diff --git a/spark-2.4.0-bin-hadoop2.7/LICENSE b/spark-2.4.0-bin-hadoop2.7/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..b94ea90de08be05f05c8c4c0b47026502f871121
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/LICENSE
@@ -0,0 +1,518 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+------------------------------------------------------------------------------------
+This project bundles some components that are also licensed under the Apache
+License Version 2.0:
+
+commons-beanutils:commons-beanutils
+org.apache.zookeeper:zookeeper
+oro:oro
+commons-configuration:commons-configuration
+commons-digester:commons-digester
+com.chuusai:shapeless_2.11
+com.googlecode.javaewah:JavaEWAH
+com.twitter:chill-java
+com.twitter:chill_2.11
+com.univocity:univocity-parsers
+javax.jdo:jdo-api
+joda-time:joda-time
+net.sf.opencsv:opencsv
+org.apache.derby:derby
+org.objenesis:objenesis
+org.roaringbitmap:RoaringBitmap
+org.scalanlp:breeze-macros_2.11
+org.scalanlp:breeze_2.11
+org.typelevel:macro-compat_2.11
+org.yaml:snakeyaml
+org.apache.xbean:xbean-asm5-shaded
+com.squareup.okhttp3:logging-interceptor
+com.squareup.okhttp3:okhttp
+com.squareup.okio:okio
+org.apache.spark:spark-catalyst_2.11
+org.apache.spark:spark-kvstore_2.11
+org.apache.spark:spark-launcher_2.11
+org.apache.spark:spark-mllib-local_2.11
+org.apache.spark:spark-network-common_2.11
+org.apache.spark:spark-network-shuffle_2.11
+org.apache.spark:spark-sketch_2.11
+org.apache.spark:spark-tags_2.11
+org.apache.spark:spark-unsafe_2.11
+commons-httpclient:commons-httpclient
+com.vlkan:flatbuffers
+com.ning:compress-lzf
+io.airlift:aircompressor
+io.dropwizard.metrics:metrics-core
+io.dropwizard.metrics:metrics-ganglia
+io.dropwizard.metrics:metrics-graphite
+io.dropwizard.metrics:metrics-json
+io.dropwizard.metrics:metrics-jvm
+org.iq80.snappy:snappy
+com.clearspring.analytics:stream
+com.jamesmurty.utils:java-xmlbuilder
+commons-codec:commons-codec
+commons-collections:commons-collections
+io.fabric8:kubernetes-client
+io.fabric8:kubernetes-model
+io.netty:netty
+io.netty:netty-all
+net.hydromatic:eigenbase-properties
+net.sf.supercsv:super-csv
+org.apache.arrow:arrow-format
+org.apache.arrow:arrow-memory
+org.apache.arrow:arrow-vector
+org.apache.calcite:calcite-avatica
+org.apache.calcite:calcite-core
+org.apache.calcite:calcite-linq4j
+org.apache.commons:commons-crypto
+org.apache.commons:commons-lang3
+org.apache.hadoop:hadoop-annotations
+org.apache.hadoop:hadoop-auth
+org.apache.hadoop:hadoop-client
+org.apache.hadoop:hadoop-common
+org.apache.hadoop:hadoop-hdfs
+org.apache.hadoop:hadoop-mapreduce-client-app
+org.apache.hadoop:hadoop-mapreduce-client-common
+org.apache.hadoop:hadoop-mapreduce-client-core
+org.apache.hadoop:hadoop-mapreduce-client-jobclient
+org.apache.hadoop:hadoop-mapreduce-client-shuffle
+org.apache.hadoop:hadoop-yarn-api
+org.apache.hadoop:hadoop-yarn-client
+org.apache.hadoop:hadoop-yarn-common
+org.apache.hadoop:hadoop-yarn-server-common
+org.apache.hadoop:hadoop-yarn-server-web-proxy
+org.apache.httpcomponents:httpclient
+org.apache.httpcomponents:httpcore
+org.apache.orc:orc-core
+org.apache.orc:orc-mapreduce
+org.mortbay.jetty:jetty
+org.mortbay.jetty:jetty-util
+com.jolbox:bonecp
+org.json4s:json4s-ast_2.11
+org.json4s:json4s-core_2.11
+org.json4s:json4s-jackson_2.11
+org.json4s:json4s-scalap_2.11
+com.carrotsearch:hppc
+com.fasterxml.jackson.core:jackson-annotations
+com.fasterxml.jackson.core:jackson-core
+com.fasterxml.jackson.core:jackson-databind
+com.fasterxml.jackson.dataformat:jackson-dataformat-yaml
+com.fasterxml.jackson.module:jackson-module-jaxb-annotations
+com.fasterxml.jackson.module:jackson-module-paranamer
+com.fasterxml.jackson.module:jackson-module-scala_2.11
+com.github.mifmif:generex
+com.google.code.findbugs:jsr305
+com.google.code.gson:gson
+com.google.inject:guice
+com.google.inject.extensions:guice-servlet
+com.twitter:parquet-hadoop-bundle
+commons-beanutils:commons-beanutils-core
+commons-cli:commons-cli
+commons-dbcp:commons-dbcp
+commons-io:commons-io
+commons-lang:commons-lang
+commons-logging:commons-logging
+commons-net:commons-net
+commons-pool:commons-pool
+io.fabric8:zjsonpatch
+javax.inject:javax.inject
+javax.validation:validation-api
+log4j:apache-log4j-extras
+log4j:log4j
+net.sf.jpam:jpam
+org.apache.avro:avro
+org.apache.avro:avro-ipc
+org.apache.avro:avro-mapred
+org.apache.commons:commons-compress
+org.apache.commons:commons-math3
+org.apache.curator:curator-client
+org.apache.curator:curator-framework
+org.apache.curator:curator-recipes
+org.apache.directory.api:api-asn1-api
+org.apache.directory.api:api-util
+org.apache.directory.server:apacheds-i18n
+org.apache.directory.server:apacheds-kerberos-codec
+org.apache.htrace:htrace-core
+org.apache.ivy:ivy
+org.apache.mesos:mesos
+org.apache.parquet:parquet-column
+org.apache.parquet:parquet-common
+org.apache.parquet:parquet-encoding
+org.apache.parquet:parquet-format
+org.apache.parquet:parquet-hadoop
+org.apache.parquet:parquet-jackson
+org.apache.thrift:libfb303
+org.apache.thrift:libthrift
+org.codehaus.jackson:jackson-core-asl
+org.codehaus.jackson:jackson-mapper-asl
+org.datanucleus:datanucleus-api-jdo
+org.datanucleus:datanucleus-core
+org.datanucleus:datanucleus-rdbms
+org.lz4:lz4-java
+org.spark-project.hive:hive-beeline
+org.spark-project.hive:hive-cli
+org.spark-project.hive:hive-exec
+org.spark-project.hive:hive-jdbc
+org.spark-project.hive:hive-metastore
+org.xerial.snappy:snappy-java
+stax:stax-api
+xerces:xercesImpl
+org.codehaus.jackson:jackson-jaxrs
+org.codehaus.jackson:jackson-xc
+org.eclipse.jetty:jetty-client
+org.eclipse.jetty:jetty-continuation
+org.eclipse.jetty:jetty-http
+org.eclipse.jetty:jetty-io
+org.eclipse.jetty:jetty-jndi
+org.eclipse.jetty:jetty-plus
+org.eclipse.jetty:jetty-proxy
+org.eclipse.jetty:jetty-security
+org.eclipse.jetty:jetty-server
+org.eclipse.jetty:jetty-servlet
+org.eclipse.jetty:jetty-servlets
+org.eclipse.jetty:jetty-util
+org.eclipse.jetty:jetty-webapp
+org.eclipse.jetty:jetty-xml
+
+core/src/main/java/org/apache/spark/util/collection/TimSort.java
+core/src/main/resources/org/apache/spark/ui/static/bootstrap*
+core/src/main/resources/org/apache/spark/ui/static/jsonFormatter*
+core/src/main/resources/org/apache/spark/ui/static/vis*
+docs/js/vendor/bootstrap.js
+
+
+------------------------------------------------------------------------------------
+This product bundles various third-party components under other open source licenses.
+This section summarizes those components and their licenses. See licenses-binary/
+for text of these licenses.
+
+
+BSD 2-Clause
+------------
+
+com.github.luben:zstd-jni
+javolution:javolution
+com.esotericsoftware:kryo-shaded
+com.esotericsoftware:minlog
+com.esotericsoftware:reflectasm
+com.google.protobuf:protobuf-java
+org.codehaus.janino:commons-compiler
+org.codehaus.janino:janino
+jline:jline
+org.jodd:jodd-core
+
+
+BSD 3-Clause
+------------
+
+dk.brics.automaton:automaton
+org.antlr:antlr-runtime
+org.antlr:ST4
+org.antlr:stringtemplate
+org.antlr:antlr4-runtime
+antlr:antlr
+com.github.fommil.netlib:core
+com.thoughtworks.paranamer:paranamer
+org.scala-lang:scala-compiler
+org.scala-lang:scala-library
+org.scala-lang:scala-reflect
+org.scala-lang.modules:scala-parser-combinators_2.11
+org.scala-lang.modules:scala-xml_2.11
+org.fusesource.leveldbjni:leveldbjni-all
+net.sourceforge.f2j:arpack_combined_all
+xmlenc:xmlenc
+net.sf.py4j:py4j
+org.jpmml:pmml-model
+org.jpmml:pmml-schema
+
+python/lib/py4j-*-src.zip
+python/pyspark/cloudpickle.py
+python/pyspark/join.py
+core/src/main/resources/org/apache/spark/ui/static/d3.min.js
+
+The CSS style for the navigation sidebar of the documentation was originally
+submitted by Óscar Nájera for the scikit-learn project. The scikit-learn project
+is distributed under the 3-Clause BSD license.
+
+
+MIT License
+-----------
+
+org.spire-math:spire-macros_2.11
+org.spire-math:spire_2.11
+org.typelevel:machinist_2.11
+net.razorvine:pyrolite
+org.slf4j:jcl-over-slf4j
+org.slf4j:jul-to-slf4j
+org.slf4j:slf4j-api
+org.slf4j:slf4j-log4j12
+com.github.scopt:scopt_2.11
+
+core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
+core/src/main/resources/org/apache/spark/ui/static/*dataTables*
+core/src/main/resources/org/apache/spark/ui/static/graphlib-dot.min.js
+ore/src/main/resources/org/apache/spark/ui/static/jquery*
+core/src/main/resources/org/apache/spark/ui/static/sorttable.js
+docs/js/vendor/anchor.min.js
+docs/js/vendor/jquery*
+docs/js/vendor/modernizer*
+
+
+Common Development and Distribution License (CDDL) 1.0
+------------------------------------------------------
+
+javax.activation:activation  http://www.oracle.com/technetwork/java/javase/tech/index-jsp-138795.html
+javax.xml.stream:stax-api    https://jcp.org/en/jsr/detail?id=173
+
+
+Common Development and Distribution License (CDDL) 1.1
+------------------------------------------------------
+
+javax.annotation:javax.annotation-api    https://jcp.org/en/jsr/detail?id=250
+javax.servlet:javax.servlet-api   https://javaee.github.io/servlet-spec/
+javax.transaction:jta http://www.oracle.com/technetwork/java/index.html
+javax.ws.rs:javax.ws.rs-api https://github.com/jax-rs
+javax.xml.bind:jaxb-api    https://github.com/javaee/jaxb-v2
+org.glassfish.hk2:hk2-api https://github.com/javaee/glassfish
+org.glassfish.hk2:hk2-locator (same)
+org.glassfish.hk2:hk2-utils
+org.glassfish.hk2:osgi-resource-locator
+org.glassfish.hk2.external:aopalliance-repackaged
+org.glassfish.hk2.external:javax.inject
+org.glassfish.jersey.bundles.repackaged:jersey-guava
+org.glassfish.jersey.containers:jersey-container-servlet
+org.glassfish.jersey.containers:jersey-container-servlet-core
+org.glassfish.jersey.core:jersey-client
+org.glassfish.jersey.core:jersey-common
+org.glassfish.jersey.core:jersey-server
+org.glassfish.jersey.media:jersey-media-jaxb
+
+
+Mozilla Public License (MPL) 1.1
+--------------------------------
+
+com.github.rwl:jtransforms https://sourceforge.net/projects/jtransforms/
+
+
+Python Software Foundation License
+----------------------------------
+
+pyspark/heapq3.py
+
+
+Public Domain
+-------------
+
+aopalliance:aopalliance
+net.iharder:base64
+org.tukaani:xz
+
+
+Creative Commons CC0 1.0 Universal Public Domain Dedication
+-----------------------------------------------------------
+(see LICENSE-CC0.txt)
+
+data/mllib/images/kittens/29.5.a_b_EGDP022204.jpg
+data/mllib/images/kittens/54893.jpg
+data/mllib/images/kittens/DP153539.jpg
+data/mllib/images/kittens/DP802813.jpg
+data/mllib/images/multi-channel/chr30.4.184.jpg
diff --git a/spark-2.4.0-bin-hadoop2.7/NOTICE b/spark-2.4.0-bin-hadoop2.7/NOTICE
new file mode 100644
index 0000000000000000000000000000000000000000..b707c436983f7613d883af3e8ef33a283a620e29
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/NOTICE
@@ -0,0 +1,1174 @@
+Apache Spark
+Copyright 2014 and onwards The Apache Software Foundation.
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Export Control Notice
+---------------------
+
+This distribution includes cryptographic software. The country in which you currently reside may have
+restrictions on the import, possession, use, and/or re-export to another country, of encryption software.
+BEFORE using any encryption software, please check your country's laws, regulations and policies concerning
+the import, possession, or use, and re-export of encryption software, to see if this is permitted. See
+<http://www.wassenaar.org/> for more information.
+
+The U.S. Government Department of Commerce, Bureau of Industry and Security (BIS), has classified this
+software as Export Commodity Control Number (ECCN) 5D002.C.1, which includes information security software
+using or performing cryptographic functions with asymmetric algorithms. The form and manner of this Apache
+Software Foundation distribution makes it eligible for export under the License Exception ENC Technology
+Software Unrestricted (TSU) exception (see the BIS Export Administration Regulations, Section 740.13) for
+both object code and source code.
+
+The following provides more details on the included cryptographic software:
+
+This software uses Apache Commons Crypto (https://commons.apache.org/proper/commons-crypto/) to
+support authentication, and encryption and decryption of data sent across the network between
+services.
+
+
+// ------------------------------------------------------------------
+// NOTICE file corresponding to the section 4d of The Apache License,
+// Version 2.0, in this case for
+// ------------------------------------------------------------------
+
+Hive Beeline
+Copyright 2016 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+Apache Avro
+Copyright 2009-2014 The Apache Software Foundation
+
+This product currently only contains code developed by authors
+of specific components, as identified by the source code files;
+if such notes are missing files have been created by
+Tatu Saloranta.
+
+For additional credits (generally to people who reported problems)
+see CREDITS file.
+
+Apache Commons Compress
+Copyright 2002-2012 The Apache Software Foundation
+
+This product includes software developed by
+The Apache Software Foundation (http://www.apache.org/).
+
+Apache Avro Mapred API
+Copyright 2009-2014 The Apache Software Foundation
+
+Apache Avro IPC
+Copyright 2009-2014 The Apache Software Foundation
+
+Objenesis
+Copyright 2006-2013 Joe Walnes, Henri Tremblay, Leonardo Mesquita
+
+Apache XBean :: ASM 5 shaded (repackaged)
+Copyright 2005-2015 The Apache Software Foundation
+
+--------------------------------------
+
+This product includes software developed at
+OW2 Consortium (http://asm.ow2.org/)
+
+This product includes software developed by The Apache Software
+Foundation (http://www.apache.org/).
+
+The binary distribution of this product bundles binaries of
+org.iq80.leveldb:leveldb-api (https://github.com/dain/leveldb), which has the
+following notices:
+* Copyright 2011 Dain Sundstrom <dain@iq80.com>
+* Copyright 2011 FuseSource Corp. http://fusesource.com
+
+The binary distribution of this product bundles binaries of
+org.fusesource.hawtjni:hawtjni-runtime (https://github.com/fusesource/hawtjni),
+which has the following notices:
+* This product includes software developed by FuseSource Corp.
+  http://fusesource.com
+* This product includes software developed at
+  Progress Software Corporation and/or its  subsidiaries or affiliates.
+* This product includes software developed by IBM Corporation and others.
+
+The binary distribution of this product bundles binaries of
+Gson 2.2.4,
+which has the following notices:
+
+                            The Netty Project
+                            =================
+
+Please visit the Netty web site for more information:
+
+  * http://netty.io/
+
+Copyright 2014 The Netty Project
+
+The Netty Project licenses this file to you under the Apache License,
+version 2.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at:
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+License for the specific language governing permissions and limitations
+under the License.
+
+Also, please refer to each LICENSE.<component>.txt file, which is located in
+the 'license' directory of the distribution file, for the license terms of the
+components that this product depends on.
+
+-------------------------------------------------------------------------------
+This product contains the extensions to Java Collections Framework which has
+been derived from the works by JSR-166 EG, Doug Lea, and Jason T. Greene:
+
+  * LICENSE:
+    * license/LICENSE.jsr166y.txt (Public Domain)
+  * HOMEPAGE:
+    * http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/
+    * http://viewvc.jboss.org/cgi-bin/viewvc.cgi/jbosscache/experimental/jsr166/
+
+This product contains a modified version of Robert Harder's Public Domain
+Base64 Encoder and Decoder, which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.base64.txt (Public Domain)
+  * HOMEPAGE:
+    * http://iharder.sourceforge.net/current/java/base64/
+
+This product contains a modified portion of 'Webbit', an event based
+WebSocket and HTTP server, which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.webbit.txt (BSD License)
+  * HOMEPAGE:
+    * https://github.com/joewalnes/webbit
+
+This product contains a modified portion of 'SLF4J', a simple logging
+facade for Java, which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.slf4j.txt (MIT License)
+  * HOMEPAGE:
+    * http://www.slf4j.org/
+
+This product contains a modified portion of 'ArrayDeque', written by Josh
+Bloch of Google, Inc:
+
+  * LICENSE:
+    * license/LICENSE.deque.txt (Public Domain)
+
+This product contains a modified portion of 'Apache Harmony', an open source
+Java SE, which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.harmony.txt (Apache License 2.0)
+  * HOMEPAGE:
+    * http://archive.apache.org/dist/harmony/
+
+This product contains a modified version of Roland Kuhn's ASL2
+AbstractNodeQueue, which is based on Dmitriy Vyukov's non-intrusive MPSC queue.
+It can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.abstractnodequeue.txt (Public Domain)
+  * HOMEPAGE:
+    * https://github.com/akka/akka/blob/wip-2.2.3-for-scala-2.11/akka-actor/src/main/java/akka/dispatch/AbstractNodeQueue.java
+
+This product contains a modified portion of 'jbzip2', a Java bzip2 compression
+and decompression library written by Matthew J. Francis. It can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.jbzip2.txt (MIT License)
+  * HOMEPAGE:
+    * https://code.google.com/p/jbzip2/
+
+This product contains a modified portion of 'libdivsufsort', a C API library to construct
+the suffix array and the Burrows-Wheeler transformed string for any input string of
+a constant-size alphabet written by Yuta Mori. It can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.libdivsufsort.txt (MIT License)
+  * HOMEPAGE:
+    * https://code.google.com/p/libdivsufsort/
+
+This product contains a modified portion of Nitsan Wakart's 'JCTools', Java Concurrency Tools for the JVM,
+ which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.jctools.txt (ASL2 License)
+  * HOMEPAGE:
+    * https://github.com/JCTools/JCTools
+
+This product optionally depends on 'JZlib', a re-implementation of zlib in
+pure Java, which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.jzlib.txt (BSD style License)
+  * HOMEPAGE:
+    * http://www.jcraft.com/jzlib/
+
+This product optionally depends on 'Compress-LZF', a Java library for encoding and
+decoding data in LZF format, written by Tatu Saloranta. It can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.compress-lzf.txt (Apache License 2.0)
+  * HOMEPAGE:
+    * https://github.com/ning/compress
+
+This product optionally depends on 'lz4', a LZ4 Java compression
+and decompression library written by Adrien Grand. It can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.lz4.txt (Apache License 2.0)
+  * HOMEPAGE:
+    * https://github.com/jpountz/lz4-java
+
+This product optionally depends on 'lzma-java', a LZMA Java compression
+and decompression library, which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.lzma-java.txt (Apache License 2.0)
+  * HOMEPAGE:
+    * https://github.com/jponge/lzma-java
+
+This product contains a modified portion of 'jfastlz', a Java port of FastLZ compression
+and decompression library written by William Kinney. It can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.jfastlz.txt (MIT License)
+  * HOMEPAGE:
+    * https://code.google.com/p/jfastlz/
+
+This product contains a modified portion of and optionally depends on 'Protocol Buffers', Google's data
+interchange format, which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.protobuf.txt (New BSD License)
+  * HOMEPAGE:
+    * http://code.google.com/p/protobuf/
+
+This product optionally depends on 'Bouncy Castle Crypto APIs' to generate
+a temporary self-signed X.509 certificate when the JVM does not provide the
+equivalent functionality.  It can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.bouncycastle.txt (MIT License)
+  * HOMEPAGE:
+    * http://www.bouncycastle.org/
+
+This product optionally depends on 'Snappy', a compression library produced
+by Google Inc, which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.snappy.txt (New BSD License)
+  * HOMEPAGE:
+    * http://code.google.com/p/snappy/
+
+This product optionally depends on 'JBoss Marshalling', an alternative Java
+serialization API, which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.jboss-marshalling.txt (GNU LGPL 2.1)
+  * HOMEPAGE:
+    * http://www.jboss.org/jbossmarshalling
+
+This product optionally depends on 'Caliper', Google's micro-
+benchmarking framework, which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.caliper.txt (Apache License 2.0)
+  * HOMEPAGE:
+    * http://code.google.com/p/caliper/
+
+This product optionally depends on 'Apache Commons Logging', a logging
+framework, which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.commons-logging.txt (Apache License 2.0)
+  * HOMEPAGE:
+    * http://commons.apache.org/logging/
+
+This product optionally depends on 'Apache Log4J', a logging framework, which
+can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.log4j.txt (Apache License 2.0)
+  * HOMEPAGE:
+    * http://logging.apache.org/log4j/
+
+This product optionally depends on 'Aalto XML', an ultra-high performance
+non-blocking XML processor, which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.aalto-xml.txt (Apache License 2.0)
+  * HOMEPAGE:
+    * http://wiki.fasterxml.com/AaltoHome
+
+This product contains a modified version of 'HPACK', a Java implementation of
+the HTTP/2 HPACK algorithm written by Twitter. It can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.hpack.txt (Apache License 2.0)
+  * HOMEPAGE:
+    * https://github.com/twitter/hpack
+
+This product contains a modified portion of 'Apache Commons Lang', a Java library
+provides utilities for the java.lang API, which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.commons-lang.txt (Apache License 2.0)
+  * HOMEPAGE:
+    * https://commons.apache.org/proper/commons-lang/
+
+The binary distribution of this product bundles binaries of
+Commons Codec 1.4,
+which has the following notices:
+ * src/test/org/apache/commons/codec/language/DoubleMetaphoneTest.javacontains test data from http://aspell.net/test/orig/batch0.tab.Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org)
+  ===============================================================================
+  The content of package org.apache.commons.codec.language.bm has been translated
+  from the original php source code available at http://stevemorse.org/phoneticinfo.htm
+  with permission from the original authors.
+  Original source copyright:Copyright (c) 2008 Alexander Beider & Stephen P. Morse.
+
+The binary distribution of this product bundles binaries of
+Commons Lang 2.6,
+which has the following notices:
+ * This product includes software from the Spring Framework,under the Apache License 2.0 (see: StringUtils.containsWhitespace())
+
+The binary distribution of this product bundles binaries of
+Apache Log4j 1.2.17,
+which has the following notices:
+ * ResolverUtil.java
+    Copyright 2005-2006 Tim Fennell
+  Dumbster SMTP test server
+    Copyright 2004 Jason Paul Kitchen
+  TypeUtil.java
+    Copyright 2002-2012 Ramnivas Laddad, Juergen Hoeller, Chris Beams
+
+The binary distribution of this product bundles binaries of
+Jetty 6.1.26,
+which has the following notices:
+ * ==============================================================
+    Jetty Web Container
+    Copyright 1995-2016 Mort Bay Consulting Pty Ltd.
+   ==============================================================
+
+   The Jetty Web Container is Copyright Mort Bay Consulting Pty Ltd
+   unless otherwise noted.
+
+   Jetty is dual licensed under both
+
+     * The Apache 2.0 License
+       http://www.apache.org/licenses/LICENSE-2.0.html
+
+         and
+
+     * The Eclipse Public 1.0 License
+       http://www.eclipse.org/legal/epl-v10.html
+
+   Jetty may be distributed under either license.
+
+   ------
+   Eclipse
+
+   The following artifacts are EPL.
+    * org.eclipse.jetty.orbit:org.eclipse.jdt.core
+
+   The following artifacts are EPL and ASL2.
+    * org.eclipse.jetty.orbit:javax.security.auth.message
+
+   The following artifacts are EPL and CDDL 1.0.
+    * org.eclipse.jetty.orbit:javax.mail.glassfish
+
+   ------
+   Oracle
+
+   The following artifacts are CDDL + GPLv2 with classpath exception.
+   https://glassfish.dev.java.net/nonav/public/CDDL+GPL.html
+
+    * javax.servlet:javax.servlet-api
+    * javax.annotation:javax.annotation-api
+    * javax.transaction:javax.transaction-api
+    * javax.websocket:javax.websocket-api
+
+   ------
+   Oracle OpenJDK
+
+   If ALPN is used to negotiate HTTP/2 connections, then the following
+   artifacts may be included in the distribution or downloaded when ALPN
+   module is selected.
+
+    * java.sun.security.ssl
+
+   These artifacts replace/modify OpenJDK classes.  The modififications
+   are hosted at github and both modified and original are under GPL v2 with
+   classpath exceptions.
+   http://openjdk.java.net/legal/gplv2+ce.html
+
+   ------
+   OW2
+
+   The following artifacts are licensed by the OW2 Foundation according to the
+   terms of http://asm.ow2.org/license.html
+
+   org.ow2.asm:asm-commons
+   org.ow2.asm:asm
+
+   ------
+   Apache
+
+   The following artifacts are ASL2 licensed.
+
+   org.apache.taglibs:taglibs-standard-spec
+   org.apache.taglibs:taglibs-standard-impl
+
+   ------
+   MortBay
+
+   The following artifacts are ASL2 licensed.  Based on selected classes from
+   following Apache Tomcat jars, all ASL2 licensed.
+
+   org.mortbay.jasper:apache-jsp
+     org.apache.tomcat:tomcat-jasper
+     org.apache.tomcat:tomcat-juli
+     org.apache.tomcat:tomcat-jsp-api
+     org.apache.tomcat:tomcat-el-api
+     org.apache.tomcat:tomcat-jasper-el
+     org.apache.tomcat:tomcat-api
+     org.apache.tomcat:tomcat-util-scan
+     org.apache.tomcat:tomcat-util
+
+   org.mortbay.jasper:apache-el
+     org.apache.tomcat:tomcat-jasper-el
+     org.apache.tomcat:tomcat-el-api
+
+   ------
+   Mortbay
+
+   The following artifacts are CDDL + GPLv2 with classpath exception.
+
+   https://glassfish.dev.java.net/nonav/public/CDDL+GPL.html
+
+   org.eclipse.jetty.toolchain:jetty-schemas
+
+   ------
+   Assorted
+
+   The UnixCrypt.java code implements the one way cryptography used by
+   Unix systems for simple password protection.  Copyright 1996 Aki Yoshida,
+   modified April 2001  by Iris Van den Broeke, Daniel Deville.
+   Permission to use, copy, modify and distribute UnixCrypt
+   for non-commercial or commercial purposes and without fee is
+   granted provided that the copyright notice appears in all copies./
+
+The binary distribution of this product bundles binaries of
+Snappy for Java 1.0.4.1,
+which has the following notices:
+ * This product includes software developed by Google
+    Snappy: http://code.google.com/p/snappy/ (New BSD License)
+
+   This product includes software developed by Apache
+    PureJavaCrc32C from apache-hadoop-common http://hadoop.apache.org/
+    (Apache 2.0 license)
+
+   This library contains statically linked libstdc++. This inclusion is allowed by
+   "GCC RUntime Library Exception"
+   http://gcc.gnu.org/onlinedocs/libstdc++/manual/license.html
+
+   == Contributors ==
+     * Tatu Saloranta
+       * Providing benchmark suite
+     * Alec Wysoker
+       * Performance and memory usage improvement
+
+The binary distribution of this product bundles binaries of
+Xerces2 Java Parser 2.9.1,
+which has the following notices:
+ * =========================================================================
+   ==  NOTICE file corresponding to section 4(d) of the Apache License,   ==
+   ==  Version 2.0, in this case for the Apache Xerces Java distribution. ==
+   =========================================================================
+
+   Apache Xerces Java
+   Copyright 1999-2007 The Apache Software Foundation
+
+   This product includes software developed at
+   The Apache Software Foundation (http://www.apache.org/).
+
+   Portions of this software were originally based on the following:
+     - software copyright (c) 1999, IBM Corporation., http://www.ibm.com.
+     - software copyright (c) 1999, Sun Microsystems., http://www.sun.com.
+     - voluntary contributions made by Paul Eng on behalf of the
+       Apache Software Foundation that were originally developed at iClick, Inc.,
+       software copyright (c) 1999.
+
+Apache Commons Collections
+Copyright 2001-2015 The Apache Software Foundation
+
+Apache Commons Configuration
+Copyright 2001-2008 The Apache Software Foundation
+
+Apache Jakarta Commons Digester
+Copyright 2001-2006 The Apache Software Foundation
+
+Apache Commons BeanUtils
+Copyright 2000-2008 The Apache Software Foundation
+
+ApacheDS Protocol Kerberos Codec
+Copyright 2003-2013 The Apache Software Foundation
+
+ApacheDS I18n
+Copyright 2003-2013 The Apache Software Foundation
+
+Apache Directory API ASN.1 API
+Copyright 2003-2013 The Apache Software Foundation
+
+Apache Directory LDAP API Utilities
+Copyright 2003-2013 The Apache Software Foundation
+
+Curator Client
+Copyright 2011-2015 The Apache Software Foundation
+
+htrace-core
+Copyright 2015 The Apache Software Foundation
+
+   =========================================================================
+   ==  NOTICE file corresponding to section 4(d) of the Apache License,   ==
+   ==  Version 2.0, in this case for the Apache Xerces Java distribution. ==
+   =========================================================================
+
+   Portions of this software were originally based on the following:
+     - software copyright (c) 1999, IBM Corporation., http://www.ibm.com.
+     - software copyright (c) 1999, Sun Microsystems., http://www.sun.com.
+     - voluntary contributions made by Paul Eng on behalf of the
+       Apache Software Foundation that were originally developed at iClick, Inc.,
+       software copyright (c) 1999.
+
+# Jackson JSON processor
+
+Jackson is a high-performance, Free/Open Source JSON processing library.
+It was originally written by Tatu Saloranta (tatu.saloranta@iki.fi), and has
+been in development since 2007.
+It is currently developed by a community of developers, as well as supported
+commercially by FasterXML.com.
+
+## Licensing
+
+Jackson core and extension components may licensed under different licenses.
+To find the details that apply to this artifact see the accompanying LICENSE file.
+For more information, including possible other licensing options, contact
+FasterXML.com (http://fasterxml.com).
+
+## Credits
+
+A list of contributors may be found from CREDITS file, which is included
+in some artifacts (usually source distributions); but is always available
+from the source code management (SCM) system project uses.
+
+Apache HttpCore
+Copyright 2005-2017 The Apache Software Foundation
+
+Curator Recipes
+Copyright 2011-2015 The Apache Software Foundation
+
+Curator Framework
+Copyright 2011-2015 The Apache Software Foundation
+
+Apache Commons Lang
+Copyright 2001-2016 The Apache Software Foundation
+
+This product includes software from the Spring Framework,
+under the Apache License 2.0 (see: StringUtils.containsWhitespace())
+
+Apache Commons Math
+Copyright 2001-2015 The Apache Software Foundation
+
+This product includes software developed for Orekit by
+CS Systèmes d'Information (http://www.c-s.fr/)
+Copyright 2010-2012 CS Systèmes d'Information
+
+Apache log4j
+Copyright 2007 The Apache Software Foundation
+
+# Compress LZF
+
+This library contains efficient implementation of LZF compression format,
+as well as additional helper classes that build on JDK-provided gzip (deflat)
+codec.
+
+Library is licensed under Apache License 2.0, as per accompanying LICENSE file.
+
+## Credit
+
+Library has been written by Tatu Saloranta (tatu.saloranta@iki.fi).
+It was started at Ning, inc., as an official Open Source process used by
+platform backend, but after initial versions has been developed outside of
+Ning by supporting community.
+
+Other contributors include:
+
+* Jon Hartlaub (first versions of streaming reader/writer; unit tests)
+* Cedrik Lime: parallel LZF implementation
+
+Various community members have contributed bug reports, and suggested minor
+fixes; these can be found from file "VERSION.txt" in SCM.
+
+Apache Commons Net
+Copyright 2001-2012 The Apache Software Foundation
+
+Copyright 2011 The Netty Project
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+This product contains a modified version of 'JZlib', a re-implementation of
+zlib in pure Java, which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.jzlib.txt (BSD Style License)
+  * HOMEPAGE:
+    * http://www.jcraft.com/jzlib/
+
+This product contains a modified version of 'Webbit', a Java event based
+WebSocket and HTTP server:
+
+This product optionally depends on 'Protocol Buffers', Google's data
+interchange format, which can be obtained at:
+
+This product optionally depends on 'SLF4J', a simple logging facade for Java,
+which can be obtained at:
+
+This product optionally depends on 'Apache Log4J', a logging framework,
+which can be obtained at:
+
+This product optionally depends on 'JBoss Logging', a logging framework,
+which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.jboss-logging.txt (GNU LGPL 2.1)
+  * HOMEPAGE:
+    * http://anonsvn.jboss.org/repos/common/common-logging-spi/
+
+This product optionally depends on 'Apache Felix', an open source OSGi
+framework implementation, which can be obtained at:
+
+  * LICENSE:
+    * license/LICENSE.felix.txt (Apache License 2.0)
+  * HOMEPAGE:
+    * http://felix.apache.org/
+
+Jackson core and extension components may be licensed under different licenses.
+To find the details that apply to this artifact see the accompanying LICENSE file.
+For more information, including possible other licensing options, contact
+FasterXML.com (http://fasterxml.com).
+
+Apache Ivy (TM)
+Copyright 2007-2014 The Apache Software Foundation
+
+Portions of Ivy were originally developed at
+Jayasoft SARL (http://www.jayasoft.fr/)
+and are licensed to the Apache Software Foundation under the
+"Software Grant License Agreement"
+
+SSH and SFTP support is provided by the JCraft JSch package,
+which is open source software, available under
+the terms of a BSD style license.
+The original software and related information is available
+at http://www.jcraft.com/jsch/.
+
+
+ORC Core
+Copyright 2013-2018 The Apache Software Foundation
+
+Apache Commons Lang
+Copyright 2001-2011 The Apache Software Foundation
+
+ORC MapReduce
+Copyright 2013-2018 The Apache Software Foundation
+
+Apache Parquet Format
+Copyright 2017 The Apache Software Foundation
+
+Arrow Vectors
+Copyright 2017 The Apache Software Foundation
+
+Arrow Format
+Copyright 2017 The Apache Software Foundation
+
+Arrow Memory
+Copyright 2017 The Apache Software Foundation
+
+Apache Commons CLI
+Copyright 2001-2009 The Apache Software Foundation
+
+Google Guice - Extensions - Servlet
+Copyright 2006-2011 Google, Inc.
+
+Apache Commons IO
+Copyright 2002-2012 The Apache Software Foundation
+
+Google Guice - Core Library
+Copyright 2006-2011 Google, Inc.
+
+mesos
+Copyright 2017 The Apache Software Foundation
+
+Apache Parquet Hadoop Bundle (Incubating)
+Copyright 2015 The Apache Software Foundation
+
+Hive Query Language
+Copyright 2016 The Apache Software Foundation
+
+Apache Extras Companion for log4j 1.2.
+Copyright 2007 The Apache Software Foundation
+
+Hive Metastore
+Copyright 2016 The Apache Software Foundation
+
+Apache Commons Logging
+Copyright 2003-2013 The Apache Software Foundation
+
+=========================================================================
+==  NOTICE file corresponding to section 4(d) of the Apache License,   ==
+==  Version 2.0, in this case for the DataNucleus distribution.        ==
+=========================================================================
+
+===================================================================
+This product includes software developed by many individuals,
+including the following:
+===================================================================
+Erik Bengtson
+Andy Jefferson
+
+===================================================================
+This product has included contributions from some individuals,
+including the following:
+===================================================================
+
+===================================================================
+This product includes software developed by many individuals,
+including the following:
+===================================================================
+Andy Jefferson
+Erik Bengtson
+Joerg von Frantzius
+Marco Schulze
+
+===================================================================
+This product has included contributions from some individuals,
+including the following:
+===================================================================
+Barry Haddow
+Ralph Ullrich
+David Ezzio
+Brendan de Beer
+David Eaves
+Martin Taal
+Tony Lai
+Roland Szabo
+Anton Troshin (Timesten)
+
+===================================================================
+This product also includes software developed by the TJDO project
+(http://tjdo.sourceforge.net/).
+===================================================================
+
+===================================================================
+This product also includes software developed by the Apache Commons project
+(http://commons.apache.org/).
+===================================================================
+
+Apache Commons Pool
+Copyright 1999-2009 The Apache Software Foundation
+
+Apache Commons DBCP
+Copyright 2001-2010 The Apache Software Foundation
+
+Apache Java Data Objects (JDO)
+Copyright 2005-2006 The Apache Software Foundation
+
+Apache Jakarta HttpClient
+Copyright 1999-2007 The Apache Software Foundation
+
+Calcite Avatica
+Copyright 2012-2015 The Apache Software Foundation
+
+Calcite Core
+Copyright 2012-2015 The Apache Software Foundation
+
+Calcite Linq4j
+Copyright 2012-2015 The Apache Software Foundation
+
+Apache HttpClient
+Copyright 1999-2017 The Apache Software Foundation
+
+Apache Commons Codec
+Copyright 2002-2014 The Apache Software Foundation
+
+src/test/org/apache/commons/codec/language/DoubleMetaphoneTest.java
+contains test data from http://aspell.net/test/orig/batch0.tab.
+Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org)
+
+===============================================================================
+
+The content of package org.apache.commons.codec.language.bm has been translated
+from the original php source code available at http://stevemorse.org/phoneticinfo.htm
+with permission from the original authors.
+Original source copyright:
+Copyright (c) 2008 Alexander Beider & Stephen P. Morse.
+
+=============================================================================
+= NOTICE file corresponding to section 4d of the Apache License Version 2.0 =
+=============================================================================
+This product includes software developed by
+Joda.org (http://www.joda.org/).
+
+===================================================================
+This product has included contributions from some individuals,
+including the following:
+===================================================================
+Joerg von Frantzius
+Thomas Marti
+Barry Haddow
+Marco Schulze
+Ralph Ullrich
+David Ezzio
+Brendan de Beer
+David Eaves
+Martin Taal
+Tony Lai
+Roland Szabo
+Marcus Mennemeier
+Xuan Baldauf
+Eric Sultan
+
+Apache Thrift
+Copyright 2006-2010 The Apache Software Foundation.
+
+=========================================================================
+==  NOTICE file corresponding to section 4(d) of the Apache License,
+==  Version 2.0, in this case for the Apache Derby distribution.
+==
+==  DO NOT EDIT THIS FILE DIRECTLY. IT IS GENERATED
+==  BY THE buildnotice TARGET IN THE TOP LEVEL build.xml FILE.
+==
+=========================================================================
+
+Apache Derby
+Copyright 2004-2015 The Apache Software Foundation
+
+=========================================================================
+
+Portions of Derby were originally developed by
+International Business Machines Corporation and are
+licensed to the Apache Software Foundation under the
+"Software Grant and Corporate Contribution License Agreement",
+informally known as the "Derby CLA".
+The following copyright notice(s) were affixed to portions of the code
+with which this file is now or was at one time distributed
+and are placed here unaltered.
+
+(C) Copyright 1997,2004 International Business Machines Corporation.  All rights reserved.
+
+(C) Copyright IBM Corp. 2003.
+
+The portion of the functionTests under 'nist' was originally
+developed by the National Institute of Standards and Technology (NIST),
+an agency of the United States Department of Commerce, and adapted by
+International Business Machines Corporation in accordance with the NIST
+Software Acknowledgment and Redistribution document at
+http://www.itl.nist.gov/div897/ctg/sql_form.htm
+
+The JDBC apis for small devices and JDBC3 (under java/stubs/jsr169 and
+java/stubs/jdbc3) were produced by trimming sources supplied by the
+Apache Harmony project. In addition, the Harmony SerialBlob and
+SerialClob implementations are used. The following notice covers the Harmony sources:
+
+Portions of Harmony were originally developed by
+Intel Corporation and are licensed to the Apache Software
+Foundation under the "Software Grant and Corporate Contribution
+License Agreement", informally known as the "Intel Harmony CLA".
+
+The Derby build relies on source files supplied by the Apache Felix
+project. The following notice covers the Felix files:
+
+  Apache Felix Main
+  Copyright 2008 The Apache Software Foundation
+
+  I. Included Software
+
+  This product includes software developed at
+  The Apache Software Foundation (http://www.apache.org/).
+  Licensed under the Apache License 2.0.
+
+  This product includes software developed at
+  The OSGi Alliance (http://www.osgi.org/).
+  Copyright (c) OSGi Alliance (2000, 2007).
+  Licensed under the Apache License 2.0.
+
+  This product includes software from http://kxml.sourceforge.net.
+  Copyright (c) 2002,2003, Stefan Haustein, Oberhausen, Rhld., Germany.
+  Licensed under BSD License.
+
+  II. Used Software
+
+  This product uses software developed at
+  The OSGi Alliance (http://www.osgi.org/).
+  Copyright (c) OSGi Alliance (2000, 2007).
+  Licensed under the Apache License 2.0.
+
+  III. License Summary
+  - Apache License 2.0
+  - BSD License
+
+The Derby build relies on jar files supplied by the Apache Lucene
+project. The following notice covers the Lucene files:
+
+Apache Lucene
+Copyright 2013 The Apache Software Foundation
+
+Includes software from other Apache Software Foundation projects,
+including, but not limited to:
+ - Apache Ant
+ - Apache Jakarta Regexp
+ - Apache Commons
+ - Apache Xerces
+
+ICU4J, (under analysis/icu) is licensed under an MIT styles license
+and Copyright (c) 1995-2008 International Business Machines Corporation and others
+
+Some data files (under analysis/icu/src/data) are derived from Unicode data such
+as the Unicode Character Database. See http://unicode.org/copyright.html for more
+details.
+
+Brics Automaton (under core/src/java/org/apache/lucene/util/automaton) is
+BSD-licensed, created by Anders Møller. See http://www.brics.dk/automaton/
+
+The levenshtein automata tables (under core/src/java/org/apache/lucene/util/automaton) were
+automatically generated with the moman/finenight FSA library, created by
+Jean-Philippe Barrette-LaPierre. This library is available under an MIT license,
+see http://sites.google.com/site/rrettesite/moman and
+http://bitbucket.org/jpbarrette/moman/overview/
+
+The class org.apache.lucene.util.WeakIdentityMap was derived from
+the Apache CXF project and is Apache License 2.0.
+
+The Google Code Prettify is Apache License 2.0.
+See http://code.google.com/p/google-code-prettify/
+
+JUnit (junit-4.10) is licensed under the Common Public License v. 1.0
+See http://junit.sourceforge.net/cpl-v10.html
+
+This product includes code (JaspellTernarySearchTrie) from Java Spelling Checkin
+g Package (jaspell): http://jaspell.sourceforge.net/
+License: The BSD License (http://www.opensource.org/licenses/bsd-license.php)
+
+The snowball stemmers in
+  analysis/common/src/java/net/sf/snowball
+were developed by Martin Porter and Richard Boulton.
+The snowball stopword lists in
+  analysis/common/src/resources/org/apache/lucene/analysis/snowball
+were developed by Martin Porter and Richard Boulton.
+The full snowball package is available from
+  http://snowball.tartarus.org/
+
+The KStem stemmer in
+  analysis/common/src/org/apache/lucene/analysis/en
+was developed by Bob Krovetz and Sergio Guzman-Lara (CIIR-UMass Amherst)
+under the BSD-license.
+
+The Arabic,Persian,Romanian,Bulgarian, and Hindi analyzers (common) come with a default
+stopword list that is BSD-licensed created by Jacques Savoy.  These files reside in:
+analysis/common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt,
+analysis/common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt,
+analysis/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt,
+analysis/common/src/resources/org/apache/lucene/analysis/bg/stopwords.txt,
+analysis/common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt
+See http://members.unine.ch/jacques.savoy/clef/index.html.
+
+The German,Spanish,Finnish,French,Hungarian,Italian,Portuguese,Russian and Swedish light stemmers
+(common) are based on BSD-licensed reference implementations created by Jacques Savoy and
+Ljiljana Dolamic. These files reside in:
+analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java
+analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java
+analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java
+analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemmer.java
+analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemmer.java
+analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemmer.java
+analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java
+analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemmer.java
+analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.java
+analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java
+analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java
+
+The Stempel analyzer (stempel) includes BSD-licensed software developed
+by the Egothor project http://egothor.sf.net/, created by Leo Galambos, Martin Kvapil,
+and Edmond Nolan.
+
+The Polish analyzer (stempel) comes with a default
+stopword list that is BSD-licensed created by the Carrot2 project. The file resides
+in stempel/src/resources/org/apache/lucene/analysis/pl/stopwords.txt.
+See http://project.carrot2.org/license.html.
+
+The SmartChineseAnalyzer source code (smartcn) was
+provided by Xiaoping Gao and copyright 2009 by www.imdict.net.
+
+WordBreakTestUnicode_*.java (under modules/analysis/common/src/test/)
+is derived from Unicode data such as the Unicode Character Database.
+See http://unicode.org/copyright.html for more details.
+
+The Morfologik analyzer (morfologik) includes BSD-licensed software
+developed by Dawid Weiss and Marcin Miłkowski (http://morfologik.blogspot.com/).
+
+Morfologik uses data from Polish ispell/myspell dictionary
+(http://www.sjp.pl/slownik/en/) licenced on the terms of (inter alia)
+LGPL and Creative Commons ShareAlike.
+
+Morfologic includes data from BSD-licensed dictionary of Polish (SGJP)
+(http://sgjp.pl/morfeusz/)
+
+Servlet-api.jar and javax.servlet-*.jar are under the CDDL license, the original
+source code for this can be found at http://www.eclipse.org/jetty/downloads.php
+
+===========================================================================
+Kuromoji Japanese Morphological Analyzer - Apache Lucene Integration
+===========================================================================
+
+This software includes a binary and/or source version of data from
+
+  mecab-ipadic-2.7.0-20070801
+
+which can be obtained from
+
+  http://atilika.com/releases/mecab-ipadic/mecab-ipadic-2.7.0-20070801.tar.gz
+
+or
+
+  http://jaist.dl.sourceforge.net/project/mecab/mecab-ipadic/2.7.0-20070801/mecab-ipadic-2.7.0-20070801.tar.gz
+
+===========================================================================
+mecab-ipadic-2.7.0-20070801 Notice
+===========================================================================
+
+Nara Institute of Science and Technology (NAIST),
+the copyright holders, disclaims all warranties with regard to this
+software, including all implied warranties of merchantability and
+fitness, in no event shall NAIST be liable for
+any special, indirect or consequential damages or any damages
+whatsoever resulting from loss of use, data or profits, whether in an
+action of contract, negligence or other tortuous action, arising out
+of or in connection with the use or performance of this software.
+
+A large portion of the dictionary entries
+originate from ICOT Free Software.  The following conditions for ICOT
+Free Software applies to the current dictionary as well.
+
+Each User may also freely distribute the Program, whether in its
+original form or modified, to any third party or parties, PROVIDED
+that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
+on, or be attached to, the Program, which is distributed substantially
+in the same form as set out herein and that such intended
+distribution, if actually made, will neither violate or otherwise
+contravene any of the laws and regulations of the countries having
+jurisdiction over the User or the intended distribution itself.
+
+NO WARRANTY
+
+The program was produced on an experimental basis in the course of the
+research and development conducted during the project and is provided
+to users as so produced on an experimental basis.  Accordingly, the
+program is provided without any warranty whatsoever, whether express,
+implied, statutory or otherwise.  The term "warranty" used herein
+includes, but is not limited to, any warranty of the quality,
+performance, merchantability and fitness for a particular purpose of
+the program and the nonexistence of any infringement or violation of
+any right of any third party.
+
+Each user of the program will agree and understand, and be deemed to
+have agreed and understood, that there is no warranty whatsoever for
+the program and, accordingly, the entire risk arising from or
+otherwise connected with the program is assumed by the user.
+
+Therefore, neither ICOT, the copyright holder, or any other
+organization that participated in or was otherwise related to the
+development of the program and their respective officials, directors,
+officers and other employees shall be held liable for any and all
+damages, including, without limitation, general, special, incidental
+and consequential damages, arising out of or otherwise in connection
+with the use or inability to use the program or any product, material
+or result produced or otherwise obtained by using the program,
+regardless of whether they have been advised of, or otherwise had
+knowledge of, the possibility of such damages at any time during the
+project or thereafter.  Each user will be deemed to have agreed to the
+foregoing by his or her commencement of use of the program.  The term
+"use" as used herein includes, but is not limited to, the use,
+modification, copying and distribution of the program and the
+production of secondary products from the program.
+
+In the case where the program, whether in its original form or
+modified, was distributed or delivered to or received by a user from
+any person, organization or entity other than ICOT, unless it makes or
+grants independently of ICOT any specific warranty to the user in
+writing, such person, organization or entity, will also be exempted
+from and not be held liable to the user for any such damages as noted
+above as far as the program is concerned.
+
+The Derby build relies on a jar file supplied by the JSON Simple
+project, hosted at https://code.google.com/p/json-simple/.
+The JSON simple jar file is licensed under the Apache 2.0 License.
+
+Hive CLI
+Copyright 2016 The Apache Software Foundation
+
+Hive JDBC
+Copyright 2016 The Apache Software Foundation
+
+
+Chill is a set of Scala extensions for Kryo.
+Copyright 2012 Twitter, Inc.
+
+Third Party Dependencies:
+
+Kryo 2.17
+BSD 3-Clause License
+http://code.google.com/p/kryo
+
+Commons-Codec 1.7
+Apache Public License 2.0
+http://hadoop.apache.org
+
+
+
+Breeze is distributed under an Apache License V2.0 (See LICENSE)
+
+===============================================================================
+
+Proximal algorithms outlined in Proximal.scala (package breeze.optimize.proximal)
+are based on https://github.com/cvxgrp/proximal (see LICENSE for details) and distributed with
+Copyright (c) 2014 by Debasish Das (Verizon), all rights reserved.
+
+===============================================================================
+
+QuadraticMinimizer class in package breeze.optimize.proximal is distributed with Copyright (c)
+2014, Debasish Das (Verizon), all rights reserved.
+
+===============================================================================
+
+NonlinearMinimizer class in package breeze.optimize.proximal is distributed with Copyright (c)
+2015, Debasish Das (Verizon), all rights reserved.
+
+
+stream-lib
+Copyright 2016 AddThis
+
+This product includes software developed by AddThis.
+
+This product also includes code adapted from:
+
+Apache Solr (http://lucene.apache.org/solr/)
+Copyright 2014 The Apache Software Foundation
+
+Apache Mahout (http://mahout.apache.org/)
+Copyright 2014 The Apache Software Foundation
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/DESCRIPTION b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/DESCRIPTION
new file mode 100644
index 0000000000000000000000000000000000000000..a809ece4ae1e1ec6591a4d5a52203ff1c95fadba
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/DESCRIPTION
@@ -0,0 +1,36 @@
+Package: SparkR
+Type: Package
+Version: 2.4.0
+Title: R Frontend for Apache Spark
+Description: Provides an R Frontend for Apache Spark.
+Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
+                    email = "shivaram@cs.berkeley.edu"),
+             person("Xiangrui", "Meng", role = "aut",
+                    email = "meng@databricks.com"),
+             person("Felix", "Cheung", role = "aut",
+                    email = "felixcheung@apache.org"),
+             person(family = "The Apache Software Foundation", role = c("aut", "cph")))
+License: Apache License (== 2.0)
+URL: http://www.apache.org/ http://spark.apache.org/
+BugReports: http://spark.apache.org/contributing.html
+SystemRequirements: Java (== 8)
+Depends: R (>= 3.0), methods
+Suggests: knitr, rmarkdown, testthat, e1071, survival
+Collate: 'schema.R' 'generics.R' 'jobj.R' 'column.R' 'group.R' 'RDD.R'
+        'pairRDD.R' 'DataFrame.R' 'SQLContext.R' 'WindowSpec.R'
+        'backend.R' 'broadcast.R' 'catalog.R' 'client.R' 'context.R'
+        'deserialize.R' 'functions.R' 'install.R' 'jvm.R'
+        'mllib_classification.R' 'mllib_clustering.R' 'mllib_fpm.R'
+        'mllib_recommendation.R' 'mllib_regression.R' 'mllib_stat.R'
+        'mllib_tree.R' 'mllib_utils.R' 'serialize.R' 'sparkR.R'
+        'stats.R' 'streaming.R' 'types.R' 'utils.R' 'window.R'
+RoxygenNote: 6.1.0
+VignetteBuilder: knitr
+NeedsCompilation: no
+Packaged: 2018-10-29 06:33:58 UTC; spark-rm
+Author: Shivaram Venkataraman [aut, cre],
+  Xiangrui Meng [aut],
+  Felix Cheung [aut],
+  The Apache Software Foundation [aut, cph]
+Maintainer: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
+Built: R 3.4.4; ; 2018-10-29 06:36:15 UTC; unix
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/INDEX b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/INDEX
new file mode 100644
index 0000000000000000000000000000000000000000..63e37ade976523844a71518e2d020312cb0f1841
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/INDEX
@@ -0,0 +1,318 @@
+%<=>%                   %<=>%
+%in%                    Match a column with given values.
+AFTSurvivalRegressionModel-class
+                        S4 class that represents a
+                        AFTSurvivalRegressionModel
+ALSModel-class          S4 class that represents an ALSModel
+BisectingKMeansModel-class
+                        S4 class that represents a BisectingKMeansModel
+DecisionTreeClassificationModel-class
+                        S4 class that represents a
+                        DecisionTreeClassificationModel
+DecisionTreeRegressionModel-class
+                        S4 class that represents a
+                        DecisionTreeRegressionModel
+FPGrowthModel-class     S4 class that represents a FPGrowthModel
+GBTClassificationModel-class
+                        S4 class that represents a
+                        GBTClassificationModel
+GBTRegressionModel-class
+                        S4 class that represents a GBTRegressionModel
+GaussianMixtureModel-class
+                        S4 class that represents a GaussianMixtureModel
+GeneralizedLinearRegressionModel-class
+                        S4 class that represents a generalized linear
+                        model
+GroupedData-class       S4 class that represents a GroupedData
+IsotonicRegressionModel-class
+                        S4 class that represents an
+                        IsotonicRegressionModel
+KMeansModel-class       S4 class that represents a KMeansModel
+KSTest-class            S4 class that represents an KSTest
+LDAModel-class          S4 class that represents an LDAModel
+LinearSVCModel-class    S4 class that represents an LinearSVCModel
+LogisticRegressionModel-class
+                        S4 class that represents an
+                        LogisticRegressionModel
+MultilayerPerceptronClassificationModel-class
+                        S4 class that represents a
+                        MultilayerPerceptronClassificationModel
+NaiveBayesModel-class   S4 class that represents a NaiveBayesModel
+RandomForestClassificationModel-class
+                        S4 class that represents a
+                        RandomForestClassificationModel
+RandomForestRegressionModel-class
+                        S4 class that represents a
+                        RandomForestRegressionModel
+SparkDataFrame-class    S4 class that represents a SparkDataFrame
+StreamingQuery-class    S4 class that represents a StreamingQuery
+WindowSpec-class        S4 class that represents a WindowSpec
+agg                     summarize
+alias                   alias
+approxQuantile          Calculates the approximate quantiles of
+                        numerical columns of a SparkDataFrame
+arrange                 Arrange Rows by Variables
+as.data.frame           Download data from a SparkDataFrame into a R
+                        data.frame
+asc                     A set of operations working with SparkDataFrame
+                        columns
+attach,SparkDataFrame-method
+                        Attach SparkDataFrame to R search path
+avg                     avg
+awaitTermination        awaitTermination
+between                 between
+broadcast               broadcast
+cache                   Cache
+cacheTable              Cache Table
+cancelJobGroup          Cancel active jobs for the specified group
+cast                    Casts the column to a different data type.
+checkpoint              checkpoint
+clearCache              Clear Cache
+clearJobGroup           Clear current job group ID and its description
+coalesce                Coalesce
+collect                 Collects all the elements of a SparkDataFrame
+                        and coerces them into an R data.frame.
+colnames                Column Names of SparkDataFrame
+coltypes                coltypes
+column                  S4 class that represents a SparkDataFrame
+                        column
+column_aggregate_functions
+                        Aggregate functions for Column operations
+column_collection_functions
+                        Collection functions for Column operations
+column_datetime_diff_functions
+                        Date time arithmetic functions for Column
+                        operations
+column_datetime_functions
+                        Date time functions for Column operations
+column_math_functions   Math functions for Column operations
+column_misc_functions   Miscellaneous functions for Column operations
+column_nonaggregate_functions
+                        Non-aggregate functions for Column operations
+column_string_functions
+                        String functions for Column operations
+column_window_functions
+                        Window functions for Column operations
+corr                    corr
+count                   Count
+cov                     cov
+createDataFrame         Create a SparkDataFrame
+createExternalTable     (Deprecated) Create an external table
+createOrReplaceTempView
+                        Creates a temporary view using the given name.
+createTable             Creates a table based on the dataset in a data
+                        source
+crossJoin               CrossJoin
+crosstab                Computes a pair-wise frequency table of the
+                        given columns
+cube                    cube
+currentDatabase         Returns the current default database
+dapply                  dapply
+dapplyCollect           dapplyCollect
+describe                describe
+dim                     Returns the dimensions of SparkDataFrame
+distinct                Distinct
+drop                    drop
+dropDuplicates          dropDuplicates
+dropTempTable           (Deprecated) Drop Temporary Table
+dropTempView            Drops the temporary view with the given view
+                        name in the catalog.
+dropna                  A set of SparkDataFrame functions working with
+                        NA values
+dtypes                  DataTypes
+endsWith                endsWith
+except                  except
+exceptAll               exceptAll
+explain                 Explain
+filter                  Filter
+first                   Return the first row of a SparkDataFrame
+fitted                  Get fitted result from a k-means model
+freqItems               Finding frequent items for columns, possibly
+                        with false positives
+gapply                  gapply
+gapplyCollect           gapplyCollect
+getLocalProperty        Get a local property set in this thread, or
+                        'NULL' if it is missing. See
+                        'setLocalProperty'.
+getNumPartitions        getNumPartitions
+glm,formula,ANY,SparkDataFrame-method
+                        Generalized Linear Models (R-compliant)
+group_by                GroupBy
+hashCode                Compute the hashCode of an object
+head                    Head
+hint                    hint
+histogram               Compute histogram statistics for given column
+insertInto              insertInto
+install.spark           Download and Install Apache Spark to a Local
+                        Directory
+intersect               Intersect
+intersectAll            intersectAll
+isActive                isActive
+isLocal                 isLocal
+isStreaming             isStreaming
+join                    Join
+last                    last
+lastProgress            lastProgress
+limit                   Limit
+listColumns             Returns a list of columns for the given
+                        table/view in the specified database
+listDatabases           Returns a list of databases available
+listFunctions           Returns a list of functions registered in the
+                        specified database
+listTables              Returns a list of tables or views in the
+                        specified database
+localCheckpoint         localCheckpoint
+merge                   Merges two data frames
+mutate                  Mutate
+ncol                    Returns the number of columns in a
+                        SparkDataFrame
+not                     !
+nrow                    Returns the number of rows in a SparkDataFrame
+orderBy                 Ordering Columns in a WindowSpec
+otherwise               otherwise
+over                    over
+partitionBy             partitionBy
+persist                 Persist
+pivot                   Pivot a column of the GroupedData and perform
+                        the specified aggregation.
+predict                 Makes predictions from a MLlib model
+print.jobj              Print a JVM object reference.
+print.structField       Print a Spark StructField.
+print.structType        Print a Spark StructType.
+printSchema             Print Schema of a SparkDataFrame
+queryName               queryName
+randomSplit             randomSplit
+rangeBetween            rangeBetween
+rbind                   Union two or more SparkDataFrames
+read.df                 Load a SparkDataFrame
+read.jdbc               Create a SparkDataFrame representing the
+                        database table accessible via JDBC URL
+read.json               Create a SparkDataFrame from a JSON file.
+read.ml                 Load a fitted MLlib model from the input path.
+read.orc                Create a SparkDataFrame from an ORC file.
+read.parquet            Create a SparkDataFrame from a Parquet file.
+read.stream             Load a streaming SparkDataFrame
+read.text               Create a SparkDataFrame from a text file.
+recoverPartitions       Recovers all the partitions in the directory of
+                        a table and update the catalog
+refreshByPath           Invalidates and refreshes all the cached data
+                        and metadata for SparkDataFrame containing path
+refreshTable            Invalidates and refreshes all the cached data
+                        and metadata of the given table
+registerTempTable       (Deprecated) Register Temporary Table
+rename                  rename
+repartition             Repartition
+repartitionByRange      Repartition by range
+rollup                  rollup
+rowsBetween             rowsBetween
+sample                  Sample
+sampleBy                Returns a stratified sample without replacement
+saveAsTable             Save the contents of the SparkDataFrame to a
+                        data source as a table
+schema                  Get schema object
+select                  Select
+selectExpr              SelectExpr
+setCheckpointDir        Set checkpoint directory
+setCurrentDatabase      Sets the current default database
+setJobDescription       Set a human readable description of the current
+                        job.
+setJobGroup             Assigns a group ID to all the jobs started by
+                        this thread until the group ID is set to a
+                        different value or cleared.
+setLocalProperty        Set a local property that affects jobs
+                        submitted from this thread, such as the Spark
+                        fair scheduler pool.
+setLogLevel             Set new log level
+show                    show
+showDF                  showDF
+spark.addFile           Add a file or directory to be downloaded with
+                        this Spark job on every node.
+spark.als               Alternating Least Squares (ALS) for
+                        Collaborative Filtering
+spark.bisectingKmeans   Bisecting K-Means Clustering Model
+spark.decisionTree      Decision Tree Model for Regression and
+                        Classification
+spark.fpGrowth          FP-growth
+spark.gaussianMixture   Multivariate Gaussian Mixture Model (GMM)
+spark.gbt               Gradient Boosted Tree Model for Regression and
+                        Classification
+spark.getSparkFiles     Get the absolute path of a file added through
+                        spark.addFile.
+spark.getSparkFilesRootDirectory
+                        Get the root directory that contains files
+                        added through spark.addFile.
+spark.glm               Generalized Linear Models
+spark.isoreg            Isotonic Regression Model
+spark.kmeans            K-Means Clustering Model
+spark.kstest            (One-Sample) Kolmogorov-Smirnov Test
+spark.lapply            Run a function over a list of elements,
+                        distributing the computations with Spark
+spark.lda               Latent Dirichlet Allocation
+spark.logit             Logistic Regression Model
+spark.mlp               Multilayer Perceptron Classification Model
+spark.naiveBayes        Naive Bayes Models
+spark.randomForest      Random Forest Model for Regression and
+                        Classification
+spark.survreg           Accelerated Failure Time (AFT) Survival
+                        Regression Model
+spark.svmLinear         Linear SVM Model
+sparkR.callJMethod      Call Java Methods
+sparkR.callJStatic      Call Static Java Methods
+sparkR.conf             Get Runtime Config from the current active
+                        SparkSession
+sparkR.init             (Deprecated) Initialize a new Spark Context
+sparkR.newJObject       Create Java Objects
+sparkR.session          Get the existing SparkSession or initialize a
+                        new SparkSession.
+sparkR.session.stop     Stop the Spark Session and Spark Context
+sparkR.uiWebUrl         Get the URL of the SparkUI instance for the
+                        current active SparkSession
+sparkR.version          Get version of Spark on which this application
+                        is running
+sparkRHive.init         (Deprecated) Initialize a new HiveContext
+sparkRSQL.init          (Deprecated) Initialize a new SQLContext
+sql                     SQL Query
+startsWith              startsWith
+status                  status
+stopQuery               stopQuery
+storageLevel            StorageLevel
+str                     Compactly display the structure of a dataset
+structField             structField
+structType              structType
+subset                  Subset
+substr                  substr
+summary                 summary
+tableNames              Table Names
+tableToDF               Create a SparkDataFrame from a SparkSQL table
+                        or view
+tables                  Tables
+take                    Take the first NUM rows of a SparkDataFrame and
+                        return the results as a R data.frame
+toJSON                  toJSON
+uncacheTable            Uncache Table
+union                   Return a new SparkDataFrame containing the
+                        union of rows
+unionByName             Return a new SparkDataFrame containing the
+                        union of rows, matched by column names
+unpersist               Unpersist
+windowOrderBy           windowOrderBy
+windowPartitionBy       windowPartitionBy
+with                    Evaluate a R expression in an environment
+                        constructed from a SparkDataFrame
+withColumn              WithColumn
+withWatermark           withWatermark
+write.df                Save the contents of SparkDataFrame to a data
+                        source.
+write.jdbc              Save the content of SparkDataFrame to an
+                        external database table via JDBC.
+write.json              Save the contents of SparkDataFrame as a JSON
+                        file
+write.ml                Saves the MLlib model to the input path
+write.orc               Save the contents of SparkDataFrame as an ORC
+                        file, preserving the schema.
+write.parquet           Save the contents of SparkDataFrame as a
+                        Parquet file, preserving the schema.
+write.stream            Write the streaming SparkDataFrame to a data
+                        source.
+write.text              Save the content of SparkDataFrame in a text
+                        file at the specified path.
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/Meta/Rd.rds b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/Meta/Rd.rds
new file mode 100644
index 0000000000000000000000000000000000000000..a4e0988aad1bc4c9ee3dff1320aba857f517d739
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/Meta/Rd.rds differ
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/Meta/features.rds b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/Meta/features.rds
new file mode 100644
index 0000000000000000000000000000000000000000..4400f982fd42a7ba25fafd6595825ff962e9c3c7
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/Meta/features.rds differ
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/Meta/hsearch.rds b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/Meta/hsearch.rds
new file mode 100644
index 0000000000000000000000000000000000000000..40405fd5e1827e51d4f3c8eed5b995011369af3f
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/Meta/hsearch.rds differ
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/Meta/links.rds b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/Meta/links.rds
new file mode 100644
index 0000000000000000000000000000000000000000..cbbb8579d85c9e42ce3a5282bef70bd5ddfe9e4e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/Meta/links.rds differ
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/Meta/nsInfo.rds b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/Meta/nsInfo.rds
new file mode 100644
index 0000000000000000000000000000000000000000..04cb65a7731e2d514bd8da1a5f45e1f356d96bde
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/Meta/nsInfo.rds differ
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/Meta/package.rds b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/Meta/package.rds
new file mode 100644
index 0000000000000000000000000000000000000000..d7133e3bd747a92478d90035834a95113e4ec782
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/Meta/package.rds differ
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/Meta/vignette.rds b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/Meta/vignette.rds
new file mode 100644
index 0000000000000000000000000000000000000000..7768286adaa328adeeef20ebff5bb01ad7455768
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/Meta/vignette.rds differ
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/NAMESPACE b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/NAMESPACE
new file mode 100644
index 0000000000000000000000000000000000000000..d77c62a0ae5dab1ff9045250343d30208d569784
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/NAMESPACE
@@ -0,0 +1,503 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Imports from base R
+# Do not include stats:: "rpois", "runif" - causes error at runtime
+importFrom("methods", "setGeneric", "setMethod", "setOldClass")
+importFrom("methods", "is", "new", "signature", "show")
+importFrom("stats", "gaussian", "setNames")
+importFrom("utils", "download.file", "object.size", "packageVersion", "tail", "untar")
+
+# Disable native libraries till we figure out how to package it
+# See SPARKR-7839
+#useDynLib(SparkR, stringHashCode)
+
+# S3 methods exported
+export("sparkR.session")
+export("sparkR.init")
+export("sparkR.stop")
+export("sparkR.session.stop")
+export("sparkR.conf")
+export("sparkR.version")
+export("sparkR.uiWebUrl")
+export("print.jobj")
+
+export("sparkR.newJObject")
+export("sparkR.callJMethod")
+export("sparkR.callJStatic")
+
+export("install.spark")
+
+export("sparkRSQL.init",
+       "sparkRHive.init")
+
+# MLlib integration
+exportMethods("glm",
+              "spark.glm",
+              "predict",
+              "summary",
+              "spark.kmeans",
+              "fitted",
+              "spark.mlp",
+              "spark.naiveBayes",
+              "spark.survreg",
+              "spark.lda",
+              "spark.posterior",
+              "spark.perplexity",
+              "spark.isoreg",
+              "spark.gaussianMixture",
+              "spark.als",
+              "spark.kstest",
+              "spark.logit",
+              "spark.decisionTree",
+              "spark.randomForest",
+              "spark.gbt",
+              "spark.bisectingKmeans",
+              "spark.svmLinear",
+              "spark.fpGrowth",
+              "spark.freqItemsets",
+              "spark.associationRules")
+
+# Job group lifecycle management methods
+export("setJobGroup",
+       "clearJobGroup",
+       "cancelJobGroup",
+       "setJobDescription",
+       "setLocalProperty",
+       "getLocalProperty")
+
+# Export Utility methods
+export("setLogLevel")
+
+exportClasses("SparkDataFrame")
+
+exportMethods("arrange",
+              "as.data.frame",
+              "attach",
+              "broadcast",
+              "cache",
+              "checkpoint",
+              "coalesce",
+              "collect",
+              "colnames",
+              "colnames<-",
+              "coltypes",
+              "coltypes<-",
+              "columns",
+              "count",
+              "cov",
+              "corr",
+              "covar_samp",
+              "covar_pop",
+              "createOrReplaceTempView",
+              "crossJoin",
+              "crosstab",
+              "cube",
+              "dapply",
+              "dapplyCollect",
+              "describe",
+              "dim",
+              "distinct",
+              "drop",
+              "dropDuplicates",
+              "dropna",
+              "dtypes",
+              "except",
+              "exceptAll",
+              "explain",
+              "fillna",
+              "filter",
+              "first",
+              "freqItems",
+              "gapply",
+              "gapplyCollect",
+              "getNumPartitions",
+              "group_by",
+              "groupBy",
+              "head",
+              "hint",
+              "insertInto",
+              "intersect",
+              "intersectAll",
+              "isLocal",
+              "isStreaming",
+              "join",
+              "limit",
+              "localCheckpoint",
+              "merge",
+              "mutate",
+              "na.omit",
+              "names",
+              "names<-",
+              "ncol",
+              "nrow",
+              "orderBy",
+              "persist",
+              "printSchema",
+              "randomSplit",
+              "rbind",
+              "registerTempTable",
+              "rename",
+              "repartition",
+              "repartitionByRange",
+              "rollup",
+              "sample",
+              "sample_frac",
+              "sampleBy",
+              "saveAsParquetFile",
+              "saveAsTable",
+              "saveDF",
+              "schema",
+              "select",
+              "selectExpr",
+              "show",
+              "showDF",
+              "storageLevel",
+              "subset",
+              "summarize",
+              "summary",
+              "take",
+              "toJSON",
+              "transform",
+              "union",
+              "unionAll",
+              "unionByName",
+              "unique",
+              "unpersist",
+              "where",
+              "with",
+              "withColumn",
+              "withColumnRenamed",
+              "withWatermark",
+              "write.df",
+              "write.jdbc",
+              "write.json",
+              "write.orc",
+              "write.parquet",
+              "write.stream",
+              "write.text",
+              "write.ml")
+
+exportClasses("Column")
+
+exportMethods("%<=>%",
+              "%in%",
+              "abs",
+              "acos",
+              "add_months",
+              "alias",
+              "approxCountDistinct",
+              "approxQuantile",
+              "array_contains",
+              "array_distinct",
+              "array_except",
+              "array_intersect",
+              "array_join",
+              "array_max",
+              "array_min",
+              "array_position",
+              "array_remove",
+              "array_repeat",
+              "array_sort",
+              "arrays_overlap",
+              "array_union",
+              "arrays_zip",
+              "asc",
+              "ascii",
+              "asin",
+              "atan",
+              "atan2",
+              "avg",
+              "base64",
+              "between",
+              "bin",
+              "bitwiseNOT",
+              "bround",
+              "cast",
+              "cbrt",
+              "ceil",
+              "ceiling",
+              "collect_list",
+              "collect_set",
+              "column",
+              "concat",
+              "concat_ws",
+              "contains",
+              "conv",
+              "cos",
+              "cosh",
+              "count",
+              "countDistinct",
+              "crc32",
+              "create_array",
+              "create_map",
+              "current_date",
+              "current_timestamp",
+              "hash",
+              "cume_dist",
+              "date_add",
+              "date_format",
+              "date_sub",
+              "date_trunc",
+              "datediff",
+              "dayofmonth",
+              "dayofweek",
+              "dayofyear",
+              "decode",
+              "dense_rank",
+              "desc",
+              "element_at",
+              "encode",
+              "endsWith",
+              "exp",
+              "explode",
+              "explode_outer",
+              "expm1",
+              "expr",
+              "factorial",
+              "first",
+              "flatten",
+              "floor",
+              "format_number",
+              "format_string",
+              "from_json",
+              "from_unixtime",
+              "from_utc_timestamp",
+              "getField",
+              "getItem",
+              "greatest",
+              "grouping_bit",
+              "grouping_id",
+              "hex",
+              "histogram",
+              "hour",
+              "hypot",
+              "ifelse",
+              "initcap",
+              "input_file_name",
+              "instr",
+              "isNaN",
+              "isNotNull",
+              "isNull",
+              "is.nan",
+              "isnan",
+              "kurtosis",
+              "lag",
+              "last",
+              "last_day",
+              "lead",
+              "least",
+              "length",
+              "levenshtein",
+              "like",
+              "lit",
+              "locate",
+              "log",
+              "log10",
+              "log1p",
+              "log2",
+              "lower",
+              "lpad",
+              "ltrim",
+              "map_from_arrays",
+              "map_keys",
+              "map_values",
+              "max",
+              "md5",
+              "mean",
+              "min",
+              "minute",
+              "monotonically_increasing_id",
+              "month",
+              "months_between",
+              "n",
+              "n_distinct",
+              "nanvl",
+              "negate",
+              "next_day",
+              "not",
+              "ntile",
+              "otherwise",
+              "over",
+              "percent_rank",
+              "pmod",
+              "posexplode",
+              "posexplode_outer",
+              "quarter",
+              "rand",
+              "randn",
+              "rank",
+              "regexp_extract",
+              "regexp_replace",
+              "repeat_string",
+              "reverse",
+              "rint",
+              "rlike",
+              "round",
+              "row_number",
+              "rpad",
+              "rtrim",
+              "second",
+              "sha1",
+              "sha2",
+              "shiftLeft",
+              "shiftRight",
+              "shiftRightUnsigned",
+              "shuffle",
+              "sd",
+              "sign",
+              "signum",
+              "sin",
+              "sinh",
+              "size",
+              "skewness",
+              "slice",
+              "sort_array",
+              "soundex",
+              "spark_partition_id",
+              "split_string",
+              "stddev",
+              "stddev_pop",
+              "stddev_samp",
+              "struct",
+              "sqrt",
+              "startsWith",
+              "substr",
+              "substring_index",
+              "sum",
+              "sumDistinct",
+              "tan",
+              "tanh",
+              "toDegrees",
+              "toRadians",
+              "to_date",
+              "to_json",
+              "to_timestamp",
+              "to_utc_timestamp",
+              "translate",
+              "trim",
+              "trunc",
+              "unbase64",
+              "unhex",
+              "unix_timestamp",
+              "upper",
+              "var",
+              "variance",
+              "var_pop",
+              "var_samp",
+              "weekofyear",
+              "when",
+              "window",
+              "year")
+
+exportClasses("GroupedData")
+exportMethods("agg")
+exportMethods("pivot")
+
+export("as.DataFrame",
+       "cacheTable",
+       "clearCache",
+       "createDataFrame",
+       "createExternalTable",
+       "createTable",
+       "currentDatabase",
+       "dropTempTable",
+       "dropTempView",
+       "jsonFile",
+       "listColumns",
+       "listDatabases",
+       "listFunctions",
+       "listTables",
+       "loadDF",
+       "parquetFile",
+       "read.df",
+       "read.jdbc",
+       "read.json",
+       "read.orc",
+       "read.parquet",
+       "read.stream",
+       "read.text",
+       "recoverPartitions",
+       "refreshByPath",
+       "refreshTable",
+       "setCheckpointDir",
+       "setCurrentDatabase",
+       "spark.lapply",
+       "spark.addFile",
+       "spark.getSparkFilesRootDirectory",
+       "spark.getSparkFiles",
+       "sql",
+       "str",
+       "tableToDF",
+       "tableNames",
+       "tables",
+       "uncacheTable",
+       "print.summary.GeneralizedLinearRegressionModel",
+       "read.ml",
+       "print.summary.KSTest",
+       "print.summary.DecisionTreeRegressionModel",
+       "print.summary.DecisionTreeClassificationModel",
+       "print.summary.RandomForestRegressionModel",
+       "print.summary.RandomForestClassificationModel",
+       "print.summary.GBTRegressionModel",
+       "print.summary.GBTClassificationModel")
+
+export("structField",
+       "structField.jobj",
+       "structField.character",
+       "print.structField",
+       "structType",
+       "structType.character",
+       "structType.jobj",
+       "structType.structField",
+       "print.structType")
+
+exportClasses("WindowSpec")
+
+export("partitionBy",
+       "rowsBetween",
+       "rangeBetween")
+
+export("windowPartitionBy",
+       "windowOrderBy")
+
+exportClasses("StreamingQuery")
+
+export("awaitTermination",
+       "isActive",
+       "lastProgress",
+       "queryName",
+       "status",
+       "stopQuery")
+
+
+S3method(print, jobj)
+S3method(print, structField)
+S3method(print, structType)
+S3method(print, summary.GeneralizedLinearRegressionModel)
+S3method(print, summary.KSTest)
+S3method(print, summary.DecisionTreeRegressionModel)
+S3method(print, summary.DecisionTreeClassificationModel)
+S3method(print, summary.RandomForestRegressionModel)
+S3method(print, summary.RandomForestClassificationModel)
+S3method(print, summary.GBTRegressionModel)
+S3method(print, summary.GBTClassificationModel)
+S3method(structField, character)
+S3method(structField, jobj)
+S3method(structType, character)
+S3method(structType, jobj)
+S3method(structType, structField)
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/R/SparkR b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/R/SparkR
new file mode 100644
index 0000000000000000000000000000000000000000..3b65e3cbb872050f5dbcfacd3f2057d52d28ba0e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/R/SparkR
@@ -0,0 +1,27 @@
+#  File share/R/nspackloader.R
+#  Part of the R package, http://www.R-project.org
+#
+#  Copyright (C) 1995-2012 The R Core Team
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  A copy of the GNU General Public License is available at
+#  http://www.r-project.org/Licenses/
+
+local({
+    info <- loadingNamespaceInfo()
+    pkg <- info$pkgname
+    ns <- .getNamespace(as.name(pkg))
+    if (is.null(ns))
+        stop("cannot find namespace environment for ", pkg, domain = NA);
+    dbbase <- file.path(info$libname, pkg, "R", pkg)
+    lazyLoad(dbbase, ns, filter = function(n) n != ".__NAMESPACE__.")
+})
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/R/SparkR.rdb b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/R/SparkR.rdb
new file mode 100644
index 0000000000000000000000000000000000000000..9755b8ef44f52145c67ffeb03beafab6a22323ad
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/R/SparkR.rdb differ
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/R/SparkR.rdx b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/R/SparkR.rdx
new file mode 100644
index 0000000000000000000000000000000000000000..c6d87487a1d7921784fa043334d4b81b64da54c9
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/R/SparkR.rdx differ
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/doc/index.html b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/doc/index.html
new file mode 100644
index 0000000000000000000000000000000000000000..fcb704964af1aefb2a0c0c022e847ca6081c23c3
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/doc/index.html
@@ -0,0 +1,28 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>R: Vignettes and other documentation</title>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<link rel="stylesheet" type="text/css" href="/doc/html/R.css" />
+</head><body>
+<h1> Vignettes and other documentation
+<img class="toplogo" src="/doc/html/Rlogo.svg" alt="[R logo]" />
+</h1>
+<hr/>
+<div style="text-align: center;">
+<a href="/doc/html/index.html"><img class="arrow" src="/doc/html/up.jpg" alt="[Top]" /></a>
+</div>
+<h2>Vignettes from package 'SparkR'</h2>
+<table width="100%">
+<col style="width: 22%;" />
+<col style="width:  2%;" />
+<col style="width: 50%;" />
+<col style="width:  8%;" />
+<col style="width:  8%;" />
+<col style="width:  8%;" />
+<tr><td style="text-align: right; vertical-align: top;"><a href="../../../library/SparkR/doc/sparkr-vignettes.html">SparkR::sparkr-vignettes</a></td>
+<td></td><td valign="top">SparkR - Practical Guide</td>
+<td valign="top"><a href="../../../library/SparkR/doc/sparkr-vignettes.html">HTML</a></td>
+<td valign="top"><a href="../../../library/SparkR/doc/sparkr-vignettes.Rmd">source</a></td>
+<td valign="top" style="white-space: nowrap"><a href="../../../library/SparkR/doc/sparkr-vignettes.R">R code</a></td></tr>
+</table>
+</body></html>
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/doc/sparkr-vignettes.R b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/doc/sparkr-vignettes.R
new file mode 100644
index 0000000000000000000000000000000000000000..996cbee0491f7e49619a14cfcf9406bf7db63db7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/doc/sparkr-vignettes.R
@@ -0,0 +1,522 @@
+## ----setup, include=FALSE------------------------------------------------
+library(knitr)
+opts_hooks$set(eval = function(options) {
+  # override eval to FALSE only on windows
+  if (.Platform$OS.type == "windows") {
+    options$eval = FALSE
+  }
+  options
+})
+r_tmp_dir <- tempdir()
+tmp_arg <- paste0("-Djava.io.tmpdir=", r_tmp_dir)
+sparkSessionConfig <- list(spark.driver.extraJavaOptions = tmp_arg,
+                           spark.executor.extraJavaOptions = tmp_arg)
+old_java_opt <- Sys.getenv("_JAVA_OPTIONS")
+Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt, sep = " "))
+
+## ---- message=FALSE------------------------------------------------------
+library(SparkR)
+
+## ---- include=FALSE------------------------------------------------------
+install.spark()
+sparkR.session(master = "local[1]", sparkConfig = sparkSessionConfig, enableHiveSupport = FALSE)
+
+## ---- eval=FALSE---------------------------------------------------------
+#  sparkR.session()
+
+## ------------------------------------------------------------------------
+cars <- cbind(model = rownames(mtcars), mtcars)
+carsDF <- createDataFrame(cars)
+
+## ------------------------------------------------------------------------
+head(carsDF)
+
+## ------------------------------------------------------------------------
+carsSubDF <- select(carsDF, "model", "mpg", "hp")
+carsSubDF <- filter(carsSubDF, carsSubDF$hp >= 200)
+head(carsSubDF)
+
+## ------------------------------------------------------------------------
+carsGPDF <- summarize(groupBy(carsDF, carsDF$gear), count = n(carsDF$gear))
+head(carsGPDF)
+
+## ------------------------------------------------------------------------
+carsGP <- collect(carsGPDF)
+class(carsGP)
+
+## ------------------------------------------------------------------------
+model <- spark.glm(carsDF, mpg ~ wt + cyl)
+
+## ------------------------------------------------------------------------
+summary(model)
+
+## ---- eval=FALSE---------------------------------------------------------
+#  write.ml(model, path = "/HOME/tmp/mlModel/glmModel")
+
+## ---- eval=FALSE---------------------------------------------------------
+#  sparkR.session.stop()
+
+## ---- eval=FALSE---------------------------------------------------------
+#  install.spark()
+
+## ---- eval=FALSE---------------------------------------------------------
+#  sparkR.session(sparkHome = "/HOME/spark")
+
+## ---- eval=FALSE---------------------------------------------------------
+#  spark_warehouse_path <- file.path(path.expand('~'), "spark-warehouse")
+#  sparkR.session(spark.sql.warehouse.dir = spark_warehouse_path)
+
+## ---- echo=FALSE, tidy = TRUE--------------------------------------------
+paste("Spark", packageVersion("SparkR"))
+
+## ---- eval=FALSE---------------------------------------------------------
+#  sparkR.session(master = "spark://local:7077")
+
+## ---- eval=FALSE---------------------------------------------------------
+#  sparkR.session(master = "yarn")
+
+## ------------------------------------------------------------------------
+df <- as.DataFrame(faithful)
+head(df)
+
+## ---- eval=FALSE---------------------------------------------------------
+#  sparkR.session(sparkPackages = "com.databricks:spark-avro_2.11:3.0.0")
+
+## ---- eval=FALSE---------------------------------------------------------
+#  df <- read.df(csvPath, "csv", header = "true", inferSchema = "true", na.strings = "NA")
+
+## ------------------------------------------------------------------------
+filePath <- paste0(sparkR.conf("spark.home"),
+                         "/examples/src/main/resources/people.json")
+readLines(filePath, n = 2L)
+
+## ------------------------------------------------------------------------
+people <- read.df(filePath, "json")
+count(people)
+head(people)
+
+## ------------------------------------------------------------------------
+printSchema(people)
+
+## ------------------------------------------------------------------------
+people <- read.json(paste0(Sys.getenv("SPARK_HOME"),
+                           c("/examples/src/main/resources/people.json",
+                             "/examples/src/main/resources/people.json")))
+count(people)
+
+## ---- eval=FALSE---------------------------------------------------------
+#  write.df(people, path = "people.parquet", source = "parquet", mode = "overwrite")
+
+## ---- eval=FALSE---------------------------------------------------------
+#  sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
+#  
+#  txtPath <- paste0(sparkR.conf("spark.home"), "/examples/src/main/resources/kv1.txt")
+#  sqlCMD <- sprintf("LOAD DATA LOCAL INPATH '%s' INTO TABLE src", txtPath)
+#  sql(sqlCMD)
+#  
+#  results <- sql("FROM src SELECT key, value")
+#  
+#  # results is now a SparkDataFrame
+#  head(results)
+
+## ------------------------------------------------------------------------
+carsDF
+
+## ------------------------------------------------------------------------
+printSchema(carsDF)
+
+## ------------------------------------------------------------------------
+head(select(carsDF, "mpg"))
+
+## ------------------------------------------------------------------------
+head(filter(carsDF, carsDF$mpg < 20))
+
+## ------------------------------------------------------------------------
+numCyl <- summarize(groupBy(carsDF, carsDF$cyl), count = n(carsDF$cyl))
+head(numCyl)
+
+## ------------------------------------------------------------------------
+mean(cube(carsDF, "cyl", "gear", "am"), "mpg")
+
+## ------------------------------------------------------------------------
+mean(rollup(carsDF, "cyl", "gear", "am"), "mpg")
+
+## ------------------------------------------------------------------------
+carsDF_km <- carsDF
+carsDF_km$kmpg <- carsDF_km$mpg * 1.61
+head(select(carsDF_km, "model", "mpg", "kmpg"))
+
+## ------------------------------------------------------------------------
+carsSubDF <- select(carsDF, "model", "mpg", "cyl")
+ws <- orderBy(windowPartitionBy("cyl"), "mpg")
+carsRank <- withColumn(carsSubDF, "rank", over(rank(), ws))
+head(carsRank, n = 20L)
+
+## ------------------------------------------------------------------------
+carsSubDF <- select(carsDF, "model", "mpg")
+schema <- "model STRING, mpg DOUBLE, kmpg DOUBLE"
+out <- dapply(carsSubDF, function(x) { x <- cbind(x, x$mpg * 1.61) }, schema)
+head(collect(out))
+
+## ------------------------------------------------------------------------
+out <- dapplyCollect(
+         carsSubDF,
+         function(x) {
+           x <- cbind(x, "kmpg" = x$mpg * 1.61)
+         })
+head(out, 3)
+
+## ------------------------------------------------------------------------
+schema <- structType(structField("cyl", "double"), structField("max_mpg", "double"))
+result <- gapply(
+    carsDF,
+    "cyl",
+    function(key, x) {
+        y <- data.frame(key, max(x$mpg))
+    },
+    schema)
+head(arrange(result, "max_mpg", decreasing = TRUE))
+
+## ------------------------------------------------------------------------
+result <- gapplyCollect(
+    carsDF,
+    "cyl",
+    function(key, x) {
+         y <- data.frame(key, max(x$mpg))
+        colnames(y) <- c("cyl", "max_mpg")
+        y
+    })
+head(result[order(result$max_mpg, decreasing = TRUE), ])
+
+## ------------------------------------------------------------------------
+costs <- exp(seq(from = log(1), to = log(1000), length.out = 5))
+train <- function(cost) {
+  stopifnot(requireNamespace("e1071", quietly = TRUE))
+  model <- e1071::svm(Species ~ ., data = iris, cost = cost)
+  summary(model)
+}
+
+## ------------------------------------------------------------------------
+model.summaries <- spark.lapply(costs, train)
+
+## ------------------------------------------------------------------------
+class(model.summaries)
+
+## ---- include=FALSE------------------------------------------------------
+ops <- options()
+options(max.print=40)
+
+## ------------------------------------------------------------------------
+print(model.summaries[[2]])
+
+## ---- include=FALSE------------------------------------------------------
+options(ops)
+
+## ------------------------------------------------------------------------
+people <- read.df(paste0(sparkR.conf("spark.home"),
+                         "/examples/src/main/resources/people.json"), "json")
+
+## ------------------------------------------------------------------------
+createOrReplaceTempView(people, "people")
+
+## ------------------------------------------------------------------------
+teenagers <- sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
+head(teenagers)
+
+## ------------------------------------------------------------------------
+splitDF_list <- randomSplit(carsDF, c(0.7, 0.3), seed = 0)
+carsDF_train <- splitDF_list[[1]]
+carsDF_test <- splitDF_list[[2]]
+
+## ------------------------------------------------------------------------
+count(carsDF_train)
+head(carsDF_train)
+
+## ------------------------------------------------------------------------
+count(carsDF_test)
+head(carsDF_test)
+
+## ------------------------------------------------------------------------
+# load training data and create a DataFrame
+t <- as.data.frame(Titanic)
+training <- createDataFrame(t)
+# fit a Linear SVM classifier model
+model <- spark.svmLinear(training,  Survived ~ ., regParam = 0.01, maxIter = 10)
+summary(model)
+
+## ------------------------------------------------------------------------
+prediction <- predict(model, training)
+head(select(prediction, "Class", "Sex", "Age", "Freq", "Survived", "prediction"))
+
+## ------------------------------------------------------------------------
+t <- as.data.frame(Titanic)
+training <- createDataFrame(t)
+model <- spark.logit(training, Survived ~ ., regParam = 0.04741301)
+summary(model)
+
+## ------------------------------------------------------------------------
+fitted <- predict(model, training)
+head(select(fitted, "Class", "Sex", "Age", "Freq", "Survived", "prediction"))
+
+## ------------------------------------------------------------------------
+t <- as.data.frame(Titanic)
+training <- createDataFrame(t)
+# Note in this case, Spark infers it is multinomial logistic regression, so family = "multinomial" is optional.
+model <- spark.logit(training, Class ~ ., regParam = 0.07815179)
+summary(model)
+
+## ------------------------------------------------------------------------
+t <- as.data.frame(Titanic)
+training <- createDataFrame(t)
+# fit a Multilayer Perceptron Classification Model
+model <- spark.mlp(training, Survived ~ Age + Sex, blockSize = 128, layers = c(2, 2), solver = "l-bfgs", maxIter = 100, tol = 0.5, stepSize = 1, seed = 1, initialWeights = c( 0, 0, 5, 5, 9, 9))
+
+## ---- include=FALSE------------------------------------------------------
+ops <- options()
+options(max.print=5)
+
+## ------------------------------------------------------------------------
+# check the summary of the fitted model
+summary(model)
+
+## ---- include=FALSE------------------------------------------------------
+options(ops)
+
+## ------------------------------------------------------------------------
+# make predictions use the fitted model
+predictions <- predict(model, training)
+head(select(predictions, predictions$prediction))
+
+## ------------------------------------------------------------------------
+titanic <- as.data.frame(Titanic)
+titanicDF <- createDataFrame(titanic[titanic$Freq > 0, -5])
+naiveBayesModel <- spark.naiveBayes(titanicDF, Survived ~ Class + Sex + Age)
+summary(naiveBayesModel)
+naiveBayesPrediction <- predict(naiveBayesModel, titanicDF)
+head(select(naiveBayesPrediction, "Class", "Sex", "Age", "Survived", "prediction"))
+
+## ---- warning=FALSE------------------------------------------------------
+library(survival)
+ovarianDF <- createDataFrame(ovarian)
+aftModel <- spark.survreg(ovarianDF, Surv(futime, fustat) ~ ecog_ps + rx)
+summary(aftModel)
+aftPredictions <- predict(aftModel, ovarianDF)
+head(aftPredictions)
+
+## ------------------------------------------------------------------------
+gaussianGLM <- spark.glm(carsDF, mpg ~ wt + hp)
+summary(gaussianGLM)
+
+## ------------------------------------------------------------------------
+gaussianFitted <- predict(gaussianGLM, carsDF)
+head(select(gaussianFitted, "model", "prediction", "mpg", "wt", "hp"))
+
+## ------------------------------------------------------------------------
+tweedieGLM1 <- spark.glm(carsDF, mpg ~ wt + hp, family = "tweedie", var.power = 0.0)
+summary(tweedieGLM1)
+
+## ------------------------------------------------------------------------
+tweedieGLM2 <- spark.glm(carsDF, mpg ~ wt + hp, family = "tweedie",
+                         var.power = 1.2, link.power = 0.0)
+summary(tweedieGLM2)
+
+## ------------------------------------------------------------------------
+y <- c(3.0, 6.0, 8.0, 5.0, 7.0)
+x <- c(1.0, 2.0, 3.5, 3.0, 4.0)
+w <- rep(1.0, 5)
+data <- data.frame(y = y, x = x, w = w)
+df <- createDataFrame(data)
+isoregModel <- spark.isoreg(df, y ~ x, weightCol = "w")
+isoregFitted <- predict(isoregModel, df)
+head(select(isoregFitted, "x", "y", "prediction"))
+
+## ------------------------------------------------------------------------
+newDF <- createDataFrame(data.frame(x = c(1.5, 3.2)))
+head(predict(isoregModel, newDF))
+
+## ------------------------------------------------------------------------
+t <- as.data.frame(Titanic)
+df <- createDataFrame(t)
+dtModel <- spark.decisionTree(df, Survived ~ ., type = "classification", maxDepth = 2)
+summary(dtModel)
+predictions <- predict(dtModel, df)
+head(select(predictions, "Class", "Sex", "Age", "Freq", "Survived", "prediction"))
+
+## ------------------------------------------------------------------------
+t <- as.data.frame(Titanic)
+df <- createDataFrame(t)
+gbtModel <- spark.gbt(df, Survived ~ ., type = "classification", maxDepth = 2, maxIter = 2)
+summary(gbtModel)
+predictions <- predict(gbtModel, df)
+head(select(predictions, "Class", "Sex", "Age", "Freq", "Survived", "prediction"))
+
+## ------------------------------------------------------------------------
+t <- as.data.frame(Titanic)
+df <- createDataFrame(t)
+rfModel <- spark.randomForest(df, Survived ~ ., type = "classification", maxDepth = 2, numTrees = 2)
+summary(rfModel)
+predictions <- predict(rfModel, df)
+head(select(predictions, "Class", "Sex", "Age", "Freq", "Survived", "prediction"))
+
+## ------------------------------------------------------------------------
+t <- as.data.frame(Titanic)
+training <- createDataFrame(t)
+model <- spark.bisectingKmeans(training, Class ~ Survived, k = 4)
+summary(model)
+fitted <- predict(model, training)
+head(select(fitted, "Class", "prediction"))
+
+## ------------------------------------------------------------------------
+X1 <- data.frame(V1 = rnorm(4), V2 = rnorm(4))
+X2 <- data.frame(V1 = rnorm(6, 3), V2 = rnorm(6, 4))
+data <- rbind(X1, X2)
+df <- createDataFrame(data)
+gmmModel <- spark.gaussianMixture(df, ~ V1 + V2, k = 2)
+summary(gmmModel)
+gmmFitted <- predict(gmmModel, df)
+head(select(gmmFitted, "V1", "V2", "prediction"))
+
+## ------------------------------------------------------------------------
+kmeansModel <- spark.kmeans(carsDF, ~ mpg + hp + wt, k = 3)
+summary(kmeansModel)
+kmeansPredictions <- predict(kmeansModel, carsDF)
+head(select(kmeansPredictions, "model", "mpg", "hp", "wt", "prediction"), n = 20L)
+
+## ------------------------------------------------------------------------
+corpus <- data.frame(features = c(
+  "1 2 6 0 2 3 1 1 0 0 3",
+  "1 3 0 1 3 0 0 2 0 0 1",
+  "1 4 1 0 0 4 9 0 1 2 0",
+  "2 1 0 3 0 0 5 0 2 3 9",
+  "3 1 1 9 3 0 2 0 0 1 3",
+  "4 2 0 3 4 5 1 1 1 4 0",
+  "2 1 0 3 0 0 5 0 2 2 9",
+  "1 1 1 9 2 1 2 0 0 1 3",
+  "4 4 0 3 4 2 1 3 0 0 0",
+  "2 8 2 0 3 0 2 0 2 7 2",
+  "1 1 1 9 0 2 2 0 0 3 3",
+  "4 1 0 0 4 5 1 3 0 1 0"))
+corpusDF <- createDataFrame(corpus)
+model <- spark.lda(data = corpusDF, k = 5, optimizer = "em")
+summary(model)
+
+## ------------------------------------------------------------------------
+posterior <- spark.posterior(model, corpusDF)
+head(posterior)
+
+## ------------------------------------------------------------------------
+perplexity <- spark.perplexity(model, corpusDF)
+perplexity
+
+## ---- eval=FALSE---------------------------------------------------------
+#  ratings <- list(list(0, 0, 4.0), list(0, 1, 2.0), list(1, 1, 3.0), list(1, 2, 4.0),
+#                  list(2, 1, 1.0), list(2, 2, 5.0))
+#  df <- createDataFrame(ratings, c("user", "item", "rating"))
+#  model <- spark.als(df, "rating", "user", "item", rank = 10, reg = 0.1, nonnegative = TRUE)
+
+## ---- eval=FALSE---------------------------------------------------------
+#  stats <- summary(model)
+#  userFactors <- stats$userFactors
+#  itemFactors <- stats$itemFactors
+#  head(userFactors)
+#  head(itemFactors)
+
+## ---- eval=FALSE---------------------------------------------------------
+#  predicted <- predict(model, df)
+#  head(predicted)
+
+## ------------------------------------------------------------------------
+df <- selectExpr(createDataFrame(data.frame(rawItems = c(
+  "T,R,U", "T,S", "V,R", "R,U,T,V", "R,S", "V,S,U", "U,R", "S,T", "V,R", "V,U,S",
+  "T,V,U", "R,V", "T,S", "T,S", "S,T", "S,U", "T,R", "V,R", "S,V", "T,S,U"
+))), "split(rawItems, ',') AS items")
+
+fpm <- spark.fpGrowth(df, minSupport = 0.2, minConfidence = 0.5)
+
+## ------------------------------------------------------------------------
+head(spark.freqItemsets(fpm))
+
+## ------------------------------------------------------------------------
+head(spark.associationRules(fpm))
+
+## ------------------------------------------------------------------------
+head(predict(fpm, df))
+
+## ------------------------------------------------------------------------
+t <- as.data.frame(Titanic)
+df <- createDataFrame(t)
+freqStats <- head(select(df, mean(df$Freq), sd(df$Freq)))
+freqMean <- freqStats[1]
+freqStd <- freqStats[2]
+
+test <- spark.kstest(df, "Freq", "norm", c(freqMean, freqStd))
+testSummary <- summary(test)
+testSummary
+
+## ------------------------------------------------------------------------
+t <- as.data.frame(Titanic)
+training <- createDataFrame(t)
+gaussianGLM <- spark.glm(training, Freq ~ Sex + Age, family = "gaussian")
+
+# Save and then load a fitted MLlib model
+modelPath <- tempfile(pattern = "ml", fileext = ".tmp")
+write.ml(gaussianGLM, modelPath)
+gaussianGLM2 <- read.ml(modelPath)
+
+# Check model summary
+summary(gaussianGLM2)
+
+# Check model prediction
+gaussianPredictions <- predict(gaussianGLM2, training)
+head(gaussianPredictions)
+
+unlink(modelPath)
+
+## ---- eval=FALSE---------------------------------------------------------
+#  # Create DataFrame representing the stream of input lines from connection
+#  lines <- read.stream("socket", host = hostname, port = port)
+#  
+#  # Split the lines into words
+#  words <- selectExpr(lines, "explode(split(value, ' ')) as word")
+#  
+#  # Generate running word count
+#  wordCounts <- count(groupBy(words, "word"))
+#  
+#  # Start running the query that prints the running counts to the console
+#  query <- write.stream(wordCounts, "console", outputMode = "complete")
+
+## ---- eval=FALSE---------------------------------------------------------
+#  topic <- read.stream("kafka",
+#                       kafka.bootstrap.servers = "host1:port1,host2:port2",
+#                       subscribe = "topic1")
+#  keyvalue <- selectExpr(topic, "CAST(key AS STRING)", "CAST(value AS STRING)")
+
+## ---- eval=FALSE---------------------------------------------------------
+#  noAggDF <- select(where(deviceDataStreamingDf, "signal > 10"), "device")
+#  
+#  # Print new data to console
+#  write.stream(noAggDF, "console")
+#  
+#  # Write new data to Parquet files
+#  write.stream(noAggDF,
+#               "parquet",
+#               path = "path/to/destination/dir",
+#               checkpointLocation = "path/to/checkpoint/dir")
+#  
+#  # Aggregate
+#  aggDF <- count(groupBy(noAggDF, "device"))
+#  
+#  # Print updated aggregations to console
+#  write.stream(aggDF, "console", outputMode = "complete")
+#  
+#  # Have all the aggregates in an in memory table. The query name will be the table name
+#  write.stream(aggDF, "memory", queryName = "aggregates", outputMode = "complete")
+#  
+#  head(sql("select * from aggregates"))
+
+## ---- echo=FALSE---------------------------------------------------------
+sparkR.session.stop()
+
+## ----cleanup, include=FALSE----------------------------------------------
+SparkR:::uninstallDownloadedSpark()
+
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/doc/sparkr-vignettes.Rmd b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/doc/sparkr-vignettes.Rmd
new file mode 100644
index 0000000000000000000000000000000000000000..090363c5f8a3e815dd0e44faefdcb2906989a817
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/doc/sparkr-vignettes.Rmd
@@ -0,0 +1,1193 @@
+---
+title: "SparkR - Practical Guide"
+output:
+  rmarkdown::html_vignette:
+    toc: true
+    toc_depth: 4
+vignette: >
+  %\VignetteIndexEntry{SparkR - Practical Guide}
+  %\VignetteEngine{knitr::rmarkdown}
+  \usepackage[utf8]{inputenc}
+---
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+```{r setup, include=FALSE}
+library(knitr)
+opts_hooks$set(eval = function(options) {
+  # override eval to FALSE only on windows
+  if (.Platform$OS.type == "windows") {
+    options$eval = FALSE
+  }
+  options
+})
+r_tmp_dir <- tempdir()
+tmp_arg <- paste0("-Djava.io.tmpdir=", r_tmp_dir)
+sparkSessionConfig <- list(spark.driver.extraJavaOptions = tmp_arg,
+                           spark.executor.extraJavaOptions = tmp_arg)
+old_java_opt <- Sys.getenv("_JAVA_OPTIONS")
+Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt, sep = " "))
+```
+
+## Overview
+
+SparkR is an R package that provides a light-weight frontend to use Apache Spark from R. With Spark `r packageVersion("SparkR")`, SparkR provides a distributed data frame implementation that supports data processing operations like selection, filtering, aggregation etc. and distributed machine learning using [MLlib](https://spark.apache.org/mllib/).
+
+## Getting Started
+
+We begin with an example running on the local machine and provide an overview of the use of SparkR: data ingestion, data processing and machine learning.
+
+First, let's load and attach the package.
+```{r, message=FALSE}
+library(SparkR)
+```
+
+`SparkSession` is the entry point into SparkR which connects your R program to a Spark cluster. You can create a `SparkSession` using `sparkR.session` and pass in options such as the application name, any Spark packages depended on, etc.
+
+We use default settings in which it runs in local mode. It auto downloads Spark package in the background if no previous installation is found. For more details about setup, see [Spark Session](#SetupSparkSession).
+
+```{r, include=FALSE}
+install.spark()
+sparkR.session(master = "local[1]", sparkConfig = sparkSessionConfig, enableHiveSupport = FALSE)
+```
+```{r, eval=FALSE}
+sparkR.session()
+```
+
+The operations in SparkR are centered around an R class called `SparkDataFrame`. It is a distributed collection of data organized into named columns, which is conceptually equivalent to a table in a relational database or a data frame in R, but with richer optimizations under the hood.
+
+`SparkDataFrame` can be constructed from a wide array of sources such as: structured data files, tables in Hive, external databases, or existing local R data frames. For example, we create a `SparkDataFrame` from a local R data frame,
+
+```{r}
+cars <- cbind(model = rownames(mtcars), mtcars)
+carsDF <- createDataFrame(cars)
+```
+
+We can view the first few rows of the `SparkDataFrame` by `head` or `showDF` function.
+```{r}
+head(carsDF)
+```
+
+Common data processing operations such as `filter` and `select` are supported on the `SparkDataFrame`.
+```{r}
+carsSubDF <- select(carsDF, "model", "mpg", "hp")
+carsSubDF <- filter(carsSubDF, carsSubDF$hp >= 200)
+head(carsSubDF)
+```
+
+SparkR can use many common aggregation functions after grouping.
+
+```{r}
+carsGPDF <- summarize(groupBy(carsDF, carsDF$gear), count = n(carsDF$gear))
+head(carsGPDF)
+```
+
+The results `carsDF` and `carsSubDF` are `SparkDataFrame` objects. To convert back to R `data.frame`, we can use `collect`. **Caution**: This can cause your interactive environment to run out of memory, though, because `collect()` fetches the entire distributed `DataFrame` to your client, which is acting as a Spark driver.
+```{r}
+carsGP <- collect(carsGPDF)
+class(carsGP)
+```
+
+SparkR supports a number of commonly used machine learning algorithms. Under the hood, SparkR uses MLlib to train the model. Users can call `summary` to print a summary of the fitted model, `predict` to make predictions on new data, and `write.ml`/`read.ml` to save/load fitted models.
+
+SparkR supports a subset of R formula operators for model fitting, including ‘~’, ‘.’, ‘:’, ‘+’, and ‘-‘. We use linear regression as an example.
+```{r}
+model <- spark.glm(carsDF, mpg ~ wt + cyl)
+```
+
+The result matches that returned by R `glm` function applied to the corresponding `data.frame` `mtcars` of `carsDF`. In fact, for Generalized Linear Model, we specifically expose `glm` for `SparkDataFrame` as well so that the above is equivalent to `model <- glm(mpg ~ wt + cyl, data = carsDF)`.
+
+```{r}
+summary(model)
+```
+
+The model can be saved by `write.ml` and loaded back using `read.ml`.
+```{r, eval=FALSE}
+write.ml(model, path = "/HOME/tmp/mlModel/glmModel")
+```
+
+In the end, we can stop Spark Session by running
+```{r, eval=FALSE}
+sparkR.session.stop()
+```
+
+## Setup
+
+### Installation
+
+Different from many other R packages, to use SparkR, you need an additional installation of Apache Spark. The Spark installation will be used to run a backend process that will compile and execute SparkR programs.
+
+After installing the SparkR package, you can call `sparkR.session` as explained in the previous section to start and it will check for the Spark installation. If you are working with SparkR from an interactive shell (eg. R, RStudio) then Spark is downloaded and cached automatically if it is not found. Alternatively, we provide an easy-to-use function `install.spark` for running this manually. If you don't have Spark installed on the computer, you may download it from [Apache Spark Website](https://spark.apache.org/downloads.html).
+
+```{r, eval=FALSE}
+install.spark()
+```
+
+If you already have Spark installed, you don't have to install again and can pass the `sparkHome` argument to `sparkR.session` to let SparkR know where the existing Spark installation is.
+
+```{r, eval=FALSE}
+sparkR.session(sparkHome = "/HOME/spark")
+```
+
+### Spark Session {#SetupSparkSession}
+
+
+In addition to `sparkHome`, many other options can be specified in `sparkR.session`. For a complete list, see [Starting up: SparkSession](https://spark.apache.org/docs/latest/sparkr.html#starting-up-sparksession) and [SparkR API doc](https://spark.apache.org/docs/latest/api/R/sparkR.session.html).
+
+In particular, the following Spark driver properties can be set in `sparkConfig`.
+
+Property Name | Property group | spark-submit equivalent
+---------------- | ------------------ | ----------------------
+`spark.driver.memory` | Application Properties | `--driver-memory`
+`spark.driver.extraClassPath` | Runtime Environment | `--driver-class-path`
+`spark.driver.extraJavaOptions` | Runtime Environment | `--driver-java-options`
+`spark.driver.extraLibraryPath` | Runtime Environment | `--driver-library-path`
+`spark.yarn.keytab` | Application Properties | `--keytab`
+`spark.yarn.principal` | Application Properties | `--principal`
+
+**For Windows users**: Due to different file prefixes across operating systems, to avoid the issue of potential wrong prefix, a current workaround is to specify `spark.sql.warehouse.dir` when starting the `SparkSession`.
+
+```{r, eval=FALSE}
+spark_warehouse_path <- file.path(path.expand('~'), "spark-warehouse")
+sparkR.session(spark.sql.warehouse.dir = spark_warehouse_path)
+```
+
+
+#### Cluster Mode
+SparkR can connect to remote Spark clusters. [Cluster Mode Overview](https://spark.apache.org/docs/latest/cluster-overview.html) is a good introduction to different Spark cluster modes.
+
+When connecting SparkR to a remote Spark cluster, make sure that the Spark version and Hadoop version on the machine match the corresponding versions on the cluster. Current SparkR package is compatible with
+```{r, echo=FALSE, tidy = TRUE}
+paste("Spark", packageVersion("SparkR"))
+```
+It should be used both on the local computer and on the remote cluster.
+
+To connect, pass the URL of the master node to `sparkR.session`. A complete list can be seen in [Spark Master URLs](https://spark.apache.org/docs/latest/submitting-applications.html#master-urls).
+For example, to connect to a local standalone Spark master, we can call
+
+```{r, eval=FALSE}
+sparkR.session(master = "spark://local:7077")
+```
+
+For YARN cluster, SparkR supports the client mode with the master set as "yarn".
+```{r, eval=FALSE}
+sparkR.session(master = "yarn")
+```
+Yarn cluster mode is not supported in the current version.
+
+## Data Import
+
+### Local Data Frame
+The simplest way is to convert a local R data frame into a `SparkDataFrame`. Specifically we can use `as.DataFrame` or `createDataFrame` and pass in the local R data frame to create a `SparkDataFrame`. As an example, the following creates a `SparkDataFrame` based using the `faithful` dataset from R.
+```{r}
+df <- as.DataFrame(faithful)
+head(df)
+```
+
+### Data Sources
+SparkR supports operating on a variety of data sources through the `SparkDataFrame` interface. You can check the Spark SQL Programming Guide for more [specific options](https://spark.apache.org/docs/latest/sql-programming-guide.html#manually-specifying-options) that are available for the built-in data sources.
+
+The general method for creating `SparkDataFrame` from data sources is `read.df`. This method takes in the path for the file to load and the type of data source, and the currently active Spark Session will be used automatically. SparkR supports reading CSV, JSON and Parquet files natively and through Spark Packages you can find data source connectors for popular file formats like Avro. These packages can be added with `sparkPackages` parameter when initializing SparkSession using `sparkR.session`.
+
+```{r, eval=FALSE}
+sparkR.session(sparkPackages = "com.databricks:spark-avro_2.11:3.0.0")
+```
+
+We can see how to use data sources using an example CSV input file. For more information please refer to SparkR [read.df](https://spark.apache.org/docs/latest/api/R/read.df.html) API documentation.
+```{r, eval=FALSE}
+df <- read.df(csvPath, "csv", header = "true", inferSchema = "true", na.strings = "NA")
+```
+
+The data sources API natively supports JSON formatted input files. Note that the file that is used here is not a typical JSON file. Each line in the file must contain a separate, self-contained valid JSON object. As a consequence, a regular multi-line JSON file will most often fail.
+
+Let's take a look at the first two lines of the raw JSON file used here.
+
+```{r}
+filePath <- paste0(sparkR.conf("spark.home"),
+                         "/examples/src/main/resources/people.json")
+readLines(filePath, n = 2L)
+```
+
+We use `read.df` to read that into a `SparkDataFrame`.
+
+```{r}
+people <- read.df(filePath, "json")
+count(people)
+head(people)
+```
+
+SparkR automatically infers the schema from the JSON file.
+```{r}
+printSchema(people)
+```
+
+If we want to read multiple JSON files, `read.json` can be used.
+```{r}
+people <- read.json(paste0(Sys.getenv("SPARK_HOME"),
+                           c("/examples/src/main/resources/people.json",
+                             "/examples/src/main/resources/people.json")))
+count(people)
+```
+
+The data sources API can also be used to save out `SparkDataFrames` into multiple file formats. For example we can save the `SparkDataFrame` from the previous example to a Parquet file using `write.df`.
+```{r, eval=FALSE}
+write.df(people, path = "people.parquet", source = "parquet", mode = "overwrite")
+```
+
+### Hive Tables
+You can also create SparkDataFrames from Hive tables. To do this we will need to create a SparkSession with Hive support which can access tables in the Hive MetaStore. Note that Spark should have been built with Hive support and more details can be found in the [SQL Programming Guide](https://spark.apache.org/docs/latest/sql-programming-guide.html). In SparkR, by default it will attempt to create a SparkSession with Hive support enabled (`enableHiveSupport = TRUE`).
+
+```{r, eval=FALSE}
+sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
+
+txtPath <- paste0(sparkR.conf("spark.home"), "/examples/src/main/resources/kv1.txt")
+sqlCMD <- sprintf("LOAD DATA LOCAL INPATH '%s' INTO TABLE src", txtPath)
+sql(sqlCMD)
+
+results <- sql("FROM src SELECT key, value")
+
+# results is now a SparkDataFrame
+head(results)
+```
+
+
+## Data Processing
+
+**To dplyr users**: SparkR has similar interface as dplyr in data processing. However, some noticeable differences are worth mentioning in the first place. We use `df` to represent a `SparkDataFrame` and `col` to represent the name of column here.
+
+1. indicate columns. SparkR uses either a character string of the column name or a Column object constructed with `$` to indicate a column. For example, to select `col` in `df`, we can write `select(df, "col")` or `select(df, df$col)`.
+
+2. describe conditions. In SparkR, the Column object representation can be inserted into the condition directly, or we can use a character string to describe the condition, without referring to the `SparkDataFrame` used. For example, to select rows with value > 1, we can write `filter(df, df$col > 1)` or `filter(df, "col > 1")`.
+
+Here are more concrete examples.
+
+dplyr | SparkR
+-------- | ---------
+`select(mtcars, mpg, hp)` | `select(carsDF, "mpg", "hp")`
+`filter(mtcars, mpg > 20, hp > 100)` | `filter(carsDF, carsDF$mpg > 20, carsDF$hp > 100)`
+
+Other differences will be mentioned in the specific methods.
+
+We use the `SparkDataFrame` `carsDF` created above. We can get basic information about the `SparkDataFrame`.
+```{r}
+carsDF
+```
+
+Print out the schema in tree format.
+```{r}
+printSchema(carsDF)
+```
+
+### SparkDataFrame Operations
+
+#### Selecting rows, columns
+
+SparkDataFrames support a number of functions to do structured data processing. Here we include some basic examples and a complete list can be found in the [API](https://spark.apache.org/docs/latest/api/R/index.html) docs:
+
+You can also pass in column name as strings.
+```{r}
+head(select(carsDF, "mpg"))
+```
+
+Filter the SparkDataFrame to only retain rows with mpg less than 20 miles/gallon.
+```{r}
+head(filter(carsDF, carsDF$mpg < 20))
+```
+
+#### Grouping, Aggregation
+
+A common flow of grouping and aggregation is
+
+1. Use `groupBy` or `group_by` with respect to some grouping variables to create a `GroupedData` object
+
+2. Feed the `GroupedData` object to `agg` or `summarize` functions, with some provided aggregation functions to compute a number within each group.
+
+A number of widely used functions are supported to aggregate data after grouping, including `avg`, `countDistinct`, `count`, `first`, `kurtosis`, `last`, `max`, `mean`, `min`, `sd`, `skewness`, `stddev_pop`, `stddev_samp`, `sumDistinct`, `sum`, `var_pop`, `var_samp`, `var`. See the [API doc for aggregate functions](https://spark.apache.org/docs/latest/api/R/column_aggregate_functions.html) linked there.
+
+For example we can compute a histogram of the number of cylinders in the `mtcars` dataset as shown below.
+
+```{r}
+numCyl <- summarize(groupBy(carsDF, carsDF$cyl), count = n(carsDF$cyl))
+head(numCyl)
+```
+
+Use `cube` or `rollup` to compute subtotals across multiple dimensions.
+
+```{r}
+mean(cube(carsDF, "cyl", "gear", "am"), "mpg")
+```
+
+generates groupings for {(`cyl`, `gear`, `am`), (`cyl`, `gear`), (`cyl`), ()}, while
+
+```{r}
+mean(rollup(carsDF, "cyl", "gear", "am"), "mpg")
+```
+
+generates groupings for all possible combinations of grouping columns.
+
+
+#### Operating on Columns
+
+SparkR also provides a number of functions that can directly applied to columns for data processing and during aggregation. The example below shows the use of basic arithmetic functions.
+
+```{r}
+carsDF_km <- carsDF
+carsDF_km$kmpg <- carsDF_km$mpg * 1.61
+head(select(carsDF_km, "model", "mpg", "kmpg"))
+```
+
+
+### Window Functions
+A window function is a variation of aggregation function. In simple words,
+
+* aggregation function: `n` to `1` mapping - returns a single value for a group of entries. Examples include `sum`, `count`, `max`.
+
+* window function: `n` to `n` mapping - returns one value for each entry in the group, but the value may depend on all the entries of the *group*. Examples include `rank`, `lead`, `lag`.
+
+Formally, the *group* mentioned above is called the *frame*. Every input row can have a unique frame associated with it and the output of the window function on that row is based on the rows confined in that frame.
+
+Window functions are often used in conjunction with the following functions: `windowPartitionBy`, `windowOrderBy`, `partitionBy`, `orderBy`, `over`. To illustrate this we next look at an example.
+
+We still use the `mtcars` dataset. The corresponding `SparkDataFrame` is `carsDF`. Suppose for each number of cylinders, we want to calculate the rank of each car in `mpg` within the group.
+```{r}
+carsSubDF <- select(carsDF, "model", "mpg", "cyl")
+ws <- orderBy(windowPartitionBy("cyl"), "mpg")
+carsRank <- withColumn(carsSubDF, "rank", over(rank(), ws))
+head(carsRank, n = 20L)
+```
+
+We explain in detail the above steps.
+
+* `windowPartitionBy` creates a window specification object `WindowSpec` that defines the partition. It controls which rows will be in the same partition as the given row. In this case, rows with the same value in `cyl` will be put in the same partition. `orderBy` further defines the ordering - the position a given row is in the partition. The resulting `WindowSpec` is returned as `ws`.
+
+More window specification methods include `rangeBetween`, which can define boundaries of the frame by value, and `rowsBetween`, which can define the boundaries by row indices.
+
+* `withColumn` appends a Column called `rank` to the `SparkDataFrame`. `over` returns a windowing column. The first argument is usually a Column returned by window function(s) such as `rank()`, `lead(carsDF$wt)`. That calculates the corresponding values according to the partitioned-and-ordered table.
+
+### User-Defined Function
+
+In SparkR, we support several kinds of user-defined functions (UDFs).
+
+#### Apply by Partition
+
+`dapply` can apply a function to each partition of a `SparkDataFrame`. The function to be applied to each partition of the `SparkDataFrame` should have only one parameter, a `data.frame` corresponding to a partition, and the output should be a `data.frame` as well. Schema specifies the row format of the resulting a `SparkDataFrame`. It must match to data types of returned value. See [here](#DataTypes) for mapping between R and Spark.
+
+We convert `mpg` to `kmpg` (kilometers per gallon). `carsSubDF` is a `SparkDataFrame` with a subset of `carsDF` columns.
+
+```{r}
+carsSubDF <- select(carsDF, "model", "mpg")
+schema <- "model STRING, mpg DOUBLE, kmpg DOUBLE"
+out <- dapply(carsSubDF, function(x) { x <- cbind(x, x$mpg * 1.61) }, schema)
+head(collect(out))
+```
+
+Like `dapply`, `dapplyCollect` can apply a function to each partition of a `SparkDataFrame` and collect the result back. The output of the function should be a `data.frame`, but no schema is required in this case. Note that `dapplyCollect` can fail if the output of the UDF on all partitions cannot be pulled into the driver's memory.
+
+```{r}
+out <- dapplyCollect(
+         carsSubDF,
+         function(x) {
+           x <- cbind(x, "kmpg" = x$mpg * 1.61)
+         })
+head(out, 3)
+```
+
+#### Apply by Group
+`gapply` can apply a function to each group of a `SparkDataFrame`. The function is to be applied to each group of the `SparkDataFrame` and should have only two parameters: grouping key and R `data.frame` corresponding to that key. The groups are chosen from `SparkDataFrames` column(s). The output of function should be a `data.frame`. Schema specifies the row format of the resulting `SparkDataFrame`. It must represent R function’s output schema on the basis of Spark data types. The column names of the returned `data.frame` are set by user. See [here](#DataTypes) for mapping between R and Spark.
+
+```{r}
+schema <- structType(structField("cyl", "double"), structField("max_mpg", "double"))
+result <- gapply(
+    carsDF,
+    "cyl",
+    function(key, x) {
+        y <- data.frame(key, max(x$mpg))
+    },
+    schema)
+head(arrange(result, "max_mpg", decreasing = TRUE))
+```
+
+Like `gapply`, `gapplyCollect` can apply a function to each partition of a `SparkDataFrame` and collect the result back to R `data.frame`. The output of the function should be a `data.frame` but no schema is required in this case. Note that `gapplyCollect` can fail if the output of the UDF on all partitions cannot be pulled into the driver's memory.
+
+```{r}
+result <- gapplyCollect(
+    carsDF,
+    "cyl",
+    function(key, x) {
+         y <- data.frame(key, max(x$mpg))
+        colnames(y) <- c("cyl", "max_mpg")
+        y
+    })
+head(result[order(result$max_mpg, decreasing = TRUE), ])
+```
+
+#### Distribute Local Functions
+
+Similar to `lapply` in native R, `spark.lapply` runs a function over a list of elements and distributes the computations with Spark. `spark.lapply` works in a manner that is similar to `doParallel` or `lapply` to elements of a list. The results of all the computations should fit in a single machine. If that is not the case you can do something like `df <- createDataFrame(list)` and then use `dapply`.
+
+We use `svm` in package `e1071` as an example. We use all default settings except for varying costs of constraints violation. `spark.lapply` can train those different models in parallel.
+
+```{r}
+costs <- exp(seq(from = log(1), to = log(1000), length.out = 5))
+train <- function(cost) {
+  stopifnot(requireNamespace("e1071", quietly = TRUE))
+  model <- e1071::svm(Species ~ ., data = iris, cost = cost)
+  summary(model)
+}
+```
+
+Return a list of model's summaries.
+```{r}
+model.summaries <- spark.lapply(costs, train)
+```
+
+```{r}
+class(model.summaries)
+```
+
+
+To avoid lengthy display, we only present the partial result of the second fitted model. You are free to inspect other models as well.
+```{r, include=FALSE}
+ops <- options()
+options(max.print=40)
+```
+```{r}
+print(model.summaries[[2]])
+```
+```{r, include=FALSE}
+options(ops)
+```
+
+
+### SQL Queries
+A `SparkDataFrame` can also be registered as a temporary view in Spark SQL so that one can run SQL queries over its data. The sql function enables applications to run SQL queries programmatically and returns the result as a `SparkDataFrame`.
+
+```{r}
+people <- read.df(paste0(sparkR.conf("spark.home"),
+                         "/examples/src/main/resources/people.json"), "json")
+```
+
+Register this `SparkDataFrame` as a temporary view.
+
+```{r}
+createOrReplaceTempView(people, "people")
+```
+
+SQL statements can be run using the sql method.
+```{r}
+teenagers <- sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
+head(teenagers)
+```
+
+
+## Machine Learning
+
+SparkR supports the following machine learning models and algorithms.
+
+#### Classification
+
+* Linear Support Vector Machine (SVM) Classifier
+
+* Logistic Regression
+
+* Multilayer Perceptron (MLP)
+
+* Naive Bayes
+
+#### Regression
+
+* Accelerated Failure Time (AFT) Survival Model
+
+* Generalized Linear Model (GLM)
+
+* Isotonic Regression
+
+#### Tree - Classification and Regression
+
+* Decision Tree
+
+* Gradient-Boosted Trees (GBT)
+
+* Random Forest
+
+#### Clustering
+
+* Bisecting $k$-means
+
+* Gaussian Mixture Model (GMM)
+
+* $k$-means Clustering
+
+* Latent Dirichlet Allocation (LDA)
+
+#### Collaborative Filtering
+
+* Alternating Least Squares (ALS)
+
+#### Frequent Pattern Mining
+
+* FP-growth
+
+#### Statistics
+
+* Kolmogorov-Smirnov Test
+
+### R Formula
+
+For most above, SparkR supports **R formula operators**, including `~`, `.`, `:`, `+` and `-` for model fitting. This makes it a similar experience as using R functions.
+
+### Training and Test Sets
+
+We can easily split `SparkDataFrame` into random training and test sets by the `randomSplit` function. It returns a list of split `SparkDataFrames` with provided `weights`. We use `carsDF` as an example and want to have about $70%$ training data and $30%$ test data.
+```{r}
+splitDF_list <- randomSplit(carsDF, c(0.7, 0.3), seed = 0)
+carsDF_train <- splitDF_list[[1]]
+carsDF_test <- splitDF_list[[2]]
+```
+
+```{r}
+count(carsDF_train)
+head(carsDF_train)
+```
+
+```{r}
+count(carsDF_test)
+head(carsDF_test)
+```
+
+### Models and Algorithms
+
+#### Linear Support Vector Machine (SVM) Classifier
+
+[Linear Support Vector Machine (SVM)](https://en.wikipedia.org/wiki/Support_vector_machine#Linear_SVM) classifier is an SVM classifier with linear kernels.
+This is a binary classifier. We use a simple example to show how to use `spark.svmLinear`
+for binary classification.
+
+```{r}
+# load training data and create a DataFrame
+t <- as.data.frame(Titanic)
+training <- createDataFrame(t)
+# fit a Linear SVM classifier model
+model <- spark.svmLinear(training,  Survived ~ ., regParam = 0.01, maxIter = 10)
+summary(model)
+```
+
+Predict values on training data
+```{r}
+prediction <- predict(model, training)
+head(select(prediction, "Class", "Sex", "Age", "Freq", "Survived", "prediction"))
+```
+
+#### Logistic Regression
+
+[Logistic regression](https://en.wikipedia.org/wiki/Logistic_regression) is a widely-used model when the response is categorical. It can be seen as a special case of the [Generalized Linear Predictive Model](https://en.wikipedia.org/wiki/Generalized_linear_model).
+We provide `spark.logit` on top of `spark.glm` to support logistic regression with advanced hyper-parameters.
+It supports both binary and multiclass classification with elastic-net regularization and feature standardization, similar to `glmnet`.
+
+We use a simple example to demonstrate `spark.logit` usage. In general, there are three steps of using `spark.logit`:
+1). Create a dataframe from a proper data source; 2). Fit a logistic regression model using `spark.logit` with a proper parameter setting;
+and 3). Obtain the coefficient matrix of the fitted model using `summary` and use the model for prediction with `predict`.
+
+Binomial logistic regression
+```{r}
+t <- as.data.frame(Titanic)
+training <- createDataFrame(t)
+model <- spark.logit(training, Survived ~ ., regParam = 0.04741301)
+summary(model)
+```
+
+Predict values on training data
+```{r}
+fitted <- predict(model, training)
+head(select(fitted, "Class", "Sex", "Age", "Freq", "Survived", "prediction"))
+```
+
+Multinomial logistic regression against three classes
+```{r}
+t <- as.data.frame(Titanic)
+training <- createDataFrame(t)
+# Note in this case, Spark infers it is multinomial logistic regression, so family = "multinomial" is optional.
+model <- spark.logit(training, Class ~ ., regParam = 0.07815179)
+summary(model)
+```
+
+#### Multilayer Perceptron
+
+Multilayer perceptron classifier (MLPC) is a classifier based on the [feedforward artificial neural network](https://en.wikipedia.org/wiki/Feedforward_neural_network). MLPC consists of multiple layers of nodes. Each layer is fully connected to the next layer in the network. Nodes in the input layer represent the input data. All other nodes map inputs to outputs by a linear combination of the inputs with the node’s weights $w$ and bias $b$ and applying an activation function. This can be written in matrix form for MLPC with $K+1$ layers as follows:
+$$
+y(x)=f_K(\ldots f_2(w_2^T f_1(w_1^T x + b_1) + b_2) \ldots + b_K).
+$$
+
+Nodes in intermediate layers use sigmoid (logistic) function:
+$$
+f(z_i) = \frac{1}{1+e^{-z_i}}.
+$$
+
+Nodes in the output layer use softmax function:
+$$
+f(z_i) = \frac{e^{z_i}}{\sum_{k=1}^N e^{z_k}}.
+$$
+
+The number of nodes $N$ in the output layer corresponds to the number of classes.
+
+MLPC employs backpropagation for learning the model. We use the logistic loss function for optimization and L-BFGS as an optimization routine.
+
+`spark.mlp` requires at least two columns in `data`: one named `"label"` and the other one `"features"`. The `"features"` column should be in libSVM-format.
+
+We use Titanic data set to show how to use `spark.mlp` in classification.
+```{r}
+t <- as.data.frame(Titanic)
+training <- createDataFrame(t)
+# fit a Multilayer Perceptron Classification Model
+model <- spark.mlp(training, Survived ~ Age + Sex, blockSize = 128, layers = c(2, 2), solver = "l-bfgs", maxIter = 100, tol = 0.5, stepSize = 1, seed = 1, initialWeights = c( 0, 0, 5, 5, 9, 9))
+```
+
+To avoid lengthy display, we only present partial results of the model summary. You can check the full result from your sparkR shell.
+```{r, include=FALSE}
+ops <- options()
+options(max.print=5)
+```
+```{r}
+# check the summary of the fitted model
+summary(model)
+```
+```{r, include=FALSE}
+options(ops)
+```
+```{r}
+# make predictions use the fitted model
+predictions <- predict(model, training)
+head(select(predictions, predictions$prediction))
+```
+
+#### Naive Bayes
+
+Naive Bayes model assumes independence among the features. `spark.naiveBayes` fits a [Bernoulli naive Bayes model](https://en.wikipedia.org/wiki/Naive_Bayes_classifier#Bernoulli_naive_Bayes) against a SparkDataFrame. The data should be all categorical. These models are often used for document classification.
+
+```{r}
+titanic <- as.data.frame(Titanic)
+titanicDF <- createDataFrame(titanic[titanic$Freq > 0, -5])
+naiveBayesModel <- spark.naiveBayes(titanicDF, Survived ~ Class + Sex + Age)
+summary(naiveBayesModel)
+naiveBayesPrediction <- predict(naiveBayesModel, titanicDF)
+head(select(naiveBayesPrediction, "Class", "Sex", "Age", "Survived", "prediction"))
+```
+
+#### Accelerated Failure Time Survival Model
+
+Survival analysis studies the expected duration of time until an event happens, and often the relationship with risk factors or treatment taken on the subject. In contrast to standard regression analysis, survival modeling has to deal with special characteristics in the data including non-negative survival time and censoring.
+
+Accelerated Failure Time (AFT) model is a parametric survival model for censored data that assumes the effect of a covariate is to accelerate or decelerate the life course of an event by some constant. For more information, refer to the Wikipedia page [AFT Model](https://en.wikipedia.org/wiki/Accelerated_failure_time_model) and the references there. Different from a [Proportional Hazards Model](https://en.wikipedia.org/wiki/Proportional_hazards_model) designed for the same purpose, the AFT model is easier to parallelize because each instance contributes to the objective function independently.
+
+```{r, warning=FALSE}
+library(survival)
+ovarianDF <- createDataFrame(ovarian)
+aftModel <- spark.survreg(ovarianDF, Surv(futime, fustat) ~ ecog_ps + rx)
+summary(aftModel)
+aftPredictions <- predict(aftModel, ovarianDF)
+head(aftPredictions)
+```
+
+#### Generalized Linear Model
+
+The main function is `spark.glm`. The following families and link functions are supported. The default is gaussian.
+
+Family | Link Function
+------ | ---------
+gaussian | identity, log, inverse
+binomial | logit, probit, cloglog (complementary log-log)
+poisson | log, identity, sqrt
+gamma | inverse, identity, log
+tweedie | power link function
+
+There are three ways to specify the `family` argument.
+
+* Family name as a character string, e.g. `family = "gaussian"`.
+
+* Family function, e.g. `family = binomial`.
+
+* Result returned by a family function, e.g. `family = poisson(link = log)`.
+
+* Note that there are two ways to specify the tweedie family:
+  a) Set `family = "tweedie"` and specify the `var.power` and `link.power`
+  b) When package `statmod` is loaded, the tweedie family is specified using the family definition therein, i.e., `tweedie()`.
+
+For more information regarding the families and their link functions, see the Wikipedia page [Generalized Linear Model](https://en.wikipedia.org/wiki/Generalized_linear_model).
+
+We use the `mtcars` dataset as an illustration. The corresponding `SparkDataFrame` is `carsDF`. After fitting the model, we print out a summary and see the fitted values by making predictions on the original dataset. We can also pass into a new `SparkDataFrame` of same schema to predict on new data.
+
+```{r}
+gaussianGLM <- spark.glm(carsDF, mpg ~ wt + hp)
+summary(gaussianGLM)
+```
+When doing prediction, a new column called `prediction` will be appended. Let's look at only a subset of columns here.
+```{r}
+gaussianFitted <- predict(gaussianGLM, carsDF)
+head(select(gaussianFitted, "model", "prediction", "mpg", "wt", "hp"))
+```
+
+The following is the same fit using the tweedie family:
+```{r}
+tweedieGLM1 <- spark.glm(carsDF, mpg ~ wt + hp, family = "tweedie", var.power = 0.0)
+summary(tweedieGLM1)
+```
+We can try other distributions in the tweedie family, for example, a compound Poisson distribution with a log link:
+```{r}
+tweedieGLM2 <- spark.glm(carsDF, mpg ~ wt + hp, family = "tweedie",
+                         var.power = 1.2, link.power = 0.0)
+summary(tweedieGLM2)
+```
+
+#### Isotonic Regression
+
+`spark.isoreg` fits an [Isotonic Regression](https://en.wikipedia.org/wiki/Isotonic_regression) model against a `SparkDataFrame`. It solves a weighted univariate a regression problem under a complete order constraint. Specifically, given a set of real observed responses $y_1, \ldots, y_n$, corresponding real features $x_1, \ldots, x_n$, and optionally positive weights $w_1, \ldots, w_n$, we want to find a monotone (piecewise linear) function $f$ to  minimize
+$$
+\ell(f) = \sum_{i=1}^n w_i (y_i - f(x_i))^2.
+$$
+
+There are a few more arguments that may be useful.
+
+* `weightCol`: a character string specifying the weight column.
+
+* `isotonic`: logical value indicating whether the output sequence should be isotonic/increasing (`TRUE`) or antitonic/decreasing (`FALSE`).
+
+* `featureIndex`: the index of the feature on the right hand side of the formula if it is a vector column (default: 0), no effect otherwise.
+
+We use an artificial example to show the use.
+
+```{r}
+y <- c(3.0, 6.0, 8.0, 5.0, 7.0)
+x <- c(1.0, 2.0, 3.5, 3.0, 4.0)
+w <- rep(1.0, 5)
+data <- data.frame(y = y, x = x, w = w)
+df <- createDataFrame(data)
+isoregModel <- spark.isoreg(df, y ~ x, weightCol = "w")
+isoregFitted <- predict(isoregModel, df)
+head(select(isoregFitted, "x", "y", "prediction"))
+```
+
+In the prediction stage, based on the fitted monotone piecewise function, the rules are:
+
+* If the prediction input exactly matches a training feature then associated prediction is returned. In case there are multiple predictions with the same feature then one of them is returned. Which one is undefined.
+
+* If the prediction input is lower or higher than all training features then prediction with lowest or highest feature is returned respectively. In case there are multiple predictions with the same feature then the lowest or highest is returned respectively.
+
+* If the prediction input falls between two training features then prediction is treated as piecewise linear function and interpolated value is calculated from the predictions of the two closest features. In case there are multiple values with the same feature then the same rules as in previous point are used.
+
+For example, when the input is $3.2$, the two closest feature values are $3.0$ and $3.5$, then predicted value would be a linear interpolation between the predicted values at $3.0$ and $3.5$.
+
+```{r}
+newDF <- createDataFrame(data.frame(x = c(1.5, 3.2)))
+head(predict(isoregModel, newDF))
+```
+
+#### Decision Tree
+
+`spark.decisionTree` fits a [decision tree](https://en.wikipedia.org/wiki/Decision_tree_learning) classification or regression model on a `SparkDataFrame`.
+Users can call `summary` to get a summary of the fitted model, `predict` to make predictions, and `write.ml`/`read.ml` to save/load fitted models.
+
+We use the `Titanic` dataset to train a decision tree and make predictions:
+
+```{r}
+t <- as.data.frame(Titanic)
+df <- createDataFrame(t)
+dtModel <- spark.decisionTree(df, Survived ~ ., type = "classification", maxDepth = 2)
+summary(dtModel)
+predictions <- predict(dtModel, df)
+head(select(predictions, "Class", "Sex", "Age", "Freq", "Survived", "prediction"))
+```
+
+#### Gradient-Boosted Trees
+
+`spark.gbt` fits a [gradient-boosted tree](https://en.wikipedia.org/wiki/Gradient_boosting) classification or regression model on a `SparkDataFrame`.
+Users can call `summary` to get a summary of the fitted model, `predict` to make predictions, and `write.ml`/`read.ml` to save/load fitted models.
+
+We use the `Titanic` dataset to train a gradient-boosted tree and make predictions:
+
+```{r}
+t <- as.data.frame(Titanic)
+df <- createDataFrame(t)
+gbtModel <- spark.gbt(df, Survived ~ ., type = "classification", maxDepth = 2, maxIter = 2)
+summary(gbtModel)
+predictions <- predict(gbtModel, df)
+head(select(predictions, "Class", "Sex", "Age", "Freq", "Survived", "prediction"))
+```
+
+#### Random Forest
+
+`spark.randomForest` fits a [random forest](https://en.wikipedia.org/wiki/Random_forest) classification or regression model on a `SparkDataFrame`.
+Users can call `summary` to get a summary of the fitted model, `predict` to make predictions, and `write.ml`/`read.ml` to save/load fitted models.
+
+In the following example, we use the `Titanic` dataset to train a random forest and make predictions:
+
+```{r}
+t <- as.data.frame(Titanic)
+df <- createDataFrame(t)
+rfModel <- spark.randomForest(df, Survived ~ ., type = "classification", maxDepth = 2, numTrees = 2)
+summary(rfModel)
+predictions <- predict(rfModel, df)
+head(select(predictions, "Class", "Sex", "Age", "Freq", "Survived", "prediction"))
+```
+
+#### Bisecting k-Means
+
+`spark.bisectingKmeans` is a kind of [hierarchical clustering](https://en.wikipedia.org/wiki/Hierarchical_clustering) using a divisive (or "top-down") approach: all observations start in one cluster, and splits are performed recursively as one moves down the hierarchy.
+
+```{r}
+t <- as.data.frame(Titanic)
+training <- createDataFrame(t)
+model <- spark.bisectingKmeans(training, Class ~ Survived, k = 4)
+summary(model)
+fitted <- predict(model, training)
+head(select(fitted, "Class", "prediction"))
+```
+
+#### Gaussian Mixture Model
+
+`spark.gaussianMixture` fits multivariate [Gaussian Mixture Model](https://en.wikipedia.org/wiki/Mixture_model#Multivariate_Gaussian_mixture_model) (GMM) against a `SparkDataFrame`. [Expectation-Maximization](https://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm) (EM) is used to approximate the maximum likelihood estimator (MLE) of the model.
+
+We use a simulated example to demonstrate the usage.
+```{r}
+X1 <- data.frame(V1 = rnorm(4), V2 = rnorm(4))
+X2 <- data.frame(V1 = rnorm(6, 3), V2 = rnorm(6, 4))
+data <- rbind(X1, X2)
+df <- createDataFrame(data)
+gmmModel <- spark.gaussianMixture(df, ~ V1 + V2, k = 2)
+summary(gmmModel)
+gmmFitted <- predict(gmmModel, df)
+head(select(gmmFitted, "V1", "V2", "prediction"))
+```
+
+#### k-Means Clustering
+
+`spark.kmeans` fits a $k$-means clustering model against a `SparkDataFrame`. As an unsupervised learning method, we don't need a response variable. Hence, the left hand side of the R formula should be left blank. The clustering is based only on the variables on the right hand side.
+
+```{r}
+kmeansModel <- spark.kmeans(carsDF, ~ mpg + hp + wt, k = 3)
+summary(kmeansModel)
+kmeansPredictions <- predict(kmeansModel, carsDF)
+head(select(kmeansPredictions, "model", "mpg", "hp", "wt", "prediction"), n = 20L)
+```
+
+#### Latent Dirichlet Allocation
+
+`spark.lda` fits a [Latent Dirichlet Allocation](https://en.wikipedia.org/wiki/Latent_Dirichlet_allocation) model on a `SparkDataFrame`. It is often used in topic modeling in which topics are inferred from a collection of text documents. LDA can be thought of as a clustering algorithm as follows:
+
+* Topics correspond to cluster centers, and documents correspond to examples (rows) in a dataset.
+
+* Topics and documents both exist in a feature space, where feature vectors are vectors of word counts (bag of words).
+
+* Rather than clustering using a traditional distance, LDA uses a function based on a statistical model of how text documents are generated.
+
+To use LDA, we need to specify a `features` column in `data` where each entry represents a document. There are two options for the column:
+
+* character string: This can be a string of the whole document. It will be parsed automatically. Additional stop words can be added in `customizedStopWords`.
+
+* libSVM: Each entry is a collection of words and will be processed directly.
+
+Two more functions are provided for the fitted model.
+
+* `spark.posterior` returns a `SparkDataFrame` containing a column of posterior probabilities vectors named "topicDistribution".
+
+* `spark.perplexity` returns the log perplexity of given `SparkDataFrame`, or the log perplexity of the training data if missing argument `data`.
+
+For more information, see the help document `?spark.lda`.
+
+Let's look an artificial example.
+```{r}
+corpus <- data.frame(features = c(
+  "1 2 6 0 2 3 1 1 0 0 3",
+  "1 3 0 1 3 0 0 2 0 0 1",
+  "1 4 1 0 0 4 9 0 1 2 0",
+  "2 1 0 3 0 0 5 0 2 3 9",
+  "3 1 1 9 3 0 2 0 0 1 3",
+  "4 2 0 3 4 5 1 1 1 4 0",
+  "2 1 0 3 0 0 5 0 2 2 9",
+  "1 1 1 9 2 1 2 0 0 1 3",
+  "4 4 0 3 4 2 1 3 0 0 0",
+  "2 8 2 0 3 0 2 0 2 7 2",
+  "1 1 1 9 0 2 2 0 0 3 3",
+  "4 1 0 0 4 5 1 3 0 1 0"))
+corpusDF <- createDataFrame(corpus)
+model <- spark.lda(data = corpusDF, k = 5, optimizer = "em")
+summary(model)
+```
+
+```{r}
+posterior <- spark.posterior(model, corpusDF)
+head(posterior)
+```
+
+```{r}
+perplexity <- spark.perplexity(model, corpusDF)
+perplexity
+```
+
+#### Alternating Least Squares
+
+`spark.als` learns latent factors in [collaborative filtering](https://en.wikipedia.org/wiki/Recommender_system#Collaborative_filtering) via [alternating least squares](https://dl.acm.org/citation.cfm?id=1608614).
+
+There are multiple options that can be configured in `spark.als`, including `rank`, `reg`, and `nonnegative`. For a complete list, refer to the help file.
+
+```{r, eval=FALSE}
+ratings <- list(list(0, 0, 4.0), list(0, 1, 2.0), list(1, 1, 3.0), list(1, 2, 4.0),
+                list(2, 1, 1.0), list(2, 2, 5.0))
+df <- createDataFrame(ratings, c("user", "item", "rating"))
+model <- spark.als(df, "rating", "user", "item", rank = 10, reg = 0.1, nonnegative = TRUE)
+```
+
+Extract latent factors.
+```{r, eval=FALSE}
+stats <- summary(model)
+userFactors <- stats$userFactors
+itemFactors <- stats$itemFactors
+head(userFactors)
+head(itemFactors)
+```
+
+Make predictions.
+
+```{r, eval=FALSE}
+predicted <- predict(model, df)
+head(predicted)
+```
+
+#### FP-growth
+
+`spark.fpGrowth` executes FP-growth algorithm to mine frequent itemsets on a `SparkDataFrame`. `itemsCol` should be an array of values.
+
+```{r}
+df <- selectExpr(createDataFrame(data.frame(rawItems = c(
+  "T,R,U", "T,S", "V,R", "R,U,T,V", "R,S", "V,S,U", "U,R", "S,T", "V,R", "V,U,S",
+  "T,V,U", "R,V", "T,S", "T,S", "S,T", "S,U", "T,R", "V,R", "S,V", "T,S,U"
+))), "split(rawItems, ',') AS items")
+
+fpm <- spark.fpGrowth(df, minSupport = 0.2, minConfidence = 0.5)
+```
+
+`spark.freqItemsets` method can be used to retrieve a `SparkDataFrame` with the frequent itemsets.
+
+```{r}
+head(spark.freqItemsets(fpm))
+```
+
+`spark.associationRules` returns a `SparkDataFrame` with the association rules.
+
+```{r}
+head(spark.associationRules(fpm))
+```
+
+We can make predictions based on the `antecedent`.
+
+```{r}
+head(predict(fpm, df))
+```
+
+#### Kolmogorov-Smirnov Test
+
+`spark.kstest` runs a two-sided, one-sample [Kolmogorov-Smirnov (KS) test](https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test).
+Given a `SparkDataFrame`, the test compares continuous data in a given column `testCol` with the theoretical distribution
+specified by parameter `nullHypothesis`.
+Users can call `summary` to get a summary of the test results.
+
+In the following example, we test whether the `Titanic` dataset's `Freq` column
+follows a normal distribution.  We set the parameters of the normal distribution using
+the mean and standard deviation of the sample.
+
+```{r}
+t <- as.data.frame(Titanic)
+df <- createDataFrame(t)
+freqStats <- head(select(df, mean(df$Freq), sd(df$Freq)))
+freqMean <- freqStats[1]
+freqStd <- freqStats[2]
+
+test <- spark.kstest(df, "Freq", "norm", c(freqMean, freqStd))
+testSummary <- summary(test)
+testSummary
+```
+
+
+### Model Persistence
+The following example shows how to save/load an ML model in SparkR.
+```{r}
+t <- as.data.frame(Titanic)
+training <- createDataFrame(t)
+gaussianGLM <- spark.glm(training, Freq ~ Sex + Age, family = "gaussian")
+
+# Save and then load a fitted MLlib model
+modelPath <- tempfile(pattern = "ml", fileext = ".tmp")
+write.ml(gaussianGLM, modelPath)
+gaussianGLM2 <- read.ml(modelPath)
+
+# Check model summary
+summary(gaussianGLM2)
+
+# Check model prediction
+gaussianPredictions <- predict(gaussianGLM2, training)
+head(gaussianPredictions)
+
+unlink(modelPath)
+```
+
+
+## Structured Streaming
+
+SparkR supports the Structured Streaming API.
+
+You can check the Structured Streaming Programming Guide for [an introduction](https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#programming-model) to its programming model and basic concepts.
+
+### Simple Source and Sink
+
+Spark has a few built-in input sources. As an example, to test with a socket source reading text into words and displaying the computed word counts:
+
+```{r, eval=FALSE}
+# Create DataFrame representing the stream of input lines from connection
+lines <- read.stream("socket", host = hostname, port = port)
+
+# Split the lines into words
+words <- selectExpr(lines, "explode(split(value, ' ')) as word")
+
+# Generate running word count
+wordCounts <- count(groupBy(words, "word"))
+
+# Start running the query that prints the running counts to the console
+query <- write.stream(wordCounts, "console", outputMode = "complete")
+```
+
+### Kafka Source
+
+It is simple to read data from Kafka. For more information, see [Input Sources](https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#input-sources) supported by Structured Streaming.
+
+```{r, eval=FALSE}
+topic <- read.stream("kafka",
+                     kafka.bootstrap.servers = "host1:port1,host2:port2",
+                     subscribe = "topic1")
+keyvalue <- selectExpr(topic, "CAST(key AS STRING)", "CAST(value AS STRING)")
+```
+
+### Operations and Sinks
+
+Most of the common operations on `SparkDataFrame` are supported for streaming, including selection, projection, and aggregation. Once you have defined the final result, to start the streaming computation, you will call the `write.stream` method setting a sink and `outputMode`.
+
+A streaming `SparkDataFrame` can be written for debugging to the console, to a temporary in-memory table, or for further processing in a fault-tolerant manner to a File Sink in different formats.
+
+```{r, eval=FALSE}
+noAggDF <- select(where(deviceDataStreamingDf, "signal > 10"), "device")
+
+# Print new data to console
+write.stream(noAggDF, "console")
+
+# Write new data to Parquet files
+write.stream(noAggDF,
+             "parquet",
+             path = "path/to/destination/dir",
+             checkpointLocation = "path/to/checkpoint/dir")
+
+# Aggregate
+aggDF <- count(groupBy(noAggDF, "device"))
+
+# Print updated aggregations to console
+write.stream(aggDF, "console", outputMode = "complete")
+
+# Have all the aggregates in an in memory table. The query name will be the table name
+write.stream(aggDF, "memory", queryName = "aggregates", outputMode = "complete")
+
+head(sql("select * from aggregates"))
+```
+
+
+## Advanced Topics
+
+### SparkR Object Classes
+
+There are three main object classes in SparkR you may be working with.
+
+* `SparkDataFrame`: the central component of SparkR. It is an S4 class representing distributed collection of data organized into named columns, which is conceptually equivalent to a table in a relational database or a data frame in R. It has two slots `sdf` and `env`.
+    + `sdf` stores a reference to the corresponding Spark Dataset in the Spark JVM backend.
+    + `env` saves the meta-information of the object such as `isCached`.
+
+    It can be created by data import methods or by transforming an existing `SparkDataFrame`. We can manipulate `SparkDataFrame` by numerous data processing functions and feed that into machine learning algorithms.
+
+* `Column`: an S4 class representing a column of `SparkDataFrame`. The slot `jc` saves a reference to the corresponding `Column` object in the Spark JVM backend.
+
+    It can be obtained from a `SparkDataFrame` by `$` operator, e.g., `df$col`. More often, it is used together with other functions, for example, with `select` to select particular columns, with `filter` and constructed conditions to select rows, with aggregation functions to compute aggregate statistics for each group.
+
+* `GroupedData`: an S4 class representing grouped data created by `groupBy` or by transforming other `GroupedData`. Its `sgd` slot saves a reference to a `RelationalGroupedDataset` object in the backend.
+
+    This is often an intermediate object with group information and followed up by aggregation operations.
+
+### Architecture
+
+A complete description of architecture can be seen in the references, in particular the paper *SparkR: Scaling R Programs with Spark*.
+
+Under the hood of SparkR is Spark SQL engine. This avoids the overheads of running interpreted R code, and the optimized SQL execution engine in Spark uses structural information about data and computation flow to perform a bunch of optimizations to speed up the computation.
+
+The main method calls of actual computation happen in the Spark JVM of the driver. We have a socket-based SparkR API that allows us to invoke functions on the JVM from R. We use a SparkR JVM backend that listens on a Netty-based socket server.
+
+Two kinds of RPCs are supported in the SparkR JVM backend: method invocation and creating new objects. Method invocation can be done in two ways.
+
+* `sparkR.callJMethod` takes a reference to an existing Java object and a list of arguments to be passed on to the method.
+
+* `sparkR.callJStatic` takes a class name for static method and a list of arguments to be passed on to the method.
+
+The arguments are serialized using our custom wire format which is then deserialized on the JVM side. We then use Java reflection to invoke the appropriate method.
+
+To create objects, `sparkR.newJObject` is used and then similarly the appropriate constructor is invoked with provided arguments.
+
+Finally, we use a new R class `jobj` that refers to a Java object existing in the backend. These references are tracked on the Java side and are automatically garbage collected when they go out of scope on the R side.
+
+## Appendix
+
+### R and Spark Data Types {#DataTypes}
+
+R | Spark
+----------- | -------------
+byte | byte
+integer | integer
+float | float
+double | double
+numeric | double
+character | string
+string | string
+binary | binary
+raw | binary
+logical | boolean
+POSIXct | timestamp
+POSIXlt | timestamp
+Date | date
+array | array
+list | array
+env | map
+
+## References
+
+* [Spark Cluster Mode Overview](https://spark.apache.org/docs/latest/cluster-overview.html)
+
+* [Submitting Spark Applications](https://spark.apache.org/docs/latest/submitting-applications.html)
+
+* [Machine Learning Library Guide (MLlib)](https://spark.apache.org/docs/latest/ml-guide.html)
+
+* [SparkR: Scaling R Programs with Spark](https://people.csail.mit.edu/matei/papers/2016/sigmod_sparkr.pdf), Shivaram Venkataraman, Zongheng Yang, Davies Liu, Eric Liang, Hossein Falaki, Xiangrui Meng, Reynold Xin, Ali Ghodsi, Michael Franklin, Ion Stoica, and Matei Zaharia. SIGMOD 2016. June 2016.
+
+```{r, echo=FALSE}
+sparkR.session.stop()
+```
+
+```{r cleanup, include=FALSE}
+SparkR:::uninstallDownloadedSpark()
+```
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/doc/sparkr-vignettes.html b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/doc/sparkr-vignettes.html
new file mode 100644
index 0000000000000000000000000000000000000000..9c82f1d5a053ad487fd89c6da08f40265d80e1c0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/doc/sparkr-vignettes.html
@@ -0,0 +1,1913 @@
+<!DOCTYPE html>
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+
+<head>
+
+<meta charset="utf-8" />
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta name="generator" content="pandoc" />
+
+<meta name="viewport" content="width=device-width, initial-scale=1">
+
+
+
+<title>SparkR - Practical Guide</title>
+
+
+
+<style type="text/css">code{white-space: pre;}</style>
+<style type="text/css">
+div.sourceCode { overflow-x: auto; }
+table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
+  margin: 0; padding: 0; vertical-align: baseline; border: none; }
+table.sourceCode { width: 100%; line-height: 100%; }
+td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
+td.sourceCode { padding-left: 5px; }
+code > span.kw { color: #007020; font-weight: bold; } /* Keyword */
+code > span.dt { color: #902000; } /* DataType */
+code > span.dv { color: #40a070; } /* DecVal */
+code > span.bn { color: #40a070; } /* BaseN */
+code > span.fl { color: #40a070; } /* Float */
+code > span.ch { color: #4070a0; } /* Char */
+code > span.st { color: #4070a0; } /* String */
+code > span.co { color: #60a0b0; font-style: italic; } /* Comment */
+code > span.ot { color: #007020; } /* Other */
+code > span.al { color: #ff0000; font-weight: bold; } /* Alert */
+code > span.fu { color: #06287e; } /* Function */
+code > span.er { color: #ff0000; font-weight: bold; } /* Error */
+code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
+code > span.cn { color: #880000; } /* Constant */
+code > span.sc { color: #4070a0; } /* SpecialChar */
+code > span.vs { color: #4070a0; } /* VerbatimString */
+code > span.ss { color: #bb6688; } /* SpecialString */
+code > span.im { } /* Import */
+code > span.va { color: #19177c; } /* Variable */
+code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
+code > span.op { color: #666666; } /* Operator */
+code > span.bu { } /* BuiltIn */
+code > span.ex { } /* Extension */
+code > span.pp { color: #bc7a00; } /* Preprocessor */
+code > span.at { color: #7d9029; } /* Attribute */
+code > span.do { color: #ba2121; font-style: italic; } /* Documentation */
+code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
+code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
+code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
+</style>
+
+
+
+<link href="data:text/css;charset=utf-8,body%20%7B%0Abackground%2Dcolor%3A%20%23fff%3B%0Amargin%3A%201em%20auto%3B%0Amax%2Dwidth%3A%20700px%3B%0Aoverflow%3A%20visible%3B%0Apadding%2Dleft%3A%202em%3B%0Apadding%2Dright%3A%202em%3B%0Afont%2Dfamily%3A%20%22Open%20Sans%22%2C%20%22Helvetica%20Neue%22%2C%20Helvetica%2C%20Arial%2C%20sans%2Dserif%3B%0Afont%2Dsize%3A%2014px%3B%0Aline%2Dheight%3A%201%2E35%3B%0A%7D%0A%23header%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0A%23TOC%20%7B%0Aclear%3A%20both%3B%0Amargin%3A%200%200%2010px%2010px%3B%0Apadding%3A%204px%3B%0Awidth%3A%20400px%3B%0Aborder%3A%201px%20solid%20%23CCCCCC%3B%0Aborder%2Dradius%3A%205px%3B%0Abackground%2Dcolor%3A%20%23f6f6f6%3B%0Afont%2Dsize%3A%2013px%3B%0Aline%2Dheight%3A%201%2E3%3B%0A%7D%0A%23TOC%20%2Etoctitle%20%7B%0Afont%2Dweight%3A%20bold%3B%0Afont%2Dsize%3A%2015px%3B%0Amargin%2Dleft%3A%205px%3B%0A%7D%0A%23TOC%20ul%20%7B%0Apadding%2Dleft%3A%2040px%3B%0Amargin%2Dleft%3A%20%2D1%2E5em%3B%0Amargin%2Dtop%3A%205px%3B%0Amargin%2Dbottom%3A%205px%3B%0A%7D%0A%23TOC%20ul%20ul%20%7B%0Amargin%2Dleft%3A%20%2D2em%3B%0A%7D%0A%23TOC%20li%20%7B%0Aline%2Dheight%3A%2016px%3B%0A%7D%0Atable%20%7B%0Amargin%3A%201em%20auto%3B%0Aborder%2Dwidth%3A%201px%3B%0Aborder%2Dcolor%3A%20%23DDDDDD%3B%0Aborder%2Dstyle%3A%20outset%3B%0Aborder%2Dcollapse%3A%20collapse%3B%0A%7D%0Atable%20th%20%7B%0Aborder%2Dwidth%3A%202px%3B%0Apadding%3A%205px%3B%0Aborder%2Dstyle%3A%20inset%3B%0A%7D%0Atable%20td%20%7B%0Aborder%2Dwidth%3A%201px%3B%0Aborder%2Dstyle%3A%20inset%3B%0Aline%2Dheight%3A%2018px%3B%0Apadding%3A%205px%205px%3B%0A%7D%0Atable%2C%20table%20th%2C%20table%20td%20%7B%0Aborder%2Dleft%2Dstyle%3A%20none%3B%0Aborder%2Dright%2Dstyle%3A%20none%3B%0A%7D%0Atable%20thead%2C%20table%20tr%2Eeven%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0A%7D%0Ap%20%7B%0Amargin%3A%200%2E5em%200%3B%0A%7D%0Ablockquote%20%7B%0Abackground%2Dcolor%3A%20%23f6f6f6%3B%0Apadding%3A%200%2E25em%200%2E75em%3B%0A%7D%0Ahr%20%7B%0Aborder%2Dstyle%3A%20solid%3B%0Aborder%3A%20none%3B%0Aborder%2Dtop%3A%201px%20solid%20%23777%3B%0Amargin%3A%2028px%200%3B%0A%7D%0Adl%20%7B%0Amargin%2Dleft%3A%200%3B%0A%7D%0Adl%20dd%20%7B%0Amargin%2Dbottom%3A%2013px%3B%0Amargin%2Dleft%3A%2013px%3B%0A%7D%0Adl%20dt%20%7B%0Afont%2Dweight%3A%20bold%3B%0A%7D%0Aul%20%7B%0Amargin%2Dtop%3A%200%3B%0A%7D%0Aul%20li%20%7B%0Alist%2Dstyle%3A%20circle%20outside%3B%0A%7D%0Aul%20ul%20%7B%0Amargin%2Dbottom%3A%200%3B%0A%7D%0Apre%2C%20code%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0Aborder%2Dradius%3A%203px%3B%0Acolor%3A%20%23333%3B%0Awhite%2Dspace%3A%20pre%2Dwrap%3B%20%0A%7D%0Apre%20%7B%0Aborder%2Dradius%3A%203px%3B%0Amargin%3A%205px%200px%2010px%200px%3B%0Apadding%3A%2010px%3B%0A%7D%0Apre%3Anot%28%5Bclass%5D%29%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0A%7D%0Acode%20%7B%0Afont%2Dfamily%3A%20Consolas%2C%20Monaco%2C%20%27Courier%20New%27%2C%20monospace%3B%0Afont%2Dsize%3A%2085%25%3B%0A%7D%0Ap%20%3E%20code%2C%20li%20%3E%20code%20%7B%0Apadding%3A%202px%200px%3B%0A%7D%0Adiv%2Efigure%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0Aimg%20%7B%0Abackground%2Dcolor%3A%20%23FFFFFF%3B%0Apadding%3A%202px%3B%0Aborder%3A%201px%20solid%20%23DDDDDD%3B%0Aborder%2Dradius%3A%203px%3B%0Aborder%3A%201px%20solid%20%23CCCCCC%3B%0Amargin%3A%200%205px%3B%0A%7D%0Ah1%20%7B%0Amargin%2Dtop%3A%200%3B%0Afont%2Dsize%3A%2035px%3B%0Aline%2Dheight%3A%2040px%3B%0A%7D%0Ah2%20%7B%0Aborder%2Dbottom%3A%204px%20solid%20%23f7f7f7%3B%0Apadding%2Dtop%3A%2010px%3B%0Apadding%2Dbottom%3A%202px%3B%0Afont%2Dsize%3A%20145%25%3B%0A%7D%0Ah3%20%7B%0Aborder%2Dbottom%3A%202px%20solid%20%23f7f7f7%3B%0Apadding%2Dtop%3A%2010px%3B%0Afont%2Dsize%3A%20120%25%3B%0A%7D%0Ah4%20%7B%0Aborder%2Dbottom%3A%201px%20solid%20%23f7f7f7%3B%0Amargin%2Dleft%3A%208px%3B%0Afont%2Dsize%3A%20105%25%3B%0A%7D%0Ah5%2C%20h6%20%7B%0Aborder%2Dbottom%3A%201px%20solid%20%23ccc%3B%0Afont%2Dsize%3A%20105%25%3B%0A%7D%0Aa%20%7B%0Acolor%3A%20%230033dd%3B%0Atext%2Ddecoration%3A%20none%3B%0A%7D%0Aa%3Ahover%20%7B%0Acolor%3A%20%236666ff%3B%20%7D%0Aa%3Avisited%20%7B%0Acolor%3A%20%23800080%3B%20%7D%0Aa%3Avisited%3Ahover%20%7B%0Acolor%3A%20%23BB00BB%3B%20%7D%0Aa%5Bhref%5E%3D%22http%3A%22%5D%20%7B%0Atext%2Ddecoration%3A%20underline%3B%20%7D%0Aa%5Bhref%5E%3D%22https%3A%22%5D%20%7B%0Atext%2Ddecoration%3A%20underline%3B%20%7D%0A%0Acode%20%3E%20span%2Ekw%20%7B%20color%3A%20%23555%3B%20font%2Dweight%3A%20bold%3B%20%7D%20%0Acode%20%3E%20span%2Edt%20%7B%20color%3A%20%23902000%3B%20%7D%20%0Acode%20%3E%20span%2Edv%20%7B%20color%3A%20%2340a070%3B%20%7D%20%0Acode%20%3E%20span%2Ebn%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Efl%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Ech%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Est%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Eco%20%7B%20color%3A%20%23888888%3B%20font%2Dstyle%3A%20italic%3B%20%7D%20%0Acode%20%3E%20span%2Eot%20%7B%20color%3A%20%23007020%3B%20%7D%20%0Acode%20%3E%20span%2Eal%20%7B%20color%3A%20%23ff0000%3B%20font%2Dweight%3A%20bold%3B%20%7D%20%0Acode%20%3E%20span%2Efu%20%7B%20color%3A%20%23900%3B%20font%2Dweight%3A%20bold%3B%20%7D%20%20code%20%3E%20span%2Eer%20%7B%20color%3A%20%23a61717%3B%20background%2Dcolor%3A%20%23e3d2d2%3B%20%7D%20%0A" rel="stylesheet" type="text/css" />
+
+</head>
+
+<body>
+
+
+
+
+<h1 class="title toc-ignore">SparkR - Practical Guide</h1>
+
+
+<div id="TOC">
+<ul>
+<li><a href="#overview">Overview</a></li>
+<li><a href="#getting-started">Getting Started</a></li>
+<li><a href="#setup">Setup</a><ul>
+<li><a href="#installation">Installation</a></li>
+<li><a href="#SetupSparkSession">Spark Session</a><ul>
+<li><a href="#cluster-mode">Cluster Mode</a></li>
+</ul></li>
+</ul></li>
+<li><a href="#data-import">Data Import</a><ul>
+<li><a href="#local-data-frame">Local Data Frame</a></li>
+<li><a href="#data-sources">Data Sources</a></li>
+<li><a href="#hive-tables">Hive Tables</a></li>
+</ul></li>
+<li><a href="#data-processing">Data Processing</a><ul>
+<li><a href="#sparkdataframe-operations">SparkDataFrame Operations</a><ul>
+<li><a href="#selecting-rows-columns">Selecting rows, columns</a></li>
+<li><a href="#grouping-aggregation">Grouping, Aggregation</a></li>
+<li><a href="#operating-on-columns">Operating on Columns</a></li>
+</ul></li>
+<li><a href="#window-functions">Window Functions</a></li>
+<li><a href="#user-defined-function">User-Defined Function</a><ul>
+<li><a href="#apply-by-partition">Apply by Partition</a></li>
+<li><a href="#apply-by-group">Apply by Group</a></li>
+<li><a href="#distribute-local-functions">Distribute Local Functions</a></li>
+</ul></li>
+<li><a href="#sql-queries">SQL Queries</a></li>
+</ul></li>
+<li><a href="#machine-learning">Machine Learning</a><ul>
+<li><a href="#classification">Classification</a></li>
+<li><a href="#regression">Regression</a></li>
+<li><a href="#tree---classification-and-regression">Tree - Classification and Regression</a></li>
+<li><a href="#clustering">Clustering</a></li>
+<li><a href="#collaborative-filtering">Collaborative Filtering</a></li>
+<li><a href="#frequent-pattern-mining">Frequent Pattern Mining</a></li>
+<li><a href="#statistics">Statistics</a></li>
+<li><a href="#r-formula">R Formula</a></li>
+<li><a href="#training-and-test-sets">Training and Test Sets</a></li>
+<li><a href="#models-and-algorithms">Models and Algorithms</a><ul>
+<li><a href="#linear-support-vector-machine-svm-classifier">Linear Support Vector Machine (SVM) Classifier</a></li>
+<li><a href="#logistic-regression">Logistic Regression</a></li>
+<li><a href="#multilayer-perceptron">Multilayer Perceptron</a></li>
+<li><a href="#naive-bayes">Naive Bayes</a></li>
+<li><a href="#accelerated-failure-time-survival-model">Accelerated Failure Time Survival Model</a></li>
+<li><a href="#generalized-linear-model">Generalized Linear Model</a></li>
+<li><a href="#isotonic-regression">Isotonic Regression</a></li>
+<li><a href="#decision-tree">Decision Tree</a></li>
+<li><a href="#gradient-boosted-trees">Gradient-Boosted Trees</a></li>
+<li><a href="#random-forest">Random Forest</a></li>
+<li><a href="#bisecting-k-means">Bisecting k-Means</a></li>
+<li><a href="#gaussian-mixture-model">Gaussian Mixture Model</a></li>
+<li><a href="#k-means-clustering">k-Means Clustering</a></li>
+<li><a href="#latent-dirichlet-allocation">Latent Dirichlet Allocation</a></li>
+<li><a href="#alternating-least-squares">Alternating Least Squares</a></li>
+<li><a href="#fp-growth">FP-growth</a></li>
+<li><a href="#kolmogorov-smirnov-test">Kolmogorov-Smirnov Test</a></li>
+</ul></li>
+<li><a href="#model-persistence">Model Persistence</a></li>
+</ul></li>
+<li><a href="#structured-streaming">Structured Streaming</a><ul>
+<li><a href="#simple-source-and-sink">Simple Source and Sink</a></li>
+<li><a href="#kafka-source">Kafka Source</a></li>
+<li><a href="#operations-and-sinks">Operations and Sinks</a></li>
+</ul></li>
+<li><a href="#advanced-topics">Advanced Topics</a><ul>
+<li><a href="#sparkr-object-classes">SparkR Object Classes</a></li>
+<li><a href="#architecture">Architecture</a></li>
+</ul></li>
+<li><a href="#appendix">Appendix</a><ul>
+<li><a href="#DataTypes">R and Spark Data Types</a></li>
+</ul></li>
+<li><a href="#references">References</a></li>
+</ul>
+</div>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<div id="overview" class="section level2">
+<h2>Overview</h2>
+<p>SparkR is an R package that provides a light-weight frontend to use Apache Spark from R. With Spark 2.4.0, SparkR provides a distributed data frame implementation that supports data processing operations like selection, filtering, aggregation etc. and distributed machine learning using <a href="https://spark.apache.org/mllib/">MLlib</a>.</p>
+</div>
+<div id="getting-started" class="section level2">
+<h2>Getting Started</h2>
+<p>We begin with an example running on the local machine and provide an overview of the use of SparkR: data ingestion, data processing and machine learning.</p>
+<p>First, let’s load and attach the package.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(SparkR)</code></pre></div>
+<p><code>SparkSession</code> is the entry point into SparkR which connects your R program to a Spark cluster. You can create a <code>SparkSession</code> using <code>sparkR.session</code> and pass in options such as the application name, any Spark packages depended on, etc.</p>
+<p>We use default settings in which it runs in local mode. It auto downloads Spark package in the background if no previous installation is found. For more details about setup, see <a href="#SetupSparkSession">Spark Session</a>.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">sparkR.session</span>()</code></pre></div>
+<p>The operations in SparkR are centered around an R class called <code>SparkDataFrame</code>. It is a distributed collection of data organized into named columns, which is conceptually equivalent to a table in a relational database or a data frame in R, but with richer optimizations under the hood.</p>
+<p><code>SparkDataFrame</code> can be constructed from a wide array of sources such as: structured data files, tables in Hive, external databases, or existing local R data frames. For example, we create a <code>SparkDataFrame</code> from a local R data frame,</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">cars &lt;-<span class="st"> </span><span class="kw">cbind</span>(<span class="dt">model =</span> <span class="kw">rownames</span>(mtcars), mtcars)
+carsDF &lt;-<span class="st"> </span><span class="kw">createDataFrame</span>(cars)</code></pre></div>
+<p>We can view the first few rows of the <code>SparkDataFrame</code> by <code>head</code> or <code>showDF</code> function.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(carsDF)</code></pre></div>
+<pre><code>##               model  mpg cyl disp  hp drat    wt  qsec vs am gear carb
+## 1         Mazda RX4 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
+## 2     Mazda RX4 Wag 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
+## 3        Datsun 710 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
+## 4    Hornet 4 Drive 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
+## 5 Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
+## 6           Valiant 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1</code></pre>
+<p>Common data processing operations such as <code>filter</code> and <code>select</code> are supported on the <code>SparkDataFrame</code>.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">carsSubDF &lt;-<span class="st"> </span><span class="kw">select</span>(carsDF, <span class="st">&quot;model&quot;</span>, <span class="st">&quot;mpg&quot;</span>, <span class="st">&quot;hp&quot;</span>)
+carsSubDF &lt;-<span class="st"> </span><span class="kw">filter</span>(carsSubDF, carsSubDF$hp &gt;=<span class="st"> </span><span class="dv">200</span>)
+<span class="kw">head</span>(carsSubDF)</code></pre></div>
+<pre><code>##                 model  mpg  hp
+## 1          Duster 360 14.3 245
+## 2  Cadillac Fleetwood 10.4 205
+## 3 Lincoln Continental 10.4 215
+## 4   Chrysler Imperial 14.7 230
+## 5          Camaro Z28 13.3 245
+## 6      Ford Pantera L 15.8 264</code></pre>
+<p>SparkR can use many common aggregation functions after grouping.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">carsGPDF &lt;-<span class="st"> </span><span class="kw">summarize</span>(<span class="kw">groupBy</span>(carsDF, carsDF$gear), <span class="dt">count =</span> <span class="kw">n</span>(carsDF$gear))
+<span class="kw">head</span>(carsGPDF)</code></pre></div>
+<pre><code>##   gear count
+## 1    4    12
+## 2    3    15
+## 3    5     5</code></pre>
+<p>The results <code>carsDF</code> and <code>carsSubDF</code> are <code>SparkDataFrame</code> objects. To convert back to R <code>data.frame</code>, we can use <code>collect</code>. <strong>Caution</strong>: This can cause your interactive environment to run out of memory, though, because <code>collect()</code> fetches the entire distributed <code>DataFrame</code> to your client, which is acting as a Spark driver.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">carsGP &lt;-<span class="st"> </span><span class="kw">collect</span>(carsGPDF)
+<span class="kw">class</span>(carsGP)</code></pre></div>
+<pre><code>## [1] &quot;data.frame&quot;</code></pre>
+<p>SparkR supports a number of commonly used machine learning algorithms. Under the hood, SparkR uses MLlib to train the model. Users can call <code>summary</code> to print a summary of the fitted model, <code>predict</code> to make predictions on new data, and <code>write.ml</code>/<code>read.ml</code> to save/load fitted models.</p>
+<p>SparkR supports a subset of R formula operators for model fitting, including ‘~’, ‘.’, ‘:’, ‘+’, and ‘-‘. We use linear regression as an example.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">model &lt;-<span class="st"> </span><span class="kw">spark.glm</span>(carsDF, mpg ~<span class="st"> </span>wt +<span class="st"> </span>cyl)</code></pre></div>
+<p>The result matches that returned by R <code>glm</code> function applied to the corresponding <code>data.frame</code> <code>mtcars</code> of <code>carsDF</code>. In fact, for Generalized Linear Model, we specifically expose <code>glm</code> for <code>SparkDataFrame</code> as well so that the above is equivalent to <code>model &lt;- glm(mpg ~ wt + cyl, data = carsDF)</code>.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">summary</span>(model)</code></pre></div>
+<pre><code>## 
+## Deviance Residuals: 
+## (Note: These are approximate quantiles with relative error &lt;= 0.01)
+##     Min       1Q   Median       3Q      Max  
+## -4.2893  -1.7085  -0.4713   1.5729   6.1004  
+## 
+## Coefficients:
+##              Estimate  Std. Error  t value    Pr(&gt;|t|)
+## (Intercept)   39.6863     1.71498  23.1409  0.00000000
+## wt            -3.1910     0.75691  -4.2158  0.00022202
+## cyl           -1.5078     0.41469  -3.6360  0.00106428
+## 
+## (Dispersion parameter for gaussian family taken to be 6.592137)
+## 
+##     Null deviance: 1126.05  on 31  degrees of freedom
+## Residual deviance:  191.17  on 29  degrees of freedom
+## AIC: 156
+## 
+## Number of Fisher Scoring iterations: 1</code></pre>
+<p>The model can be saved by <code>write.ml</code> and loaded back using <code>read.ml</code>.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">write.ml</span>(model, <span class="dt">path =</span> <span class="st">&quot;/HOME/tmp/mlModel/glmModel&quot;</span>)</code></pre></div>
+<p>In the end, we can stop Spark Session by running</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">sparkR.session.stop</span>()</code></pre></div>
+</div>
+<div id="setup" class="section level2">
+<h2>Setup</h2>
+<div id="installation" class="section level3">
+<h3>Installation</h3>
+<p>Different from many other R packages, to use SparkR, you need an additional installation of Apache Spark. The Spark installation will be used to run a backend process that will compile and execute SparkR programs.</p>
+<p>After installing the SparkR package, you can call <code>sparkR.session</code> as explained in the previous section to start and it will check for the Spark installation. If you are working with SparkR from an interactive shell (eg. R, RStudio) then Spark is downloaded and cached automatically if it is not found. Alternatively, we provide an easy-to-use function <code>install.spark</code> for running this manually. If you don’t have Spark installed on the computer, you may download it from <a href="https://spark.apache.org/downloads.html">Apache Spark Website</a>.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">install.spark</span>()</code></pre></div>
+<p>If you already have Spark installed, you don’t have to install again and can pass the <code>sparkHome</code> argument to <code>sparkR.session</code> to let SparkR know where the existing Spark installation is.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">sparkR.session</span>(<span class="dt">sparkHome =</span> <span class="st">&quot;/HOME/spark&quot;</span>)</code></pre></div>
+</div>
+<div id="SetupSparkSession" class="section level3">
+<h3>Spark Session</h3>
+<p>In addition to <code>sparkHome</code>, many other options can be specified in <code>sparkR.session</code>. For a complete list, see <a href="https://spark.apache.org/docs/latest/sparkr.html#starting-up-sparksession">Starting up: SparkSession</a> and <a href="https://spark.apache.org/docs/latest/api/R/sparkR.session.html">SparkR API doc</a>.</p>
+<p>In particular, the following Spark driver properties can be set in <code>sparkConfig</code>.</p>
+<table>
+<thead>
+<tr class="header">
+<th align="left">Property Name</th>
+<th align="left">Property group</th>
+<th align="left">spark-submit equivalent</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left"><code>spark.driver.memory</code></td>
+<td align="left">Application Properties</td>
+<td align="left"><code>--driver-memory</code></td>
+</tr>
+<tr class="even">
+<td align="left"><code>spark.driver.extraClassPath</code></td>
+<td align="left">Runtime Environment</td>
+<td align="left"><code>--driver-class-path</code></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>spark.driver.extraJavaOptions</code></td>
+<td align="left">Runtime Environment</td>
+<td align="left"><code>--driver-java-options</code></td>
+</tr>
+<tr class="even">
+<td align="left"><code>spark.driver.extraLibraryPath</code></td>
+<td align="left">Runtime Environment</td>
+<td align="left"><code>--driver-library-path</code></td>
+</tr>
+<tr class="odd">
+<td align="left"><code>spark.yarn.keytab</code></td>
+<td align="left">Application Properties</td>
+<td align="left"><code>--keytab</code></td>
+</tr>
+<tr class="even">
+<td align="left"><code>spark.yarn.principal</code></td>
+<td align="left">Application Properties</td>
+<td align="left"><code>--principal</code></td>
+</tr>
+</tbody>
+</table>
+<p><strong>For Windows users</strong>: Due to different file prefixes across operating systems, to avoid the issue of potential wrong prefix, a current workaround is to specify <code>spark.sql.warehouse.dir</code> when starting the <code>SparkSession</code>.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">spark_warehouse_path &lt;-<span class="st"> </span><span class="kw">file.path</span>(<span class="kw">path.expand</span>(<span class="st">'~'</span>), <span class="st">&quot;spark-warehouse&quot;</span>)
+<span class="kw">sparkR.session</span>(<span class="dt">spark.sql.warehouse.dir =</span> spark_warehouse_path)</code></pre></div>
+<div id="cluster-mode" class="section level4">
+<h4>Cluster Mode</h4>
+<p>SparkR can connect to remote Spark clusters. <a href="https://spark.apache.org/docs/latest/cluster-overview.html">Cluster Mode Overview</a> is a good introduction to different Spark cluster modes.</p>
+<p>When connecting SparkR to a remote Spark cluster, make sure that the Spark version and Hadoop version on the machine match the corresponding versions on the cluster. Current SparkR package is compatible with</p>
+<pre><code>## [1] &quot;Spark 2.4.0&quot;</code></pre>
+<p>It should be used both on the local computer and on the remote cluster.</p>
+<p>To connect, pass the URL of the master node to <code>sparkR.session</code>. A complete list can be seen in <a href="https://spark.apache.org/docs/latest/submitting-applications.html#master-urls">Spark Master URLs</a>. For example, to connect to a local standalone Spark master, we can call</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">sparkR.session</span>(<span class="dt">master =</span> <span class="st">&quot;spark://local:7077&quot;</span>)</code></pre></div>
+<p>For YARN cluster, SparkR supports the client mode with the master set as “yarn”.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">sparkR.session</span>(<span class="dt">master =</span> <span class="st">&quot;yarn&quot;</span>)</code></pre></div>
+<p>Yarn cluster mode is not supported in the current version.</p>
+</div>
+</div>
+</div>
+<div id="data-import" class="section level2">
+<h2>Data Import</h2>
+<div id="local-data-frame" class="section level3">
+<h3>Local Data Frame</h3>
+<p>The simplest way is to convert a local R data frame into a <code>SparkDataFrame</code>. Specifically we can use <code>as.DataFrame</code> or <code>createDataFrame</code> and pass in the local R data frame to create a <code>SparkDataFrame</code>. As an example, the following creates a <code>SparkDataFrame</code> based using the <code>faithful</code> dataset from R.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">df &lt;-<span class="st"> </span><span class="kw">as.DataFrame</span>(faithful)
+<span class="kw">head</span>(df)</code></pre></div>
+<pre><code>##   eruptions waiting
+## 1     3.600      79
+## 2     1.800      54
+## 3     3.333      74
+## 4     2.283      62
+## 5     4.533      85
+## 6     2.883      55</code></pre>
+</div>
+<div id="data-sources" class="section level3">
+<h3>Data Sources</h3>
+<p>SparkR supports operating on a variety of data sources through the <code>SparkDataFrame</code> interface. You can check the Spark SQL Programming Guide for more <a href="https://spark.apache.org/docs/latest/sql-programming-guide.html#manually-specifying-options">specific options</a> that are available for the built-in data sources.</p>
+<p>The general method for creating <code>SparkDataFrame</code> from data sources is <code>read.df</code>. This method takes in the path for the file to load and the type of data source, and the currently active Spark Session will be used automatically. SparkR supports reading CSV, JSON and Parquet files natively and through Spark Packages you can find data source connectors for popular file formats like Avro. These packages can be added with <code>sparkPackages</code> parameter when initializing SparkSession using <code>sparkR.session</code>.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">sparkR.session</span>(<span class="dt">sparkPackages =</span> <span class="st">&quot;com.databricks:spark-avro_2.11:3.0.0&quot;</span>)</code></pre></div>
+<p>We can see how to use data sources using an example CSV input file. For more information please refer to SparkR <a href="https://spark.apache.org/docs/latest/api/R/read.df.html">read.df</a> API documentation.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">df &lt;-<span class="st"> </span><span class="kw">read.df</span>(csvPath, <span class="st">&quot;csv&quot;</span>, <span class="dt">header =</span> <span class="st">&quot;true&quot;</span>, <span class="dt">inferSchema =</span> <span class="st">&quot;true&quot;</span>, <span class="dt">na.strings =</span> <span class="st">&quot;NA&quot;</span>)</code></pre></div>
+<p>The data sources API natively supports JSON formatted input files. Note that the file that is used here is not a typical JSON file. Each line in the file must contain a separate, self-contained valid JSON object. As a consequence, a regular multi-line JSON file will most often fail.</p>
+<p>Let’s take a look at the first two lines of the raw JSON file used here.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">filePath &lt;-<span class="st"> </span><span class="kw">paste0</span>(<span class="kw">sparkR.conf</span>(<span class="st">&quot;spark.home&quot;</span>),
+                         <span class="st">&quot;/examples/src/main/resources/people.json&quot;</span>)
+<span class="kw">readLines</span>(filePath, <span class="dt">n =</span> 2L)</code></pre></div>
+<pre><code>## [1] &quot;{\&quot;name\&quot;:\&quot;Michael\&quot;}&quot;          &quot;{\&quot;name\&quot;:\&quot;Andy\&quot;, \&quot;age\&quot;:30}&quot;</code></pre>
+<p>We use <code>read.df</code> to read that into a <code>SparkDataFrame</code>.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">people &lt;-<span class="st"> </span><span class="kw">read.df</span>(filePath, <span class="st">&quot;json&quot;</span>)
+<span class="kw">count</span>(people)</code></pre></div>
+<pre><code>## [1] 3</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(people)</code></pre></div>
+<pre><code>##   age    name
+## 1  NA Michael
+## 2  30    Andy
+## 3  19  Justin</code></pre>
+<p>SparkR automatically infers the schema from the JSON file.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">printSchema</span>(people)</code></pre></div>
+<pre><code>## root
+##  |-- age: long (nullable = true)
+##  |-- name: string (nullable = true)</code></pre>
+<p>If we want to read multiple JSON files, <code>read.json</code> can be used.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">people &lt;-<span class="st"> </span><span class="kw">read.json</span>(<span class="kw">paste0</span>(<span class="kw">Sys.getenv</span>(<span class="st">&quot;SPARK_HOME&quot;</span>),
+                           <span class="kw">c</span>(<span class="st">&quot;/examples/src/main/resources/people.json&quot;</span>,
+                             <span class="st">&quot;/examples/src/main/resources/people.json&quot;</span>)))
+<span class="kw">count</span>(people)</code></pre></div>
+<pre><code>## [1] 6</code></pre>
+<p>The data sources API can also be used to save out <code>SparkDataFrames</code> into multiple file formats. For example we can save the <code>SparkDataFrame</code> from the previous example to a Parquet file using <code>write.df</code>.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">write.df</span>(people, <span class="dt">path =</span> <span class="st">&quot;people.parquet&quot;</span>, <span class="dt">source =</span> <span class="st">&quot;parquet&quot;</span>, <span class="dt">mode =</span> <span class="st">&quot;overwrite&quot;</span>)</code></pre></div>
+</div>
+<div id="hive-tables" class="section level3">
+<h3>Hive Tables</h3>
+<p>You can also create SparkDataFrames from Hive tables. To do this we will need to create a SparkSession with Hive support which can access tables in the Hive MetaStore. Note that Spark should have been built with Hive support and more details can be found in the <a href="https://spark.apache.org/docs/latest/sql-programming-guide.html">SQL Programming Guide</a>. In SparkR, by default it will attempt to create a SparkSession with Hive support enabled (<code>enableHiveSupport = TRUE</code>).</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">sql</span>(<span class="st">&quot;CREATE TABLE IF NOT EXISTS src (key INT, value STRING)&quot;</span>)
+
+txtPath &lt;-<span class="st"> </span><span class="kw">paste0</span>(<span class="kw">sparkR.conf</span>(<span class="st">&quot;spark.home&quot;</span>), <span class="st">&quot;/examples/src/main/resources/kv1.txt&quot;</span>)
+sqlCMD &lt;-<span class="st"> </span><span class="kw">sprintf</span>(<span class="st">&quot;LOAD DATA LOCAL INPATH '%s' INTO TABLE src&quot;</span>, txtPath)
+<span class="kw">sql</span>(sqlCMD)
+
+results &lt;-<span class="st"> </span><span class="kw">sql</span>(<span class="st">&quot;FROM src SELECT key, value&quot;</span>)
+
+<span class="co"># results is now a SparkDataFrame</span>
+<span class="kw">head</span>(results)</code></pre></div>
+</div>
+</div>
+<div id="data-processing" class="section level2">
+<h2>Data Processing</h2>
+<p><strong>To dplyr users</strong>: SparkR has similar interface as dplyr in data processing. However, some noticeable differences are worth mentioning in the first place. We use <code>df</code> to represent a <code>SparkDataFrame</code> and <code>col</code> to represent the name of column here.</p>
+<ol style="list-style-type: decimal">
+<li><p>indicate columns. SparkR uses either a character string of the column name or a Column object constructed with <code>$</code> to indicate a column. For example, to select <code>col</code> in <code>df</code>, we can write <code>select(df, &quot;col&quot;)</code> or <code>select(df, df$col)</code>.</p></li>
+<li><p>describe conditions. In SparkR, the Column object representation can be inserted into the condition directly, or we can use a character string to describe the condition, without referring to the <code>SparkDataFrame</code> used. For example, to select rows with value &gt; 1, we can write <code>filter(df, df$col &gt; 1)</code> or <code>filter(df, &quot;col &gt; 1&quot;)</code>.</p></li>
+</ol>
+<p>Here are more concrete examples.</p>
+<table style="width:26%;">
+<colgroup>
+<col width="12%"></col>
+<col width="13%"></col>
+</colgroup>
+<thead>
+<tr class="header">
+<th align="left">dplyr</th>
+<th align="left">SparkR</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left"><code>select(mtcars, mpg, hp)</code></td>
+<td align="left"><code>select(carsDF, &quot;mpg&quot;, &quot;hp&quot;)</code></td>
+</tr>
+<tr class="even">
+<td align="left"><code>filter(mtcars, mpg &gt; 20, hp &gt; 100)</code></td>
+<td align="left"><code>filter(carsDF, carsDF$mpg &gt; 20, carsDF$hp &gt; 100)</code></td>
+</tr>
+</tbody>
+</table>
+<p>Other differences will be mentioned in the specific methods.</p>
+<p>We use the <code>SparkDataFrame</code> <code>carsDF</code> created above. We can get basic information about the <code>SparkDataFrame</code>.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">carsDF</code></pre></div>
+<pre><code>## SparkDataFrame[model:string, mpg:double, cyl:double, disp:double, hp:double, drat:double, wt:double, qsec:double, vs:double, am:double, gear:double, carb:double]</code></pre>
+<p>Print out the schema in tree format.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">printSchema</span>(carsDF)</code></pre></div>
+<pre><code>## root
+##  |-- model: string (nullable = true)
+##  |-- mpg: double (nullable = true)
+##  |-- cyl: double (nullable = true)
+##  |-- disp: double (nullable = true)
+##  |-- hp: double (nullable = true)
+##  |-- drat: double (nullable = true)
+##  |-- wt: double (nullable = true)
+##  |-- qsec: double (nullable = true)
+##  |-- vs: double (nullable = true)
+##  |-- am: double (nullable = true)
+##  |-- gear: double (nullable = true)
+##  |-- carb: double (nullable = true)</code></pre>
+<div id="sparkdataframe-operations" class="section level3">
+<h3>SparkDataFrame Operations</h3>
+<div id="selecting-rows-columns" class="section level4">
+<h4>Selecting rows, columns</h4>
+<p>SparkDataFrames support a number of functions to do structured data processing. Here we include some basic examples and a complete list can be found in the <a href="https://spark.apache.org/docs/latest/api/R/index.html">API</a> docs:</p>
+<p>You can also pass in column name as strings.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(<span class="kw">select</span>(carsDF, <span class="st">&quot;mpg&quot;</span>))</code></pre></div>
+<pre><code>##    mpg
+## 1 21.0
+## 2 21.0
+## 3 22.8
+## 4 21.4
+## 5 18.7
+## 6 18.1</code></pre>
+<p>Filter the SparkDataFrame to only retain rows with mpg less than 20 miles/gallon.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(<span class="kw">filter</span>(carsDF, carsDF$mpg &lt;<span class="st"> </span><span class="dv">20</span>))</code></pre></div>
+<pre><code>##               model  mpg cyl  disp  hp drat   wt  qsec vs am gear carb
+## 1 Hornet Sportabout 18.7   8 360.0 175 3.15 3.44 17.02  0  0    3    2
+## 2           Valiant 18.1   6 225.0 105 2.76 3.46 20.22  1  0    3    1
+## 3        Duster 360 14.3   8 360.0 245 3.21 3.57 15.84  0  0    3    4
+## 4          Merc 280 19.2   6 167.6 123 3.92 3.44 18.30  1  0    4    4
+## 5         Merc 280C 17.8   6 167.6 123 3.92 3.44 18.90  1  0    4    4
+## 6        Merc 450SE 16.4   8 275.8 180 3.07 4.07 17.40  0  0    3    3</code></pre>
+</div>
+<div id="grouping-aggregation" class="section level4">
+<h4>Grouping, Aggregation</h4>
+<p>A common flow of grouping and aggregation is</p>
+<ol style="list-style-type: decimal">
+<li><p>Use <code>groupBy</code> or <code>group_by</code> with respect to some grouping variables to create a <code>GroupedData</code> object</p></li>
+<li><p>Feed the <code>GroupedData</code> object to <code>agg</code> or <code>summarize</code> functions, with some provided aggregation functions to compute a number within each group.</p></li>
+</ol>
+<p>A number of widely used functions are supported to aggregate data after grouping, including <code>avg</code>, <code>countDistinct</code>, <code>count</code>, <code>first</code>, <code>kurtosis</code>, <code>last</code>, <code>max</code>, <code>mean</code>, <code>min</code>, <code>sd</code>, <code>skewness</code>, <code>stddev_pop</code>, <code>stddev_samp</code>, <code>sumDistinct</code>, <code>sum</code>, <code>var_pop</code>, <code>var_samp</code>, <code>var</code>. See the <a href="https://spark.apache.org/docs/latest/api/R/column_aggregate_functions.html">API doc for aggregate functions</a> linked there.</p>
+<p>For example we can compute a histogram of the number of cylinders in the <code>mtcars</code> dataset as shown below.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">numCyl &lt;-<span class="st"> </span><span class="kw">summarize</span>(<span class="kw">groupBy</span>(carsDF, carsDF$cyl), <span class="dt">count =</span> <span class="kw">n</span>(carsDF$cyl))
+<span class="kw">head</span>(numCyl)</code></pre></div>
+<pre><code>##   cyl count
+## 1   8    14
+## 2   4    11
+## 3   6     7</code></pre>
+<p>Use <code>cube</code> or <code>rollup</code> to compute subtotals across multiple dimensions.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">mean</span>(<span class="kw">cube</span>(carsDF, <span class="st">&quot;cyl&quot;</span>, <span class="st">&quot;gear&quot;</span>, <span class="st">&quot;am&quot;</span>), <span class="st">&quot;mpg&quot;</span>)</code></pre></div>
+<pre><code>## SparkDataFrame[cyl:double, gear:double, am:double, avg(mpg):double]</code></pre>
+<p>generates groupings for {(<code>cyl</code>, <code>gear</code>, <code>am</code>), (<code>cyl</code>, <code>gear</code>), (<code>cyl</code>), ()}, while</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">mean</span>(<span class="kw">rollup</span>(carsDF, <span class="st">&quot;cyl&quot;</span>, <span class="st">&quot;gear&quot;</span>, <span class="st">&quot;am&quot;</span>), <span class="st">&quot;mpg&quot;</span>)</code></pre></div>
+<pre><code>## SparkDataFrame[cyl:double, gear:double, am:double, avg(mpg):double]</code></pre>
+<p>generates groupings for all possible combinations of grouping columns.</p>
+</div>
+<div id="operating-on-columns" class="section level4">
+<h4>Operating on Columns</h4>
+<p>SparkR also provides a number of functions that can directly applied to columns for data processing and during aggregation. The example below shows the use of basic arithmetic functions.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">carsDF_km &lt;-<span class="st"> </span>carsDF
+carsDF_km$kmpg &lt;-<span class="st"> </span>carsDF_km$mpg *<span class="st"> </span><span class="fl">1.61</span>
+<span class="kw">head</span>(<span class="kw">select</span>(carsDF_km, <span class="st">&quot;model&quot;</span>, <span class="st">&quot;mpg&quot;</span>, <span class="st">&quot;kmpg&quot;</span>))</code></pre></div>
+<pre><code>##               model  mpg   kmpg
+## 1         Mazda RX4 21.0 33.810
+## 2     Mazda RX4 Wag 21.0 33.810
+## 3        Datsun 710 22.8 36.708
+## 4    Hornet 4 Drive 21.4 34.454
+## 5 Hornet Sportabout 18.7 30.107
+## 6           Valiant 18.1 29.141</code></pre>
+</div>
+</div>
+<div id="window-functions" class="section level3">
+<h3>Window Functions</h3>
+<p>A window function is a variation of aggregation function. In simple words,</p>
+<ul>
+<li><p>aggregation function: <code>n</code> to <code>1</code> mapping - returns a single value for a group of entries. Examples include <code>sum</code>, <code>count</code>, <code>max</code>.</p></li>
+<li><p>window function: <code>n</code> to <code>n</code> mapping - returns one value for each entry in the group, but the value may depend on all the entries of the <em>group</em>. Examples include <code>rank</code>, <code>lead</code>, <code>lag</code>.</p></li>
+</ul>
+<p>Formally, the <em>group</em> mentioned above is called the <em>frame</em>. Every input row can have a unique frame associated with it and the output of the window function on that row is based on the rows confined in that frame.</p>
+<p>Window functions are often used in conjunction with the following functions: <code>windowPartitionBy</code>, <code>windowOrderBy</code>, <code>partitionBy</code>, <code>orderBy</code>, <code>over</code>. To illustrate this we next look at an example.</p>
+<p>We still use the <code>mtcars</code> dataset. The corresponding <code>SparkDataFrame</code> is <code>carsDF</code>. Suppose for each number of cylinders, we want to calculate the rank of each car in <code>mpg</code> within the group.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">carsSubDF &lt;-<span class="st"> </span><span class="kw">select</span>(carsDF, <span class="st">&quot;model&quot;</span>, <span class="st">&quot;mpg&quot;</span>, <span class="st">&quot;cyl&quot;</span>)
+ws &lt;-<span class="st"> </span><span class="kw">orderBy</span>(<span class="kw">windowPartitionBy</span>(<span class="st">&quot;cyl&quot;</span>), <span class="st">&quot;mpg&quot;</span>)
+carsRank &lt;-<span class="st"> </span><span class="kw">withColumn</span>(carsSubDF, <span class="st">&quot;rank&quot;</span>, <span class="kw">over</span>(<span class="kw">rank</span>(), ws))
+<span class="kw">head</span>(carsRank, <span class="dt">n =</span> 20L)</code></pre></div>
+<pre><code>##                  model  mpg cyl rank
+## 1   Cadillac Fleetwood 10.4   8    1
+## 2  Lincoln Continental 10.4   8    1
+## 3           Camaro Z28 13.3   8    3
+## 4           Duster 360 14.3   8    4
+## 5    Chrysler Imperial 14.7   8    5
+## 6        Maserati Bora 15.0   8    6
+## 7          Merc 450SLC 15.2   8    7
+## 8          AMC Javelin 15.2   8    7
+## 9     Dodge Challenger 15.5   8    9
+## 10      Ford Pantera L 15.8   8   10
+## 11          Merc 450SE 16.4   8   11
+## 12          Merc 450SL 17.3   8   12
+## 13   Hornet Sportabout 18.7   8   13
+## 14    Pontiac Firebird 19.2   8   14
+## 15          Volvo 142E 21.4   4    1
+## 16       Toyota Corona 21.5   4    2
+## 17          Datsun 710 22.8   4    3
+## 18            Merc 230 22.8   4    3
+## 19           Merc 240D 24.4   4    5
+## 20       Porsche 914-2 26.0   4    6</code></pre>
+<p>We explain in detail the above steps.</p>
+<ul>
+<li><code>windowPartitionBy</code> creates a window specification object <code>WindowSpec</code> that defines the partition. It controls which rows will be in the same partition as the given row. In this case, rows with the same value in <code>cyl</code> will be put in the same partition. <code>orderBy</code> further defines the ordering - the position a given row is in the partition. The resulting <code>WindowSpec</code> is returned as <code>ws</code>.</li>
+</ul>
+<p>More window specification methods include <code>rangeBetween</code>, which can define boundaries of the frame by value, and <code>rowsBetween</code>, which can define the boundaries by row indices.</p>
+<ul>
+<li><code>withColumn</code> appends a Column called <code>rank</code> to the <code>SparkDataFrame</code>. <code>over</code> returns a windowing column. The first argument is usually a Column returned by window function(s) such as <code>rank()</code>, <code>lead(carsDF$wt)</code>. That calculates the corresponding values according to the partitioned-and-ordered table.</li>
+</ul>
+</div>
+<div id="user-defined-function" class="section level3">
+<h3>User-Defined Function</h3>
+<p>In SparkR, we support several kinds of user-defined functions (UDFs).</p>
+<div id="apply-by-partition" class="section level4">
+<h4>Apply by Partition</h4>
+<p><code>dapply</code> can apply a function to each partition of a <code>SparkDataFrame</code>. The function to be applied to each partition of the <code>SparkDataFrame</code> should have only one parameter, a <code>data.frame</code> corresponding to a partition, and the output should be a <code>data.frame</code> as well. Schema specifies the row format of the resulting a <code>SparkDataFrame</code>. It must match to data types of returned value. See <a href="#DataTypes">here</a> for mapping between R and Spark.</p>
+<p>We convert <code>mpg</code> to <code>kmpg</code> (kilometers per gallon). <code>carsSubDF</code> is a <code>SparkDataFrame</code> with a subset of <code>carsDF</code> columns.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">carsSubDF &lt;-<span class="st"> </span><span class="kw">select</span>(carsDF, <span class="st">&quot;model&quot;</span>, <span class="st">&quot;mpg&quot;</span>)
+schema &lt;-<span class="st"> &quot;model STRING, mpg DOUBLE, kmpg DOUBLE&quot;</span>
+out &lt;-<span class="st"> </span><span class="kw">dapply</span>(carsSubDF, function(x) { x &lt;-<span class="st"> </span><span class="kw">cbind</span>(x, x$mpg *<span class="st"> </span><span class="fl">1.61</span>) }, schema)
+<span class="kw">head</span>(<span class="kw">collect</span>(out))</code></pre></div>
+<pre><code>##               model  mpg   kmpg
+## 1         Mazda RX4 21.0 33.810
+## 2     Mazda RX4 Wag 21.0 33.810
+## 3        Datsun 710 22.8 36.708
+## 4    Hornet 4 Drive 21.4 34.454
+## 5 Hornet Sportabout 18.7 30.107
+## 6           Valiant 18.1 29.141</code></pre>
+<p>Like <code>dapply</code>, <code>dapplyCollect</code> can apply a function to each partition of a <code>SparkDataFrame</code> and collect the result back. The output of the function should be a <code>data.frame</code>, but no schema is required in this case. Note that <code>dapplyCollect</code> can fail if the output of the UDF on all partitions cannot be pulled into the driver’s memory.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">out &lt;-<span class="st"> </span><span class="kw">dapplyCollect</span>(
+         carsSubDF,
+         function(x) {
+           x &lt;-<span class="st"> </span><span class="kw">cbind</span>(x, <span class="st">&quot;kmpg&quot;</span> =<span class="st"> </span>x$mpg *<span class="st"> </span><span class="fl">1.61</span>)
+         })
+<span class="kw">head</span>(out, <span class="dv">3</span>)</code></pre></div>
+<pre><code>##           model  mpg   kmpg
+## 1     Mazda RX4 21.0 33.810
+## 2 Mazda RX4 Wag 21.0 33.810
+## 3    Datsun 710 22.8 36.708</code></pre>
+</div>
+<div id="apply-by-group" class="section level4">
+<h4>Apply by Group</h4>
+<p><code>gapply</code> can apply a function to each group of a <code>SparkDataFrame</code>. The function is to be applied to each group of the <code>SparkDataFrame</code> and should have only two parameters: grouping key and R <code>data.frame</code> corresponding to that key. The groups are chosen from <code>SparkDataFrames</code> column(s). The output of function should be a <code>data.frame</code>. Schema specifies the row format of the resulting <code>SparkDataFrame</code>. It must represent R function’s output schema on the basis of Spark data types. The column names of the returned <code>data.frame</code> are set by user. See <a href="#DataTypes">here</a> for mapping between R and Spark.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">schema &lt;-<span class="st"> </span><span class="kw">structType</span>(<span class="kw">structField</span>(<span class="st">&quot;cyl&quot;</span>, <span class="st">&quot;double&quot;</span>), <span class="kw">structField</span>(<span class="st">&quot;max_mpg&quot;</span>, <span class="st">&quot;double&quot;</span>))
+result &lt;-<span class="st"> </span><span class="kw">gapply</span>(
+    carsDF,
+    <span class="st">&quot;cyl&quot;</span>,
+    function(key, x) {
+        y &lt;-<span class="st"> </span><span class="kw">data.frame</span>(key, <span class="kw">max</span>(x$mpg))
+    },
+    schema)
+<span class="kw">head</span>(<span class="kw">arrange</span>(result, <span class="st">&quot;max_mpg&quot;</span>, <span class="dt">decreasing =</span> <span class="ot">TRUE</span>))</code></pre></div>
+<pre><code>##   cyl max_mpg
+## 1   4    33.9
+## 2   6    21.4
+## 3   8    19.2</code></pre>
+<p>Like <code>gapply</code>, <code>gapplyCollect</code> can apply a function to each partition of a <code>SparkDataFrame</code> and collect the result back to R <code>data.frame</code>. The output of the function should be a <code>data.frame</code> but no schema is required in this case. Note that <code>gapplyCollect</code> can fail if the output of the UDF on all partitions cannot be pulled into the driver’s memory.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">result &lt;-<span class="st"> </span><span class="kw">gapplyCollect</span>(
+    carsDF,
+    <span class="st">&quot;cyl&quot;</span>,
+    function(key, x) {
+         y &lt;-<span class="st"> </span><span class="kw">data.frame</span>(key, <span class="kw">max</span>(x$mpg))
+        <span class="kw">colnames</span>(y) &lt;-<span class="st"> </span><span class="kw">c</span>(<span class="st">&quot;cyl&quot;</span>, <span class="st">&quot;max_mpg&quot;</span>)
+        y
+    })
+<span class="kw">head</span>(result[<span class="kw">order</span>(result$max_mpg, <span class="dt">decreasing =</span> <span class="ot">TRUE</span>), ])</code></pre></div>
+<pre><code>##   cyl max_mpg
+## 2   4    33.9
+## 3   6    21.4
+## 1   8    19.2</code></pre>
+</div>
+<div id="distribute-local-functions" class="section level4">
+<h4>Distribute Local Functions</h4>
+<p>Similar to <code>lapply</code> in native R, <code>spark.lapply</code> runs a function over a list of elements and distributes the computations with Spark. <code>spark.lapply</code> works in a manner that is similar to <code>doParallel</code> or <code>lapply</code> to elements of a list. The results of all the computations should fit in a single machine. If that is not the case you can do something like <code>df &lt;- createDataFrame(list)</code> and then use <code>dapply</code>.</p>
+<p>We use <code>svm</code> in package <code>e1071</code> as an example. We use all default settings except for varying costs of constraints violation. <code>spark.lapply</code> can train those different models in parallel.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">costs &lt;-<span class="st"> </span><span class="kw">exp</span>(<span class="kw">seq</span>(<span class="dt">from =</span> <span class="kw">log</span>(<span class="dv">1</span>), <span class="dt">to =</span> <span class="kw">log</span>(<span class="dv">1000</span>), <span class="dt">length.out =</span> <span class="dv">5</span>))
+train &lt;-<span class="st"> </span>function(cost) {
+  <span class="kw">stopifnot</span>(<span class="kw">requireNamespace</span>(<span class="st">&quot;e1071&quot;</span>, <span class="dt">quietly =</span> <span class="ot">TRUE</span>))
+  model &lt;-<span class="st"> </span>e1071::<span class="kw">svm</span>(Species ~<span class="st"> </span>., <span class="dt">data =</span> iris, <span class="dt">cost =</span> cost)
+  <span class="kw">summary</span>(model)
+}</code></pre></div>
+<p>Return a list of model’s summaries.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">model.summaries &lt;-<span class="st"> </span><span class="kw">spark.lapply</span>(costs, train)</code></pre></div>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">class</span>(model.summaries)</code></pre></div>
+<pre><code>## [1] &quot;list&quot;</code></pre>
+<p>To avoid lengthy display, we only present the partial result of the second fitted model. You are free to inspect other models as well.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">print</span>(model.summaries[[<span class="dv">2</span>]])</code></pre></div>
+<pre><code>## $call
+## svm(formula = Species ~ ., data = iris, cost = cost)
+## 
+## $type
+## [1] 0
+## 
+## $kernel
+## [1] 2
+## 
+## $cost
+## [1] 5.623413
+## 
+## $degree
+## [1] 3
+## 
+## $gamma
+## [1] 0.25
+## 
+## $coef0
+## [1] 0
+## 
+## $nu
+## [1] 0.5
+## 
+## $epsilon
+## [1] 0.1
+## 
+## $sparse
+## [1] FALSE
+## 
+## $scaled
+## [1] TRUE TRUE TRUE TRUE
+## 
+## $x.scale
+## $x.scale$`scaled:center`
+## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
+##     5.843333     3.057333     3.758000     1.199333 
+## 
+## $x.scale$`scaled:scale`
+## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
+##    0.8280661    0.4358663    1.7652982    0.7622377 
+## 
+## 
+## $y.scale
+## NULL
+## 
+## $nclasses
+## [1] 3
+## 
+## $levels
+## [1] &quot;setosa&quot;     &quot;versicolor&quot; &quot;virginica&quot; 
+## 
+## $tot.nSV
+## [1] 35
+## 
+## $nSV
+## [1]  6 15 14
+## 
+## $labels
+## [1] 1 2 3
+## 
+## $SV
+##     Sepal.Length Sepal.Width Petal.Length Petal.Width
+## 14   -1.86378030 -0.13153881   -1.5056946  -1.4422448
+## 16   -0.17309407  3.08045544   -1.2791040  -1.0486668
+## 21   -0.53538397  0.78617383   -1.1658087  -1.3110521
+## 23   -1.50149039  1.24503015   -1.5623422  -1.3110521
+## 24   -0.89767388  0.55674567   -1.1658087  -0.9174741
+## 42   -1.62225369 -1.73753594   -1.3923993  -1.1798595
+## 51    1.39682886  0.32731751    0.5336209   0.2632600
+## 53    1.27606556  0.09788935    0.6469162   0.3944526
+## 54   -0.41462067 -1.73753594    0.1370873   0.1320673
+## 55    0.79301235 -0.59039513    0.4769732   0.3944526
+##  [ reached getOption(&quot;max.print&quot;) -- omitted 25 rows ]
+## 
+## $index
+##  [1]  14  16  21  23  24  42  51  53  54  55  58  61  69  71  73  78  79
+## [18]  84  85  86  99 107 111 119 120 124 127 128 130 132 134 135 139 149
+## [35] 150
+## 
+## $rho
+## [1] -0.10346530  0.12160294 -0.09540346
+## 
+## $compprob
+## [1] FALSE
+## 
+## $probA
+## NULL
+## 
+## $probB
+## NULL
+## 
+## $sigma
+## NULL
+## 
+## $coefs
+##              [,1]        [,2]
+##  [1,]  0.00000000  0.06561739
+##  [2,]  0.76813720  0.93378721
+##  [3,]  0.00000000  0.12123270
+##  [4,]  0.00000000  0.31170741
+##  [5,]  1.11614066  0.46397392
+##  [6,]  1.88141600  1.10392128
+##  [7,] -0.55872622  0.00000000
+##  [8,]  0.00000000  5.62341325
+##  [9,]  0.00000000  0.27711792
+## [10,]  0.00000000  5.28440007
+## [11,] -1.06596713  0.00000000
+## [12,] -0.57076709  1.09019756
+## [13,] -0.03365904  5.62341325
+## [14,]  0.00000000  5.62341325
+## [15,]  0.00000000  5.62341325
+## [16,]  0.00000000  5.62341325
+## [17,]  0.00000000  4.70398738
+## [18,]  0.00000000  5.62341325
+## [19,]  0.00000000  4.97981371
+## [20,] -0.77497987  0.00000000
+##  [ reached getOption(&quot;max.print&quot;) -- omitted 15 rows ]
+## 
+## $na.action
+## NULL
+## 
+## $fitted
+##      1      2      3      4      5      6      7      8      9     10 
+## setosa setosa setosa setosa setosa setosa setosa setosa setosa setosa 
+##     11     12     13     14     15     16     17     18     19     20 
+## setosa setosa setosa setosa setosa setosa setosa setosa setosa setosa 
+##     21     22     23     24     25     26     27     28     29     30 
+## setosa setosa setosa setosa setosa setosa setosa setosa setosa setosa 
+##     31     32     33     34     35     36     37     38     39     40 
+## setosa setosa setosa setosa setosa setosa setosa setosa setosa setosa 
+##  [ reached getOption(&quot;max.print&quot;) -- omitted 110 entries ]
+## Levels: setosa versicolor virginica
+## 
+## $decision.values
+##     setosa/versicolor setosa/virginica versicolor/virginica
+## 1           1.1911739        1.0908424            1.1275805
+## 2           1.1336557        1.0619543            1.3260964
+## 3           1.2085065        1.0698101            1.0511345
+## 4           1.1646153        1.0505915            1.0806874
+## 5           1.1880814        1.0950348            0.9542815
+## 6           1.0990761        1.0984626            0.9326361
+## 7           1.1573474        1.0343287            0.9726843
+## 8           1.1851598        1.0815750            1.2206802
+## 9           1.1673499        1.0406734            0.8837945
+## 10          1.1629911        1.0560925            1.2430067
+## 11          1.1339282        1.0803946            1.0338357
+## 12          1.1724182        1.0641469            1.1190423
+## 13          1.1827355        1.0667956            1.1414844
+##  [ reached getOption(&quot;max.print&quot;) -- omitted 137 rows ]
+## 
+## $terms
+## Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width
+## attr(,&quot;variables&quot;)
+## list(Species, Sepal.Length, Sepal.Width, Petal.Length, Petal.Width)
+## attr(,&quot;factors&quot;)
+##              Sepal.Length Sepal.Width Petal.Length Petal.Width
+## Species                 0           0            0           0
+## Sepal.Length            1           0            0           0
+## Sepal.Width             0           1            0           0
+## Petal.Length            0           0            1           0
+## Petal.Width             0           0            0           1
+## attr(,&quot;term.labels&quot;)
+## [1] &quot;Sepal.Length&quot; &quot;Sepal.Width&quot;  &quot;Petal.Length&quot; &quot;Petal.Width&quot; 
+## attr(,&quot;order&quot;)
+## [1] 1 1 1 1
+## attr(,&quot;intercept&quot;)
+## [1] 0
+## attr(,&quot;response&quot;)
+## [1] 1
+## attr(,&quot;.Environment&quot;)
+## &lt;environment: 0x5e8a588&gt;
+## attr(,&quot;predvars&quot;)
+## list(Species, Sepal.Length, Sepal.Width, Petal.Length, Petal.Width)
+## attr(,&quot;dataClasses&quot;)
+##      Species Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
+##     &quot;factor&quot;    &quot;numeric&quot;    &quot;numeric&quot;    &quot;numeric&quot;    &quot;numeric&quot; 
+## 
+## attr(,&quot;class&quot;)
+## [1] &quot;summary.svm&quot;</code></pre>
+</div>
+</div>
+<div id="sql-queries" class="section level3">
+<h3>SQL Queries</h3>
+<p>A <code>SparkDataFrame</code> can also be registered as a temporary view in Spark SQL so that one can run SQL queries over its data. The sql function enables applications to run SQL queries programmatically and returns the result as a <code>SparkDataFrame</code>.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">people &lt;-<span class="st"> </span><span class="kw">read.df</span>(<span class="kw">paste0</span>(<span class="kw">sparkR.conf</span>(<span class="st">&quot;spark.home&quot;</span>),
+                         <span class="st">&quot;/examples/src/main/resources/people.json&quot;</span>), <span class="st">&quot;json&quot;</span>)</code></pre></div>
+<p>Register this <code>SparkDataFrame</code> as a temporary view.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">createOrReplaceTempView</span>(people, <span class="st">&quot;people&quot;</span>)</code></pre></div>
+<p>SQL statements can be run using the sql method.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">teenagers &lt;-<span class="st"> </span><span class="kw">sql</span>(<span class="st">&quot;SELECT name FROM people WHERE age &gt;= 13 AND age &lt;= 19&quot;</span>)
+<span class="kw">head</span>(teenagers)</code></pre></div>
+<pre><code>##     name
+## 1 Justin</code></pre>
+</div>
+</div>
+<div id="machine-learning" class="section level2">
+<h2>Machine Learning</h2>
+<p>SparkR supports the following machine learning models and algorithms.</p>
+<div id="classification" class="section level4">
+<h4>Classification</h4>
+<ul>
+<li><p>Linear Support Vector Machine (SVM) Classifier</p></li>
+<li><p>Logistic Regression</p></li>
+<li><p>Multilayer Perceptron (MLP)</p></li>
+<li><p>Naive Bayes</p></li>
+</ul>
+</div>
+<div id="regression" class="section level4">
+<h4>Regression</h4>
+<ul>
+<li><p>Accelerated Failure Time (AFT) Survival Model</p></li>
+<li><p>Generalized Linear Model (GLM)</p></li>
+<li><p>Isotonic Regression</p></li>
+</ul>
+</div>
+<div id="tree---classification-and-regression" class="section level4">
+<h4>Tree - Classification and Regression</h4>
+<ul>
+<li><p>Decision Tree</p></li>
+<li><p>Gradient-Boosted Trees (GBT)</p></li>
+<li><p>Random Forest</p></li>
+</ul>
+</div>
+<div id="clustering" class="section level4">
+<h4>Clustering</h4>
+<ul>
+<li><p>Bisecting <span class="math inline">\(k\)</span>-means</p></li>
+<li><p>Gaussian Mixture Model (GMM)</p></li>
+<li><p><span class="math inline">\(k\)</span>-means Clustering</p></li>
+<li><p>Latent Dirichlet Allocation (LDA)</p></li>
+</ul>
+</div>
+<div id="collaborative-filtering" class="section level4">
+<h4>Collaborative Filtering</h4>
+<ul>
+<li>Alternating Least Squares (ALS)</li>
+</ul>
+</div>
+<div id="frequent-pattern-mining" class="section level4">
+<h4>Frequent Pattern Mining</h4>
+<ul>
+<li>FP-growth</li>
+</ul>
+</div>
+<div id="statistics" class="section level4">
+<h4>Statistics</h4>
+<ul>
+<li>Kolmogorov-Smirnov Test</li>
+</ul>
+</div>
+<div id="r-formula" class="section level3">
+<h3>R Formula</h3>
+<p>For most above, SparkR supports <strong>R formula operators</strong>, including <code>~</code>, <code>.</code>, <code>:</code>, <code>+</code> and <code>-</code> for model fitting. This makes it a similar experience as using R functions.</p>
+</div>
+<div id="training-and-test-sets" class="section level3">
+<h3>Training and Test Sets</h3>
+<p>We can easily split <code>SparkDataFrame</code> into random training and test sets by the <code>randomSplit</code> function. It returns a list of split <code>SparkDataFrames</code> with provided <code>weights</code>. We use <code>carsDF</code> as an example and want to have about <span class="math inline">\(70%\)</span> training data and <span class="math inline">\(30%\)</span> test data.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">splitDF_list &lt;-<span class="st"> </span><span class="kw">randomSplit</span>(carsDF, <span class="kw">c</span>(<span class="fl">0.7</span>, <span class="fl">0.3</span>), <span class="dt">seed =</span> <span class="dv">0</span>)
+carsDF_train &lt;-<span class="st"> </span>splitDF_list[[<span class="dv">1</span>]]
+carsDF_test &lt;-<span class="st"> </span>splitDF_list[[<span class="dv">2</span>]]</code></pre></div>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">count</span>(carsDF_train)</code></pre></div>
+<pre><code>## [1] 21</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(carsDF_train)</code></pre></div>
+<pre><code>##                model  mpg cyl  disp  hp drat    wt  qsec vs am gear carb
+## 1 Cadillac Fleetwood 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
+## 2         Camaro Z28 13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
+## 3         Duster 360 14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
+## 4           Fiat 128 32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
+## 5          Fiat X1-9 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
+## 6     Ford Pantera L 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">count</span>(carsDF_test)</code></pre></div>
+<pre><code>## [1] 11</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(carsDF_test)</code></pre></div>
+<pre><code>##               model  mpg cyl disp  hp drat    wt  qsec vs am gear carb
+## 1       AMC Javelin 15.2   8  304 150 3.15 3.435 17.30  0  0    3    2
+## 2 Chrysler Imperial 14.7   8  440 230 3.23 5.345 17.42  0  0    3    4
+## 3        Datsun 710 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
+## 4  Dodge Challenger 15.5   8  318 150 2.76 3.520 16.87  0  0    3    2
+## 5      Ferrari Dino 19.7   6  145 175 3.62 2.770 15.50  0  1    5    6
+## 6     Mazda RX4 Wag 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4</code></pre>
+</div>
+<div id="models-and-algorithms" class="section level3">
+<h3>Models and Algorithms</h3>
+<div id="linear-support-vector-machine-svm-classifier" class="section level4">
+<h4>Linear Support Vector Machine (SVM) Classifier</h4>
+<p><a href="https://en.wikipedia.org/wiki/Support_vector_machine#Linear_SVM">Linear Support Vector Machine (SVM)</a> classifier is an SVM classifier with linear kernels. This is a binary classifier. We use a simple example to show how to use <code>spark.svmLinear</code> for binary classification.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># load training data and create a DataFrame</span>
+t &lt;-<span class="st"> </span><span class="kw">as.data.frame</span>(Titanic)
+training &lt;-<span class="st"> </span><span class="kw">createDataFrame</span>(t)
+<span class="co"># fit a Linear SVM classifier model</span>
+model &lt;-<span class="st"> </span><span class="kw">spark.svmLinear</span>(training,  Survived ~<span class="st"> </span>., <span class="dt">regParam =</span> <span class="fl">0.01</span>, <span class="dt">maxIter =</span> <span class="dv">10</span>)
+<span class="kw">summary</span>(model)</code></pre></div>
+<pre><code>## $coefficients
+##                 Estimate
+## (Intercept)  0.155016898
+## Class_Crew   0.616590877
+## Class_1st    0.000000000
+## Class_3rd    0.148622548
+## Sex_Male     0.210290445
+## Age_Adult    0.323480802
+## Freq        -0.004794042
+## 
+## $numClasses
+## [1] 2
+## 
+## $numFeatures
+## [1] 6</code></pre>
+<p>Predict values on training data</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">prediction &lt;-<span class="st"> </span><span class="kw">predict</span>(model, training)
+<span class="kw">head</span>(<span class="kw">select</span>(prediction, <span class="st">&quot;Class&quot;</span>, <span class="st">&quot;Sex&quot;</span>, <span class="st">&quot;Age&quot;</span>, <span class="st">&quot;Freq&quot;</span>, <span class="st">&quot;Survived&quot;</span>, <span class="st">&quot;prediction&quot;</span>))</code></pre></div>
+<pre><code>##   Class    Sex   Age Freq Survived prediction
+## 1   1st   Male Child    0       No        Yes
+## 2   2nd   Male Child    0       No        Yes
+## 3   3rd   Male Child   35       No        Yes
+## 4  Crew   Male Child    0       No        Yes
+## 5   1st Female Child    0       No        Yes
+## 6   2nd Female Child    0       No        Yes</code></pre>
+</div>
+<div id="logistic-regression" class="section level4">
+<h4>Logistic Regression</h4>
+<p><a href="https://en.wikipedia.org/wiki/Logistic_regression">Logistic regression</a> is a widely-used model when the response is categorical. It can be seen as a special case of the <a href="https://en.wikipedia.org/wiki/Generalized_linear_model">Generalized Linear Predictive Model</a>. We provide <code>spark.logit</code> on top of <code>spark.glm</code> to support logistic regression with advanced hyper-parameters. It supports both binary and multiclass classification with elastic-net regularization and feature standardization, similar to <code>glmnet</code>.</p>
+<p>We use a simple example to demonstrate <code>spark.logit</code> usage. In general, there are three steps of using <code>spark.logit</code>: 1). Create a dataframe from a proper data source; 2). Fit a logistic regression model using <code>spark.logit</code> with a proper parameter setting; and 3). Obtain the coefficient matrix of the fitted model using <code>summary</code> and use the model for prediction with <code>predict</code>.</p>
+<p>Binomial logistic regression</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">t &lt;-<span class="st"> </span><span class="kw">as.data.frame</span>(Titanic)
+training &lt;-<span class="st"> </span><span class="kw">createDataFrame</span>(t)
+model &lt;-<span class="st"> </span><span class="kw">spark.logit</span>(training, Survived ~<span class="st"> </span>., <span class="dt">regParam =</span> <span class="fl">0.04741301</span>)
+<span class="kw">summary</span>(model)</code></pre></div>
+<pre><code>## $coefficients
+##                 Estimate
+## (Intercept) -0.108748837
+## Class_Crew   0.152970953
+## Class_1st   -0.018199615
+## Class_3rd    0.116011205
+## Sex_Male     0.201436637
+## Age_Adult    0.326629954
+## Freq        -0.003310102</code></pre>
+<p>Predict values on training data</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">fitted &lt;-<span class="st"> </span><span class="kw">predict</span>(model, training)
+<span class="kw">head</span>(<span class="kw">select</span>(fitted, <span class="st">&quot;Class&quot;</span>, <span class="st">&quot;Sex&quot;</span>, <span class="st">&quot;Age&quot;</span>, <span class="st">&quot;Freq&quot;</span>, <span class="st">&quot;Survived&quot;</span>, <span class="st">&quot;prediction&quot;</span>))</code></pre></div>
+<pre><code>##   Class    Sex   Age Freq Survived prediction
+## 1   1st   Male Child    0       No        Yes
+## 2   2nd   Male Child    0       No        Yes
+## 3   3rd   Male Child   35       No        Yes
+## 4  Crew   Male Child    0       No        Yes
+## 5   1st Female Child    0       No         No
+## 6   2nd Female Child    0       No         No</code></pre>
+<p>Multinomial logistic regression against three classes</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">t &lt;-<span class="st"> </span><span class="kw">as.data.frame</span>(Titanic)
+training &lt;-<span class="st"> </span><span class="kw">createDataFrame</span>(t)
+<span class="co"># Note in this case, Spark infers it is multinomial logistic regression, so family = &quot;multinomial&quot; is optional.</span>
+model &lt;-<span class="st"> </span><span class="kw">spark.logit</span>(training, Class ~<span class="st"> </span>., <span class="dt">regParam =</span> <span class="fl">0.07815179</span>)
+<span class="kw">summary</span>(model)</code></pre></div>
+<pre><code>## $coefficients
+##                     Crew          1st          3rd          2nd
+## (Intercept)  0.055749216 -0.036372587  0.020150962 -0.039527591
+## Sex_Male    -0.131326302  0.088032755 -0.059233656  0.102527203
+## Age_Adult   -0.208864050  0.141940085 -0.102563703  0.169487668
+## Survived_No -0.081293328  0.052721700 -0.029409091  0.057980720
+## Freq         0.002222453 -0.001555913  0.001303827 -0.001970366</code></pre>
+</div>
+<div id="multilayer-perceptron" class="section level4">
+<h4>Multilayer Perceptron</h4>
+<p>Multilayer perceptron classifier (MLPC) is a classifier based on the <a href="https://en.wikipedia.org/wiki/Feedforward_neural_network">feedforward artificial neural network</a>. MLPC consists of multiple layers of nodes. Each layer is fully connected to the next layer in the network. Nodes in the input layer represent the input data. All other nodes map inputs to outputs by a linear combination of the inputs with the node’s weights <span class="math inline">\(w\)</span> and bias <span class="math inline">\(b\)</span> and applying an activation function. This can be written in matrix form for MLPC with <span class="math inline">\(K+1\)</span> layers as follows: <span class="math display">\[
+y(x)=f_K(\ldots f_2(w_2^T f_1(w_1^T x + b_1) + b_2) \ldots + b_K).
+\]</span></p>
+<p>Nodes in intermediate layers use sigmoid (logistic) function: <span class="math display">\[
+f(z_i) = \frac{1}{1+e^{-z_i}}.
+\]</span></p>
+<p>Nodes in the output layer use softmax function: <span class="math display">\[
+f(z_i) = \frac{e^{z_i}}{\sum_{k=1}^N e^{z_k}}.
+\]</span></p>
+<p>The number of nodes <span class="math inline">\(N\)</span> in the output layer corresponds to the number of classes.</p>
+<p>MLPC employs backpropagation for learning the model. We use the logistic loss function for optimization and L-BFGS as an optimization routine.</p>
+<p><code>spark.mlp</code> requires at least two columns in <code>data</code>: one named <code>&quot;label&quot;</code> and the other one <code>&quot;features&quot;</code>. The <code>&quot;features&quot;</code> column should be in libSVM-format.</p>
+<p>We use Titanic data set to show how to use <code>spark.mlp</code> in classification.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">t &lt;-<span class="st"> </span><span class="kw">as.data.frame</span>(Titanic)
+training &lt;-<span class="st"> </span><span class="kw">createDataFrame</span>(t)
+<span class="co"># fit a Multilayer Perceptron Classification Model</span>
+model &lt;-<span class="st"> </span><span class="kw">spark.mlp</span>(training, Survived ~<span class="st"> </span>Age +<span class="st"> </span>Sex, <span class="dt">blockSize =</span> <span class="dv">128</span>, <span class="dt">layers =</span> <span class="kw">c</span>(<span class="dv">2</span>, <span class="dv">2</span>), <span class="dt">solver =</span> <span class="st">&quot;l-bfgs&quot;</span>, <span class="dt">maxIter =</span> <span class="dv">100</span>, <span class="dt">tol =</span> <span class="fl">0.5</span>, <span class="dt">stepSize =</span> <span class="dv">1</span>, <span class="dt">seed =</span> <span class="dv">1</span>, <span class="dt">initialWeights =</span> <span class="kw">c</span>( <span class="dv">0</span>, <span class="dv">0</span>, <span class="dv">5</span>, <span class="dv">5</span>, <span class="dv">9</span>, <span class="dv">9</span>))</code></pre></div>
+<p>To avoid lengthy display, we only present partial results of the model summary. You can check the full result from your sparkR shell.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># check the summary of the fitted model</span>
+<span class="kw">summary</span>(model)</code></pre></div>
+<pre><code>## $numOfInputs
+## [1] 2
+## 
+## $numOfOutputs
+## [1] 2
+## 
+## $layers
+## [1] 2 2
+## 
+## $weights
+## $weights[[1]]
+## [1] 0
+## 
+## $weights[[2]]
+## [1] 0
+## 
+## $weights[[3]]
+## [1] 5
+## 
+## $weights[[4]]
+## [1] 5
+## 
+## $weights[[5]]
+## [1] 9
+## 
+## $weights[[6]]
+## [1] 9</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># make predictions use the fitted model</span>
+predictions &lt;-<span class="st"> </span><span class="kw">predict</span>(model, training)
+<span class="kw">head</span>(<span class="kw">select</span>(predictions, predictions$prediction))</code></pre></div>
+<pre><code>##   prediction
+## 1         No
+## 2         No
+## 3         No
+## 4         No
+## 5         No
+## 6         No</code></pre>
+</div>
+<div id="naive-bayes" class="section level4">
+<h4>Naive Bayes</h4>
+<p>Naive Bayes model assumes independence among the features. <code>spark.naiveBayes</code> fits a <a href="https://en.wikipedia.org/wiki/Naive_Bayes_classifier#Bernoulli_naive_Bayes">Bernoulli naive Bayes model</a> against a SparkDataFrame. The data should be all categorical. These models are often used for document classification.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">titanic &lt;-<span class="st"> </span><span class="kw">as.data.frame</span>(Titanic)
+titanicDF &lt;-<span class="st"> </span><span class="kw">createDataFrame</span>(titanic[titanic$Freq &gt;<span class="st"> </span><span class="dv">0</span>, -<span class="dv">5</span>])
+naiveBayesModel &lt;-<span class="st"> </span><span class="kw">spark.naiveBayes</span>(titanicDF, Survived ~<span class="st"> </span>Class +<span class="st"> </span>Sex +<span class="st"> </span>Age)
+<span class="kw">summary</span>(naiveBayesModel)</code></pre></div>
+<pre><code>## $apriori
+##            Yes        No
+## [1,] 0.5769231 0.4230769
+## 
+## $tables
+##     Class_3rd Class_1st Class_2nd Sex_Male Age_Adult
+## Yes 0.3125    0.3125    0.3125    0.5      0.5625   
+## No  0.4166667 0.25      0.25      0.5      0.75</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">naiveBayesPrediction &lt;-<span class="st"> </span><span class="kw">predict</span>(naiveBayesModel, titanicDF)
+<span class="kw">head</span>(<span class="kw">select</span>(naiveBayesPrediction, <span class="st">&quot;Class&quot;</span>, <span class="st">&quot;Sex&quot;</span>, <span class="st">&quot;Age&quot;</span>, <span class="st">&quot;Survived&quot;</span>, <span class="st">&quot;prediction&quot;</span>))</code></pre></div>
+<pre><code>##   Class    Sex   Age Survived prediction
+## 1   3rd   Male Child       No        Yes
+## 2   3rd Female Child       No        Yes
+## 3   1st   Male Adult       No        Yes
+## 4   2nd   Male Adult       No        Yes
+## 5   3rd   Male Adult       No         No
+## 6  Crew   Male Adult       No        Yes</code></pre>
+</div>
+<div id="accelerated-failure-time-survival-model" class="section level4">
+<h4>Accelerated Failure Time Survival Model</h4>
+<p>Survival analysis studies the expected duration of time until an event happens, and often the relationship with risk factors or treatment taken on the subject. In contrast to standard regression analysis, survival modeling has to deal with special characteristics in the data including non-negative survival time and censoring.</p>
+<p>Accelerated Failure Time (AFT) model is a parametric survival model for censored data that assumes the effect of a covariate is to accelerate or decelerate the life course of an event by some constant. For more information, refer to the Wikipedia page <a href="https://en.wikipedia.org/wiki/Accelerated_failure_time_model">AFT Model</a> and the references there. Different from a <a href="https://en.wikipedia.org/wiki/Proportional_hazards_model">Proportional Hazards Model</a> designed for the same purpose, the AFT model is easier to parallelize because each instance contributes to the objective function independently.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(survival)
+ovarianDF &lt;-<span class="st"> </span><span class="kw">createDataFrame</span>(ovarian)
+aftModel &lt;-<span class="st"> </span><span class="kw">spark.survreg</span>(ovarianDF, <span class="kw">Surv</span>(futime, fustat) ~<span class="st"> </span>ecog_ps +<span class="st"> </span>rx)
+<span class="kw">summary</span>(aftModel)</code></pre></div>
+<pre><code>## $coefficients
+##                  Value
+## (Intercept)  6.8966930
+## ecog_ps     -0.3850426
+## rx           0.5286457
+## Log(scale)  -0.1234418</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">aftPredictions &lt;-<span class="st"> </span><span class="kw">predict</span>(aftModel, ovarianDF)
+<span class="kw">head</span>(aftPredictions)</code></pre></div>
+<pre><code>##   futime fustat     age resid_ds rx ecog_ps label prediction
+## 1     59      1 72.3315        2  1       1    59  1141.7256
+## 2    115      1 74.4932        2  1       1   115  1141.7256
+## 3    156      1 66.4658        2  1       2   156   776.8548
+## 4    421      0 53.3644        2  2       1   421  1937.0893
+## 5    431      1 50.3397        2  1       1   431  1141.7256
+## 6    448      0 56.4301        1  1       2   448   776.8548</code></pre>
+</div>
+<div id="generalized-linear-model" class="section level4">
+<h4>Generalized Linear Model</h4>
+<p>The main function is <code>spark.glm</code>. The following families and link functions are supported. The default is gaussian.</p>
+<table>
+<thead>
+<tr class="header">
+<th align="left">Family</th>
+<th align="left">Link Function</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">gaussian</td>
+<td align="left">identity, log, inverse</td>
+</tr>
+<tr class="even">
+<td align="left">binomial</td>
+<td align="left">logit, probit, cloglog (complementary log-log)</td>
+</tr>
+<tr class="odd">
+<td align="left">poisson</td>
+<td align="left">log, identity, sqrt</td>
+</tr>
+<tr class="even">
+<td align="left">gamma</td>
+<td align="left">inverse, identity, log</td>
+</tr>
+<tr class="odd">
+<td align="left">tweedie</td>
+<td align="left">power link function</td>
+</tr>
+</tbody>
+</table>
+<p>There are three ways to specify the <code>family</code> argument.</p>
+<ul>
+<li><p>Family name as a character string, e.g. <code>family = &quot;gaussian&quot;</code>.</p></li>
+<li><p>Family function, e.g. <code>family = binomial</code>.</p></li>
+<li><p>Result returned by a family function, e.g. <code>family = poisson(link = log)</code>.</p></li>
+<li>Note that there are two ways to specify the tweedie family:</li>
+</ul>
+<ol style="list-style-type: lower-alpha">
+<li>Set <code>family = &quot;tweedie&quot;</code> and specify the <code>var.power</code> and <code>link.power</code></li>
+<li>When package <code>statmod</code> is loaded, the tweedie family is specified using the family definition therein, i.e., <code>tweedie()</code>.</li>
+</ol>
+<p>For more information regarding the families and their link functions, see the Wikipedia page <a href="https://en.wikipedia.org/wiki/Generalized_linear_model">Generalized Linear Model</a>.</p>
+<p>We use the <code>mtcars</code> dataset as an illustration. The corresponding <code>SparkDataFrame</code> is <code>carsDF</code>. After fitting the model, we print out a summary and see the fitted values by making predictions on the original dataset. We can also pass into a new <code>SparkDataFrame</code> of same schema to predict on new data.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">gaussianGLM &lt;-<span class="st"> </span><span class="kw">spark.glm</span>(carsDF, mpg ~<span class="st"> </span>wt +<span class="st"> </span>hp)
+<span class="kw">summary</span>(gaussianGLM)</code></pre></div>
+<pre><code>## 
+## Deviance Residuals: 
+## (Note: These are approximate quantiles with relative error &lt;= 0.01)
+##     Min       1Q   Median       3Q      Max  
+## -3.9410  -1.6499  -0.3267   1.0373   5.8538  
+## 
+## Coefficients:
+##               Estimate  Std. Error  t value    Pr(&gt;|t|)
+## (Intercept)  37.227270   1.5987875  23.2847  0.0000e+00
+## wt           -3.877831   0.6327335  -6.1287  1.1196e-06
+## hp           -0.031773   0.0090297  -3.5187  1.4512e-03
+## 
+## (Dispersion parameter for gaussian family taken to be 6.725785)
+## 
+##     Null deviance: 1126.05  on 31  degrees of freedom
+## Residual deviance:  195.05  on 29  degrees of freedom
+## AIC: 156.7
+## 
+## Number of Fisher Scoring iterations: 1</code></pre>
+<p>When doing prediction, a new column called <code>prediction</code> will be appended. Let’s look at only a subset of columns here.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">gaussianFitted &lt;-<span class="st"> </span><span class="kw">predict</span>(gaussianGLM, carsDF)
+<span class="kw">head</span>(<span class="kw">select</span>(gaussianFitted, <span class="st">&quot;model&quot;</span>, <span class="st">&quot;prediction&quot;</span>, <span class="st">&quot;mpg&quot;</span>, <span class="st">&quot;wt&quot;</span>, <span class="st">&quot;hp&quot;</span>))</code></pre></div>
+<pre><code>##               model prediction  mpg    wt  hp
+## 1         Mazda RX4   23.57233 21.0 2.620 110
+## 2     Mazda RX4 Wag   22.58348 21.0 2.875 110
+## 3        Datsun 710   25.27582 22.8 2.320  93
+## 4    Hornet 4 Drive   21.26502 21.4 3.215 110
+## 5 Hornet Sportabout   18.32727 18.7 3.440 175
+## 6           Valiant   20.47382 18.1 3.460 105</code></pre>
+<p>The following is the same fit using the tweedie family:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">tweedieGLM1 &lt;-<span class="st"> </span><span class="kw">spark.glm</span>(carsDF, mpg ~<span class="st"> </span>wt +<span class="st"> </span>hp, <span class="dt">family =</span> <span class="st">&quot;tweedie&quot;</span>, <span class="dt">var.power =</span> <span class="fl">0.0</span>)
+<span class="kw">summary</span>(tweedieGLM1)</code></pre></div>
+<pre><code>## 
+## Deviance Residuals: 
+## (Note: These are approximate quantiles with relative error &lt;= 0.01)
+##     Min       1Q   Median       3Q      Max  
+## -3.9410  -1.6499  -0.3267   1.0373   5.8538  
+## 
+## Coefficients:
+##               Estimate  Std. Error  t value    Pr(&gt;|t|)
+## (Intercept)  37.227270   1.5987875  23.2847  0.0000e+00
+## wt           -3.877831   0.6327335  -6.1287  1.1196e-06
+## hp           -0.031773   0.0090297  -3.5187  1.4512e-03
+## 
+## (Dispersion parameter for tweedie family taken to be 6.725785)
+## 
+##     Null deviance: 1126.05  on 31  degrees of freedom
+## Residual deviance:  195.05  on 29  degrees of freedom
+## AIC: 156.7
+## 
+## Number of Fisher Scoring iterations: 1</code></pre>
+<p>We can try other distributions in the tweedie family, for example, a compound Poisson distribution with a log link:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">tweedieGLM2 &lt;-<span class="st"> </span><span class="kw">spark.glm</span>(carsDF, mpg ~<span class="st"> </span>wt +<span class="st"> </span>hp, <span class="dt">family =</span> <span class="st">&quot;tweedie&quot;</span>,
+                         <span class="dt">var.power =</span> <span class="fl">1.2</span>, <span class="dt">link.power =</span> <span class="fl">0.0</span>)
+<span class="kw">summary</span>(tweedieGLM2)</code></pre></div>
+<pre><code>## 
+## Deviance Residuals: 
+## (Note: These are approximate quantiles with relative error &lt;= 0.01)
+##      Min        1Q    Median        3Q       Max  
+## -0.58074  -0.25335  -0.09892   0.18608   0.82717  
+## 
+## Coefficients:
+##                Estimate  Std. Error  t value    Pr(&gt;|t|)
+## (Intercept)   3.8500849  0.06698272  57.4788  0.0000e+00
+## wt           -0.2018426  0.02897283  -6.9666  1.1691e-07
+## hp           -0.0016248  0.00041603  -3.9054  5.1697e-04
+## 
+## (Dispersion parameter for tweedie family taken to be 0.1340111)
+## 
+##     Null deviance: 29.8820  on 31  degrees of freedom
+## Residual deviance:  3.7739  on 29  degrees of freedom
+## AIC: NA
+## 
+## Number of Fisher Scoring iterations: 4</code></pre>
+</div>
+<div id="isotonic-regression" class="section level4">
+<h4>Isotonic Regression</h4>
+<p><code>spark.isoreg</code> fits an <a href="https://en.wikipedia.org/wiki/Isotonic_regression">Isotonic Regression</a> model against a <code>SparkDataFrame</code>. It solves a weighted univariate a regression problem under a complete order constraint. Specifically, given a set of real observed responses <span class="math inline">\(y_1, \ldots, y_n\)</span>, corresponding real features <span class="math inline">\(x_1, \ldots, x_n\)</span>, and optionally positive weights <span class="math inline">\(w_1, \ldots, w_n\)</span>, we want to find a monotone (piecewise linear) function <span class="math inline">\(f\)</span> to minimize <span class="math display">\[
+\ell(f) = \sum_{i=1}^n w_i (y_i - f(x_i))^2.
+\]</span></p>
+<p>There are a few more arguments that may be useful.</p>
+<ul>
+<li><p><code>weightCol</code>: a character string specifying the weight column.</p></li>
+<li><p><code>isotonic</code>: logical value indicating whether the output sequence should be isotonic/increasing (<code>TRUE</code>) or antitonic/decreasing (<code>FALSE</code>).</p></li>
+<li><p><code>featureIndex</code>: the index of the feature on the right hand side of the formula if it is a vector column (default: 0), no effect otherwise.</p></li>
+</ul>
+<p>We use an artificial example to show the use.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">y &lt;-<span class="st"> </span><span class="kw">c</span>(<span class="fl">3.0</span>, <span class="fl">6.0</span>, <span class="fl">8.0</span>, <span class="fl">5.0</span>, <span class="fl">7.0</span>)
+x &lt;-<span class="st"> </span><span class="kw">c</span>(<span class="fl">1.0</span>, <span class="fl">2.0</span>, <span class="fl">3.5</span>, <span class="fl">3.0</span>, <span class="fl">4.0</span>)
+w &lt;-<span class="st"> </span><span class="kw">rep</span>(<span class="fl">1.0</span>, <span class="dv">5</span>)
+data &lt;-<span class="st"> </span><span class="kw">data.frame</span>(<span class="dt">y =</span> y, <span class="dt">x =</span> x, <span class="dt">w =</span> w)
+df &lt;-<span class="st"> </span><span class="kw">createDataFrame</span>(data)
+isoregModel &lt;-<span class="st"> </span><span class="kw">spark.isoreg</span>(df, y ~<span class="st"> </span>x, <span class="dt">weightCol =</span> <span class="st">&quot;w&quot;</span>)
+isoregFitted &lt;-<span class="st"> </span><span class="kw">predict</span>(isoregModel, df)
+<span class="kw">head</span>(<span class="kw">select</span>(isoregFitted, <span class="st">&quot;x&quot;</span>, <span class="st">&quot;y&quot;</span>, <span class="st">&quot;prediction&quot;</span>))</code></pre></div>
+<pre><code>##     x y prediction
+## 1 1.0 3        3.0
+## 2 2.0 6        5.5
+## 3 3.5 8        7.5
+## 4 3.0 5        5.5
+## 5 4.0 7        7.5</code></pre>
+<p>In the prediction stage, based on the fitted monotone piecewise function, the rules are:</p>
+<ul>
+<li><p>If the prediction input exactly matches a training feature then associated prediction is returned. In case there are multiple predictions with the same feature then one of them is returned. Which one is undefined.</p></li>
+<li><p>If the prediction input is lower or higher than all training features then prediction with lowest or highest feature is returned respectively. In case there are multiple predictions with the same feature then the lowest or highest is returned respectively.</p></li>
+<li><p>If the prediction input falls between two training features then prediction is treated as piecewise linear function and interpolated value is calculated from the predictions of the two closest features. In case there are multiple values with the same feature then the same rules as in previous point are used.</p></li>
+</ul>
+<p>For example, when the input is <span class="math inline">\(3.2\)</span>, the two closest feature values are <span class="math inline">\(3.0\)</span> and <span class="math inline">\(3.5\)</span>, then predicted value would be a linear interpolation between the predicted values at <span class="math inline">\(3.0\)</span> and <span class="math inline">\(3.5\)</span>.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">newDF &lt;-<span class="st"> </span><span class="kw">createDataFrame</span>(<span class="kw">data.frame</span>(<span class="dt">x =</span> <span class="kw">c</span>(<span class="fl">1.5</span>, <span class="fl">3.2</span>)))
+<span class="kw">head</span>(<span class="kw">predict</span>(isoregModel, newDF))</code></pre></div>
+<pre><code>##     x prediction
+## 1 1.5       4.25
+## 2 3.2       6.30</code></pre>
+</div>
+<div id="decision-tree" class="section level4">
+<h4>Decision Tree</h4>
+<p><code>spark.decisionTree</code> fits a <a href="https://en.wikipedia.org/wiki/Decision_tree_learning">decision tree</a> classification or regression model on a <code>SparkDataFrame</code>. Users can call <code>summary</code> to get a summary of the fitted model, <code>predict</code> to make predictions, and <code>write.ml</code>/<code>read.ml</code> to save/load fitted models.</p>
+<p>We use the <code>Titanic</code> dataset to train a decision tree and make predictions:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">t &lt;-<span class="st"> </span><span class="kw">as.data.frame</span>(Titanic)
+df &lt;-<span class="st"> </span><span class="kw">createDataFrame</span>(t)
+dtModel &lt;-<span class="st"> </span><span class="kw">spark.decisionTree</span>(df, Survived ~<span class="st"> </span>., <span class="dt">type =</span> <span class="st">&quot;classification&quot;</span>, <span class="dt">maxDepth =</span> <span class="dv">2</span>)
+<span class="kw">summary</span>(dtModel)</code></pre></div>
+<pre><code>## Formula:  Survived ~ .
+## Number of features:  6
+## Features:  Class_Crew Class_1st Class_3rd Sex_Male Age_Adult Freq
+## Feature importances:  (6,[5],[1.0])
+## Max Depth:  2
+##  DecisionTreeClassificationModel (uid=dtc_b96761304339) of depth 2 with 5 nodes
+##   If (feature 5 &lt;= 4.5)
+##    Predict: 0.0
+##   Else (feature 5 &gt; 4.5)
+##    If (feature 5 &lt;= 84.5)
+##     Predict: 1.0
+##    Else (feature 5 &gt; 84.5)
+##     Predict: 0.0
+## </code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">predictions &lt;-<span class="st"> </span><span class="kw">predict</span>(dtModel, df)
+<span class="kw">head</span>(<span class="kw">select</span>(predictions, <span class="st">&quot;Class&quot;</span>, <span class="st">&quot;Sex&quot;</span>, <span class="st">&quot;Age&quot;</span>, <span class="st">&quot;Freq&quot;</span>, <span class="st">&quot;Survived&quot;</span>, <span class="st">&quot;prediction&quot;</span>))</code></pre></div>
+<pre><code>##   Class    Sex   Age Freq Survived prediction
+## 1   1st   Male Child    0       No         No
+## 2   2nd   Male Child    0       No         No
+## 3   3rd   Male Child   35       No        Yes
+## 4  Crew   Male Child    0       No         No
+## 5   1st Female Child    0       No         No
+## 6   2nd Female Child    0       No         No</code></pre>
+</div>
+<div id="gradient-boosted-trees" class="section level4">
+<h4>Gradient-Boosted Trees</h4>
+<p><code>spark.gbt</code> fits a <a href="https://en.wikipedia.org/wiki/Gradient_boosting">gradient-boosted tree</a> classification or regression model on a <code>SparkDataFrame</code>. Users can call <code>summary</code> to get a summary of the fitted model, <code>predict</code> to make predictions, and <code>write.ml</code>/<code>read.ml</code> to save/load fitted models.</p>
+<p>We use the <code>Titanic</code> dataset to train a gradient-boosted tree and make predictions:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">t &lt;-<span class="st"> </span><span class="kw">as.data.frame</span>(Titanic)
+df &lt;-<span class="st"> </span><span class="kw">createDataFrame</span>(t)
+gbtModel &lt;-<span class="st"> </span><span class="kw">spark.gbt</span>(df, Survived ~<span class="st"> </span>., <span class="dt">type =</span> <span class="st">&quot;classification&quot;</span>, <span class="dt">maxDepth =</span> <span class="dv">2</span>, <span class="dt">maxIter =</span> <span class="dv">2</span>)
+<span class="kw">summary</span>(gbtModel)</code></pre></div>
+<pre><code>## Formula:  Survived ~ .
+## Number of features:  6
+## Features:  Class_Crew Class_1st Class_3rd Sex_Male Age_Adult Freq
+## Feature importances:  (6,[0,4,5],[0.13232868918568455,0.04105571847507324,0.8266155923392422])
+## Max Depth:  2
+## Number of trees:  2
+## Tree weights:  1 0.1
+##  GBTClassificationModel (uid=gbtc_a9f9e480f809) with 2 trees
+##   Tree 0 (weight 1.0):
+##     If (feature 5 &lt;= 4.5)
+##      If (feature 4 in {1.0})
+##       Predict: -1.0
+##      Else (feature 4 not in {1.0})
+##       Predict: -0.3333333333333333
+##     Else (feature 5 &gt; 4.5)
+##      If (feature 5 &lt;= 84.5)
+##       Predict: 0.5714285714285714
+##      Else (feature 5 &gt; 84.5)
+##       Predict: -0.42857142857142855
+##   Tree 1 (weight 0.1):
+##     If (feature 0 in {0.0})
+##      If (feature 5 &lt;= 0.5)
+##       Predict: -1.3569745249367313
+##      Else (feature 5 &gt; 0.5)
+##       Predict: 0.03904410748693565
+##     Else (feature 0 not in {0.0})
+##      If (feature 5 &lt;= 289.5)
+##       Predict: 0.8386754830442805
+##      Else (feature 5 &gt; 289.5)
+##       Predict: -1.1917465204842816
+## </code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">predictions &lt;-<span class="st"> </span><span class="kw">predict</span>(gbtModel, df)
+<span class="kw">head</span>(<span class="kw">select</span>(predictions, <span class="st">&quot;Class&quot;</span>, <span class="st">&quot;Sex&quot;</span>, <span class="st">&quot;Age&quot;</span>, <span class="st">&quot;Freq&quot;</span>, <span class="st">&quot;Survived&quot;</span>, <span class="st">&quot;prediction&quot;</span>))</code></pre></div>
+<pre><code>##   Class    Sex   Age Freq Survived prediction
+## 1   1st   Male Child    0       No         No
+## 2   2nd   Male Child    0       No         No
+## 3   3rd   Male Child   35       No        Yes
+## 4  Crew   Male Child    0       No         No
+## 5   1st Female Child    0       No         No
+## 6   2nd Female Child    0       No         No</code></pre>
+</div>
+<div id="random-forest" class="section level4">
+<h4>Random Forest</h4>
+<p><code>spark.randomForest</code> fits a <a href="https://en.wikipedia.org/wiki/Random_forest">random forest</a> classification or regression model on a <code>SparkDataFrame</code>. Users can call <code>summary</code> to get a summary of the fitted model, <code>predict</code> to make predictions, and <code>write.ml</code>/<code>read.ml</code> to save/load fitted models.</p>
+<p>In the following example, we use the <code>Titanic</code> dataset to train a random forest and make predictions:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">t &lt;-<span class="st"> </span><span class="kw">as.data.frame</span>(Titanic)
+df &lt;-<span class="st"> </span><span class="kw">createDataFrame</span>(t)
+rfModel &lt;-<span class="st"> </span><span class="kw">spark.randomForest</span>(df, Survived ~<span class="st"> </span>., <span class="dt">type =</span> <span class="st">&quot;classification&quot;</span>, <span class="dt">maxDepth =</span> <span class="dv">2</span>, <span class="dt">numTrees =</span> <span class="dv">2</span>)
+<span class="kw">summary</span>(rfModel)</code></pre></div>
+<pre><code>## Formula:  Survived ~ .
+## Number of features:  6
+## Features:  Class_Crew Class_1st Class_3rd Sex_Male Age_Adult Freq
+## Feature importances:  (6,[1,5],[0.3304780506160942,0.6695219493839059])
+## Max Depth:  2
+## Number of trees:  2
+## Tree weights:  1 1
+##  RandomForestClassificationModel (uid=rfc_0a2ee1631170) with 2 trees
+##   Tree 0 (weight 1.0):
+##     If (feature 5 &lt;= 0.5)
+##      Predict: 0.0
+##     Else (feature 5 &gt; 0.5)
+##      If (feature 1 in {1.0})
+##       Predict: 1.0
+##      Else (feature 1 not in {1.0})
+##       Predict: 0.0
+##   Tree 1 (weight 1.0):
+##     If (feature 5 &lt;= 84.5)
+##      If (feature 1 in {1.0})
+##       Predict: 0.0
+##      Else (feature 1 not in {1.0})
+##       Predict: 1.0
+##     Else (feature 5 &gt; 84.5)
+##      Predict: 0.0
+## </code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">predictions &lt;-<span class="st"> </span><span class="kw">predict</span>(rfModel, df)
+<span class="kw">head</span>(<span class="kw">select</span>(predictions, <span class="st">&quot;Class&quot;</span>, <span class="st">&quot;Sex&quot;</span>, <span class="st">&quot;Age&quot;</span>, <span class="st">&quot;Freq&quot;</span>, <span class="st">&quot;Survived&quot;</span>, <span class="st">&quot;prediction&quot;</span>))</code></pre></div>
+<pre><code>##   Class    Sex   Age Freq Survived prediction
+## 1   1st   Male Child    0       No         No
+## 2   2nd   Male Child    0       No         No
+## 3   3rd   Male Child   35       No        Yes
+## 4  Crew   Male Child    0       No         No
+## 5   1st Female Child    0       No         No
+## 6   2nd Female Child    0       No         No</code></pre>
+</div>
+<div id="bisecting-k-means" class="section level4">
+<h4>Bisecting k-Means</h4>
+<p><code>spark.bisectingKmeans</code> is a kind of <a href="https://en.wikipedia.org/wiki/Hierarchical_clustering">hierarchical clustering</a> using a divisive (or “top-down”) approach: all observations start in one cluster, and splits are performed recursively as one moves down the hierarchy.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">t &lt;-<span class="st"> </span><span class="kw">as.data.frame</span>(Titanic)
+training &lt;-<span class="st"> </span><span class="kw">createDataFrame</span>(t)
+model &lt;-<span class="st"> </span><span class="kw">spark.bisectingKmeans</span>(training, Class ~<span class="st"> </span>Survived, <span class="dt">k =</span> <span class="dv">4</span>)
+<span class="kw">summary</span>(model)</code></pre></div>
+<pre><code>## $k
+## [1] 4
+## 
+## $coefficients
+##   Survived_No
+## 1 0          
+## 2 1          
+## 3 0          
+## 4 1          
+## 
+## $size
+## $size[[1]]
+## [1] 16
+## 
+## $size[[2]]
+## [1] 16
+## 
+## $size[[3]]
+## [1] 0
+## 
+## $size[[4]]
+## [1] 0
+## 
+## 
+## $cluster
+## SparkDataFrame[prediction:int]
+## 
+## $is.loaded
+## [1] FALSE</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">fitted &lt;-<span class="st"> </span><span class="kw">predict</span>(model, training)
+<span class="kw">head</span>(<span class="kw">select</span>(fitted, <span class="st">&quot;Class&quot;</span>, <span class="st">&quot;prediction&quot;</span>))</code></pre></div>
+<pre><code>##   Class prediction
+## 1   1st          1
+## 2   2nd          1
+## 3   3rd          1
+## 4  Crew          1
+## 5   1st          1
+## 6   2nd          1</code></pre>
+</div>
+<div id="gaussian-mixture-model" class="section level4">
+<h4>Gaussian Mixture Model</h4>
+<p><code>spark.gaussianMixture</code> fits multivariate <a href="https://en.wikipedia.org/wiki/Mixture_model#Multivariate_Gaussian_mixture_model">Gaussian Mixture Model</a> (GMM) against a <code>SparkDataFrame</code>. <a href="https://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm">Expectation-Maximization</a> (EM) is used to approximate the maximum likelihood estimator (MLE) of the model.</p>
+<p>We use a simulated example to demonstrate the usage.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">X1 &lt;-<span class="st"> </span><span class="kw">data.frame</span>(<span class="dt">V1 =</span> <span class="kw">rnorm</span>(<span class="dv">4</span>), <span class="dt">V2 =</span> <span class="kw">rnorm</span>(<span class="dv">4</span>))
+X2 &lt;-<span class="st"> </span><span class="kw">data.frame</span>(<span class="dt">V1 =</span> <span class="kw">rnorm</span>(<span class="dv">6</span>, <span class="dv">3</span>), <span class="dt">V2 =</span> <span class="kw">rnorm</span>(<span class="dv">6</span>, <span class="dv">4</span>))
+data &lt;-<span class="st"> </span><span class="kw">rbind</span>(X1, X2)
+df &lt;-<span class="st"> </span><span class="kw">createDataFrame</span>(data)
+gmmModel &lt;-<span class="st"> </span><span class="kw">spark.gaussianMixture</span>(df, ~<span class="st"> </span>V1 +<span class="st"> </span>V2, <span class="dt">k =</span> <span class="dv">2</span>)
+<span class="kw">summary</span>(gmmModel)</code></pre></div>
+<pre><code>## $lambda
+## [1] 0.6025806 0.3974194
+## 
+## $mu
+## $mu[[1]]
+## [1] 1.275377 1.890154
+## 
+## $mu[[2]]
+## [1] 1.925365 2.737117
+## 
+## 
+## $sigma
+## $sigma[[1]]
+##      [,1]     [,2]    
+## [1,] 1.134881 2.704181
+## [2,] 2.704181 6.637527
+## 
+## $sigma[[2]]
+##      [,1]     [,2]    
+## [1,] 4.056748 3.106571
+## [2,] 3.106571 2.419852
+## 
+## 
+## $loglik
+## [1] -26.84406
+## 
+## $posterior
+## SparkDataFrame[posterior:array&lt;double&gt;]
+## 
+## $is.loaded
+## [1] FALSE</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">gmmFitted &lt;-<span class="st"> </span><span class="kw">predict</span>(gmmModel, df)
+<span class="kw">head</span>(<span class="kw">select</span>(gmmFitted, <span class="st">&quot;V1&quot;</span>, <span class="st">&quot;V2&quot;</span>, <span class="st">&quot;prediction&quot;</span>))</code></pre></div>
+<pre><code>##            V1         V2 prediction
+## 1 -0.05211896 -1.1149494          0
+## 2  0.25918082 -0.2754023          0
+## 3 -1.30192531  0.2943053          1
+## 4  0.47806694 -0.5287418          0
+## 5  2.00437151  3.6517953          0
+## 6  2.42427427  3.3637719          1</code></pre>
+</div>
+<div id="k-means-clustering" class="section level4">
+<h4>k-Means Clustering</h4>
+<p><code>spark.kmeans</code> fits a <span class="math inline">\(k\)</span>-means clustering model against a <code>SparkDataFrame</code>. As an unsupervised learning method, we don’t need a response variable. Hence, the left hand side of the R formula should be left blank. The clustering is based only on the variables on the right hand side.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">kmeansModel &lt;-<span class="st"> </span><span class="kw">spark.kmeans</span>(carsDF, ~<span class="st"> </span>mpg +<span class="st"> </span>hp +<span class="st"> </span>wt, <span class="dt">k =</span> <span class="dv">3</span>)
+<span class="kw">summary</span>(kmeansModel)</code></pre></div>
+<pre><code>## $k
+## [1] 3
+## 
+## $coefficients
+##        mpg        hp     wt
+## 1 23.28947  99.47368 2.6920
+## 2 15.45000 205.75000 4.0195
+## 3 15.00000 335.00000 3.5700
+## 
+## $size
+## $size[[1]]
+## [1] 19
+## 
+## $size[[2]]
+## [1] 12
+## 
+## $size[[3]]
+## [1] 1
+## 
+## 
+## $cluster
+## SparkDataFrame[prediction:int]
+## 
+## $is.loaded
+## [1] FALSE
+## 
+## $clusterSize
+## [1] 3</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">kmeansPredictions &lt;-<span class="st"> </span><span class="kw">predict</span>(kmeansModel, carsDF)
+<span class="kw">head</span>(<span class="kw">select</span>(kmeansPredictions, <span class="st">&quot;model&quot;</span>, <span class="st">&quot;mpg&quot;</span>, <span class="st">&quot;hp&quot;</span>, <span class="st">&quot;wt&quot;</span>, <span class="st">&quot;prediction&quot;</span>), <span class="dt">n =</span> 20L)</code></pre></div>
+<pre><code>##                  model  mpg  hp    wt prediction
+## 1            Mazda RX4 21.0 110 2.620          0
+## 2        Mazda RX4 Wag 21.0 110 2.875          0
+## 3           Datsun 710 22.8  93 2.320          0
+## 4       Hornet 4 Drive 21.4 110 3.215          0
+## 5    Hornet Sportabout 18.7 175 3.440          1
+## 6              Valiant 18.1 105 3.460          0
+## 7           Duster 360 14.3 245 3.570          1
+## 8            Merc 240D 24.4  62 3.190          0
+## 9             Merc 230 22.8  95 3.150          0
+## 10            Merc 280 19.2 123 3.440          0
+## 11           Merc 280C 17.8 123 3.440          0
+## 12          Merc 450SE 16.4 180 4.070          1
+## 13          Merc 450SL 17.3 180 3.730          1
+## 14         Merc 450SLC 15.2 180 3.780          1
+## 15  Cadillac Fleetwood 10.4 205 5.250          1
+## 16 Lincoln Continental 10.4 215 5.424          1
+## 17   Chrysler Imperial 14.7 230 5.345          1
+## 18            Fiat 128 32.4  66 2.200          0
+## 19         Honda Civic 30.4  52 1.615          0
+## 20      Toyota Corolla 33.9  65 1.835          0</code></pre>
+</div>
+<div id="latent-dirichlet-allocation" class="section level4">
+<h4>Latent Dirichlet Allocation</h4>
+<p><code>spark.lda</code> fits a <a href="https://en.wikipedia.org/wiki/Latent_Dirichlet_allocation">Latent Dirichlet Allocation</a> model on a <code>SparkDataFrame</code>. It is often used in topic modeling in which topics are inferred from a collection of text documents. LDA can be thought of as a clustering algorithm as follows:</p>
+<ul>
+<li><p>Topics correspond to cluster centers, and documents correspond to examples (rows) in a dataset.</p></li>
+<li><p>Topics and documents both exist in a feature space, where feature vectors are vectors of word counts (bag of words).</p></li>
+<li><p>Rather than clustering using a traditional distance, LDA uses a function based on a statistical model of how text documents are generated.</p></li>
+</ul>
+<p>To use LDA, we need to specify a <code>features</code> column in <code>data</code> where each entry represents a document. There are two options for the column:</p>
+<ul>
+<li><p>character string: This can be a string of the whole document. It will be parsed automatically. Additional stop words can be added in <code>customizedStopWords</code>.</p></li>
+<li><p>libSVM: Each entry is a collection of words and will be processed directly.</p></li>
+</ul>
+<p>Two more functions are provided for the fitted model.</p>
+<ul>
+<li><p><code>spark.posterior</code> returns a <code>SparkDataFrame</code> containing a column of posterior probabilities vectors named “topicDistribution”.</p></li>
+<li><p><code>spark.perplexity</code> returns the log perplexity of given <code>SparkDataFrame</code>, or the log perplexity of the training data if missing argument <code>data</code>.</p></li>
+</ul>
+<p>For more information, see the help document <code>?spark.lda</code>.</p>
+<p>Let’s look an artificial example.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">corpus &lt;-<span class="st"> </span><span class="kw">data.frame</span>(<span class="dt">features =</span> <span class="kw">c</span>(
+  <span class="st">&quot;1 2 6 0 2 3 1 1 0 0 3&quot;</span>,
+  <span class="st">&quot;1 3 0 1 3 0 0 2 0 0 1&quot;</span>,
+  <span class="st">&quot;1 4 1 0 0 4 9 0 1 2 0&quot;</span>,
+  <span class="st">&quot;2 1 0 3 0 0 5 0 2 3 9&quot;</span>,
+  <span class="st">&quot;3 1 1 9 3 0 2 0 0 1 3&quot;</span>,
+  <span class="st">&quot;4 2 0 3 4 5 1 1 1 4 0&quot;</span>,
+  <span class="st">&quot;2 1 0 3 0 0 5 0 2 2 9&quot;</span>,
+  <span class="st">&quot;1 1 1 9 2 1 2 0 0 1 3&quot;</span>,
+  <span class="st">&quot;4 4 0 3 4 2 1 3 0 0 0&quot;</span>,
+  <span class="st">&quot;2 8 2 0 3 0 2 0 2 7 2&quot;</span>,
+  <span class="st">&quot;1 1 1 9 0 2 2 0 0 3 3&quot;</span>,
+  <span class="st">&quot;4 1 0 0 4 5 1 3 0 1 0&quot;</span>))
+corpusDF &lt;-<span class="st"> </span><span class="kw">createDataFrame</span>(corpus)
+model &lt;-<span class="st"> </span><span class="kw">spark.lda</span>(<span class="dt">data =</span> corpusDF, <span class="dt">k =</span> <span class="dv">5</span>, <span class="dt">optimizer =</span> <span class="st">&quot;em&quot;</span>)
+<span class="kw">summary</span>(model)</code></pre></div>
+<pre><code>## $docConcentration
+## [1] 11 11 11 11 11
+## 
+## $topicConcentration
+## [1] 1.1
+## 
+## $logLikelihood
+## [1] -353.2948
+## 
+## $logPerplexity
+## [1] 2.676476
+## 
+## $isDistributed
+## [1] TRUE
+## 
+## $vocabSize
+## [1] 10
+## 
+## $topics
+## SparkDataFrame[topic:int, term:array&lt;string&gt;, termWeights:array&lt;double&gt;]
+## 
+## $vocabulary
+##  [1] &quot;0&quot; &quot;1&quot; &quot;2&quot; &quot;3&quot; &quot;4&quot; &quot;9&quot; &quot;5&quot; &quot;8&quot; &quot;7&quot; &quot;6&quot;
+## 
+## $trainingLogLikelihood
+## [1] -239.5629
+## 
+## $logPrior
+## [1] -980.2974</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">posterior &lt;-<span class="st"> </span><span class="kw">spark.posterior</span>(model, corpusDF)
+<span class="kw">head</span>(posterior)</code></pre></div>
+<pre><code>##                features
+## 1 1 2 6 0 2 3 1 1 0 0 3
+## 2 1 3 0 1 3 0 0 2 0 0 1
+## 3 1 4 1 0 0 4 9 0 1 2 0
+## 4 2 1 0 3 0 0 5 0 2 3 9
+## 5 3 1 1 9 3 0 2 0 0 1 3
+## 6 4 2 0 3 4 5 1 1 1 4 0
+##                                       topicDistribution
+## 1 0.1972176, 0.1986634, 0.2022014, 0.2006583, 0.2012593
+## 2 0.1989982, 0.1988728, 0.2015967, 0.2006385, 0.1998938
+## 3 0.2020614, 0.2026068, 0.1968839, 0.1987298, 0.1997182
+## 4 0.2004077, 0.1981925, 0.2013013, 0.2006319, 0.1994665
+## 5 0.1971481, 0.1983945, 0.2023577, 0.2011591, 0.2009406
+## 6 0.2020244, 0.2041802, 0.1955394, 0.1997237, 0.1985322</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">perplexity &lt;-<span class="st"> </span><span class="kw">spark.perplexity</span>(model, corpusDF)
+perplexity</code></pre></div>
+<pre><code>## [1] 2.676476</code></pre>
+</div>
+<div id="alternating-least-squares" class="section level4">
+<h4>Alternating Least Squares</h4>
+<p><code>spark.als</code> learns latent factors in <a href="https://en.wikipedia.org/wiki/Recommender_system#Collaborative_filtering">collaborative filtering</a> via <a href="https://dl.acm.org/citation.cfm?id=1608614">alternating least squares</a>.</p>
+<p>There are multiple options that can be configured in <code>spark.als</code>, including <code>rank</code>, <code>reg</code>, and <code>nonnegative</code>. For a complete list, refer to the help file.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">ratings &lt;-<span class="st"> </span><span class="kw">list</span>(<span class="kw">list</span>(<span class="dv">0</span>, <span class="dv">0</span>, <span class="fl">4.0</span>), <span class="kw">list</span>(<span class="dv">0</span>, <span class="dv">1</span>, <span class="fl">2.0</span>), <span class="kw">list</span>(<span class="dv">1</span>, <span class="dv">1</span>, <span class="fl">3.0</span>), <span class="kw">list</span>(<span class="dv">1</span>, <span class="dv">2</span>, <span class="fl">4.0</span>),
+                <span class="kw">list</span>(<span class="dv">2</span>, <span class="dv">1</span>, <span class="fl">1.0</span>), <span class="kw">list</span>(<span class="dv">2</span>, <span class="dv">2</span>, <span class="fl">5.0</span>))
+df &lt;-<span class="st"> </span><span class="kw">createDataFrame</span>(ratings, <span class="kw">c</span>(<span class="st">&quot;user&quot;</span>, <span class="st">&quot;item&quot;</span>, <span class="st">&quot;rating&quot;</span>))
+model &lt;-<span class="st"> </span><span class="kw">spark.als</span>(df, <span class="st">&quot;rating&quot;</span>, <span class="st">&quot;user&quot;</span>, <span class="st">&quot;item&quot;</span>, <span class="dt">rank =</span> <span class="dv">10</span>, <span class="dt">reg =</span> <span class="fl">0.1</span>, <span class="dt">nonnegative =</span> <span class="ot">TRUE</span>)</code></pre></div>
+<p>Extract latent factors.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">stats &lt;-<span class="st"> </span><span class="kw">summary</span>(model)
+userFactors &lt;-<span class="st"> </span>stats$userFactors
+itemFactors &lt;-<span class="st"> </span>stats$itemFactors
+<span class="kw">head</span>(userFactors)
+<span class="kw">head</span>(itemFactors)</code></pre></div>
+<p>Make predictions.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">predicted &lt;-<span class="st"> </span><span class="kw">predict</span>(model, df)
+<span class="kw">head</span>(predicted)</code></pre></div>
+</div>
+<div id="fp-growth" class="section level4">
+<h4>FP-growth</h4>
+<p><code>spark.fpGrowth</code> executes FP-growth algorithm to mine frequent itemsets on a <code>SparkDataFrame</code>. <code>itemsCol</code> should be an array of values.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">df &lt;-<span class="st"> </span><span class="kw">selectExpr</span>(<span class="kw">createDataFrame</span>(<span class="kw">data.frame</span>(<span class="dt">rawItems =</span> <span class="kw">c</span>(
+  <span class="st">&quot;T,R,U&quot;</span>, <span class="st">&quot;T,S&quot;</span>, <span class="st">&quot;V,R&quot;</span>, <span class="st">&quot;R,U,T,V&quot;</span>, <span class="st">&quot;R,S&quot;</span>, <span class="st">&quot;V,S,U&quot;</span>, <span class="st">&quot;U,R&quot;</span>, <span class="st">&quot;S,T&quot;</span>, <span class="st">&quot;V,R&quot;</span>, <span class="st">&quot;V,U,S&quot;</span>,
+  <span class="st">&quot;T,V,U&quot;</span>, <span class="st">&quot;R,V&quot;</span>, <span class="st">&quot;T,S&quot;</span>, <span class="st">&quot;T,S&quot;</span>, <span class="st">&quot;S,T&quot;</span>, <span class="st">&quot;S,U&quot;</span>, <span class="st">&quot;T,R&quot;</span>, <span class="st">&quot;V,R&quot;</span>, <span class="st">&quot;S,V&quot;</span>, <span class="st">&quot;T,S,U&quot;</span>
+))), <span class="st">&quot;split(rawItems, ',') AS items&quot;</span>)
+
+fpm &lt;-<span class="st"> </span><span class="kw">spark.fpGrowth</span>(df, <span class="dt">minSupport =</span> <span class="fl">0.2</span>, <span class="dt">minConfidence =</span> <span class="fl">0.5</span>)</code></pre></div>
+<p><code>spark.freqItemsets</code> method can be used to retrieve a <code>SparkDataFrame</code> with the frequent itemsets.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(<span class="kw">spark.freqItemsets</span>(fpm))</code></pre></div>
+<pre><code>##   items freq
+## 1     R    9
+## 2     U    8
+## 3  U, T    4
+## 4  U, V    4
+## 5  U, S    4
+## 6     T   10</code></pre>
+<p><code>spark.associationRules</code> returns a <code>SparkDataFrame</code> with the association rules.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(<span class="kw">spark.associationRules</span>(fpm))</code></pre></div>
+<pre><code>##   antecedent consequent confidence     lift
+## 1          V          R  0.5555556 1.234568
+## 2          S          T  0.5454545 1.090909
+## 3          T          S  0.6000000 1.090909
+## 4          R          V  0.5555556 1.234568
+## 5          U          T  0.5000000 1.000000
+## 6          U          V  0.5000000 1.111111</code></pre>
+<p>We can make predictions based on the <code>antecedent</code>.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">head</span>(<span class="kw">predict</span>(fpm, df))</code></pre></div>
+<pre><code>##        items prediction
+## 1    T, R, U       S, V
+## 2       T, S       NULL
+## 3       V, R       NULL
+## 4 R, U, T, V          S
+## 5       R, S       T, V
+## 6    V, S, U       R, T</code></pre>
+</div>
+<div id="kolmogorov-smirnov-test" class="section level4">
+<h4>Kolmogorov-Smirnov Test</h4>
+<p><code>spark.kstest</code> runs a two-sided, one-sample <a href="https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test">Kolmogorov-Smirnov (KS) test</a>. Given a <code>SparkDataFrame</code>, the test compares continuous data in a given column <code>testCol</code> with the theoretical distribution specified by parameter <code>nullHypothesis</code>. Users can call <code>summary</code> to get a summary of the test results.</p>
+<p>In the following example, we test whether the <code>Titanic</code> dataset’s <code>Freq</code> column follows a normal distribution. We set the parameters of the normal distribution using the mean and standard deviation of the sample.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">t &lt;-<span class="st"> </span><span class="kw">as.data.frame</span>(Titanic)
+df &lt;-<span class="st"> </span><span class="kw">createDataFrame</span>(t)
+freqStats &lt;-<span class="st"> </span><span class="kw">head</span>(<span class="kw">select</span>(df, <span class="kw">mean</span>(df$Freq), <span class="kw">sd</span>(df$Freq)))
+freqMean &lt;-<span class="st"> </span>freqStats[<span class="dv">1</span>]
+freqStd &lt;-<span class="st"> </span>freqStats[<span class="dv">2</span>]
+
+test &lt;-<span class="st"> </span><span class="kw">spark.kstest</span>(df, <span class="st">&quot;Freq&quot;</span>, <span class="st">&quot;norm&quot;</span>, <span class="kw">c</span>(freqMean, freqStd))
+testSummary &lt;-<span class="st"> </span><span class="kw">summary</span>(test)
+testSummary</code></pre></div>
+<pre><code>## Kolmogorov-Smirnov test summary:
+## degrees of freedom = 0 
+## statistic = 0.3065126710255011 
+## pValue = 0.0036336792155329256 
+## Very strong presumption against null hypothesis: Sample follows theoretical distribution.</code></pre>
+</div>
+</div>
+<div id="model-persistence" class="section level3">
+<h3>Model Persistence</h3>
+<p>The following example shows how to save/load an ML model in SparkR.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">t &lt;-<span class="st"> </span><span class="kw">as.data.frame</span>(Titanic)
+training &lt;-<span class="st"> </span><span class="kw">createDataFrame</span>(t)
+gaussianGLM &lt;-<span class="st"> </span><span class="kw">spark.glm</span>(training, Freq ~<span class="st"> </span>Sex +<span class="st"> </span>Age, <span class="dt">family =</span> <span class="st">&quot;gaussian&quot;</span>)
+
+<span class="co"># Save and then load a fitted MLlib model</span>
+modelPath &lt;-<span class="st"> </span><span class="kw">tempfile</span>(<span class="dt">pattern =</span> <span class="st">&quot;ml&quot;</span>, <span class="dt">fileext =</span> <span class="st">&quot;.tmp&quot;</span>)
+<span class="kw">write.ml</span>(gaussianGLM, modelPath)
+gaussianGLM2 &lt;-<span class="st"> </span><span class="kw">read.ml</span>(modelPath)
+
+<span class="co"># Check model summary</span>
+<span class="kw">summary</span>(gaussianGLM2)</code></pre></div>
+<pre><code>## 
+## Saved-loaded model does not support output 'Deviance Residuals'.
+## 
+## Coefficients:
+##              Estimate  Std. Error   t value   Pr(&gt;|t|)
+## (Intercept)   -32.594      35.994  -0.90553  0.3726477
+## Sex_Male       78.812      41.562   1.89624  0.0679311
+## Age_Adult     123.938      41.562   2.98196  0.0057522
+## 
+## (Dispersion parameter for gaussian family taken to be 13819.52)
+## 
+##     Null deviance: 573341  on 31  degrees of freedom
+## Residual deviance: 400766  on 29  degrees of freedom
+## AIC: 400.7
+## 
+## Number of Fisher Scoring iterations: 1</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Check model prediction</span>
+gaussianPredictions &lt;-<span class="st"> </span><span class="kw">predict</span>(gaussianGLM2, training)
+<span class="kw">head</span>(gaussianPredictions)</code></pre></div>
+<pre><code>##   Class    Sex   Age Survived Freq label prediction
+## 1   1st   Male Child       No    0     0   46.21875
+## 2   2nd   Male Child       No    0     0   46.21875
+## 3   3rd   Male Child       No   35    35   46.21875
+## 4  Crew   Male Child       No    0     0   46.21875
+## 5   1st Female Child       No    0     0  -32.59375
+## 6   2nd Female Child       No    0     0  -32.59375</code></pre>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">unlink</span>(modelPath)</code></pre></div>
+</div>
+</div>
+<div id="structured-streaming" class="section level2">
+<h2>Structured Streaming</h2>
+<p>SparkR supports the Structured Streaming API.</p>
+<p>You can check the Structured Streaming Programming Guide for <a href="https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#programming-model">an introduction</a> to its programming model and basic concepts.</p>
+<div id="simple-source-and-sink" class="section level3">
+<h3>Simple Source and Sink</h3>
+<p>Spark has a few built-in input sources. As an example, to test with a socket source reading text into words and displaying the computed word counts:</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Create DataFrame representing the stream of input lines from connection</span>
+lines &lt;-<span class="st"> </span><span class="kw">read.stream</span>(<span class="st">&quot;socket&quot;</span>, <span class="dt">host =</span> hostname, <span class="dt">port =</span> port)
+
+<span class="co"># Split the lines into words</span>
+words &lt;-<span class="st"> </span><span class="kw">selectExpr</span>(lines, <span class="st">&quot;explode(split(value, ' ')) as word&quot;</span>)
+
+<span class="co"># Generate running word count</span>
+wordCounts &lt;-<span class="st"> </span><span class="kw">count</span>(<span class="kw">groupBy</span>(words, <span class="st">&quot;word&quot;</span>))
+
+<span class="co"># Start running the query that prints the running counts to the console</span>
+query &lt;-<span class="st"> </span><span class="kw">write.stream</span>(wordCounts, <span class="st">&quot;console&quot;</span>, <span class="dt">outputMode =</span> <span class="st">&quot;complete&quot;</span>)</code></pre></div>
+</div>
+<div id="kafka-source" class="section level3">
+<h3>Kafka Source</h3>
+<p>It is simple to read data from Kafka. For more information, see <a href="https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#input-sources">Input Sources</a> supported by Structured Streaming.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">topic &lt;-<span class="st"> </span><span class="kw">read.stream</span>(<span class="st">&quot;kafka&quot;</span>,
+                     <span class="dt">kafka.bootstrap.servers =</span> <span class="st">&quot;host1:port1,host2:port2&quot;</span>,
+                     <span class="dt">subscribe =</span> <span class="st">&quot;topic1&quot;</span>)
+keyvalue &lt;-<span class="st"> </span><span class="kw">selectExpr</span>(topic, <span class="st">&quot;CAST(key AS STRING)&quot;</span>, <span class="st">&quot;CAST(value AS STRING)&quot;</span>)</code></pre></div>
+</div>
+<div id="operations-and-sinks" class="section level3">
+<h3>Operations and Sinks</h3>
+<p>Most of the common operations on <code>SparkDataFrame</code> are supported for streaming, including selection, projection, and aggregation. Once you have defined the final result, to start the streaming computation, you will call the <code>write.stream</code> method setting a sink and <code>outputMode</code>.</p>
+<p>A streaming <code>SparkDataFrame</code> can be written for debugging to the console, to a temporary in-memory table, or for further processing in a fault-tolerant manner to a File Sink in different formats.</p>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">noAggDF &lt;-<span class="st"> </span><span class="kw">select</span>(<span class="kw">where</span>(deviceDataStreamingDf, <span class="st">&quot;signal &gt; 10&quot;</span>), <span class="st">&quot;device&quot;</span>)
+
+<span class="co"># Print new data to console</span>
+<span class="kw">write.stream</span>(noAggDF, <span class="st">&quot;console&quot;</span>)
+
+<span class="co"># Write new data to Parquet files</span>
+<span class="kw">write.stream</span>(noAggDF,
+             <span class="st">&quot;parquet&quot;</span>,
+             <span class="dt">path =</span> <span class="st">&quot;path/to/destination/dir&quot;</span>,
+             <span class="dt">checkpointLocation =</span> <span class="st">&quot;path/to/checkpoint/dir&quot;</span>)
+
+<span class="co"># Aggregate</span>
+aggDF &lt;-<span class="st"> </span><span class="kw">count</span>(<span class="kw">groupBy</span>(noAggDF, <span class="st">&quot;device&quot;</span>))
+
+<span class="co"># Print updated aggregations to console</span>
+<span class="kw">write.stream</span>(aggDF, <span class="st">&quot;console&quot;</span>, <span class="dt">outputMode =</span> <span class="st">&quot;complete&quot;</span>)
+
+<span class="co"># Have all the aggregates in an in memory table. The query name will be the table name</span>
+<span class="kw">write.stream</span>(aggDF, <span class="st">&quot;memory&quot;</span>, <span class="dt">queryName =</span> <span class="st">&quot;aggregates&quot;</span>, <span class="dt">outputMode =</span> <span class="st">&quot;complete&quot;</span>)
+
+<span class="kw">head</span>(<span class="kw">sql</span>(<span class="st">&quot;select * from aggregates&quot;</span>))</code></pre></div>
+</div>
+</div>
+<div id="advanced-topics" class="section level2">
+<h2>Advanced Topics</h2>
+<div id="sparkr-object-classes" class="section level3">
+<h3>SparkR Object Classes</h3>
+<p>There are three main object classes in SparkR you may be working with.</p>
+<ul>
+<li><code>SparkDataFrame</code>: the central component of SparkR. It is an S4 class representing distributed collection of data organized into named columns, which is conceptually equivalent to a table in a relational database or a data frame in R. It has two slots <code>sdf</code> and <code>env</code>.
+<ul>
+<li><code>sdf</code> stores a reference to the corresponding Spark Dataset in the Spark JVM backend.</li>
+<li><code>env</code> saves the meta-information of the object such as <code>isCached</code>.</li>
+</ul>
+<p>It can be created by data import methods or by transforming an existing <code>SparkDataFrame</code>. We can manipulate <code>SparkDataFrame</code> by numerous data processing functions and feed that into machine learning algorithms.</p></li>
+<li><p><code>Column</code>: an S4 class representing a column of <code>SparkDataFrame</code>. The slot <code>jc</code> saves a reference to the corresponding <code>Column</code> object in the Spark JVM backend.</p>
+<p>It can be obtained from a <code>SparkDataFrame</code> by <code>$</code> operator, e.g., <code>df$col</code>. More often, it is used together with other functions, for example, with <code>select</code> to select particular columns, with <code>filter</code> and constructed conditions to select rows, with aggregation functions to compute aggregate statistics for each group.</p></li>
+<li><p><code>GroupedData</code>: an S4 class representing grouped data created by <code>groupBy</code> or by transforming other <code>GroupedData</code>. Its <code>sgd</code> slot saves a reference to a <code>RelationalGroupedDataset</code> object in the backend.</p>
+<p>This is often an intermediate object with group information and followed up by aggregation operations.</p></li>
+</ul>
+</div>
+<div id="architecture" class="section level3">
+<h3>Architecture</h3>
+<p>A complete description of architecture can be seen in the references, in particular the paper <em>SparkR: Scaling R Programs with Spark</em>.</p>
+<p>Under the hood of SparkR is Spark SQL engine. This avoids the overheads of running interpreted R code, and the optimized SQL execution engine in Spark uses structural information about data and computation flow to perform a bunch of optimizations to speed up the computation.</p>
+<p>The main method calls of actual computation happen in the Spark JVM of the driver. We have a socket-based SparkR API that allows us to invoke functions on the JVM from R. We use a SparkR JVM backend that listens on a Netty-based socket server.</p>
+<p>Two kinds of RPCs are supported in the SparkR JVM backend: method invocation and creating new objects. Method invocation can be done in two ways.</p>
+<ul>
+<li><p><code>sparkR.callJMethod</code> takes a reference to an existing Java object and a list of arguments to be passed on to the method.</p></li>
+<li><p><code>sparkR.callJStatic</code> takes a class name for static method and a list of arguments to be passed on to the method.</p></li>
+</ul>
+<p>The arguments are serialized using our custom wire format which is then deserialized on the JVM side. We then use Java reflection to invoke the appropriate method.</p>
+<p>To create objects, <code>sparkR.newJObject</code> is used and then similarly the appropriate constructor is invoked with provided arguments.</p>
+<p>Finally, we use a new R class <code>jobj</code> that refers to a Java object existing in the backend. These references are tracked on the Java side and are automatically garbage collected when they go out of scope on the R side.</p>
+</div>
+</div>
+<div id="appendix" class="section level2">
+<h2>Appendix</h2>
+<div id="DataTypes" class="section level3">
+<h3>R and Spark Data Types</h3>
+<table>
+<thead>
+<tr class="header">
+<th align="left">R</th>
+<th align="left">Spark</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td align="left">byte</td>
+<td align="left">byte</td>
+</tr>
+<tr class="even">
+<td align="left">integer</td>
+<td align="left">integer</td>
+</tr>
+<tr class="odd">
+<td align="left">float</td>
+<td align="left">float</td>
+</tr>
+<tr class="even">
+<td align="left">double</td>
+<td align="left">double</td>
+</tr>
+<tr class="odd">
+<td align="left">numeric</td>
+<td align="left">double</td>
+</tr>
+<tr class="even">
+<td align="left">character</td>
+<td align="left">string</td>
+</tr>
+<tr class="odd">
+<td align="left">string</td>
+<td align="left">string</td>
+</tr>
+<tr class="even">
+<td align="left">binary</td>
+<td align="left">binary</td>
+</tr>
+<tr class="odd">
+<td align="left">raw</td>
+<td align="left">binary</td>
+</tr>
+<tr class="even">
+<td align="left">logical</td>
+<td align="left">boolean</td>
+</tr>
+<tr class="odd">
+<td align="left">POSIXct</td>
+<td align="left">timestamp</td>
+</tr>
+<tr class="even">
+<td align="left">POSIXlt</td>
+<td align="left">timestamp</td>
+</tr>
+<tr class="odd">
+<td align="left">Date</td>
+<td align="left">date</td>
+</tr>
+<tr class="even">
+<td align="left">array</td>
+<td align="left">array</td>
+</tr>
+<tr class="odd">
+<td align="left">list</td>
+<td align="left">array</td>
+</tr>
+<tr class="even">
+<td align="left">env</td>
+<td align="left">map</td>
+</tr>
+</tbody>
+</table>
+</div>
+</div>
+<div id="references" class="section level2">
+<h2>References</h2>
+<ul>
+<li><p><a href="https://spark.apache.org/docs/latest/cluster-overview.html">Spark Cluster Mode Overview</a></p></li>
+<li><p><a href="https://spark.apache.org/docs/latest/submitting-applications.html">Submitting Spark Applications</a></p></li>
+<li><p><a href="https://spark.apache.org/docs/latest/ml-guide.html">Machine Learning Library Guide (MLlib)</a></p></li>
+<li><p><a href="https://people.csail.mit.edu/matei/papers/2016/sigmod_sparkr.pdf">SparkR: Scaling R Programs with Spark</a>, Shivaram Venkataraman, Zongheng Yang, Davies Liu, Eric Liang, Hossein Falaki, Xiangrui Meng, Reynold Xin, Ali Ghodsi, Michael Franklin, Ion Stoica, and Matei Zaharia. SIGMOD 2016. June 2016.</p></li>
+</ul>
+</div>
+
+
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    script.src  = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+
+</body>
+</html>
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/help/AnIndex b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/help/AnIndex
new file mode 100644
index 0000000000000000000000000000000000000000..8e7daf86fe48de19c7aff039a2b447bff645e96c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/help/AnIndex
@@ -0,0 +1,943 @@
+!,Column-method	not
+$	select
+$,SparkDataFrame-method	select
+$<-	select
+$<-,SparkDataFrame-method	select
+%<=>%	eq_null_safe
+%<=>%,Column-method	eq_null_safe
+%in%	match
+%in%,Column-method	match
+abs	column_math_functions
+abs,Column-method	column_math_functions
+acos	column_math_functions
+acos,Column-method	column_math_functions
+add_months	column_datetime_diff_functions
+add_months,Column,numeric-method	column_datetime_diff_functions
+AFTSurvivalRegressionModel-class	AFTSurvivalRegressionModel-class
+agg	summarize
+agg,GroupedData-method	summarize
+agg,SparkDataFrame-method	summarize
+alias	alias
+alias,Column-method	alias
+alias,SparkDataFrame-method	alias
+ALSModel-class	ALSModel-class
+approxCountDistinct	column_aggregate_functions
+approxCountDistinct,Column-method	column_aggregate_functions
+approxQuantile	approxQuantile
+approxQuantile,SparkDataFrame,character,numeric,numeric-method	approxQuantile
+arrange	arrange
+arrange,SparkDataFrame,character-method	arrange
+arrange,SparkDataFrame,Column-method	arrange
+arrays_overlap	column_collection_functions
+arrays_overlap,Column,Column-method	column_collection_functions
+arrays_overlap,Column-method	column_collection_functions
+arrays_zip	column_collection_functions
+arrays_zip,Column-method	column_collection_functions
+array_contains	column_collection_functions
+array_contains,Column-method	column_collection_functions
+array_distinct	column_collection_functions
+array_distinct,Column-method	column_collection_functions
+array_except	column_collection_functions
+array_except,Column,Column-method	column_collection_functions
+array_except,Column-method	column_collection_functions
+array_intersect	column_collection_functions
+array_intersect,Column,Column-method	column_collection_functions
+array_intersect,Column-method	column_collection_functions
+array_join	column_collection_functions
+array_join,Column,character-method	column_collection_functions
+array_join,Column-method	column_collection_functions
+array_max	column_collection_functions
+array_max,Column-method	column_collection_functions
+array_min	column_collection_functions
+array_min,Column-method	column_collection_functions
+array_position	column_collection_functions
+array_position,Column-method	column_collection_functions
+array_remove	column_collection_functions
+array_remove,Column-method	column_collection_functions
+array_repeat	column_collection_functions
+array_repeat,Column,numericOrColumn-method	column_collection_functions
+array_sort	column_collection_functions
+array_sort,Column-method	column_collection_functions
+array_union	column_collection_functions
+array_union,Column,Column-method	column_collection_functions
+array_union,Column-method	column_collection_functions
+as.data.frame	as.data.frame
+as.data.frame,SparkDataFrame-method	as.data.frame
+as.DataFrame	createDataFrame
+as.DataFrame.default	createDataFrame
+asc	columnfunctions
+ascii	column_string_functions
+ascii,Column-method	column_string_functions
+asin	column_math_functions
+asin,Column-method	column_math_functions
+associationRules,FPGrowthModel-method	spark.fpGrowth
+atan	column_math_functions
+atan,Column-method	column_math_functions
+atan2	column_math_functions
+atan2,Column-method	column_math_functions
+attach	attach
+attach,SparkDataFrame-method	attach
+avg	avg
+avg,Column-method	avg
+awaitTermination	awaitTermination
+awaitTermination,StreamingQuery-method	awaitTermination
+base64	column_string_functions
+base64,Column-method	column_string_functions
+between	between
+between,Column-method	between
+bin	column_math_functions
+bin,Column-method	column_math_functions
+BisectingKMeansModel-class	BisectingKMeansModel-class
+bitwiseNOT	column_nonaggregate_functions
+bitwiseNOT,Column-method	column_nonaggregate_functions
+broadcast	broadcast
+broadcast,SparkDataFrame-method	broadcast
+bround	column_math_functions
+bround,Column-method	column_math_functions
+cache	cache
+cache,SparkDataFrame-method	cache
+cacheTable	cacheTable
+cacheTable.default	cacheTable
+cancelJobGroup	cancelJobGroup
+cancelJobGroup.default	cancelJobGroup
+cast	cast
+cast,Column-method	cast
+cbrt	column_math_functions
+cbrt,Column-method	column_math_functions
+ceil	column_math_functions
+ceil,Column-method	column_math_functions
+ceiling	column_math_functions
+ceiling,Column-method	column_math_functions
+checkpoint	checkpoint
+checkpoint,SparkDataFrame-method	checkpoint
+clearCache	clearCache
+clearCache.default	clearCache
+clearJobGroup	clearJobGroup
+clearJobGroup.default	clearJobGroup
+coalesce	coalesce
+coalesce,Column-method	column_nonaggregate_functions
+coalesce,SparkDataFrame-method	coalesce
+collect	collect
+collect,SparkDataFrame-method	collect
+collect_list	column_aggregate_functions
+collect_list,Column-method	column_aggregate_functions
+collect_set	column_aggregate_functions
+collect_set,Column-method	column_aggregate_functions
+colnames	columns
+colnames,SparkDataFrame-method	columns
+colnames<-	columns
+colnames<-,SparkDataFrame-method	columns
+coltypes	coltypes
+coltypes,SparkDataFrame-method	coltypes
+coltypes<-	coltypes
+coltypes<-,SparkDataFrame,character-method	coltypes
+column	column
+column,character-method	column
+column,jobj-method	column
+Column-class	column
+columnfunctions	columnfunctions
+columns	columns
+columns,SparkDataFrame-method	columns
+column_aggregate_functions	column_aggregate_functions
+column_collection_functions	column_collection_functions
+column_datetime_diff_functions	column_datetime_diff_functions
+column_datetime_functions	column_datetime_functions
+column_math_functions	column_math_functions
+column_misc_functions	column_misc_functions
+column_nonaggregate_functions	column_nonaggregate_functions
+column_string_functions	column_string_functions
+column_window_functions	column_window_functions
+concat	column_collection_functions
+concat,Column-method	column_collection_functions
+concat_ws	column_string_functions
+concat_ws,character,Column-method	column_string_functions
+contains	columnfunctions
+conv	column_math_functions
+conv,Column,numeric,numeric-method	column_math_functions
+corr	corr
+corr,Column-method	corr
+corr,SparkDataFrame-method	corr
+cos	column_math_functions
+cos,Column-method	column_math_functions
+cosh	column_math_functions
+cosh,Column-method	column_math_functions
+count	count
+count,Column-method	count
+count,GroupedData-method	count
+count,SparkDataFrame-method	nrow
+countDistinct	column_aggregate_functions
+countDistinct,Column-method	column_aggregate_functions
+cov	cov
+cov,characterOrColumn-method	cov
+cov,SparkDataFrame-method	cov
+covar_pop	cov
+covar_pop,characterOrColumn,characterOrColumn-method	cov
+covar_samp	cov
+covar_samp,characterOrColumn,characterOrColumn-method	cov
+crc32	column_misc_functions
+crc32,Column-method	column_misc_functions
+createDataFrame	createDataFrame
+createDataFrame.default	createDataFrame
+createExternalTable	createExternalTable-deprecated
+createExternalTable.default	createExternalTable-deprecated
+createOrReplaceTempView	createOrReplaceTempView
+createOrReplaceTempView,SparkDataFrame,character-method	createOrReplaceTempView
+createTable	createTable
+create_array	column_nonaggregate_functions
+create_array,Column-method	column_nonaggregate_functions
+create_map	column_nonaggregate_functions
+create_map,Column-method	column_nonaggregate_functions
+crossJoin	crossJoin
+crossJoin,SparkDataFrame,SparkDataFrame-method	crossJoin
+crosstab	crosstab
+crosstab,SparkDataFrame,character,character-method	crosstab
+cube	cube
+cube,SparkDataFrame-method	cube
+cume_dist	column_window_functions
+cume_dist,missing-method	column_window_functions
+currentDatabase	currentDatabase
+current_date	column_datetime_functions
+current_date,missing-method	column_datetime_functions
+current_timestamp	column_datetime_functions
+current_timestamp,missing-method	column_datetime_functions
+dapply	dapply
+dapply,SparkDataFrame,function,characterOrstructType-method	dapply
+dapplyCollect	dapplyCollect
+dapplyCollect,SparkDataFrame,function-method	dapplyCollect
+datediff	column_datetime_diff_functions
+datediff,Column-method	column_datetime_diff_functions
+date_add	column_datetime_diff_functions
+date_add,Column,numeric-method	column_datetime_diff_functions
+date_format	column_datetime_diff_functions
+date_format,Column,character-method	column_datetime_diff_functions
+date_sub	column_datetime_diff_functions
+date_sub,Column,numeric-method	column_datetime_diff_functions
+date_trunc	column_datetime_functions
+date_trunc,character,Column-method	column_datetime_functions
+dayofmonth	column_datetime_functions
+dayofmonth,Column-method	column_datetime_functions
+dayofweek	column_datetime_functions
+dayofweek,Column-method	column_datetime_functions
+dayofyear	column_datetime_functions
+dayofyear,Column-method	column_datetime_functions
+DecisionTreeClassificationModel-class	DecisionTreeClassificationModel-class
+DecisionTreeRegressionModel-class	DecisionTreeRegressionModel-class
+decode	column_string_functions
+decode,Column,character-method	column_string_functions
+dense_rank	column_window_functions
+dense_rank,missing-method	column_window_functions
+desc	columnfunctions
+describe	describe
+describe,SparkDataFrame,ANY-method	describe
+describe,SparkDataFrame,character-method	describe
+describe,SparkDataFrame-method	describe
+dim	dim
+dim,SparkDataFrame-method	dim
+distinct	distinct
+distinct,SparkDataFrame-method	distinct
+drop	drop
+drop,ANY-method	drop
+drop,SparkDataFrame-method	drop
+dropDuplicates	dropDuplicates
+dropDuplicates,SparkDataFrame-method	dropDuplicates
+dropna	nafunctions
+dropna,SparkDataFrame-method	nafunctions
+dropTempTable	dropTempTable-deprecated
+dropTempTable.default	dropTempTable-deprecated
+dropTempView	dropTempView
+dtypes	dtypes
+dtypes,SparkDataFrame-method	dtypes
+element_at	column_collection_functions
+element_at,Column-method	column_collection_functions
+encode	column_string_functions
+encode,Column,character-method	column_string_functions
+endsWith	endsWith
+endsWith,Column-method	endsWith
+except	except
+except,SparkDataFrame,SparkDataFrame-method	except
+exceptAll	exceptAll
+exceptAll,SparkDataFrame,SparkDataFrame-method	exceptAll
+exp	column_math_functions
+exp,Column-method	column_math_functions
+explain	explain
+explain,SparkDataFrame-method	explain
+explain,StreamingQuery-method	explain
+explode	column_collection_functions
+explode,Column-method	column_collection_functions
+explode_outer	column_collection_functions
+explode_outer,Column-method	column_collection_functions
+expm1	column_math_functions
+expm1,Column-method	column_math_functions
+expr	column_nonaggregate_functions
+expr,character-method	column_nonaggregate_functions
+factorial	column_math_functions
+factorial,Column-method	column_math_functions
+fillna	nafunctions
+fillna,SparkDataFrame-method	nafunctions
+filter	filter
+filter,SparkDataFrame,characterOrColumn-method	filter
+first	first
+first,characterOrColumn-method	first
+first,SparkDataFrame-method	first
+fitted	fitted
+fitted,BisectingKMeansModel-method	spark.bisectingKmeans
+fitted,KMeansModel-method	fitted
+flatten	column_collection_functions
+flatten,Column-method	column_collection_functions
+floor	column_math_functions
+floor,Column-method	column_math_functions
+format_number	column_string_functions
+format_number,Column,numeric-method	column_string_functions
+format_string	column_string_functions
+format_string,character,Column-method	column_string_functions
+FPGrowthModel-class	FPGrowthModel-class
+freqItems	freqItems
+freqItems,SparkDataFrame,character-method	freqItems
+freqItemsets,FPGrowthModel-method	spark.fpGrowth
+from_json	column_collection_functions
+from_json,Column,characterOrstructType-method	column_collection_functions
+from_unixtime	column_datetime_functions
+from_unixtime,Column-method	column_datetime_functions
+from_utc_timestamp	column_datetime_diff_functions
+from_utc_timestamp,Column,character-method	column_datetime_diff_functions
+gapply	gapply
+gapply,GroupedData-method	gapply
+gapply,SparkDataFrame-method	gapply
+gapplyCollect	gapplyCollect
+gapplyCollect,GroupedData-method	gapplyCollect
+gapplyCollect,SparkDataFrame-method	gapplyCollect
+GaussianMixtureModel-class	GaussianMixtureModel-class
+GBTClassificationModel-class	GBTClassificationModel-class
+GBTRegressionModel-class	GBTRegressionModel-class
+GeneralizedLinearRegressionModel-class	GeneralizedLinearRegressionModel-class
+getField	columnfunctions
+getItem	columnfunctions
+getLocalProperty	getLocalProperty
+getNumPartitions	getNumPartitions
+getNumPartitions,SparkDataFrame-method	getNumPartitions
+glm	glm
+glm,formula,ANY,SparkDataFrame-method	glm
+greatest	column_nonaggregate_functions
+greatest,Column-method	column_nonaggregate_functions
+groupBy	groupBy
+groupBy,SparkDataFrame-method	groupBy
+groupedData	GroupedData
+GroupedData-class	GroupedData
+grouping_bit	column_aggregate_functions
+grouping_bit,Column-method	column_aggregate_functions
+grouping_id	column_aggregate_functions
+grouping_id,Column-method	column_aggregate_functions
+group_by	groupBy
+group_by,SparkDataFrame-method	groupBy
+hash	column_misc_functions
+hash,Column-method	column_misc_functions
+hashCode	hashCode
+head	head
+head,SparkDataFrame-method	head
+hex	column_math_functions
+hex,Column-method	column_math_functions
+hint	hint
+hint,SparkDataFrame,character-method	hint
+histogram	histogram
+histogram,SparkDataFrame,characterOrColumn-method	histogram
+hour	column_datetime_functions
+hour,Column-method	column_datetime_functions
+hypot	column_math_functions
+hypot,Column-method	column_math_functions
+ifelse	column_nonaggregate_functions
+ifelse,Column-method	column_nonaggregate_functions
+initcap	column_string_functions
+initcap,Column-method	column_string_functions
+input_file_name	column_nonaggregate_functions
+input_file_name,missing-method	column_nonaggregate_functions
+insertInto	insertInto
+insertInto,SparkDataFrame,character-method	insertInto
+install.spark	install.spark
+instr	column_string_functions
+instr,Column,character-method	column_string_functions
+intersect	intersect
+intersect,SparkDataFrame,SparkDataFrame-method	intersect
+intersectAll	intersectAll
+intersectAll,SparkDataFrame,SparkDataFrame-method	intersectAll
+is.nan	column_nonaggregate_functions
+is.nan,Column-method	column_nonaggregate_functions
+isActive	isActive
+isActive,StreamingQuery-method	isActive
+isLocal	isLocal
+isLocal,SparkDataFrame-method	isLocal
+isNaN	columnfunctions
+isnan	column_nonaggregate_functions
+isnan,Column-method	column_nonaggregate_functions
+isNotNull	columnfunctions
+isNull	columnfunctions
+IsotonicRegressionModel-class	IsotonicRegressionModel-class
+isStreaming	isStreaming
+isStreaming,SparkDataFrame-method	isStreaming
+join	join
+join,SparkDataFrame,SparkDataFrame-method	join
+jsonFile	read.json
+jsonFile.default	read.json
+KMeansModel-class	KMeansModel-class
+KSTest-class	KSTest-class
+kurtosis	column_aggregate_functions
+kurtosis,Column-method	column_aggregate_functions
+lag	column_window_functions
+lag,characterOrColumn-method	column_window_functions
+last	last
+last,characterOrColumn-method	last
+lastProgress	lastProgress
+lastProgress,StreamingQuery-method	lastProgress
+last_day	column_datetime_functions
+last_day,Column-method	column_datetime_functions
+LDAModel-class	LDAModel-class
+lead	column_window_functions
+lead,characterOrColumn,numeric-method	column_window_functions
+least	column_nonaggregate_functions
+least,Column-method	column_nonaggregate_functions
+length	column_string_functions
+length,Column-method	column_string_functions
+levenshtein	column_string_functions
+levenshtein,Column-method	column_string_functions
+like	columnfunctions
+limit	limit
+limit,SparkDataFrame,numeric-method	limit
+LinearSVCModel-class	LinearSVCModel-class
+listColumns	listColumns
+listDatabases	listDatabases
+listFunctions	listFunctions
+listTables	listTables
+lit	column_nonaggregate_functions
+lit,ANY-method	column_nonaggregate_functions
+loadDF	read.df
+loadDF.default	read.df
+localCheckpoint	localCheckpoint
+localCheckpoint,SparkDataFrame-method	localCheckpoint
+locate	column_string_functions
+locate,character,Column-method	column_string_functions
+log	column_math_functions
+log,Column-method	column_math_functions
+log10	column_math_functions
+log10,Column-method	column_math_functions
+log1p	column_math_functions
+log1p,Column-method	column_math_functions
+log2	column_math_functions
+log2,Column-method	column_math_functions
+LogisticRegressionModel-class	LogisticRegressionModel-class
+lower	column_string_functions
+lower,Column-method	column_string_functions
+lpad	column_string_functions
+lpad,Column,numeric,character-method	column_string_functions
+ltrim	column_string_functions
+ltrim,Column,character-method	column_string_functions
+ltrim,Column,missing-method	column_string_functions
+map_from_arrays	column_collection_functions
+map_from_arrays,Column,Column-method	column_collection_functions
+map_from_arrays,Column-method	column_collection_functions
+map_keys	column_collection_functions
+map_keys,Column-method	column_collection_functions
+map_values	column_collection_functions
+map_values,Column-method	column_collection_functions
+max	column_aggregate_functions
+max,Column-method	column_aggregate_functions
+md5	column_misc_functions
+md5,Column-method	column_misc_functions
+mean	column_aggregate_functions
+mean,Column-method	column_aggregate_functions
+merge	merge
+merge,SparkDataFrame,SparkDataFrame-method	merge
+min	column_aggregate_functions
+min,Column-method	column_aggregate_functions
+minute	column_datetime_functions
+minute,Column-method	column_datetime_functions
+monotonically_increasing_id	column_nonaggregate_functions
+monotonically_increasing_id,missing-method	column_nonaggregate_functions
+month	column_datetime_functions
+month,Column-method	column_datetime_functions
+months_between	column_datetime_diff_functions
+months_between,Column-method	column_datetime_diff_functions
+MultilayerPerceptronClassificationModel-class	MultilayerPerceptronClassificationModel-class
+mutate	mutate
+mutate,SparkDataFrame-method	mutate
+n	count
+n,Column-method	count
+na.omit	nafunctions
+na.omit,SparkDataFrame-method	nafunctions
+NaiveBayesModel-class	NaiveBayesModel-class
+names	columns
+names,SparkDataFrame-method	columns
+names<-	columns
+names<-,SparkDataFrame-method	columns
+nanvl	column_nonaggregate_functions
+nanvl,Column-method	column_nonaggregate_functions
+ncol	ncol
+ncol,SparkDataFrame-method	ncol
+negate	column_nonaggregate_functions
+negate,Column-method	column_nonaggregate_functions
+next_day	column_datetime_diff_functions
+next_day,Column,character-method	column_datetime_diff_functions
+not	not
+not,Column-method	not
+nrow	nrow
+nrow,SparkDataFrame-method	nrow
+ntile	column_window_functions
+ntile,numeric-method	column_window_functions
+n_distinct	column_aggregate_functions
+n_distinct,Column-method	column_aggregate_functions
+orderBy	orderBy
+orderBy,SparkDataFrame,characterOrColumn-method	arrange
+orderBy,WindowSpec,character-method	orderBy
+orderBy,WindowSpec,Column-method	orderBy
+otherwise	otherwise
+otherwise,Column-method	otherwise
+over	over
+over,Column,WindowSpec-method	over
+parquetFile	read.parquet
+parquetFile.default	read.parquet
+partitionBy	partitionBy
+partitionBy,WindowSpec-method	partitionBy
+percent_rank	column_window_functions
+percent_rank,missing-method	column_window_functions
+persist	persist
+persist,SparkDataFrame,character-method	persist
+pivot	pivot
+pivot,GroupedData,character-method	pivot
+pmod	column_math_functions
+pmod,Column-method	column_math_functions
+posexplode	column_collection_functions
+posexplode,Column-method	column_collection_functions
+posexplode_outer	column_collection_functions
+posexplode_outer,Column-method	column_collection_functions
+predict	predict
+predict,AFTSurvivalRegressionModel-method	spark.survreg
+predict,ALSModel-method	spark.als
+predict,BisectingKMeansModel-method	spark.bisectingKmeans
+predict,DecisionTreeClassificationModel-method	spark.decisionTree
+predict,DecisionTreeRegressionModel-method	spark.decisionTree
+predict,FPGrowthModel-method	spark.fpGrowth
+predict,GaussianMixtureModel,SparkDataFrame-method	spark.gaussianMixture
+predict,GaussianMixtureModel-method	spark.gaussianMixture
+predict,GBTClassificationModel-method	spark.gbt
+predict,GBTRegressionModel-method	spark.gbt
+predict,GeneralizedLinearRegressionModel-method	spark.glm
+predict,IsotonicRegressionModel,SparkDataFrame-method	spark.isoreg
+predict,IsotonicRegressionModel-method	spark.isoreg
+predict,KMeansModel-method	spark.kmeans
+predict,LinearSVCModel,SparkDataFrame-method	spark.svmLinear
+predict,LinearSVCModel-method	spark.svmLinear
+predict,LogisticRegressionModel,SparkDataFrame-method	spark.logit
+predict,LogisticRegressionModel-method	spark.logit
+predict,MultilayerPerceptronClassificationModel-method	spark.mlp
+predict,NaiveBayesModel-method	spark.naiveBayes
+predict,RandomForestClassificationModel-method	spark.randomForest
+predict,RandomForestRegressionModel-method	spark.randomForest
+print.jobj	print.jobj
+print.structField	print.structField
+print.structType	print.structType
+print.summary.DecisionTreeClassificationModel	spark.decisionTree
+print.summary.DecisionTreeRegressionModel	spark.decisionTree
+print.summary.GBTClassificationModel	spark.gbt
+print.summary.GBTRegressionModel	spark.gbt
+print.summary.GeneralizedLinearRegressionModel	spark.glm
+print.summary.KSTest	spark.kstest
+print.summary.RandomForestClassificationModel	spark.randomForest
+print.summary.RandomForestRegressionModel	spark.randomForest
+printSchema	printSchema
+printSchema,SparkDataFrame-method	printSchema
+quarter	column_datetime_functions
+quarter,Column-method	column_datetime_functions
+queryName	queryName
+queryName,StreamingQuery-method	queryName
+rand	column_nonaggregate_functions
+rand,missing-method	column_nonaggregate_functions
+rand,numeric-method	column_nonaggregate_functions
+randn	column_nonaggregate_functions
+randn,missing-method	column_nonaggregate_functions
+randn,numeric-method	column_nonaggregate_functions
+RandomForestClassificationModel-class	RandomForestClassificationModel-class
+RandomForestRegressionModel-class	RandomForestRegressionModel-class
+randomSplit	randomSplit
+randomSplit,SparkDataFrame,numeric-method	randomSplit
+rangeBetween	rangeBetween
+rangeBetween,WindowSpec,numeric,numeric-method	rangeBetween
+rank	column_window_functions
+rank,ANY-method	column_window_functions
+rank,missing-method	column_window_functions
+rbind	rbind
+rbind,SparkDataFrame-method	rbind
+read.df	read.df
+read.df.default	read.df
+read.jdbc	read.jdbc
+read.json	read.json
+read.json.default	read.json
+read.ml	read.ml
+read.orc	read.orc
+read.parquet	read.parquet
+read.parquet.default	read.parquet
+read.stream	read.stream
+read.text	read.text
+read.text.default	read.text
+recoverPartitions	recoverPartitions
+refreshByPath	refreshByPath
+refreshTable	refreshTable
+regexp_extract	column_string_functions
+regexp_extract,Column,character,numeric-method	column_string_functions
+regexp_replace	column_string_functions
+regexp_replace,Column,character,character-method	column_string_functions
+registerTempTable	registerTempTable-deprecated
+registerTempTable,SparkDataFrame,character-method	registerTempTable-deprecated
+rename	rename
+rename,SparkDataFrame-method	rename
+repartition	repartition
+repartition,SparkDataFrame-method	repartition
+repartitionByRange	repartitionByRange
+repartitionByRange,SparkDataFrame-method	repartitionByRange
+repeat_string	column_string_functions
+repeat_string,Column,numeric-method	column_string_functions
+repeat_string,Column-method	column_string_functions
+reverse	column_collection_functions
+reverse,Column-method	column_collection_functions
+rint	column_math_functions
+rint,Column-method	column_math_functions
+rlike	columnfunctions
+rollup	rollup
+rollup,SparkDataFrame-method	rollup
+round	column_math_functions
+round,Column-method	column_math_functions
+rowsBetween	rowsBetween
+rowsBetween,WindowSpec,numeric,numeric-method	rowsBetween
+row_number	column_window_functions
+row_number,missing-method	column_window_functions
+rpad	column_string_functions
+rpad,Column,numeric,character-method	column_string_functions
+rtrim	column_string_functions
+rtrim,Column,character-method	column_string_functions
+rtrim,Column,missing-method	column_string_functions
+sample	sample
+sample,SparkDataFrame-method	sample
+sampleBy	sampleBy
+sampleBy,SparkDataFrame,character,list,numeric-method	sampleBy
+sample_frac	sample
+sample_frac,SparkDataFrame-method	sample
+saveAsParquetFile	write.parquet
+saveAsParquetFile,SparkDataFrame,character-method	write.parquet
+saveAsTable	saveAsTable
+saveAsTable,SparkDataFrame,character-method	saveAsTable
+saveDF	write.df
+saveDF,SparkDataFrame,character-method	write.df
+schema	schema
+schema,SparkDataFrame-method	schema
+sd	column_aggregate_functions
+sd,Column-method	column_aggregate_functions
+second	column_datetime_functions
+second,Column-method	column_datetime_functions
+select	select
+select,SparkDataFrame,character-method	select
+select,SparkDataFrame,Column-method	select
+select,SparkDataFrame,list-method	select
+selectExpr	selectExpr
+selectExpr,SparkDataFrame,character-method	selectExpr
+setCheckpointDir	setCheckpointDir
+setCurrentDatabase	setCurrentDatabase
+setJobDescription	setJobDescription
+setJobGroup	setJobGroup
+setJobGroup.default	setJobGroup
+setLocalProperty	setLocalProperty
+setLogLevel	setLogLevel
+sha1	column_misc_functions
+sha1,Column-method	column_misc_functions
+sha2	column_misc_functions
+sha2,Column,numeric-method	column_misc_functions
+shiftLeft	column_math_functions
+shiftLeft,Column,numeric-method	column_math_functions
+shiftRight	column_math_functions
+shiftRight,Column,numeric-method	column_math_functions
+shiftRightUnsigned	column_math_functions
+shiftRightUnsigned,Column,numeric-method	column_math_functions
+show	show
+show,Column-method	show
+show,GroupedData-method	show
+show,SparkDataFrame-method	show
+show,StreamingQuery-method	show
+show,WindowSpec-method	show
+showDF	showDF
+showDF,SparkDataFrame-method	showDF
+shuffle	column_collection_functions
+shuffle,Column-method	column_collection_functions
+sign	column_math_functions
+sign,Column-method	column_math_functions
+signum	column_math_functions
+signum,Column-method	column_math_functions
+sin	column_math_functions
+sin,Column-method	column_math_functions
+sinh	column_math_functions
+sinh,Column-method	column_math_functions
+size	column_collection_functions
+size,Column-method	column_collection_functions
+skewness	column_aggregate_functions
+skewness,Column-method	column_aggregate_functions
+slice	column_collection_functions
+slice,Column-method	column_collection_functions
+sort_array	column_collection_functions
+sort_array,Column-method	column_collection_functions
+soundex	column_string_functions
+soundex,Column-method	column_string_functions
+spark.addFile	spark.addFile
+spark.als	spark.als
+spark.als,SparkDataFrame-method	spark.als
+spark.associationRules	spark.fpGrowth
+spark.associationRules,FPGrowthModel-method	spark.fpGrowth
+spark.bisectingKmeans	spark.bisectingKmeans
+spark.bisectingKmeans,SparkDataFrame,formula-method	spark.bisectingKmeans
+spark.decisionTree	spark.decisionTree
+spark.decisionTree,SparkDataFrame,formula-method	spark.decisionTree
+spark.fpGrowth	spark.fpGrowth
+spark.fpGrowth,SparkDataFrame-method	spark.fpGrowth
+spark.freqItemsets	spark.fpGrowth
+spark.freqItemsets,FPGrowthModel-method	spark.fpGrowth
+spark.gaussianMixture	spark.gaussianMixture
+spark.gaussianMixture,SparkDataFrame,formula-method	spark.gaussianMixture
+spark.gbt	spark.gbt
+spark.gbt,SparkDataFrame,formula-method	spark.gbt
+spark.getSparkFiles	spark.getSparkFiles
+spark.getSparkFilesRootDirectory	spark.getSparkFilesRootDirectory
+spark.glm	spark.glm
+spark.glm,SparkDataFrame,formula-method	spark.glm
+spark.isoreg	spark.isoreg
+spark.isoreg,SparkDataFrame,formula-method	spark.isoreg
+spark.kmeans	spark.kmeans
+spark.kmeans,SparkDataFrame,formula-method	spark.kmeans
+spark.kstest	spark.kstest
+spark.kstest,SparkDataFrame-method	spark.kstest
+spark.lapply	spark.lapply
+spark.lda	spark.lda
+spark.lda,SparkDataFrame-method	spark.lda
+spark.logit	spark.logit
+spark.logit,SparkDataFrame,formula-method	spark.logit
+spark.mlp	spark.mlp
+spark.mlp,SparkDataFrame,formula-method	spark.mlp
+spark.naiveBayes	spark.naiveBayes
+spark.naiveBayes,SparkDataFrame,formula-method	spark.naiveBayes
+spark.perplexity	spark.lda
+spark.perplexity,LDAModel,SparkDataFrame-method	spark.lda
+spark.perplexity,LDAModel-method	spark.lda
+spark.posterior	spark.lda
+spark.posterior,LDAModel,SparkDataFrame-method	spark.lda
+spark.randomForest	spark.randomForest
+spark.randomForest,SparkDataFrame,formula-method	spark.randomForest
+spark.survreg	spark.survreg
+spark.survreg,SparkDataFrame,formula-method	spark.survreg
+spark.svmLinear	spark.svmLinear
+spark.svmLinear,SparkDataFrame,formula-method	spark.svmLinear
+SparkDataFrame-class	SparkDataFrame
+sparkR.callJMethod	sparkR.callJMethod
+sparkR.callJStatic	sparkR.callJStatic
+sparkR.conf	sparkR.conf
+sparkR.init	sparkR.init-deprecated
+sparkR.newJObject	sparkR.newJObject
+sparkR.session	sparkR.session
+sparkR.session.stop	sparkR.session.stop
+sparkR.stop	sparkR.session.stop
+sparkR.uiWebUrl	sparkR.uiWebUrl
+sparkR.version	sparkR.version
+sparkRHive.init	sparkRHive.init-deprecated
+sparkRSQL.init	sparkRSQL.init-deprecated
+spark_partition_id	column_nonaggregate_functions
+spark_partition_id,missing-method	column_nonaggregate_functions
+split_string	column_string_functions
+split_string,Column,character-method	column_string_functions
+split_string,Column-method	column_string_functions
+sql	sql
+sql.default	sql
+sqrt	column_math_functions
+sqrt,Column-method	column_math_functions
+startsWith	startsWith
+startsWith,Column-method	startsWith
+status	status
+status,StreamingQuery-method	status
+stddev	column_aggregate_functions
+stddev,Column-method	column_aggregate_functions
+stddev_pop	column_aggregate_functions
+stddev_pop,Column-method	column_aggregate_functions
+stddev_samp	column_aggregate_functions
+stddev_samp,Column-method	column_aggregate_functions
+stopQuery	stopQuery
+stopQuery,StreamingQuery-method	stopQuery
+storageLevel	storageLevel
+storageLevel,SparkDataFrame-method	storageLevel
+str	str
+str,SparkDataFrame-method	str
+StreamingQuery-class	StreamingQuery
+struct	column_nonaggregate_functions
+struct,characterOrColumn-method	column_nonaggregate_functions
+structField	structField
+structField.character	structField
+structField.jobj	structField
+structType	structType
+structType.character	structType
+structType.jobj	structType
+structType.structField	structType
+subset	subset
+subset,SparkDataFrame-method	subset
+substr	substr
+substr,Column-method	substr
+substring_index	column_string_functions
+substring_index,Column,character,numeric-method	column_string_functions
+sum	column_aggregate_functions
+sum,Column-method	column_aggregate_functions
+sumDistinct	column_aggregate_functions
+sumDistinct,Column-method	column_aggregate_functions
+summarize	summarize
+summarize,GroupedData-method	summarize
+summarize,SparkDataFrame-method	summarize
+summary	summary
+summary,AFTSurvivalRegressionModel-method	spark.survreg
+summary,ALSModel-method	spark.als
+summary,BisectingKMeansModel-method	spark.bisectingKmeans
+summary,DecisionTreeClassificationModel-method	spark.decisionTree
+summary,DecisionTreeRegressionModel-method	spark.decisionTree
+summary,GaussianMixtureModel-method	spark.gaussianMixture
+summary,GBTClassificationModel-method	spark.gbt
+summary,GBTRegressionModel-method	spark.gbt
+summary,GeneralizedLinearRegressionModel-method	spark.glm
+summary,IsotonicRegressionModel-method	spark.isoreg
+summary,KMeansModel-method	spark.kmeans
+summary,KSTest-method	spark.kstest
+summary,LDAModel-method	spark.lda
+summary,LinearSVCModel-method	spark.svmLinear
+summary,LogisticRegressionModel-method	spark.logit
+summary,MultilayerPerceptronClassificationModel-method	spark.mlp
+summary,NaiveBayesModel-method	spark.naiveBayes
+summary,RandomForestClassificationModel-method	spark.randomForest
+summary,RandomForestRegressionModel-method	spark.randomForest
+summary,SparkDataFrame-method	summary
+tableNames	tableNames
+tableNames.default	tableNames
+tables	tables
+tables.default	tables
+tableToDF	tableToDF
+take	take
+take,SparkDataFrame,numeric-method	take
+tan	column_math_functions
+tan,Column-method	column_math_functions
+tanh	column_math_functions
+tanh,Column-method	column_math_functions
+toDegrees	column_math_functions
+toDegrees,Column-method	column_math_functions
+toJSON	toJSON
+toJSON,SparkDataFrame-method	toJSON
+toRadians	column_math_functions
+toRadians,Column-method	column_math_functions
+to_date	column_datetime_functions
+to_date,Column,character-method	column_datetime_functions
+to_date,Column,missing-method	column_datetime_functions
+to_json	column_collection_functions
+to_json,Column-method	column_collection_functions
+to_timestamp	column_datetime_functions
+to_timestamp,Column,character-method	column_datetime_functions
+to_timestamp,Column,missing-method	column_datetime_functions
+to_utc_timestamp	column_datetime_diff_functions
+to_utc_timestamp,Column,character-method	column_datetime_diff_functions
+transform	mutate
+transform,SparkDataFrame-method	mutate
+translate	column_string_functions
+translate,Column,character,character-method	column_string_functions
+trim	column_string_functions
+trim,Column,character-method	column_string_functions
+trim,Column,missing-method	column_string_functions
+trunc	column_datetime_functions
+trunc,Column-method	column_datetime_functions
+unbase64	column_string_functions
+unbase64,Column-method	column_string_functions
+uncacheTable	uncacheTable
+uncacheTable.default	uncacheTable
+unhex	column_math_functions
+unhex,Column-method	column_math_functions
+union	union
+union,SparkDataFrame,SparkDataFrame-method	union
+unionAll	union
+unionAll,SparkDataFrame,SparkDataFrame-method	union
+unionByName	unionByName
+unionByName,SparkDataFrame,SparkDataFrame-method	unionByName
+unique	distinct
+unique,SparkDataFrame-method	distinct
+unix_timestamp	column_datetime_functions
+unix_timestamp,Column,character-method	column_datetime_functions
+unix_timestamp,Column,missing-method	column_datetime_functions
+unix_timestamp,missing,missing-method	column_datetime_functions
+unpersist	unpersist
+unpersist,SparkDataFrame-method	unpersist
+upper	column_string_functions
+upper,Column-method	column_string_functions
+var	column_aggregate_functions
+var,Column-method	column_aggregate_functions
+variance	column_aggregate_functions
+variance,Column-method	column_aggregate_functions
+var_pop	column_aggregate_functions
+var_pop,Column-method	column_aggregate_functions
+var_samp	column_aggregate_functions
+var_samp,Column-method	column_aggregate_functions
+weekofyear	column_datetime_functions
+weekofyear,Column-method	column_datetime_functions
+when	column_nonaggregate_functions
+when,Column-method	column_nonaggregate_functions
+where	filter
+where,SparkDataFrame,characterOrColumn-method	filter
+window	column_datetime_functions
+window,Column-method	column_datetime_functions
+windowOrderBy	windowOrderBy
+windowOrderBy,character-method	windowOrderBy
+windowOrderBy,Column-method	windowOrderBy
+windowPartitionBy	windowPartitionBy
+windowPartitionBy,character-method	windowPartitionBy
+windowPartitionBy,Column-method	windowPartitionBy
+WindowSpec-class	WindowSpec
+with	with
+with,SparkDataFrame-method	with
+withColumn	withColumn
+withColumn,SparkDataFrame,character-method	withColumn
+withColumnRenamed	rename
+withColumnRenamed,SparkDataFrame,character,character-method	rename
+withWatermark	withWatermark
+withWatermark,SparkDataFrame,character,character-method	withWatermark
+write.df	write.df
+write.df,SparkDataFrame-method	write.df
+write.jdbc	write.jdbc
+write.jdbc,SparkDataFrame,character,character-method	write.jdbc
+write.json	write.json
+write.json,SparkDataFrame,character-method	write.json
+write.ml	write.ml
+write.ml,AFTSurvivalRegressionModel,character-method	spark.survreg
+write.ml,ALSModel,character-method	spark.als
+write.ml,BisectingKMeansModel,character-method	spark.bisectingKmeans
+write.ml,DecisionTreeClassificationModel,character-method	spark.decisionTree
+write.ml,DecisionTreeRegressionModel,character-method	spark.decisionTree
+write.ml,FPGrowthModel,character-method	spark.fpGrowth
+write.ml,GaussianMixtureModel,character-method	spark.gaussianMixture
+write.ml,GBTClassificationModel,character-method	spark.gbt
+write.ml,GBTRegressionModel,character-method	spark.gbt
+write.ml,GeneralizedLinearRegressionModel,character-method	spark.glm
+write.ml,IsotonicRegressionModel,character-method	spark.isoreg
+write.ml,KMeansModel,character-method	spark.kmeans
+write.ml,LDAModel,character-method	spark.lda
+write.ml,LinearSVCModel,character-method	spark.svmLinear
+write.ml,LogisticRegressionModel,character-method	spark.logit
+write.ml,MultilayerPerceptronClassificationModel,character-method	spark.mlp
+write.ml,NaiveBayesModel,character-method	spark.naiveBayes
+write.ml,RandomForestClassificationModel,character-method	spark.randomForest
+write.ml,RandomForestRegressionModel,character-method	spark.randomForest
+write.orc	write.orc
+write.orc,SparkDataFrame,character-method	write.orc
+write.parquet	write.parquet
+write.parquet,SparkDataFrame,character-method	write.parquet
+write.stream	write.stream
+write.stream,SparkDataFrame-method	write.stream
+write.text	write.text
+write.text,SparkDataFrame,character-method	write.text
+year	column_datetime_functions
+year,Column-method	column_datetime_functions
+[	subset
+[,SparkDataFrame-method	subset
+[[	subset
+[[,SparkDataFrame,numericOrcharacter-method	subset
+[[<-	subset
+[[<-,SparkDataFrame,numericOrcharacter-method	subset
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/help/SparkR.rdb b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/help/SparkR.rdb
new file mode 100644
index 0000000000000000000000000000000000000000..1862ef005e3ef26ccdad05ed15aeee9897e730e8
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/help/SparkR.rdb differ
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/help/SparkR.rdx b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/help/SparkR.rdx
new file mode 100644
index 0000000000000000000000000000000000000000..e05522fdb616a18cdebf4efc07de5d1330b81144
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/help/SparkR.rdx differ
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/help/aliases.rds b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/help/aliases.rds
new file mode 100644
index 0000000000000000000000000000000000000000..44be2a4cf556f4350521e8f7270a4af5418a15ea
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/help/aliases.rds differ
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/help/paths.rds b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/help/paths.rds
new file mode 100644
index 0000000000000000000000000000000000000000..549c0a3d09a0a26cc1f603c3476eb271d4b30ed6
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/help/paths.rds differ
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/html/00Index.html b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/html/00Index.html
new file mode 100644
index 0000000000000000000000000000000000000000..0e4a16a8ad81491884c828457d7ff867e9a9318d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/html/00Index.html
@@ -0,0 +1,1950 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>R: R Frontend for Apache Spark</title>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<link rel="stylesheet" type="text/css" href="R.css" />
+</head><body>
+<h1> R Frontend for Apache Spark
+<img class="toplogo" src="../../../doc/html/Rlogo.svg" alt="[R logo]" />
+</h1>
+<hr/>
+<div style="text-align: center;">
+<a href="../../../doc/html/packages.html"><img class="arrow" src="../../../doc/html/left.jpg" alt="[Up]" /></a>
+<a href="../../../doc/html/index.html"><img class="arrow" src="../../../doc/html/up.jpg" alt="[Top]" /></a>
+</div><h2>Documentation for package &lsquo;SparkR&rsquo; version 2.4.0</h2>
+
+<ul><li><a href="../DESCRIPTION">DESCRIPTION file</a>.</li>
+<li><a href="../doc/index.html">User guides, package vignettes and other documentation.</a></li>
+</ul>
+
+<h2>Help Pages</h2>
+
+
+<p style="text-align: center;">
+<a href="#A">A</a>
+<a href="#B">B</a>
+<a href="#C">C</a>
+<a href="#D">D</a>
+<a href="#E">E</a>
+<a href="#F">F</a>
+<a href="#G">G</a>
+<a href="#H">H</a>
+<a href="#I">I</a>
+<a href="#J">J</a>
+<a href="#K">K</a>
+<a href="#L">L</a>
+<a href="#M">M</a>
+<a href="#N">N</a>
+<a href="#O">O</a>
+<a href="#P">P</a>
+<a href="#Q">Q</a>
+<a href="#R">R</a>
+<a href="#S">S</a>
+<a href="#T">T</a>
+<a href="#U">U</a>
+<a href="#V">V</a>
+<a href="#W">W</a>
+<a href="#Y">Y</a>
+<a href="#misc">misc</a>
+</p>
+
+
+<h2><a name="A">-- A --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="column_math_functions.html">abs</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">abs-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">acos</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">acos-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_diff_functions.html">add_months</a></td>
+<td>Date time arithmetic functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_diff_functions.html">add_months-method</a></td>
+<td>Date time arithmetic functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="AFTSurvivalRegressionModel-class.html">AFTSurvivalRegressionModel-class</a></td>
+<td>S4 class that represents a AFTSurvivalRegressionModel</td></tr>
+<tr><td style="width: 25%;"><a href="summarize.html">agg</a></td>
+<td>summarize</td></tr>
+<tr><td style="width: 25%;"><a href="summarize.html">agg-method</a></td>
+<td>summarize</td></tr>
+<tr><td style="width: 25%;"><a href="alias.html">alias</a></td>
+<td>alias</td></tr>
+<tr><td style="width: 25%;"><a href="alias.html">alias-method</a></td>
+<td>alias</td></tr>
+<tr><td style="width: 25%;"><a href="ALSModel-class.html">ALSModel-class</a></td>
+<td>S4 class that represents an ALSModel</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">approxCountDistinct</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">approxCountDistinct-method</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="approxQuantile.html">approxQuantile</a></td>
+<td>Calculates the approximate quantiles of numerical columns of a SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="approxQuantile.html">approxQuantile-method</a></td>
+<td>Calculates the approximate quantiles of numerical columns of a SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="arrange.html">arrange</a></td>
+<td>Arrange Rows by Variables</td></tr>
+<tr><td style="width: 25%;"><a href="arrange.html">arrange-method</a></td>
+<td>Arrange Rows by Variables</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">arrays_overlap</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">arrays_overlap-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">arrays_zip</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">arrays_zip-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_contains</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_contains-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_distinct</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_distinct-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_except</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_except-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_intersect</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_intersect-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_join</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_join-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_max</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_max-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_min</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_min-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_position</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_position-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_remove</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_remove-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_repeat</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_repeat-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_sort</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_sort-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_union</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">array_union-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="as.data.frame.html">as.data.frame</a></td>
+<td>Download data from a SparkDataFrame into a R data.frame</td></tr>
+<tr><td style="width: 25%;"><a href="as.data.frame.html">as.data.frame-method</a></td>
+<td>Download data from a SparkDataFrame into a R data.frame</td></tr>
+<tr><td style="width: 25%;"><a href="createDataFrame.html">as.DataFrame</a></td>
+<td>Create a SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="createDataFrame.html">as.DataFrame.default</a></td>
+<td>Create a SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="columnfunctions.html">asc</a></td>
+<td>A set of operations working with SparkDataFrame columns</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">ascii</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">ascii-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">asin</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">asin-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="spark.fpGrowth.html">associationRules-method</a></td>
+<td>FP-growth</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">atan</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">atan-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">atan2</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">atan2-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="attach.html">attach</a></td>
+<td>Attach SparkDataFrame to R search path</td></tr>
+<tr><td style="width: 25%;"><a href="attach.html">attach-method</a></td>
+<td>Attach SparkDataFrame to R search path</td></tr>
+<tr><td style="width: 25%;"><a href="avg.html">avg</a></td>
+<td>avg</td></tr>
+<tr><td style="width: 25%;"><a href="avg.html">avg-method</a></td>
+<td>avg</td></tr>
+<tr><td style="width: 25%;"><a href="awaitTermination.html">awaitTermination</a></td>
+<td>awaitTermination</td></tr>
+<tr><td style="width: 25%;"><a href="awaitTermination.html">awaitTermination-method</a></td>
+<td>awaitTermination</td></tr>
+</table>
+
+<h2><a name="B">-- B --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="column_string_functions.html">base64</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">base64-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="between.html">between</a></td>
+<td>between</td></tr>
+<tr><td style="width: 25%;"><a href="between.html">between-method</a></td>
+<td>between</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">bin</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">bin-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="BisectingKMeansModel-class.html">BisectingKMeansModel-class</a></td>
+<td>S4 class that represents a BisectingKMeansModel</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">bitwiseNOT</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">bitwiseNOT-method</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="broadcast.html">broadcast</a></td>
+<td>broadcast</td></tr>
+<tr><td style="width: 25%;"><a href="broadcast.html">broadcast-method</a></td>
+<td>broadcast</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">bround</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">bround-method</a></td>
+<td>Math functions for Column operations</td></tr>
+</table>
+
+<h2><a name="C">-- C --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="cache.html">cache</a></td>
+<td>Cache</td></tr>
+<tr><td style="width: 25%;"><a href="cache.html">cache-method</a></td>
+<td>Cache</td></tr>
+<tr><td style="width: 25%;"><a href="cacheTable.html">cacheTable</a></td>
+<td>Cache Table</td></tr>
+<tr><td style="width: 25%;"><a href="cacheTable.html">cacheTable.default</a></td>
+<td>Cache Table</td></tr>
+<tr><td style="width: 25%;"><a href="cancelJobGroup.html">cancelJobGroup</a></td>
+<td>Cancel active jobs for the specified group</td></tr>
+<tr><td style="width: 25%;"><a href="cancelJobGroup.html">cancelJobGroup.default</a></td>
+<td>Cancel active jobs for the specified group</td></tr>
+<tr><td style="width: 25%;"><a href="cast.html">cast</a></td>
+<td>Casts the column to a different data type.</td></tr>
+<tr><td style="width: 25%;"><a href="cast.html">cast-method</a></td>
+<td>Casts the column to a different data type.</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">cbrt</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">cbrt-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">ceil</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">ceil-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">ceiling</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">ceiling-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="checkpoint.html">checkpoint</a></td>
+<td>checkpoint</td></tr>
+<tr><td style="width: 25%;"><a href="checkpoint.html">checkpoint-method</a></td>
+<td>checkpoint</td></tr>
+<tr><td style="width: 25%;"><a href="clearCache.html">clearCache</a></td>
+<td>Clear Cache</td></tr>
+<tr><td style="width: 25%;"><a href="clearCache.html">clearCache.default</a></td>
+<td>Clear Cache</td></tr>
+<tr><td style="width: 25%;"><a href="clearJobGroup.html">clearJobGroup</a></td>
+<td>Clear current job group ID and its description</td></tr>
+<tr><td style="width: 25%;"><a href="clearJobGroup.html">clearJobGroup.default</a></td>
+<td>Clear current job group ID and its description</td></tr>
+<tr><td style="width: 25%;"><a href="coalesce.html">coalesce</a></td>
+<td>Coalesce</td></tr>
+<tr><td style="width: 25%;"><a href="coalesce.html">coalesce-method</a></td>
+<td>Coalesce</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">coalesce-method</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="collect.html">collect</a></td>
+<td>Collects all the elements of a SparkDataFrame and coerces them into an R data.frame.</td></tr>
+<tr><td style="width: 25%;"><a href="collect.html">collect-method</a></td>
+<td>Collects all the elements of a SparkDataFrame and coerces them into an R data.frame.</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">collect_list</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">collect_list-method</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">collect_set</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">collect_set-method</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="columns.html">colnames</a></td>
+<td>Column Names of SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="columns.html">colnames-method</a></td>
+<td>Column Names of SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="columns.html">colnames&lt;-</a></td>
+<td>Column Names of SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="columns.html">colnames&lt;--method</a></td>
+<td>Column Names of SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="coltypes.html">coltypes</a></td>
+<td>coltypes</td></tr>
+<tr><td style="width: 25%;"><a href="coltypes.html">coltypes-method</a></td>
+<td>coltypes</td></tr>
+<tr><td style="width: 25%;"><a href="coltypes.html">coltypes&lt;-</a></td>
+<td>coltypes</td></tr>
+<tr><td style="width: 25%;"><a href="coltypes.html">coltypes&lt;--method</a></td>
+<td>coltypes</td></tr>
+<tr><td style="width: 25%;"><a href="column.html">column</a></td>
+<td>S4 class that represents a SparkDataFrame column</td></tr>
+<tr><td style="width: 25%;"><a href="column.html">Column-class</a></td>
+<td>S4 class that represents a SparkDataFrame column</td></tr>
+<tr><td style="width: 25%;"><a href="column.html">column-method</a></td>
+<td>S4 class that represents a SparkDataFrame column</td></tr>
+<tr><td style="width: 25%;"><a href="columnfunctions.html">columnfunctions</a></td>
+<td>A set of operations working with SparkDataFrame columns</td></tr>
+<tr><td style="width: 25%;"><a href="columns.html">columns</a></td>
+<td>Column Names of SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="columns.html">columns-method</a></td>
+<td>Column Names of SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">column_aggregate_functions</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">column_collection_functions</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_diff_functions.html">column_datetime_diff_functions</a></td>
+<td>Date time arithmetic functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">column_datetime_functions</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">column_math_functions</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_misc_functions.html">column_misc_functions</a></td>
+<td>Miscellaneous functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">column_nonaggregate_functions</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">column_string_functions</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_window_functions.html">column_window_functions</a></td>
+<td>Window functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">concat</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">concat-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">concat_ws</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">concat_ws-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="columnfunctions.html">contains</a></td>
+<td>A set of operations working with SparkDataFrame columns</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">conv</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">conv-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="corr.html">corr</a></td>
+<td>corr</td></tr>
+<tr><td style="width: 25%;"><a href="corr.html">corr-method</a></td>
+<td>corr</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">cos</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">cos-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">cosh</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">cosh-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="count.html">count</a></td>
+<td>Count</td></tr>
+<tr><td style="width: 25%;"><a href="count.html">count-method</a></td>
+<td>Count</td></tr>
+<tr><td style="width: 25%;"><a href="nrow.html">count-method</a></td>
+<td>Returns the number of rows in a SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">countDistinct</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">countDistinct-method</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="cov.html">cov</a></td>
+<td>cov</td></tr>
+<tr><td style="width: 25%;"><a href="cov.html">cov-method</a></td>
+<td>cov</td></tr>
+<tr><td style="width: 25%;"><a href="cov.html">covar_pop</a></td>
+<td>cov</td></tr>
+<tr><td style="width: 25%;"><a href="cov.html">covar_pop-method</a></td>
+<td>cov</td></tr>
+<tr><td style="width: 25%;"><a href="cov.html">covar_samp</a></td>
+<td>cov</td></tr>
+<tr><td style="width: 25%;"><a href="cov.html">covar_samp-method</a></td>
+<td>cov</td></tr>
+<tr><td style="width: 25%;"><a href="column_misc_functions.html">crc32</a></td>
+<td>Miscellaneous functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_misc_functions.html">crc32-method</a></td>
+<td>Miscellaneous functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="createDataFrame.html">createDataFrame</a></td>
+<td>Create a SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="createDataFrame.html">createDataFrame.default</a></td>
+<td>Create a SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="createExternalTable-deprecated.html">createExternalTable</a></td>
+<td>(Deprecated) Create an external table</td></tr>
+<tr><td style="width: 25%;"><a href="createExternalTable-deprecated.html">createExternalTable.default</a></td>
+<td>(Deprecated) Create an external table</td></tr>
+<tr><td style="width: 25%;"><a href="createOrReplaceTempView.html">createOrReplaceTempView</a></td>
+<td>Creates a temporary view using the given name.</td></tr>
+<tr><td style="width: 25%;"><a href="createOrReplaceTempView.html">createOrReplaceTempView-method</a></td>
+<td>Creates a temporary view using the given name.</td></tr>
+<tr><td style="width: 25%;"><a href="createTable.html">createTable</a></td>
+<td>Creates a table based on the dataset in a data source</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">create_array</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">create_array-method</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">create_map</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">create_map-method</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="crossJoin.html">crossJoin</a></td>
+<td>CrossJoin</td></tr>
+<tr><td style="width: 25%;"><a href="crossJoin.html">crossJoin-method</a></td>
+<td>CrossJoin</td></tr>
+<tr><td style="width: 25%;"><a href="crosstab.html">crosstab</a></td>
+<td>Computes a pair-wise frequency table of the given columns</td></tr>
+<tr><td style="width: 25%;"><a href="crosstab.html">crosstab-method</a></td>
+<td>Computes a pair-wise frequency table of the given columns</td></tr>
+<tr><td style="width: 25%;"><a href="cube.html">cube</a></td>
+<td>cube</td></tr>
+<tr><td style="width: 25%;"><a href="cube.html">cube-method</a></td>
+<td>cube</td></tr>
+<tr><td style="width: 25%;"><a href="column_window_functions.html">cume_dist</a></td>
+<td>Window functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_window_functions.html">cume_dist-method</a></td>
+<td>Window functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="currentDatabase.html">currentDatabase</a></td>
+<td>Returns the current default database</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">current_date</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">current_date-method</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">current_timestamp</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">current_timestamp-method</a></td>
+<td>Date time functions for Column operations</td></tr>
+</table>
+
+<h2><a name="D">-- D --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="dapply.html">dapply</a></td>
+<td>dapply</td></tr>
+<tr><td style="width: 25%;"><a href="dapply.html">dapply-method</a></td>
+<td>dapply</td></tr>
+<tr><td style="width: 25%;"><a href="dapplyCollect.html">dapplyCollect</a></td>
+<td>dapplyCollect</td></tr>
+<tr><td style="width: 25%;"><a href="dapplyCollect.html">dapplyCollect-method</a></td>
+<td>dapplyCollect</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_diff_functions.html">datediff</a></td>
+<td>Date time arithmetic functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_diff_functions.html">datediff-method</a></td>
+<td>Date time arithmetic functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_diff_functions.html">date_add</a></td>
+<td>Date time arithmetic functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_diff_functions.html">date_add-method</a></td>
+<td>Date time arithmetic functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_diff_functions.html">date_format</a></td>
+<td>Date time arithmetic functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_diff_functions.html">date_format-method</a></td>
+<td>Date time arithmetic functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_diff_functions.html">date_sub</a></td>
+<td>Date time arithmetic functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_diff_functions.html">date_sub-method</a></td>
+<td>Date time arithmetic functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">date_trunc</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">date_trunc-method</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">dayofmonth</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">dayofmonth-method</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">dayofweek</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">dayofweek-method</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">dayofyear</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">dayofyear-method</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="DecisionTreeClassificationModel-class.html">DecisionTreeClassificationModel-class</a></td>
+<td>S4 class that represents a DecisionTreeClassificationModel</td></tr>
+<tr><td style="width: 25%;"><a href="DecisionTreeRegressionModel-class.html">DecisionTreeRegressionModel-class</a></td>
+<td>S4 class that represents a DecisionTreeRegressionModel</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">decode</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">decode-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_window_functions.html">dense_rank</a></td>
+<td>Window functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_window_functions.html">dense_rank-method</a></td>
+<td>Window functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="columnfunctions.html">desc</a></td>
+<td>A set of operations working with SparkDataFrame columns</td></tr>
+<tr><td style="width: 25%;"><a href="describe.html">describe</a></td>
+<td>describe</td></tr>
+<tr><td style="width: 25%;"><a href="describe.html">describe-method</a></td>
+<td>describe</td></tr>
+<tr><td style="width: 25%;"><a href="dim.html">dim</a></td>
+<td>Returns the dimensions of SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="dim.html">dim-method</a></td>
+<td>Returns the dimensions of SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="distinct.html">distinct</a></td>
+<td>Distinct</td></tr>
+<tr><td style="width: 25%;"><a href="distinct.html">distinct-method</a></td>
+<td>Distinct</td></tr>
+<tr><td style="width: 25%;"><a href="drop.html">drop</a></td>
+<td>drop</td></tr>
+<tr><td style="width: 25%;"><a href="drop.html">drop-method</a></td>
+<td>drop</td></tr>
+<tr><td style="width: 25%;"><a href="dropDuplicates.html">dropDuplicates</a></td>
+<td>dropDuplicates</td></tr>
+<tr><td style="width: 25%;"><a href="dropDuplicates.html">dropDuplicates-method</a></td>
+<td>dropDuplicates</td></tr>
+<tr><td style="width: 25%;"><a href="nafunctions.html">dropna</a></td>
+<td>A set of SparkDataFrame functions working with NA values</td></tr>
+<tr><td style="width: 25%;"><a href="nafunctions.html">dropna-method</a></td>
+<td>A set of SparkDataFrame functions working with NA values</td></tr>
+<tr><td style="width: 25%;"><a href="dropTempTable-deprecated.html">dropTempTable</a></td>
+<td>(Deprecated) Drop Temporary Table</td></tr>
+<tr><td style="width: 25%;"><a href="dropTempTable-deprecated.html">dropTempTable.default</a></td>
+<td>(Deprecated) Drop Temporary Table</td></tr>
+<tr><td style="width: 25%;"><a href="dropTempView.html">dropTempView</a></td>
+<td>Drops the temporary view with the given view name in the catalog.</td></tr>
+<tr><td style="width: 25%;"><a href="dtypes.html">dtypes</a></td>
+<td>DataTypes</td></tr>
+<tr><td style="width: 25%;"><a href="dtypes.html">dtypes-method</a></td>
+<td>DataTypes</td></tr>
+</table>
+
+<h2><a name="E">-- E --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">element_at</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">element_at-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">encode</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">encode-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="endsWith.html">endsWith</a></td>
+<td>endsWith</td></tr>
+<tr><td style="width: 25%;"><a href="endsWith.html">endsWith-method</a></td>
+<td>endsWith</td></tr>
+<tr><td style="width: 25%;"><a href="except.html">except</a></td>
+<td>except</td></tr>
+<tr><td style="width: 25%;"><a href="except.html">except-method</a></td>
+<td>except</td></tr>
+<tr><td style="width: 25%;"><a href="exceptAll.html">exceptAll</a></td>
+<td>exceptAll</td></tr>
+<tr><td style="width: 25%;"><a href="exceptAll.html">exceptAll-method</a></td>
+<td>exceptAll</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">exp</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">exp-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="explain.html">explain</a></td>
+<td>Explain</td></tr>
+<tr><td style="width: 25%;"><a href="explain.html">explain-method</a></td>
+<td>Explain</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">explode</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">explode-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">explode_outer</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">explode_outer-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">expm1</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">expm1-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">expr</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">expr-method</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+</table>
+
+<h2><a name="F">-- F --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="column_math_functions.html">factorial</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">factorial-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="nafunctions.html">fillna</a></td>
+<td>A set of SparkDataFrame functions working with NA values</td></tr>
+<tr><td style="width: 25%;"><a href="nafunctions.html">fillna-method</a></td>
+<td>A set of SparkDataFrame functions working with NA values</td></tr>
+<tr><td style="width: 25%;"><a href="filter.html">filter</a></td>
+<td>Filter</td></tr>
+<tr><td style="width: 25%;"><a href="filter.html">filter-method</a></td>
+<td>Filter</td></tr>
+<tr><td style="width: 25%;"><a href="first.html">first</a></td>
+<td>Return the first row of a SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="first.html">first-method</a></td>
+<td>Return the first row of a SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="fitted.html">fitted</a></td>
+<td>Get fitted result from a k-means model</td></tr>
+<tr><td style="width: 25%;"><a href="fitted.html">fitted-method</a></td>
+<td>Get fitted result from a k-means model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.bisectingKmeans.html">fitted-method</a></td>
+<td>Bisecting K-Means Clustering Model</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">flatten</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">flatten-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">floor</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">floor-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">format_number</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">format_number-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">format_string</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">format_string-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="FPGrowthModel-class.html">FPGrowthModel-class</a></td>
+<td>S4 class that represents a FPGrowthModel</td></tr>
+<tr><td style="width: 25%;"><a href="freqItems.html">freqItems</a></td>
+<td>Finding frequent items for columns, possibly with false positives</td></tr>
+<tr><td style="width: 25%;"><a href="freqItems.html">freqItems-method</a></td>
+<td>Finding frequent items for columns, possibly with false positives</td></tr>
+<tr><td style="width: 25%;"><a href="spark.fpGrowth.html">freqItemsets-method</a></td>
+<td>FP-growth</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">from_json</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">from_json-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">from_unixtime</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">from_unixtime-method</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_diff_functions.html">from_utc_timestamp</a></td>
+<td>Date time arithmetic functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_diff_functions.html">from_utc_timestamp-method</a></td>
+<td>Date time arithmetic functions for Column operations</td></tr>
+</table>
+
+<h2><a name="G">-- G --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="gapply.html">gapply</a></td>
+<td>gapply</td></tr>
+<tr><td style="width: 25%;"><a href="gapply.html">gapply-method</a></td>
+<td>gapply</td></tr>
+<tr><td style="width: 25%;"><a href="gapplyCollect.html">gapplyCollect</a></td>
+<td>gapplyCollect</td></tr>
+<tr><td style="width: 25%;"><a href="gapplyCollect.html">gapplyCollect-method</a></td>
+<td>gapplyCollect</td></tr>
+<tr><td style="width: 25%;"><a href="GaussianMixtureModel-class.html">GaussianMixtureModel-class</a></td>
+<td>S4 class that represents a GaussianMixtureModel</td></tr>
+<tr><td style="width: 25%;"><a href="GBTClassificationModel-class.html">GBTClassificationModel-class</a></td>
+<td>S4 class that represents a GBTClassificationModel</td></tr>
+<tr><td style="width: 25%;"><a href="GBTRegressionModel-class.html">GBTRegressionModel-class</a></td>
+<td>S4 class that represents a GBTRegressionModel</td></tr>
+<tr><td style="width: 25%;"><a href="GeneralizedLinearRegressionModel-class.html">GeneralizedLinearRegressionModel-class</a></td>
+<td>S4 class that represents a generalized linear model</td></tr>
+<tr><td style="width: 25%;"><a href="columnfunctions.html">getField</a></td>
+<td>A set of operations working with SparkDataFrame columns</td></tr>
+<tr><td style="width: 25%;"><a href="columnfunctions.html">getItem</a></td>
+<td>A set of operations working with SparkDataFrame columns</td></tr>
+<tr><td style="width: 25%;"><a href="getLocalProperty.html">getLocalProperty</a></td>
+<td>Get a local property set in this thread, or 'NULL' if it is missing. See 'setLocalProperty'.</td></tr>
+<tr><td style="width: 25%;"><a href="getNumPartitions.html">getNumPartitions</a></td>
+<td>getNumPartitions</td></tr>
+<tr><td style="width: 25%;"><a href="getNumPartitions.html">getNumPartitions-method</a></td>
+<td>getNumPartitions</td></tr>
+<tr><td style="width: 25%;"><a href="glm.html">glm</a></td>
+<td>Generalized Linear Models (R-compliant)</td></tr>
+<tr><td style="width: 25%;"><a href="glm.html">glm-method</a></td>
+<td>Generalized Linear Models (R-compliant)</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">greatest</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">greatest-method</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="groupBy.html">groupBy</a></td>
+<td>GroupBy</td></tr>
+<tr><td style="width: 25%;"><a href="groupBy.html">groupBy-method</a></td>
+<td>GroupBy</td></tr>
+<tr><td style="width: 25%;"><a href="GroupedData.html">groupedData</a></td>
+<td>S4 class that represents a GroupedData</td></tr>
+<tr><td style="width: 25%;"><a href="GroupedData.html">GroupedData-class</a></td>
+<td>S4 class that represents a GroupedData</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">grouping_bit</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">grouping_bit-method</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">grouping_id</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">grouping_id-method</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="groupBy.html">group_by</a></td>
+<td>GroupBy</td></tr>
+<tr><td style="width: 25%;"><a href="groupBy.html">group_by-method</a></td>
+<td>GroupBy</td></tr>
+</table>
+
+<h2><a name="H">-- H --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="column_misc_functions.html">hash</a></td>
+<td>Miscellaneous functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_misc_functions.html">hash-method</a></td>
+<td>Miscellaneous functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="hashCode.html">hashCode</a></td>
+<td>Compute the hashCode of an object</td></tr>
+<tr><td style="width: 25%;"><a href="head.html">head</a></td>
+<td>Head</td></tr>
+<tr><td style="width: 25%;"><a href="head.html">head-method</a></td>
+<td>Head</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">hex</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">hex-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="hint.html">hint</a></td>
+<td>hint</td></tr>
+<tr><td style="width: 25%;"><a href="hint.html">hint-method</a></td>
+<td>hint</td></tr>
+<tr><td style="width: 25%;"><a href="histogram.html">histogram</a></td>
+<td>Compute histogram statistics for given column</td></tr>
+<tr><td style="width: 25%;"><a href="histogram.html">histogram-method</a></td>
+<td>Compute histogram statistics for given column</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">hour</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">hour-method</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">hypot</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">hypot-method</a></td>
+<td>Math functions for Column operations</td></tr>
+</table>
+
+<h2><a name="I">-- I --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">ifelse</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">ifelse-method</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">initcap</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">initcap-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">input_file_name</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">input_file_name-method</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="insertInto.html">insertInto</a></td>
+<td>insertInto</td></tr>
+<tr><td style="width: 25%;"><a href="insertInto.html">insertInto-method</a></td>
+<td>insertInto</td></tr>
+<tr><td style="width: 25%;"><a href="install.spark.html">install.spark</a></td>
+<td>Download and Install Apache Spark to a Local Directory</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">instr</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">instr-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="intersect.html">intersect</a></td>
+<td>Intersect</td></tr>
+<tr><td style="width: 25%;"><a href="intersect.html">intersect-method</a></td>
+<td>Intersect</td></tr>
+<tr><td style="width: 25%;"><a href="intersectAll.html">intersectAll</a></td>
+<td>intersectAll</td></tr>
+<tr><td style="width: 25%;"><a href="intersectAll.html">intersectAll-method</a></td>
+<td>intersectAll</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">is.nan</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">is.nan-method</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="isActive.html">isActive</a></td>
+<td>isActive</td></tr>
+<tr><td style="width: 25%;"><a href="isActive.html">isActive-method</a></td>
+<td>isActive</td></tr>
+<tr><td style="width: 25%;"><a href="isLocal.html">isLocal</a></td>
+<td>isLocal</td></tr>
+<tr><td style="width: 25%;"><a href="isLocal.html">isLocal-method</a></td>
+<td>isLocal</td></tr>
+<tr><td style="width: 25%;"><a href="columnfunctions.html">isNaN</a></td>
+<td>A set of operations working with SparkDataFrame columns</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">isnan</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">isnan-method</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="columnfunctions.html">isNotNull</a></td>
+<td>A set of operations working with SparkDataFrame columns</td></tr>
+<tr><td style="width: 25%;"><a href="columnfunctions.html">isNull</a></td>
+<td>A set of operations working with SparkDataFrame columns</td></tr>
+<tr><td style="width: 25%;"><a href="IsotonicRegressionModel-class.html">IsotonicRegressionModel-class</a></td>
+<td>S4 class that represents an IsotonicRegressionModel</td></tr>
+<tr><td style="width: 25%;"><a href="isStreaming.html">isStreaming</a></td>
+<td>isStreaming</td></tr>
+<tr><td style="width: 25%;"><a href="isStreaming.html">isStreaming-method</a></td>
+<td>isStreaming</td></tr>
+</table>
+
+<h2><a name="J">-- J --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="join.html">join</a></td>
+<td>Join</td></tr>
+<tr><td style="width: 25%;"><a href="join.html">join-method</a></td>
+<td>Join</td></tr>
+<tr><td style="width: 25%;"><a href="read.json.html">jsonFile</a></td>
+<td>Create a SparkDataFrame from a JSON file.</td></tr>
+<tr><td style="width: 25%;"><a href="read.json.html">jsonFile.default</a></td>
+<td>Create a SparkDataFrame from a JSON file.</td></tr>
+</table>
+
+<h2><a name="K">-- K --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="KMeansModel-class.html">KMeansModel-class</a></td>
+<td>S4 class that represents a KMeansModel</td></tr>
+<tr><td style="width: 25%;"><a href="KSTest-class.html">KSTest-class</a></td>
+<td>S4 class that represents an KSTest</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">kurtosis</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">kurtosis-method</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+</table>
+
+<h2><a name="L">-- L --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="column_window_functions.html">lag</a></td>
+<td>Window functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_window_functions.html">lag-method</a></td>
+<td>Window functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="last.html">last</a></td>
+<td>last</td></tr>
+<tr><td style="width: 25%;"><a href="last.html">last-method</a></td>
+<td>last</td></tr>
+<tr><td style="width: 25%;"><a href="lastProgress.html">lastProgress</a></td>
+<td>lastProgress</td></tr>
+<tr><td style="width: 25%;"><a href="lastProgress.html">lastProgress-method</a></td>
+<td>lastProgress</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">last_day</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">last_day-method</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="LDAModel-class.html">LDAModel-class</a></td>
+<td>S4 class that represents an LDAModel</td></tr>
+<tr><td style="width: 25%;"><a href="column_window_functions.html">lead</a></td>
+<td>Window functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_window_functions.html">lead-method</a></td>
+<td>Window functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">least</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">least-method</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">length</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">length-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">levenshtein</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">levenshtein-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="columnfunctions.html">like</a></td>
+<td>A set of operations working with SparkDataFrame columns</td></tr>
+<tr><td style="width: 25%;"><a href="limit.html">limit</a></td>
+<td>Limit</td></tr>
+<tr><td style="width: 25%;"><a href="limit.html">limit-method</a></td>
+<td>Limit</td></tr>
+<tr><td style="width: 25%;"><a href="LinearSVCModel-class.html">LinearSVCModel-class</a></td>
+<td>S4 class that represents an LinearSVCModel</td></tr>
+<tr><td style="width: 25%;"><a href="listColumns.html">listColumns</a></td>
+<td>Returns a list of columns for the given table/view in the specified database</td></tr>
+<tr><td style="width: 25%;"><a href="listDatabases.html">listDatabases</a></td>
+<td>Returns a list of databases available</td></tr>
+<tr><td style="width: 25%;"><a href="listFunctions.html">listFunctions</a></td>
+<td>Returns a list of functions registered in the specified database</td></tr>
+<tr><td style="width: 25%;"><a href="listTables.html">listTables</a></td>
+<td>Returns a list of tables or views in the specified database</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">lit</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">lit-method</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="read.df.html">loadDF</a></td>
+<td>Load a SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="read.df.html">loadDF.default</a></td>
+<td>Load a SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="localCheckpoint.html">localCheckpoint</a></td>
+<td>localCheckpoint</td></tr>
+<tr><td style="width: 25%;"><a href="localCheckpoint.html">localCheckpoint-method</a></td>
+<td>localCheckpoint</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">locate</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">locate-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">log</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">log-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">log10</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">log10-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">log1p</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">log1p-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">log2</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">log2-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="LogisticRegressionModel-class.html">LogisticRegressionModel-class</a></td>
+<td>S4 class that represents an LogisticRegressionModel</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">lower</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">lower-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">lpad</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">lpad-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">ltrim</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">ltrim-method</a></td>
+<td>String functions for Column operations</td></tr>
+</table>
+
+<h2><a name="M">-- M --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">map_from_arrays</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">map_from_arrays-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">map_keys</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">map_keys-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">map_values</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">map_values-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">max</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">max-method</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_misc_functions.html">md5</a></td>
+<td>Miscellaneous functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_misc_functions.html">md5-method</a></td>
+<td>Miscellaneous functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">mean</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">mean-method</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="merge.html">merge</a></td>
+<td>Merges two data frames</td></tr>
+<tr><td style="width: 25%;"><a href="merge.html">merge-method</a></td>
+<td>Merges two data frames</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">min</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">min-method</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">minute</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">minute-method</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">monotonically_increasing_id</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">monotonically_increasing_id-method</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">month</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">month-method</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_diff_functions.html">months_between</a></td>
+<td>Date time arithmetic functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_diff_functions.html">months_between-method</a></td>
+<td>Date time arithmetic functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="MultilayerPerceptronClassificationModel-class.html">MultilayerPerceptronClassificationModel-class</a></td>
+<td>S4 class that represents a MultilayerPerceptronClassificationModel</td></tr>
+<tr><td style="width: 25%;"><a href="mutate.html">mutate</a></td>
+<td>Mutate</td></tr>
+<tr><td style="width: 25%;"><a href="mutate.html">mutate-method</a></td>
+<td>Mutate</td></tr>
+</table>
+
+<h2><a name="N">-- N --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="count.html">n</a></td>
+<td>Count</td></tr>
+<tr><td style="width: 25%;"><a href="count.html">n-method</a></td>
+<td>Count</td></tr>
+<tr><td style="width: 25%;"><a href="nafunctions.html">na.omit</a></td>
+<td>A set of SparkDataFrame functions working with NA values</td></tr>
+<tr><td style="width: 25%;"><a href="nafunctions.html">na.omit-method</a></td>
+<td>A set of SparkDataFrame functions working with NA values</td></tr>
+<tr><td style="width: 25%;"><a href="NaiveBayesModel-class.html">NaiveBayesModel-class</a></td>
+<td>S4 class that represents a NaiveBayesModel</td></tr>
+<tr><td style="width: 25%;"><a href="columns.html">names</a></td>
+<td>Column Names of SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="columns.html">names-method</a></td>
+<td>Column Names of SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="columns.html">names&lt;-</a></td>
+<td>Column Names of SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="columns.html">names&lt;--method</a></td>
+<td>Column Names of SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">nanvl</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">nanvl-method</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="ncol.html">ncol</a></td>
+<td>Returns the number of columns in a SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="ncol.html">ncol-method</a></td>
+<td>Returns the number of columns in a SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">negate</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">negate-method</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_diff_functions.html">next_day</a></td>
+<td>Date time arithmetic functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_diff_functions.html">next_day-method</a></td>
+<td>Date time arithmetic functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="not.html">not</a></td>
+<td>!</td></tr>
+<tr><td style="width: 25%;"><a href="not.html">not-method</a></td>
+<td>!</td></tr>
+<tr><td style="width: 25%;"><a href="nrow.html">nrow</a></td>
+<td>Returns the number of rows in a SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="nrow.html">nrow-method</a></td>
+<td>Returns the number of rows in a SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="column_window_functions.html">ntile</a></td>
+<td>Window functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_window_functions.html">ntile-method</a></td>
+<td>Window functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">n_distinct</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">n_distinct-method</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+</table>
+
+<h2><a name="O">-- O --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="orderBy.html">orderBy</a></td>
+<td>Ordering Columns in a WindowSpec</td></tr>
+<tr><td style="width: 25%;"><a href="arrange.html">orderBy-method</a></td>
+<td>Arrange Rows by Variables</td></tr>
+<tr><td style="width: 25%;"><a href="orderBy.html">orderBy-method</a></td>
+<td>Ordering Columns in a WindowSpec</td></tr>
+<tr><td style="width: 25%;"><a href="otherwise.html">otherwise</a></td>
+<td>otherwise</td></tr>
+<tr><td style="width: 25%;"><a href="otherwise.html">otherwise-method</a></td>
+<td>otherwise</td></tr>
+<tr><td style="width: 25%;"><a href="over.html">over</a></td>
+<td>over</td></tr>
+<tr><td style="width: 25%;"><a href="over.html">over-method</a></td>
+<td>over</td></tr>
+</table>
+
+<h2><a name="P">-- P --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="read.parquet.html">parquetFile</a></td>
+<td>Create a SparkDataFrame from a Parquet file.</td></tr>
+<tr><td style="width: 25%;"><a href="read.parquet.html">parquetFile.default</a></td>
+<td>Create a SparkDataFrame from a Parquet file.</td></tr>
+<tr><td style="width: 25%;"><a href="partitionBy.html">partitionBy</a></td>
+<td>partitionBy</td></tr>
+<tr><td style="width: 25%;"><a href="partitionBy.html">partitionBy-method</a></td>
+<td>partitionBy</td></tr>
+<tr><td style="width: 25%;"><a href="column_window_functions.html">percent_rank</a></td>
+<td>Window functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_window_functions.html">percent_rank-method</a></td>
+<td>Window functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="persist.html">persist</a></td>
+<td>Persist</td></tr>
+<tr><td style="width: 25%;"><a href="persist.html">persist-method</a></td>
+<td>Persist</td></tr>
+<tr><td style="width: 25%;"><a href="pivot.html">pivot</a></td>
+<td>Pivot a column of the GroupedData and perform the specified aggregation.</td></tr>
+<tr><td style="width: 25%;"><a href="pivot.html">pivot-method</a></td>
+<td>Pivot a column of the GroupedData and perform the specified aggregation.</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">pmod</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">pmod-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">posexplode</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">posexplode-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">posexplode_outer</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">posexplode_outer-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="predict.html">predict</a></td>
+<td>Makes predictions from a MLlib model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.als.html">predict-method</a></td>
+<td>Alternating Least Squares (ALS) for Collaborative Filtering</td></tr>
+<tr><td style="width: 25%;"><a href="spark.bisectingKmeans.html">predict-method</a></td>
+<td>Bisecting K-Means Clustering Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.decisionTree.html">predict-method</a></td>
+<td>Decision Tree Model for Regression and Classification</td></tr>
+<tr><td style="width: 25%;"><a href="spark.fpGrowth.html">predict-method</a></td>
+<td>FP-growth</td></tr>
+<tr><td style="width: 25%;"><a href="spark.gaussianMixture.html">predict-method</a></td>
+<td>Multivariate Gaussian Mixture Model (GMM)</td></tr>
+<tr><td style="width: 25%;"><a href="spark.gbt.html">predict-method</a></td>
+<td>Gradient Boosted Tree Model for Regression and Classification</td></tr>
+<tr><td style="width: 25%;"><a href="spark.glm.html">predict-method</a></td>
+<td>Generalized Linear Models</td></tr>
+<tr><td style="width: 25%;"><a href="spark.isoreg.html">predict-method</a></td>
+<td>Isotonic Regression Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.kmeans.html">predict-method</a></td>
+<td>K-Means Clustering Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.logit.html">predict-method</a></td>
+<td>Logistic Regression Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.mlp.html">predict-method</a></td>
+<td>Multilayer Perceptron Classification Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.naiveBayes.html">predict-method</a></td>
+<td>Naive Bayes Models</td></tr>
+<tr><td style="width: 25%;"><a href="spark.randomForest.html">predict-method</a></td>
+<td>Random Forest Model for Regression and Classification</td></tr>
+<tr><td style="width: 25%;"><a href="spark.survreg.html">predict-method</a></td>
+<td>Accelerated Failure Time (AFT) Survival Regression Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.svmLinear.html">predict-method</a></td>
+<td>Linear SVM Model</td></tr>
+<tr><td style="width: 25%;"><a href="print.jobj.html">print.jobj</a></td>
+<td>Print a JVM object reference.</td></tr>
+<tr><td style="width: 25%;"><a href="print.structField.html">print.structField</a></td>
+<td>Print a Spark StructField.</td></tr>
+<tr><td style="width: 25%;"><a href="print.structType.html">print.structType</a></td>
+<td>Print a Spark StructType.</td></tr>
+<tr><td style="width: 25%;"><a href="spark.decisionTree.html">print.summary.DecisionTreeClassificationModel</a></td>
+<td>Decision Tree Model for Regression and Classification</td></tr>
+<tr><td style="width: 25%;"><a href="spark.decisionTree.html">print.summary.DecisionTreeRegressionModel</a></td>
+<td>Decision Tree Model for Regression and Classification</td></tr>
+<tr><td style="width: 25%;"><a href="spark.gbt.html">print.summary.GBTClassificationModel</a></td>
+<td>Gradient Boosted Tree Model for Regression and Classification</td></tr>
+<tr><td style="width: 25%;"><a href="spark.gbt.html">print.summary.GBTRegressionModel</a></td>
+<td>Gradient Boosted Tree Model for Regression and Classification</td></tr>
+<tr><td style="width: 25%;"><a href="spark.glm.html">print.summary.GeneralizedLinearRegressionModel</a></td>
+<td>Generalized Linear Models</td></tr>
+<tr><td style="width: 25%;"><a href="spark.kstest.html">print.summary.KSTest</a></td>
+<td>(One-Sample) Kolmogorov-Smirnov Test</td></tr>
+<tr><td style="width: 25%;"><a href="spark.randomForest.html">print.summary.RandomForestClassificationModel</a></td>
+<td>Random Forest Model for Regression and Classification</td></tr>
+<tr><td style="width: 25%;"><a href="spark.randomForest.html">print.summary.RandomForestRegressionModel</a></td>
+<td>Random Forest Model for Regression and Classification</td></tr>
+<tr><td style="width: 25%;"><a href="printSchema.html">printSchema</a></td>
+<td>Print Schema of a SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="printSchema.html">printSchema-method</a></td>
+<td>Print Schema of a SparkDataFrame</td></tr>
+</table>
+
+<h2><a name="Q">-- Q --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">quarter</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">quarter-method</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="queryName.html">queryName</a></td>
+<td>queryName</td></tr>
+<tr><td style="width: 25%;"><a href="queryName.html">queryName-method</a></td>
+<td>queryName</td></tr>
+</table>
+
+<h2><a name="R">-- R --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">rand</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">rand-method</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">randn</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">randn-method</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="RandomForestClassificationModel-class.html">RandomForestClassificationModel-class</a></td>
+<td>S4 class that represents a RandomForestClassificationModel</td></tr>
+<tr><td style="width: 25%;"><a href="RandomForestRegressionModel-class.html">RandomForestRegressionModel-class</a></td>
+<td>S4 class that represents a RandomForestRegressionModel</td></tr>
+<tr><td style="width: 25%;"><a href="randomSplit.html">randomSplit</a></td>
+<td>randomSplit</td></tr>
+<tr><td style="width: 25%;"><a href="randomSplit.html">randomSplit-method</a></td>
+<td>randomSplit</td></tr>
+<tr><td style="width: 25%;"><a href="rangeBetween.html">rangeBetween</a></td>
+<td>rangeBetween</td></tr>
+<tr><td style="width: 25%;"><a href="rangeBetween.html">rangeBetween-method</a></td>
+<td>rangeBetween</td></tr>
+<tr><td style="width: 25%;"><a href="column_window_functions.html">rank</a></td>
+<td>Window functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_window_functions.html">rank-method</a></td>
+<td>Window functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="rbind.html">rbind</a></td>
+<td>Union two or more SparkDataFrames</td></tr>
+<tr><td style="width: 25%;"><a href="rbind.html">rbind-method</a></td>
+<td>Union two or more SparkDataFrames</td></tr>
+<tr><td style="width: 25%;"><a href="read.df.html">read.df</a></td>
+<td>Load a SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="read.df.html">read.df.default</a></td>
+<td>Load a SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="read.jdbc.html">read.jdbc</a></td>
+<td>Create a SparkDataFrame representing the database table accessible via JDBC URL</td></tr>
+<tr><td style="width: 25%;"><a href="read.json.html">read.json</a></td>
+<td>Create a SparkDataFrame from a JSON file.</td></tr>
+<tr><td style="width: 25%;"><a href="read.json.html">read.json.default</a></td>
+<td>Create a SparkDataFrame from a JSON file.</td></tr>
+<tr><td style="width: 25%;"><a href="read.ml.html">read.ml</a></td>
+<td>Load a fitted MLlib model from the input path.</td></tr>
+<tr><td style="width: 25%;"><a href="read.orc.html">read.orc</a></td>
+<td>Create a SparkDataFrame from an ORC file.</td></tr>
+<tr><td style="width: 25%;"><a href="read.parquet.html">read.parquet</a></td>
+<td>Create a SparkDataFrame from a Parquet file.</td></tr>
+<tr><td style="width: 25%;"><a href="read.parquet.html">read.parquet.default</a></td>
+<td>Create a SparkDataFrame from a Parquet file.</td></tr>
+<tr><td style="width: 25%;"><a href="read.stream.html">read.stream</a></td>
+<td>Load a streaming SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="read.text.html">read.text</a></td>
+<td>Create a SparkDataFrame from a text file.</td></tr>
+<tr><td style="width: 25%;"><a href="read.text.html">read.text.default</a></td>
+<td>Create a SparkDataFrame from a text file.</td></tr>
+<tr><td style="width: 25%;"><a href="recoverPartitions.html">recoverPartitions</a></td>
+<td>Recovers all the partitions in the directory of a table and update the catalog</td></tr>
+<tr><td style="width: 25%;"><a href="refreshByPath.html">refreshByPath</a></td>
+<td>Invalidates and refreshes all the cached data and metadata for SparkDataFrame containing path</td></tr>
+<tr><td style="width: 25%;"><a href="refreshTable.html">refreshTable</a></td>
+<td>Invalidates and refreshes all the cached data and metadata of the given table</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">regexp_extract</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">regexp_extract-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">regexp_replace</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">regexp_replace-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="registerTempTable-deprecated.html">registerTempTable</a></td>
+<td>(Deprecated) Register Temporary Table</td></tr>
+<tr><td style="width: 25%;"><a href="registerTempTable-deprecated.html">registerTempTable-method</a></td>
+<td>(Deprecated) Register Temporary Table</td></tr>
+<tr><td style="width: 25%;"><a href="rename.html">rename</a></td>
+<td>rename</td></tr>
+<tr><td style="width: 25%;"><a href="rename.html">rename-method</a></td>
+<td>rename</td></tr>
+<tr><td style="width: 25%;"><a href="repartition.html">repartition</a></td>
+<td>Repartition</td></tr>
+<tr><td style="width: 25%;"><a href="repartition.html">repartition-method</a></td>
+<td>Repartition</td></tr>
+<tr><td style="width: 25%;"><a href="repartitionByRange.html">repartitionByRange</a></td>
+<td>Repartition by range</td></tr>
+<tr><td style="width: 25%;"><a href="repartitionByRange.html">repartitionByRange-method</a></td>
+<td>Repartition by range</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">repeat_string</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">repeat_string-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">reverse</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">reverse-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">rint</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">rint-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="columnfunctions.html">rlike</a></td>
+<td>A set of operations working with SparkDataFrame columns</td></tr>
+<tr><td style="width: 25%;"><a href="rollup.html">rollup</a></td>
+<td>rollup</td></tr>
+<tr><td style="width: 25%;"><a href="rollup.html">rollup-method</a></td>
+<td>rollup</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">round</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">round-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="rowsBetween.html">rowsBetween</a></td>
+<td>rowsBetween</td></tr>
+<tr><td style="width: 25%;"><a href="rowsBetween.html">rowsBetween-method</a></td>
+<td>rowsBetween</td></tr>
+<tr><td style="width: 25%;"><a href="column_window_functions.html">row_number</a></td>
+<td>Window functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_window_functions.html">row_number-method</a></td>
+<td>Window functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">rpad</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">rpad-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">rtrim</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">rtrim-method</a></td>
+<td>String functions for Column operations</td></tr>
+</table>
+
+<h2><a name="S">-- S --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="sample.html">sample</a></td>
+<td>Sample</td></tr>
+<tr><td style="width: 25%;"><a href="sample.html">sample-method</a></td>
+<td>Sample</td></tr>
+<tr><td style="width: 25%;"><a href="sampleBy.html">sampleBy</a></td>
+<td>Returns a stratified sample without replacement</td></tr>
+<tr><td style="width: 25%;"><a href="sampleBy.html">sampleBy-method</a></td>
+<td>Returns a stratified sample without replacement</td></tr>
+<tr><td style="width: 25%;"><a href="sample.html">sample_frac</a></td>
+<td>Sample</td></tr>
+<tr><td style="width: 25%;"><a href="sample.html">sample_frac-method</a></td>
+<td>Sample</td></tr>
+<tr><td style="width: 25%;"><a href="write.parquet.html">saveAsParquetFile</a></td>
+<td>Save the contents of SparkDataFrame as a Parquet file, preserving the schema.</td></tr>
+<tr><td style="width: 25%;"><a href="write.parquet.html">saveAsParquetFile-method</a></td>
+<td>Save the contents of SparkDataFrame as a Parquet file, preserving the schema.</td></tr>
+<tr><td style="width: 25%;"><a href="saveAsTable.html">saveAsTable</a></td>
+<td>Save the contents of the SparkDataFrame to a data source as a table</td></tr>
+<tr><td style="width: 25%;"><a href="saveAsTable.html">saveAsTable-method</a></td>
+<td>Save the contents of the SparkDataFrame to a data source as a table</td></tr>
+<tr><td style="width: 25%;"><a href="write.df.html">saveDF</a></td>
+<td>Save the contents of SparkDataFrame to a data source.</td></tr>
+<tr><td style="width: 25%;"><a href="write.df.html">saveDF-method</a></td>
+<td>Save the contents of SparkDataFrame to a data source.</td></tr>
+<tr><td style="width: 25%;"><a href="schema.html">schema</a></td>
+<td>Get schema object</td></tr>
+<tr><td style="width: 25%;"><a href="schema.html">schema-method</a></td>
+<td>Get schema object</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">sd</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">sd-method</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">second</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">second-method</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="select.html">select</a></td>
+<td>Select</td></tr>
+<tr><td style="width: 25%;"><a href="select.html">select-method</a></td>
+<td>Select</td></tr>
+<tr><td style="width: 25%;"><a href="selectExpr.html">selectExpr</a></td>
+<td>SelectExpr</td></tr>
+<tr><td style="width: 25%;"><a href="selectExpr.html">selectExpr-method</a></td>
+<td>SelectExpr</td></tr>
+<tr><td style="width: 25%;"><a href="setCheckpointDir.html">setCheckpointDir</a></td>
+<td>Set checkpoint directory</td></tr>
+<tr><td style="width: 25%;"><a href="setCurrentDatabase.html">setCurrentDatabase</a></td>
+<td>Sets the current default database</td></tr>
+<tr><td style="width: 25%;"><a href="setJobDescription.html">setJobDescription</a></td>
+<td>Set a human readable description of the current job.</td></tr>
+<tr><td style="width: 25%;"><a href="setJobGroup.html">setJobGroup</a></td>
+<td>Assigns a group ID to all the jobs started by this thread until the group ID is set to a different value or cleared.</td></tr>
+<tr><td style="width: 25%;"><a href="setJobGroup.html">setJobGroup.default</a></td>
+<td>Assigns a group ID to all the jobs started by this thread until the group ID is set to a different value or cleared.</td></tr>
+<tr><td style="width: 25%;"><a href="setLocalProperty.html">setLocalProperty</a></td>
+<td>Set a local property that affects jobs submitted from this thread, such as the Spark fair scheduler pool.</td></tr>
+<tr><td style="width: 25%;"><a href="setLogLevel.html">setLogLevel</a></td>
+<td>Set new log level</td></tr>
+<tr><td style="width: 25%;"><a href="column_misc_functions.html">sha1</a></td>
+<td>Miscellaneous functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_misc_functions.html">sha1-method</a></td>
+<td>Miscellaneous functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_misc_functions.html">sha2</a></td>
+<td>Miscellaneous functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_misc_functions.html">sha2-method</a></td>
+<td>Miscellaneous functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">shiftLeft</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">shiftLeft-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">shiftRight</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">shiftRight-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">shiftRightUnsigned</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">shiftRightUnsigned-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="show.html">show</a></td>
+<td>show</td></tr>
+<tr><td style="width: 25%;"><a href="show.html">show-method</a></td>
+<td>show</td></tr>
+<tr><td style="width: 25%;"><a href="showDF.html">showDF</a></td>
+<td>showDF</td></tr>
+<tr><td style="width: 25%;"><a href="showDF.html">showDF-method</a></td>
+<td>showDF</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">shuffle</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">shuffle-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">sign</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">sign-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">signum</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">signum-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">sin</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">sin-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">sinh</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">sinh-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">size</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">size-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">skewness</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">skewness-method</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">slice</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">slice-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">sort_array</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">sort_array-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">soundex</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">soundex-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="spark.addFile.html">spark.addFile</a></td>
+<td>Add a file or directory to be downloaded with this Spark job on every node.</td></tr>
+<tr><td style="width: 25%;"><a href="spark.als.html">spark.als</a></td>
+<td>Alternating Least Squares (ALS) for Collaborative Filtering</td></tr>
+<tr><td style="width: 25%;"><a href="spark.als.html">spark.als-method</a></td>
+<td>Alternating Least Squares (ALS) for Collaborative Filtering</td></tr>
+<tr><td style="width: 25%;"><a href="spark.fpGrowth.html">spark.associationRules</a></td>
+<td>FP-growth</td></tr>
+<tr><td style="width: 25%;"><a href="spark.fpGrowth.html">spark.associationRules-method</a></td>
+<td>FP-growth</td></tr>
+<tr><td style="width: 25%;"><a href="spark.bisectingKmeans.html">spark.bisectingKmeans</a></td>
+<td>Bisecting K-Means Clustering Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.bisectingKmeans.html">spark.bisectingKmeans-method</a></td>
+<td>Bisecting K-Means Clustering Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.decisionTree.html">spark.decisionTree</a></td>
+<td>Decision Tree Model for Regression and Classification</td></tr>
+<tr><td style="width: 25%;"><a href="spark.decisionTree.html">spark.decisionTree-method</a></td>
+<td>Decision Tree Model for Regression and Classification</td></tr>
+<tr><td style="width: 25%;"><a href="spark.fpGrowth.html">spark.fpGrowth</a></td>
+<td>FP-growth</td></tr>
+<tr><td style="width: 25%;"><a href="spark.fpGrowth.html">spark.fpGrowth-method</a></td>
+<td>FP-growth</td></tr>
+<tr><td style="width: 25%;"><a href="spark.fpGrowth.html">spark.freqItemsets</a></td>
+<td>FP-growth</td></tr>
+<tr><td style="width: 25%;"><a href="spark.fpGrowth.html">spark.freqItemsets-method</a></td>
+<td>FP-growth</td></tr>
+<tr><td style="width: 25%;"><a href="spark.gaussianMixture.html">spark.gaussianMixture</a></td>
+<td>Multivariate Gaussian Mixture Model (GMM)</td></tr>
+<tr><td style="width: 25%;"><a href="spark.gaussianMixture.html">spark.gaussianMixture-method</a></td>
+<td>Multivariate Gaussian Mixture Model (GMM)</td></tr>
+<tr><td style="width: 25%;"><a href="spark.gbt.html">spark.gbt</a></td>
+<td>Gradient Boosted Tree Model for Regression and Classification</td></tr>
+<tr><td style="width: 25%;"><a href="spark.gbt.html">spark.gbt-method</a></td>
+<td>Gradient Boosted Tree Model for Regression and Classification</td></tr>
+<tr><td style="width: 25%;"><a href="spark.getSparkFiles.html">spark.getSparkFiles</a></td>
+<td>Get the absolute path of a file added through spark.addFile.</td></tr>
+<tr><td style="width: 25%;"><a href="spark.getSparkFilesRootDirectory.html">spark.getSparkFilesRootDirectory</a></td>
+<td>Get the root directory that contains files added through spark.addFile.</td></tr>
+<tr><td style="width: 25%;"><a href="spark.glm.html">spark.glm</a></td>
+<td>Generalized Linear Models</td></tr>
+<tr><td style="width: 25%;"><a href="spark.glm.html">spark.glm-method</a></td>
+<td>Generalized Linear Models</td></tr>
+<tr><td style="width: 25%;"><a href="spark.isoreg.html">spark.isoreg</a></td>
+<td>Isotonic Regression Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.isoreg.html">spark.isoreg-method</a></td>
+<td>Isotonic Regression Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.kmeans.html">spark.kmeans</a></td>
+<td>K-Means Clustering Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.kmeans.html">spark.kmeans-method</a></td>
+<td>K-Means Clustering Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.kstest.html">spark.kstest</a></td>
+<td>(One-Sample) Kolmogorov-Smirnov Test</td></tr>
+<tr><td style="width: 25%;"><a href="spark.kstest.html">spark.kstest-method</a></td>
+<td>(One-Sample) Kolmogorov-Smirnov Test</td></tr>
+<tr><td style="width: 25%;"><a href="spark.lapply.html">spark.lapply</a></td>
+<td>Run a function over a list of elements, distributing the computations with Spark</td></tr>
+<tr><td style="width: 25%;"><a href="spark.lda.html">spark.lda</a></td>
+<td>Latent Dirichlet Allocation</td></tr>
+<tr><td style="width: 25%;"><a href="spark.lda.html">spark.lda-method</a></td>
+<td>Latent Dirichlet Allocation</td></tr>
+<tr><td style="width: 25%;"><a href="spark.logit.html">spark.logit</a></td>
+<td>Logistic Regression Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.logit.html">spark.logit-method</a></td>
+<td>Logistic Regression Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.mlp.html">spark.mlp</a></td>
+<td>Multilayer Perceptron Classification Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.mlp.html">spark.mlp-method</a></td>
+<td>Multilayer Perceptron Classification Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.naiveBayes.html">spark.naiveBayes</a></td>
+<td>Naive Bayes Models</td></tr>
+<tr><td style="width: 25%;"><a href="spark.naiveBayes.html">spark.naiveBayes-method</a></td>
+<td>Naive Bayes Models</td></tr>
+<tr><td style="width: 25%;"><a href="spark.lda.html">spark.perplexity</a></td>
+<td>Latent Dirichlet Allocation</td></tr>
+<tr><td style="width: 25%;"><a href="spark.lda.html">spark.perplexity-method</a></td>
+<td>Latent Dirichlet Allocation</td></tr>
+<tr><td style="width: 25%;"><a href="spark.lda.html">spark.posterior</a></td>
+<td>Latent Dirichlet Allocation</td></tr>
+<tr><td style="width: 25%;"><a href="spark.lda.html">spark.posterior-method</a></td>
+<td>Latent Dirichlet Allocation</td></tr>
+<tr><td style="width: 25%;"><a href="spark.randomForest.html">spark.randomForest</a></td>
+<td>Random Forest Model for Regression and Classification</td></tr>
+<tr><td style="width: 25%;"><a href="spark.randomForest.html">spark.randomForest-method</a></td>
+<td>Random Forest Model for Regression and Classification</td></tr>
+<tr><td style="width: 25%;"><a href="spark.survreg.html">spark.survreg</a></td>
+<td>Accelerated Failure Time (AFT) Survival Regression Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.survreg.html">spark.survreg-method</a></td>
+<td>Accelerated Failure Time (AFT) Survival Regression Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.svmLinear.html">spark.svmLinear</a></td>
+<td>Linear SVM Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.svmLinear.html">spark.svmLinear-method</a></td>
+<td>Linear SVM Model</td></tr>
+<tr><td style="width: 25%;"><a href="SparkDataFrame.html">SparkDataFrame-class</a></td>
+<td>S4 class that represents a SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="sparkR.callJMethod.html">sparkR.callJMethod</a></td>
+<td>Call Java Methods</td></tr>
+<tr><td style="width: 25%;"><a href="sparkR.callJStatic.html">sparkR.callJStatic</a></td>
+<td>Call Static Java Methods</td></tr>
+<tr><td style="width: 25%;"><a href="sparkR.conf.html">sparkR.conf</a></td>
+<td>Get Runtime Config from the current active SparkSession</td></tr>
+<tr><td style="width: 25%;"><a href="sparkR.init-deprecated.html">sparkR.init</a></td>
+<td>(Deprecated) Initialize a new Spark Context</td></tr>
+<tr><td style="width: 25%;"><a href="sparkR.newJObject.html">sparkR.newJObject</a></td>
+<td>Create Java Objects</td></tr>
+<tr><td style="width: 25%;"><a href="sparkR.session.html">sparkR.session</a></td>
+<td>Get the existing SparkSession or initialize a new SparkSession.</td></tr>
+<tr><td style="width: 25%;"><a href="sparkR.session.stop.html">sparkR.session.stop</a></td>
+<td>Stop the Spark Session and Spark Context</td></tr>
+<tr><td style="width: 25%;"><a href="sparkR.session.stop.html">sparkR.stop</a></td>
+<td>Stop the Spark Session and Spark Context</td></tr>
+<tr><td style="width: 25%;"><a href="sparkR.uiWebUrl.html">sparkR.uiWebUrl</a></td>
+<td>Get the URL of the SparkUI instance for the current active SparkSession</td></tr>
+<tr><td style="width: 25%;"><a href="sparkR.version.html">sparkR.version</a></td>
+<td>Get version of Spark on which this application is running</td></tr>
+<tr><td style="width: 25%;"><a href="sparkRHive.init-deprecated.html">sparkRHive.init</a></td>
+<td>(Deprecated) Initialize a new HiveContext</td></tr>
+<tr><td style="width: 25%;"><a href="sparkRSQL.init-deprecated.html">sparkRSQL.init</a></td>
+<td>(Deprecated) Initialize a new SQLContext</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">spark_partition_id</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">spark_partition_id-method</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">split_string</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">split_string-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="sql.html">sql</a></td>
+<td>SQL Query</td></tr>
+<tr><td style="width: 25%;"><a href="sql.html">sql.default</a></td>
+<td>SQL Query</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">sqrt</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">sqrt-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="startsWith.html">startsWith</a></td>
+<td>startsWith</td></tr>
+<tr><td style="width: 25%;"><a href="startsWith.html">startsWith-method</a></td>
+<td>startsWith</td></tr>
+<tr><td style="width: 25%;"><a href="status.html">status</a></td>
+<td>status</td></tr>
+<tr><td style="width: 25%;"><a href="status.html">status-method</a></td>
+<td>status</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">stddev</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">stddev-method</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">stddev_pop</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">stddev_pop-method</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">stddev_samp</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">stddev_samp-method</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="stopQuery.html">stopQuery</a></td>
+<td>stopQuery</td></tr>
+<tr><td style="width: 25%;"><a href="stopQuery.html">stopQuery-method</a></td>
+<td>stopQuery</td></tr>
+<tr><td style="width: 25%;"><a href="storageLevel.html">storageLevel</a></td>
+<td>StorageLevel</td></tr>
+<tr><td style="width: 25%;"><a href="storageLevel.html">storageLevel-method</a></td>
+<td>StorageLevel</td></tr>
+<tr><td style="width: 25%;"><a href="str.html">str</a></td>
+<td>Compactly display the structure of a dataset</td></tr>
+<tr><td style="width: 25%;"><a href="str.html">str-method</a></td>
+<td>Compactly display the structure of a dataset</td></tr>
+<tr><td style="width: 25%;"><a href="StreamingQuery.html">StreamingQuery-class</a></td>
+<td>S4 class that represents a StreamingQuery</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">struct</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">struct-method</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="structField.html">structField</a></td>
+<td>structField</td></tr>
+<tr><td style="width: 25%;"><a href="structField.html">structField.character</a></td>
+<td>structField</td></tr>
+<tr><td style="width: 25%;"><a href="structField.html">structField.jobj</a></td>
+<td>structField</td></tr>
+<tr><td style="width: 25%;"><a href="structType.html">structType</a></td>
+<td>structType</td></tr>
+<tr><td style="width: 25%;"><a href="structType.html">structType.character</a></td>
+<td>structType</td></tr>
+<tr><td style="width: 25%;"><a href="structType.html">structType.jobj</a></td>
+<td>structType</td></tr>
+<tr><td style="width: 25%;"><a href="structType.html">structType.structField</a></td>
+<td>structType</td></tr>
+<tr><td style="width: 25%;"><a href="subset.html">subset</a></td>
+<td>Subset</td></tr>
+<tr><td style="width: 25%;"><a href="subset.html">subset-method</a></td>
+<td>Subset</td></tr>
+<tr><td style="width: 25%;"><a href="substr.html">substr</a></td>
+<td>substr</td></tr>
+<tr><td style="width: 25%;"><a href="substr.html">substr-method</a></td>
+<td>substr</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">substring_index</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">substring_index-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">sum</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">sum-method</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">sumDistinct</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">sumDistinct-method</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="summarize.html">summarize</a></td>
+<td>summarize</td></tr>
+<tr><td style="width: 25%;"><a href="summarize.html">summarize-method</a></td>
+<td>summarize</td></tr>
+<tr><td style="width: 25%;"><a href="summary.html">summary</a></td>
+<td>summary</td></tr>
+<tr><td style="width: 25%;"><a href="spark.als.html">summary-method</a></td>
+<td>Alternating Least Squares (ALS) for Collaborative Filtering</td></tr>
+<tr><td style="width: 25%;"><a href="spark.bisectingKmeans.html">summary-method</a></td>
+<td>Bisecting K-Means Clustering Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.decisionTree.html">summary-method</a></td>
+<td>Decision Tree Model for Regression and Classification</td></tr>
+<tr><td style="width: 25%;"><a href="spark.gaussianMixture.html">summary-method</a></td>
+<td>Multivariate Gaussian Mixture Model (GMM)</td></tr>
+<tr><td style="width: 25%;"><a href="spark.gbt.html">summary-method</a></td>
+<td>Gradient Boosted Tree Model for Regression and Classification</td></tr>
+<tr><td style="width: 25%;"><a href="spark.glm.html">summary-method</a></td>
+<td>Generalized Linear Models</td></tr>
+<tr><td style="width: 25%;"><a href="spark.isoreg.html">summary-method</a></td>
+<td>Isotonic Regression Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.kmeans.html">summary-method</a></td>
+<td>K-Means Clustering Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.kstest.html">summary-method</a></td>
+<td>(One-Sample) Kolmogorov-Smirnov Test</td></tr>
+<tr><td style="width: 25%;"><a href="spark.lda.html">summary-method</a></td>
+<td>Latent Dirichlet Allocation</td></tr>
+<tr><td style="width: 25%;"><a href="spark.logit.html">summary-method</a></td>
+<td>Logistic Regression Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.mlp.html">summary-method</a></td>
+<td>Multilayer Perceptron Classification Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.naiveBayes.html">summary-method</a></td>
+<td>Naive Bayes Models</td></tr>
+<tr><td style="width: 25%;"><a href="spark.randomForest.html">summary-method</a></td>
+<td>Random Forest Model for Regression and Classification</td></tr>
+<tr><td style="width: 25%;"><a href="spark.survreg.html">summary-method</a></td>
+<td>Accelerated Failure Time (AFT) Survival Regression Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.svmLinear.html">summary-method</a></td>
+<td>Linear SVM Model</td></tr>
+<tr><td style="width: 25%;"><a href="summary.html">summary-method</a></td>
+<td>summary</td></tr>
+</table>
+
+<h2><a name="T">-- T --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="tableNames.html">tableNames</a></td>
+<td>Table Names</td></tr>
+<tr><td style="width: 25%;"><a href="tableNames.html">tableNames.default</a></td>
+<td>Table Names</td></tr>
+<tr><td style="width: 25%;"><a href="tables.html">tables</a></td>
+<td>Tables</td></tr>
+<tr><td style="width: 25%;"><a href="tables.html">tables.default</a></td>
+<td>Tables</td></tr>
+<tr><td style="width: 25%;"><a href="tableToDF.html">tableToDF</a></td>
+<td>Create a SparkDataFrame from a SparkSQL table or view</td></tr>
+<tr><td style="width: 25%;"><a href="take.html">take</a></td>
+<td>Take the first NUM rows of a SparkDataFrame and return the results as a R data.frame</td></tr>
+<tr><td style="width: 25%;"><a href="take.html">take-method</a></td>
+<td>Take the first NUM rows of a SparkDataFrame and return the results as a R data.frame</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">tan</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">tan-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">tanh</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">tanh-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">toDegrees</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">toDegrees-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="toJSON.html">toJSON</a></td>
+<td>toJSON</td></tr>
+<tr><td style="width: 25%;"><a href="toJSON.html">toJSON-method</a></td>
+<td>toJSON</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">toRadians</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">toRadians-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">to_date</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">to_date-method</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">to_json</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_collection_functions.html">to_json-method</a></td>
+<td>Collection functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">to_timestamp</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">to_timestamp-method</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_diff_functions.html">to_utc_timestamp</a></td>
+<td>Date time arithmetic functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_diff_functions.html">to_utc_timestamp-method</a></td>
+<td>Date time arithmetic functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="mutate.html">transform</a></td>
+<td>Mutate</td></tr>
+<tr><td style="width: 25%;"><a href="mutate.html">transform-method</a></td>
+<td>Mutate</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">translate</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">translate-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">trim</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">trim-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">trunc</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">trunc-method</a></td>
+<td>Date time functions for Column operations</td></tr>
+</table>
+
+<h2><a name="U">-- U --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="column_string_functions.html">unbase64</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">unbase64-method</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="uncacheTable.html">uncacheTable</a></td>
+<td>Uncache Table</td></tr>
+<tr><td style="width: 25%;"><a href="uncacheTable.html">uncacheTable.default</a></td>
+<td>Uncache Table</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">unhex</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_math_functions.html">unhex-method</a></td>
+<td>Math functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="union.html">union</a></td>
+<td>Return a new SparkDataFrame containing the union of rows</td></tr>
+<tr><td style="width: 25%;"><a href="union.html">union-method</a></td>
+<td>Return a new SparkDataFrame containing the union of rows</td></tr>
+<tr><td style="width: 25%;"><a href="union.html">unionAll</a></td>
+<td>Return a new SparkDataFrame containing the union of rows</td></tr>
+<tr><td style="width: 25%;"><a href="union.html">unionAll-method</a></td>
+<td>Return a new SparkDataFrame containing the union of rows</td></tr>
+<tr><td style="width: 25%;"><a href="unionByName.html">unionByName</a></td>
+<td>Return a new SparkDataFrame containing the union of rows, matched by column names</td></tr>
+<tr><td style="width: 25%;"><a href="unionByName.html">unionByName-method</a></td>
+<td>Return a new SparkDataFrame containing the union of rows, matched by column names</td></tr>
+<tr><td style="width: 25%;"><a href="distinct.html">unique</a></td>
+<td>Distinct</td></tr>
+<tr><td style="width: 25%;"><a href="distinct.html">unique-method</a></td>
+<td>Distinct</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">unix_timestamp</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">unix_timestamp-method</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="unpersist.html">unpersist</a></td>
+<td>Unpersist</td></tr>
+<tr><td style="width: 25%;"><a href="unpersist.html">unpersist-method</a></td>
+<td>Unpersist</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">upper</a></td>
+<td>String functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_string_functions.html">upper-method</a></td>
+<td>String functions for Column operations</td></tr>
+</table>
+
+<h2><a name="V">-- V --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">var</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">var-method</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">variance</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">variance-method</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">var_pop</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">var_pop-method</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">var_samp</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_aggregate_functions.html">var_samp-method</a></td>
+<td>Aggregate functions for Column operations</td></tr>
+</table>
+
+<h2><a name="W">-- W --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">weekofyear</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">weekofyear-method</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">when</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_nonaggregate_functions.html">when-method</a></td>
+<td>Non-aggregate functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="filter.html">where</a></td>
+<td>Filter</td></tr>
+<tr><td style="width: 25%;"><a href="filter.html">where-method</a></td>
+<td>Filter</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">window</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">window-method</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="windowOrderBy.html">windowOrderBy</a></td>
+<td>windowOrderBy</td></tr>
+<tr><td style="width: 25%;"><a href="windowOrderBy.html">windowOrderBy-method</a></td>
+<td>windowOrderBy</td></tr>
+<tr><td style="width: 25%;"><a href="windowPartitionBy.html">windowPartitionBy</a></td>
+<td>windowPartitionBy</td></tr>
+<tr><td style="width: 25%;"><a href="windowPartitionBy.html">windowPartitionBy-method</a></td>
+<td>windowPartitionBy</td></tr>
+<tr><td style="width: 25%;"><a href="WindowSpec.html">WindowSpec-class</a></td>
+<td>S4 class that represents a WindowSpec</td></tr>
+<tr><td style="width: 25%;"><a href="with.html">with</a></td>
+<td>Evaluate a R expression in an environment constructed from a SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="with.html">with-method</a></td>
+<td>Evaluate a R expression in an environment constructed from a SparkDataFrame</td></tr>
+<tr><td style="width: 25%;"><a href="withColumn.html">withColumn</a></td>
+<td>WithColumn</td></tr>
+<tr><td style="width: 25%;"><a href="withColumn.html">withColumn-method</a></td>
+<td>WithColumn</td></tr>
+<tr><td style="width: 25%;"><a href="rename.html">withColumnRenamed</a></td>
+<td>rename</td></tr>
+<tr><td style="width: 25%;"><a href="rename.html">withColumnRenamed-method</a></td>
+<td>rename</td></tr>
+<tr><td style="width: 25%;"><a href="withWatermark.html">withWatermark</a></td>
+<td>withWatermark</td></tr>
+<tr><td style="width: 25%;"><a href="withWatermark.html">withWatermark-method</a></td>
+<td>withWatermark</td></tr>
+<tr><td style="width: 25%;"><a href="write.df.html">write.df</a></td>
+<td>Save the contents of SparkDataFrame to a data source.</td></tr>
+<tr><td style="width: 25%;"><a href="write.df.html">write.df-method</a></td>
+<td>Save the contents of SparkDataFrame to a data source.</td></tr>
+<tr><td style="width: 25%;"><a href="write.jdbc.html">write.jdbc</a></td>
+<td>Save the content of SparkDataFrame to an external database table via JDBC.</td></tr>
+<tr><td style="width: 25%;"><a href="write.jdbc.html">write.jdbc-method</a></td>
+<td>Save the content of SparkDataFrame to an external database table via JDBC.</td></tr>
+<tr><td style="width: 25%;"><a href="write.json.html">write.json</a></td>
+<td>Save the contents of SparkDataFrame as a JSON file</td></tr>
+<tr><td style="width: 25%;"><a href="write.json.html">write.json-method</a></td>
+<td>Save the contents of SparkDataFrame as a JSON file</td></tr>
+<tr><td style="width: 25%;"><a href="write.ml.html">write.ml</a></td>
+<td>Saves the MLlib model to the input path</td></tr>
+<tr><td style="width: 25%;"><a href="spark.als.html">write.ml-method</a></td>
+<td>Alternating Least Squares (ALS) for Collaborative Filtering</td></tr>
+<tr><td style="width: 25%;"><a href="spark.bisectingKmeans.html">write.ml-method</a></td>
+<td>Bisecting K-Means Clustering Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.decisionTree.html">write.ml-method</a></td>
+<td>Decision Tree Model for Regression and Classification</td></tr>
+<tr><td style="width: 25%;"><a href="spark.fpGrowth.html">write.ml-method</a></td>
+<td>FP-growth</td></tr>
+<tr><td style="width: 25%;"><a href="spark.gaussianMixture.html">write.ml-method</a></td>
+<td>Multivariate Gaussian Mixture Model (GMM)</td></tr>
+<tr><td style="width: 25%;"><a href="spark.gbt.html">write.ml-method</a></td>
+<td>Gradient Boosted Tree Model for Regression and Classification</td></tr>
+<tr><td style="width: 25%;"><a href="spark.glm.html">write.ml-method</a></td>
+<td>Generalized Linear Models</td></tr>
+<tr><td style="width: 25%;"><a href="spark.isoreg.html">write.ml-method</a></td>
+<td>Isotonic Regression Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.kmeans.html">write.ml-method</a></td>
+<td>K-Means Clustering Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.lda.html">write.ml-method</a></td>
+<td>Latent Dirichlet Allocation</td></tr>
+<tr><td style="width: 25%;"><a href="spark.logit.html">write.ml-method</a></td>
+<td>Logistic Regression Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.mlp.html">write.ml-method</a></td>
+<td>Multilayer Perceptron Classification Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.naiveBayes.html">write.ml-method</a></td>
+<td>Naive Bayes Models</td></tr>
+<tr><td style="width: 25%;"><a href="spark.randomForest.html">write.ml-method</a></td>
+<td>Random Forest Model for Regression and Classification</td></tr>
+<tr><td style="width: 25%;"><a href="spark.survreg.html">write.ml-method</a></td>
+<td>Accelerated Failure Time (AFT) Survival Regression Model</td></tr>
+<tr><td style="width: 25%;"><a href="spark.svmLinear.html">write.ml-method</a></td>
+<td>Linear SVM Model</td></tr>
+<tr><td style="width: 25%;"><a href="write.orc.html">write.orc</a></td>
+<td>Save the contents of SparkDataFrame as an ORC file, preserving the schema.</td></tr>
+<tr><td style="width: 25%;"><a href="write.orc.html">write.orc-method</a></td>
+<td>Save the contents of SparkDataFrame as an ORC file, preserving the schema.</td></tr>
+<tr><td style="width: 25%;"><a href="write.parquet.html">write.parquet</a></td>
+<td>Save the contents of SparkDataFrame as a Parquet file, preserving the schema.</td></tr>
+<tr><td style="width: 25%;"><a href="write.parquet.html">write.parquet-method</a></td>
+<td>Save the contents of SparkDataFrame as a Parquet file, preserving the schema.</td></tr>
+<tr><td style="width: 25%;"><a href="write.stream.html">write.stream</a></td>
+<td>Write the streaming SparkDataFrame to a data source.</td></tr>
+<tr><td style="width: 25%;"><a href="write.stream.html">write.stream-method</a></td>
+<td>Write the streaming SparkDataFrame to a data source.</td></tr>
+<tr><td style="width: 25%;"><a href="write.text.html">write.text</a></td>
+<td>Save the content of SparkDataFrame in a text file at the specified path.</td></tr>
+<tr><td style="width: 25%;"><a href="write.text.html">write.text-method</a></td>
+<td>Save the content of SparkDataFrame in a text file at the specified path.</td></tr>
+</table>
+
+<h2><a name="Y">-- Y --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">year</a></td>
+<td>Date time functions for Column operations</td></tr>
+<tr><td style="width: 25%;"><a href="column_datetime_functions.html">year-method</a></td>
+<td>Date time functions for Column operations</td></tr>
+</table>
+
+<h2><a name="misc">-- misc --</a></h2>
+
+<table width="100%">
+<tr><td style="width: 25%;"><a href="not.html">!-method</a></td>
+<td>!</td></tr>
+<tr><td style="width: 25%;"><a href="select.html">$</a></td>
+<td>Select</td></tr>
+<tr><td style="width: 25%;"><a href="select.html">$-method</a></td>
+<td>Select</td></tr>
+<tr><td style="width: 25%;"><a href="select.html">$&lt;-</a></td>
+<td>Select</td></tr>
+<tr><td style="width: 25%;"><a href="select.html">$&lt;--method</a></td>
+<td>Select</td></tr>
+<tr><td style="width: 25%;"><a href="eq_null_safe.html">%&lt;=&gt;%</a></td>
+<td>%&lt;=&gt;%</td></tr>
+<tr><td style="width: 25%;"><a href="eq_null_safe.html">%&lt;=&gt;%-method</a></td>
+<td>%&lt;=&gt;%</td></tr>
+<tr><td style="width: 25%;"><a href="match.html">%in%</a></td>
+<td>Match a column with given values.</td></tr>
+<tr><td style="width: 25%;"><a href="match.html">%in%-method</a></td>
+<td>Match a column with given values.</td></tr>
+<tr><td style="width: 25%;"><a href="subset.html">[</a></td>
+<td>Subset</td></tr>
+<tr><td style="width: 25%;"><a href="subset.html">[-method</a></td>
+<td>Subset</td></tr>
+<tr><td style="width: 25%;"><a href="subset.html">[[</a></td>
+<td>Subset</td></tr>
+<tr><td style="width: 25%;"><a href="subset.html">[[-method</a></td>
+<td>Subset</td></tr>
+<tr><td style="width: 25%;"><a href="subset.html">[[&lt;-</a></td>
+<td>Subset</td></tr>
+<tr><td style="width: 25%;"><a href="subset.html">[[&lt;--method</a></td>
+<td>Subset</td></tr>
+</table>
+</body></html>
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/html/R.css b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/html/R.css
new file mode 100644
index 0000000000000000000000000000000000000000..f10f5ea669d3de9e84a2ce5bf5752546b99cd02d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/html/R.css
@@ -0,0 +1,97 @@
+body {
+    background: white;
+    color: black;
+}
+
+a:link {
+    background: white;
+    color: blue;
+}
+
+a:visited {
+    background: white;
+    color: rgb(50%, 0%, 50%);
+}
+
+h1 {
+    background: white;
+    color: rgb(55%, 55%, 55%);
+    font-family: monospace;
+    font-size: x-large;
+    text-align: center;
+}
+
+h2 {
+    background: white;
+    color: rgb(40%, 40%, 40%);
+    font-family: monospace;
+    font-size: large;
+    text-align: center;
+}
+
+h3 {
+    background: white;
+    color: rgb(40%, 40%, 40%);
+    font-family: monospace;
+    font-size: large;
+}
+
+h4 {
+    background: white;
+    color: rgb(40%, 40%, 40%);
+    font-family: monospace;
+    font-style: italic;
+    font-size: large;
+}
+
+h5 {
+    background: white;
+    color: rgb(40%, 40%, 40%);
+    font-family: monospace;
+}
+
+h6 {
+    background: white;
+    color: rgb(40%, 40%, 40%);
+    font-family: monospace;
+    font-style: italic;
+}
+		
+img.toplogo {
+    width: 4em;
+    vertical-align: middle;
+}
+
+img.arrow {
+    width: 30px;
+    height: 30px;
+    border: 0;
+}
+
+span.acronym {
+    font-size: small;
+}
+
+span.env {
+    font-family: monospace;
+}
+
+span.file {
+    font-family: monospace;
+}
+
+span.option{
+    font-family: monospace;
+}
+
+span.pkg {
+    font-weight: bold;
+}
+
+span.samp{
+    font-family: monospace;
+}
+
+div.vignettes a:hover {
+    background: rgb(85%, 85%, 85%);
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/profile/general.R b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/profile/general.R
new file mode 100644
index 0000000000000000000000000000000000000000..8c75c19ca7ac3756c6d17b0469464ed6ea29604a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/profile/general.R
@@ -0,0 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+.First <- function() {
+  packageDir <- Sys.getenv("SPARKR_PACKAGE_DIR")
+  dirs <- strsplit(packageDir, ",")[[1]]
+  .libPaths(c(dirs, .libPaths()))
+  Sys.setenv(NOAWT = 1)
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/profile/shell.R b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/profile/shell.R
new file mode 100644
index 0000000000000000000000000000000000000000..8a8111a8c5419ab762eac635a113a45927f89d40
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/profile/shell.R
@@ -0,0 +1,47 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+.First <- function() {
+  home <- Sys.getenv("SPARK_HOME")
+  .libPaths(c(file.path(home, "R", "lib"), .libPaths()))
+  Sys.setenv(NOAWT = 1)
+
+  # Make sure SparkR package is the last loaded one
+  old <- getOption("defaultPackages")
+  options(defaultPackages = c(old, "SparkR"))
+
+  spark <- SparkR::sparkR.session()
+  assign("spark", spark, envir = .GlobalEnv)
+  sc <- SparkR:::callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", spark)
+  assign("sc", sc, envir = .GlobalEnv)
+  sparkVer <- SparkR:::callJMethod(sc, "version")
+  cat("\n Welcome to")
+  cat("\n")
+  cat("    ____              __", "\n")
+  cat("   / __/__  ___ _____/ /__", "\n")
+  cat("  _\\ \\/ _ \\/ _ `/ __/  '_/", "\n")
+  cat(" /___/ .__/\\_,_/_/ /_/\\_\\")
+  if (nchar(sparkVer) == 0) {
+    cat("\n")
+  } else {
+    cat("   version ", sparkVer, "\n")
+  }
+  cat("    /_/", "\n")
+  cat("\n")
+
+  cat("\n SparkSession available as 'spark'.\n")
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/tests/testthat/test_basic.R b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/tests/testthat/test_basic.R
new file mode 100644
index 0000000000000000000000000000000000000000..80df3d8ce6e59e2cc5939ec1092f9359b9a71e85
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/tests/testthat/test_basic.R
@@ -0,0 +1,100 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+context("basic tests for CRAN")
+
+test_that("create DataFrame from list or data.frame", {
+  tryCatch(checkJavaVersion(),
+            error = function(e) { skip("error on Java check") },
+            warning = function(e) { skip("warning on Java check") })
+
+  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE,
+                 sparkConfig = sparkRTestConfig)
+
+  i <- 4
+  df <- createDataFrame(data.frame(dummy = 1:i))
+  expect_equal(count(df), i)
+
+  l <- list(list(a = 1, b = 2), list(a = 3, b = 4))
+  df <- createDataFrame(l)
+  expect_equal(columns(df), c("a", "b"))
+
+  a <- 1:3
+  b <- c("a", "b", "c")
+  ldf <- data.frame(a, b)
+  df <- createDataFrame(ldf)
+  expect_equal(columns(df), c("a", "b"))
+  expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
+  expect_equal(count(df), 3)
+  ldf2 <- collect(df)
+  expect_equal(ldf$a, ldf2$a)
+
+  mtcarsdf <- createDataFrame(mtcars)
+  expect_equivalent(collect(mtcarsdf), mtcars)
+
+  bytes <- as.raw(c(1, 2, 3))
+  df <- createDataFrame(list(list(bytes)))
+  expect_equal(collect(df)[[1]][[1]], bytes)
+
+  sparkR.session.stop()
+})
+
+test_that("spark.glm and predict", {
+  tryCatch(checkJavaVersion(),
+            error = function(e) { skip("error on Java check") },
+            warning = function(e) { skip("warning on Java check") })
+
+  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE,
+                 sparkConfig = sparkRTestConfig)
+
+  training <- suppressWarnings(createDataFrame(iris))
+  # gaussian family
+  model <- spark.glm(training, Sepal_Width ~ Sepal_Length + Species)
+  prediction <- predict(model, training)
+  expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "double")
+  vals <- collect(select(prediction, "prediction"))
+  rVals <- predict(glm(Sepal.Width ~ Sepal.Length + Species, data = iris), iris)
+  expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
+
+  # Gamma family
+  x <- runif(100, -1, 1)
+  y <- rgamma(100, rate = 10 / exp(0.5 + 1.2 * x), shape = 10)
+  df <- as.DataFrame(as.data.frame(list(x = x, y = y)))
+  model <- glm(y ~ x, family = Gamma, df)
+  out <- capture.output(print(summary(model)))
+  expect_true(any(grepl("Dispersion parameter for gamma family", out)))
+
+  # tweedie family
+  model <- spark.glm(training, Sepal_Width ~ Sepal_Length + Species,
+                     family = "tweedie", var.power = 1.2, link.power = 0.0)
+  prediction <- predict(model, training)
+  expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "double")
+  vals <- collect(select(prediction, "prediction"))
+
+  # manual calculation of the R predicted values to avoid dependence on statmod
+  #' library(statmod)
+  #' rModel <- glm(Sepal.Width ~ Sepal.Length + Species, data = iris,
+  #'             family = tweedie(var.power = 1.2, link.power = 0.0))
+  #' print(coef(rModel))
+
+  rCoef <- c(0.6455409, 0.1169143, -0.3224752, -0.3282174)
+  rVals <- exp(as.numeric(model.matrix(Sepal.Width ~ Sepal.Length + Species,
+                                       data = iris) %*% rCoef))
+  expect_true(all(abs(rVals - vals) < 1e-5), rVals - vals)
+
+  sparkR.session.stop()
+})
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/worker/daemon.R b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/worker/daemon.R
new file mode 100644
index 0000000000000000000000000000000000000000..fb9db63b07cd053c5f539663d8147dcbc423da61
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/worker/daemon.R
@@ -0,0 +1,102 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Worker daemon
+
+rLibDir <- Sys.getenv("SPARKR_RLIBDIR")
+connectionTimeout <- as.integer(Sys.getenv("SPARKR_BACKEND_CONNECTION_TIMEOUT", "6000"))
+dirs <- strsplit(rLibDir, ",")[[1]]
+script <- file.path(dirs[[1]], "SparkR", "worker", "worker.R")
+
+# preload SparkR package, speedup worker
+.libPaths(c(dirs, .libPaths()))
+suppressPackageStartupMessages(library(SparkR))
+
+port <- as.integer(Sys.getenv("SPARKR_WORKER_PORT"))
+inputCon <- socketConnection(
+    port = port, open = "wb", blocking = TRUE, timeout = connectionTimeout)
+
+SparkR:::doServerAuth(inputCon, Sys.getenv("SPARKR_WORKER_SECRET"))
+
+# Waits indefinitely for a socket connecion by default.
+selectTimeout <- NULL
+
+while (TRUE) {
+  ready <- socketSelect(list(inputCon), timeout = selectTimeout)
+
+  # Note that the children should be terminated in the parent. If each child terminates
+  # itself, it appears that the resource is not released properly, that causes an unexpected
+  # termination of this daemon due to, for example, running out of file descriptors
+  # (see SPARK-21093). Therefore, the current implementation tries to retrieve children
+  # that are exited (but not terminated) and then sends a kill signal to terminate them properly
+  # in the parent.
+  #
+  # There are two paths that it attempts to send a signal to terminate the children in the parent.
+  #
+  #   1. Every second if any socket connection is not available and if there are child workers
+  #     running.
+  #   2. Right after a socket connection is available.
+  #
+  # In other words, the parent attempts to send the signal to the children every second if
+  # any worker is running or right before launching other worker children from the following
+  # new socket connection.
+
+  # The process IDs of exited children are returned below.
+  children <- parallel:::selectChildren(timeout = 0)
+
+  if (is.integer(children)) {
+    lapply(children, function(child) {
+      # This should be the PIDs of exited children. Otherwise, this returns raw bytes if any data
+      # was sent from this child. In this case, we discard it.
+      pid <- parallel:::readChild(child)
+      if (is.integer(pid)) {
+        # This checks if the data from this child is the same pid of this selected child.
+        if (child == pid) {
+          # If so, we terminate this child.
+          tools::pskill(child, tools::SIGUSR1)
+        }
+      }
+    })
+  } else if (is.null(children)) {
+    # If it is NULL, there are no children. Waits indefinitely for a socket connecion.
+    selectTimeout <- NULL
+  }
+
+  if (ready) {
+    port <- SparkR:::readInt(inputCon)
+    # There is a small chance that it could be interrupted by signal, retry one time
+    if (length(port) == 0) {
+      port <- SparkR:::readInt(inputCon)
+      if (length(port) == 0) {
+        cat("quitting daemon\n")
+        quit(save = "no")
+      }
+    }
+    p <- parallel:::mcfork()
+    if (inherits(p, "masterProcess")) {
+      # Reach here because this is a child process.
+      close(inputCon)
+      Sys.setenv(SPARKR_WORKER_PORT = port)
+      try(source(script))
+      # Note that this mcexit does not fully terminate this child.
+      parallel:::mcexit(0L)
+    } else {
+      # Forking succeeded and we need to check if they finished their jobs every second.
+      selectTimeout <- 1
+    }
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/worker/worker.R b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/worker/worker.R
new file mode 100644
index 0000000000000000000000000000000000000000..c2adf613acb02fc821199b7b4ba3014493978b95
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/R/lib/SparkR/worker/worker.R
@@ -0,0 +1,267 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Worker class
+
+# Get current system time
+currentTimeSecs <- function() {
+  as.numeric(Sys.time())
+}
+
+# Get elapsed time
+elapsedSecs <- function() {
+  proc.time()[3]
+}
+
+compute <- function(mode, partition, serializer, deserializer, key,
+             colNames, computeFunc, inputData) {
+  if (mode > 0) {
+    if (deserializer == "row") {
+      # Transform the list of rows into a data.frame
+      # Note that the optional argument stringsAsFactors for rbind is
+      # available since R 3.2.4. So we set the global option here.
+      oldOpt <- getOption("stringsAsFactors")
+      options(stringsAsFactors = FALSE)
+
+      # Handle binary data types
+      if ("raw" %in% sapply(inputData[[1]], class)) {
+        inputData <- SparkR:::rbindRaws(inputData)
+      } else {
+        inputData <- do.call(rbind.data.frame, inputData)
+      }
+
+      options(stringsAsFactors = oldOpt)
+
+      names(inputData) <- colNames
+    } else {
+      # Check to see if inputData is a valid data.frame
+      stopifnot(deserializer == "byte")
+      stopifnot(class(inputData) == "data.frame")
+    }
+
+    if (mode == 2) {
+      output <- computeFunc(key, inputData)
+    } else {
+      output <- computeFunc(inputData)
+    }
+    if (serializer == "row") {
+      # Transform the result data.frame back to a list of rows
+      output <- split(output, seq(nrow(output)))
+    } else {
+      # Serialize the output to a byte array
+      stopifnot(serializer == "byte")
+    }
+  } else {
+    output <- computeFunc(partition, inputData)
+  }
+  return(output)
+}
+
+outputResult <- function(serializer, output, outputCon) {
+  if (serializer == "byte") {
+    SparkR:::writeRawSerialize(outputCon, output)
+  } else if (serializer == "row") {
+    SparkR:::writeRowSerialize(outputCon, output)
+  } else {
+    # write lines one-by-one with flag
+    lapply(output, function(line) SparkR:::writeString(outputCon, line))
+  }
+}
+
+# Constants
+specialLengths <- list(END_OF_STERAM = 0L, TIMING_DATA = -1L)
+
+# Timing R process boot
+bootTime <- currentTimeSecs()
+bootElap <- elapsedSecs()
+
+rLibDir <- Sys.getenv("SPARKR_RLIBDIR")
+connectionTimeout <- as.integer(Sys.getenv("SPARKR_BACKEND_CONNECTION_TIMEOUT", "6000"))
+dirs <- strsplit(rLibDir, ",")[[1]]
+# Set libPaths to include SparkR package as loadNamespace needs this
+# TODO: Figure out if we can avoid this by not loading any objects that require
+# SparkR namespace
+.libPaths(c(dirs, .libPaths()))
+suppressPackageStartupMessages(library(SparkR))
+
+port <- as.integer(Sys.getenv("SPARKR_WORKER_PORT"))
+inputCon <- socketConnection(
+    port = port, blocking = TRUE, open = "wb", timeout = connectionTimeout)
+SparkR:::doServerAuth(inputCon, Sys.getenv("SPARKR_WORKER_SECRET"))
+
+outputCon <- socketConnection(
+    port = port, blocking = TRUE, open = "wb", timeout = connectionTimeout)
+SparkR:::doServerAuth(outputCon, Sys.getenv("SPARKR_WORKER_SECRET"))
+
+# read the index of the current partition inside the RDD
+partition <- SparkR:::readInt(inputCon)
+
+deserializer <- SparkR:::readString(inputCon)
+serializer <- SparkR:::readString(inputCon)
+
+# Include packages as required
+packageNames <- unserialize(SparkR:::readRaw(inputCon))
+for (pkg in packageNames) {
+  suppressPackageStartupMessages(library(as.character(pkg), character.only = TRUE))
+}
+
+# read function dependencies
+funcLen <- SparkR:::readInt(inputCon)
+computeFunc <- unserialize(SparkR:::readRawLen(inputCon, funcLen))
+env <- environment(computeFunc)
+parent.env(env) <- .GlobalEnv  # Attach under global environment.
+
+# Timing init envs for computing
+initElap <- elapsedSecs()
+
+# Read and set broadcast variables
+numBroadcastVars <- SparkR:::readInt(inputCon)
+if (numBroadcastVars > 0) {
+  for (bcast in seq(1:numBroadcastVars)) {
+    bcastId <- SparkR:::readInt(inputCon)
+    value <- unserialize(SparkR:::readRaw(inputCon))
+    SparkR:::setBroadcastValue(bcastId, value)
+  }
+}
+
+# Timing broadcast
+broadcastElap <- elapsedSecs()
+# Initial input timing
+inputElap <- broadcastElap
+
+# If -1: read as normal RDD; if >= 0, treat as pairwise RDD and treat the int
+# as number of partitions to create.
+numPartitions <- SparkR:::readInt(inputCon)
+
+# 0 - RDD mode, 1 - dapply mode, 2 - gapply mode
+mode <- SparkR:::readInt(inputCon)
+
+if (mode > 0) {
+  colNames <- SparkR:::readObject(inputCon)
+}
+
+isEmpty <- SparkR:::readInt(inputCon)
+computeInputElapsDiff <- 0
+outputComputeElapsDiff <- 0
+
+if (isEmpty != 0) {
+  if (numPartitions == -1) {
+    if (deserializer == "byte") {
+      # Now read as many characters as described in funcLen
+      data <- SparkR:::readDeserialize(inputCon)
+    } else if (deserializer == "string") {
+      data <- as.list(readLines(inputCon))
+    } else if (deserializer == "row" && mode == 2) {
+      dataWithKeys <- SparkR:::readMultipleObjectsWithKeys(inputCon)
+      keys <- dataWithKeys$keys
+      data <- dataWithKeys$data
+    } else if (deserializer == "row") {
+      data <- SparkR:::readMultipleObjects(inputCon)
+    }
+
+    # Timing reading input data for execution
+    inputElap <- elapsedSecs()
+    if (mode > 0) {
+      if (mode == 1) {
+        output <- compute(mode, partition, serializer, deserializer, NULL,
+                    colNames, computeFunc, data)
+       } else {
+        # gapply mode
+        for (i in 1:length(data)) {
+          # Timing reading input data for execution
+          inputElap <- elapsedSecs()
+          output <- compute(mode, partition, serializer, deserializer, keys[[i]],
+                      colNames, computeFunc, data[[i]])
+          computeElap <- elapsedSecs()
+          outputResult(serializer, output, outputCon)
+          outputElap <- elapsedSecs()
+          computeInputElapsDiff <-  computeInputElapsDiff + (computeElap - inputElap)
+          outputComputeElapsDiff <- outputComputeElapsDiff + (outputElap - computeElap)
+        }
+      }
+    } else {
+      output <- compute(mode, partition, serializer, deserializer, NULL,
+                  colNames, computeFunc, data)
+    }
+    if (mode != 2) {
+      # Not a gapply mode
+      computeElap <- elapsedSecs()
+      outputResult(serializer, output, outputCon)
+      outputElap <- elapsedSecs()
+      computeInputElapsDiff <- computeElap - inputElap
+      outputComputeElapsDiff <- outputElap - computeElap
+    }
+  } else {
+    if (deserializer == "byte") {
+      # Now read as many characters as described in funcLen
+      data <- SparkR:::readDeserialize(inputCon)
+    } else if (deserializer == "string") {
+      data <- readLines(inputCon)
+    } else if (deserializer == "row") {
+      data <- SparkR:::readMultipleObjects(inputCon)
+    }
+    # Timing reading input data for execution
+    inputElap <- elapsedSecs()
+
+    res <- new.env()
+
+    # Step 1: hash the data to an environment
+    hashTupleToEnvir <- function(tuple) {
+      # NOTE: execFunction is the hash function here
+      hashVal <- computeFunc(tuple[[1]])
+      bucket <- as.character(hashVal %% numPartitions)
+      acc <- res[[bucket]]
+      # Create a new accumulator
+      if (is.null(acc)) {
+        acc <- SparkR:::initAccumulator()
+      }
+      SparkR:::addItemToAccumulator(acc, tuple)
+      res[[bucket]] <- acc
+    }
+    invisible(lapply(data, hashTupleToEnvir))
+    # Timing computing
+    computeElap <- elapsedSecs()
+
+    # Step 2: write out all of the environment as key-value pairs.
+    for (name in ls(res)) {
+      SparkR:::writeInt(outputCon, 2L)
+      SparkR:::writeInt(outputCon, as.integer(name))
+      # Truncate the accumulator list to the number of elements we have
+      length(res[[name]]$data) <- res[[name]]$counter
+      SparkR:::writeRawSerialize(outputCon, res[[name]]$data)
+    }
+    # Timing output
+    outputElap <- elapsedSecs()
+    computeInputElapsDiff <- computeElap - inputElap
+    outputComputeElapsDiff <- outputElap - computeElap
+  }
+}
+
+# Report timing
+SparkR:::writeInt(outputCon, specialLengths$TIMING_DATA)
+SparkR:::writeDouble(outputCon, bootTime)
+SparkR:::writeDouble(outputCon, initElap - bootElap)        # init
+SparkR:::writeDouble(outputCon, broadcastElap - initElap)   # broadcast
+SparkR:::writeDouble(outputCon, inputElap - broadcastElap)  # input
+SparkR:::writeDouble(outputCon, computeInputElapsDiff)    # compute
+SparkR:::writeDouble(outputCon, outputComputeElapsDiff)   # output
+
+# End of output
+SparkR:::writeInt(outputCon, specialLengths$END_OF_STERAM)
+
+close(outputCon)
+close(inputCon)
diff --git a/spark-2.4.0-bin-hadoop2.7/R/lib/sparkr.zip b/spark-2.4.0-bin-hadoop2.7/R/lib/sparkr.zip
new file mode 100644
index 0000000000000000000000000000000000000000..fea98f97b015a4e06c8651f81d24e6ed29445d5d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/R/lib/sparkr.zip differ
diff --git a/spark-2.4.0-bin-hadoop2.7/README.md b/spark-2.4.0-bin-hadoop2.7/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fd8c7f656968ebdbbf1531bb24bf53f830bd10a2
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/README.md
@@ -0,0 +1,105 @@
+# Apache Spark
+
+Spark is a fast and general cluster computing system for Big Data. It provides
+high-level APIs in Scala, Java, Python, and R, and an optimized engine that
+supports general computation graphs for data analysis. It also supports a
+rich set of higher-level tools including Spark SQL for SQL and DataFrames,
+MLlib for machine learning, GraphX for graph processing,
+and Spark Streaming for stream processing.
+
+<http://spark.apache.org/>
+
+
+## Online Documentation
+
+You can find the latest Spark documentation, including a programming
+guide, on the [project web page](http://spark.apache.org/documentation.html).
+This README file only contains basic setup instructions.
+
+## Building Spark
+
+Spark is built using [Apache Maven](http://maven.apache.org/).
+To build Spark and its example programs, run:
+
+    build/mvn -DskipTests clean package
+
+(You do not need to do this if you downloaded a pre-built package.)
+
+You can build Spark using more than one thread by using the -T option with Maven, see ["Parallel builds in Maven 3"](https://cwiki.apache.org/confluence/display/MAVEN/Parallel+builds+in+Maven+3).
+More detailed documentation is available from the project site, at
+["Building Spark"](http://spark.apache.org/docs/latest/building-spark.html).
+
+For general development tips, including info on developing Spark using an IDE, see ["Useful Developer Tools"](http://spark.apache.org/developer-tools.html).
+
+## Interactive Scala Shell
+
+The easiest way to start using Spark is through the Scala shell:
+
+    ./bin/spark-shell
+
+Try the following command, which should return 1000:
+
+    scala> sc.parallelize(1 to 1000).count()
+
+## Interactive Python Shell
+
+Alternatively, if you prefer Python, you can use the Python shell:
+
+    ./bin/pyspark
+
+And run the following command, which should also return 1000:
+
+    >>> sc.parallelize(range(1000)).count()
+
+## Example Programs
+
+Spark also comes with several sample programs in the `examples` directory.
+To run one of them, use `./bin/run-example <class> [params]`. For example:
+
+    ./bin/run-example SparkPi
+
+will run the Pi example locally.
+
+You can set the MASTER environment variable when running examples to submit
+examples to a cluster. This can be a mesos:// or spark:// URL,
+"yarn" to run on YARN, and "local" to run
+locally with one thread, or "local[N]" to run locally with N threads. You
+can also use an abbreviated class name if the class is in the `examples`
+package. For instance:
+
+    MASTER=spark://host:7077 ./bin/run-example SparkPi
+
+Many of the example programs print usage help if no params are given.
+
+## Running Tests
+
+Testing first requires [building Spark](#building-spark). Once Spark is built, tests
+can be run using:
+
+    ./dev/run-tests
+
+Please see the guidance on how to
+[run tests for a module, or individual tests](http://spark.apache.org/developer-tools.html#individual-tests).
+
+There is also a Kubernetes integration test, see resource-managers/kubernetes/integration-tests/README.md
+
+## A Note About Hadoop Versions
+
+Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported
+storage systems. Because the protocols have changed in different versions of
+Hadoop, you must build Spark against the same version that your cluster runs.
+
+Please refer to the build documentation at
+["Specifying the Hadoop Version and Enabling YARN"](http://spark.apache.org/docs/latest/building-spark.html#specifying-the-hadoop-version-and-enabling-yarn)
+for detailed guidance on building for a particular distribution of Hadoop, including
+building for particular Hive and Hive Thriftserver distributions.
+
+## Configuration
+
+Please refer to the [Configuration Guide](http://spark.apache.org/docs/latest/configuration.html)
+in the online documentation for an overview on how to configure Spark.
+
+## Contributing
+
+Please review the [Contribution to Spark guide](http://spark.apache.org/contributing.html)
+for information on how to get started contributing to the project.
diff --git a/spark-2.4.0-bin-hadoop2.7/RELEASE b/spark-2.4.0-bin-hadoop2.7/RELEASE
new file mode 100644
index 0000000000000000000000000000000000000000..aff33e213ff9ce11fa9467b6088aed6f678ee6ad
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/RELEASE
@@ -0,0 +1,2 @@
+Spark 2.4.0 built for Hadoop 2.7.3
+Build flags: -B -Pmesos -Pyarn -Pkubernetes -Psparkr -Pkafka-0-8 -Phadoop-2.7 -Phive -Phive-thriftserver -DzincPort=3036
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/beeline b/spark-2.4.0-bin-hadoop2.7/bin/beeline
new file mode 100755
index 0000000000000000000000000000000000000000..058534699e44b99d0da1baaa909629527e752007
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/beeline
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# Shell script for starting BeeLine
+
+# Enter posix mode for bash
+set -o posix
+
+# Figure out if SPARK_HOME is set
+if [ -z "${SPARK_HOME}" ]; then
+  source "$(dirname "$0")"/find-spark-home
+fi
+
+CLASS="org.apache.hive.beeline.BeeLine"
+exec "${SPARK_HOME}/bin/spark-class" $CLASS "$@"
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/beeline.cmd b/spark-2.4.0-bin-hadoop2.7/bin/beeline.cmd
new file mode 100644
index 0000000000000000000000000000000000000000..288059a28cd744c5af13c17cd5867b365c32ad34
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/beeline.cmd
@@ -0,0 +1,22 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem The outermost quotes are used to prevent Windows command line parse error
+rem when there are some quotes in parameters, see SPARK-21877.
+cmd /V /E /C ""%~dp0spark-class.cmd" org.apache.hive.beeline.BeeLine %*"
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/docker-image-tool.sh b/spark-2.4.0-bin-hadoop2.7/bin/docker-image-tool.sh
new file mode 100755
index 0000000000000000000000000000000000000000..5e8eafff50f2ab6f26de4f72d65f29b041388268
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/docker-image-tool.sh
@@ -0,0 +1,183 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This script builds and pushes docker images when run from a release of Spark
+# with Kubernetes support.
+
+function error {
+  echo "$@" 1>&2
+  exit 1
+}
+
+if [ -z "${SPARK_HOME}" ]; then
+  SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+. "${SPARK_HOME}/bin/load-spark-env.sh"
+
+function image_ref {
+  local image="$1"
+  local add_repo="${2:-1}"
+  if [ $add_repo = 1 ] && [ -n "$REPO" ]; then
+    image="$REPO/$image"
+  fi
+  if [ -n "$TAG" ]; then
+    image="$image:$TAG"
+  fi
+  echo "$image"
+}
+
+function build {
+  local BUILD_ARGS
+  local IMG_PATH
+
+  if [ ! -f "$SPARK_HOME/RELEASE" ]; then
+    # Set image build arguments accordingly if this is a source repo and not a distribution archive.
+    IMG_PATH=resource-managers/kubernetes/docker/src/main/dockerfiles
+    BUILD_ARGS=(
+      ${BUILD_PARAMS}
+      --build-arg
+      img_path=$IMG_PATH
+      --build-arg
+      spark_jars=assembly/target/scala-$SPARK_SCALA_VERSION/jars
+      --build-arg
+      k8s_tests=resource-managers/kubernetes/integration-tests/tests
+    )
+  else
+    # Not passed as an argument to docker, but used to validate the Spark directory.
+    IMG_PATH="kubernetes/dockerfiles"
+    BUILD_ARGS=(${BUILD_PARAMS})
+  fi
+
+  if [ ! -d "$IMG_PATH" ]; then
+    error "Cannot find docker image. This script must be run from a runnable distribution of Apache Spark."
+  fi
+  local BINDING_BUILD_ARGS=(
+    ${BUILD_PARAMS}
+    --build-arg
+    base_img=$(image_ref spark)
+  )
+  local BASEDOCKERFILE=${BASEDOCKERFILE:-"$IMG_PATH/spark/Dockerfile"}
+  local PYDOCKERFILE=${PYDOCKERFILE:-"$IMG_PATH/spark/bindings/python/Dockerfile"}
+  local RDOCKERFILE=${RDOCKERFILE:-"$IMG_PATH/spark/bindings/R/Dockerfile"}
+
+  docker build $NOCACHEARG "${BUILD_ARGS[@]}" \
+    -t $(image_ref spark) \
+    -f "$BASEDOCKERFILE" .
+
+  docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
+    -t $(image_ref spark-py) \
+    -f "$PYDOCKERFILE" .
+
+  docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \
+    -t $(image_ref spark-r) \
+    -f "$RDOCKERFILE" .
+}
+
+function push {
+  docker push "$(image_ref spark)"
+  docker push "$(image_ref spark-py)"
+  docker push "$(image_ref spark-r)"
+}
+
+function usage {
+  cat <<EOF
+Usage: $0 [options] [command]
+Builds or pushes the built-in Spark Docker image.
+
+Commands:
+  build       Build image. Requires a repository address to be provided if the image will be
+              pushed to a different registry.
+  push        Push a pre-built image to a registry. Requires a repository address to be provided.
+
+Options:
+  -f file               Dockerfile to build for JVM based Jobs. By default builds the Dockerfile shipped with Spark.
+  -p file               Dockerfile to build for PySpark Jobs. Builds Python dependencies and ships with Spark.
+  -R file               Dockerfile to build for SparkR Jobs. Builds R dependencies and ships with Spark.
+  -r repo               Repository address.
+  -t tag                Tag to apply to the built image, or to identify the image to be pushed.
+  -m                    Use minikube's Docker daemon.
+  -n                    Build docker image with --no-cache
+  -b arg      Build arg to build or push the image. For multiple build args, this option needs to
+              be used separately for each build arg.
+
+Using minikube when building images will do so directly into minikube's Docker daemon.
+There is no need to push the images into minikube in that case, they'll be automatically
+available when running applications inside the minikube cluster.
+
+Check the following documentation for more information on using the minikube Docker daemon:
+
+  https://kubernetes.io/docs/getting-started-guides/minikube/#reusing-the-docker-daemon
+
+Examples:
+  - Build image in minikube with tag "testing"
+    $0 -m -t testing build
+
+  - Build and push image with tag "v2.3.0" to docker.io/myrepo
+    $0 -r docker.io/myrepo -t v2.3.0 build
+    $0 -r docker.io/myrepo -t v2.3.0 push
+EOF
+}
+
+if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+  usage
+  exit 0
+fi
+
+REPO=
+TAG=
+BASEDOCKERFILE=
+PYDOCKERFILE=
+RDOCKERFILE=
+NOCACHEARG=
+BUILD_PARAMS=
+while getopts f:p:R:mr:t:nb: option
+do
+ case "${option}"
+ in
+ f) BASEDOCKERFILE=${OPTARG};;
+ p) PYDOCKERFILE=${OPTARG};;
+ R) RDOCKERFILE=${OPTARG};;
+ r) REPO=${OPTARG};;
+ t) TAG=${OPTARG};;
+ n) NOCACHEARG="--no-cache";;
+ b) BUILD_PARAMS=${BUILD_PARAMS}" --build-arg "${OPTARG};;
+ m)
+   if ! which minikube 1>/dev/null; then
+     error "Cannot find minikube."
+   fi
+   eval $(minikube docker-env)
+   ;;
+ esac
+done
+
+case "${@: -1}" in
+  build)
+    build
+    ;;
+  push)
+    if [ -z "$REPO" ]; then
+      usage
+      exit 1
+    fi
+    push
+    ;;
+  *)
+    usage
+    exit 1
+    ;;
+esac
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/find-spark-home b/spark-2.4.0-bin-hadoop2.7/bin/find-spark-home
new file mode 100755
index 0000000000000000000000000000000000000000..617dbaa4fff86a7f99dbfc5b3e87ba7eba25af46
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/find-spark-home
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Attempts to find a proper value for SPARK_HOME. Should be included using "source" directive.
+
+FIND_SPARK_HOME_PYTHON_SCRIPT="$(cd "$(dirname "$0")"; pwd)/find_spark_home.py"
+
+# Short circuit if the user already has this set.
+if [ ! -z "${SPARK_HOME}" ]; then
+   exit 0
+elif [ ! -f "$FIND_SPARK_HOME_PYTHON_SCRIPT" ]; then
+  # If we are not in the same directory as find_spark_home.py we are not pip installed so we don't
+  # need to search the different Python directories for a Spark installation.
+  # Note only that, if the user has pip installed PySpark but is directly calling pyspark-shell or
+  # spark-submit in another directory we want to use that version of PySpark rather than the
+  # pip installed version of PySpark.
+  export SPARK_HOME="$(cd "$(dirname "$0")"/..; pwd)"
+else
+  # We are pip installed, use the Python script to resolve a reasonable SPARK_HOME
+  # Default to standard python interpreter unless told otherwise
+  if [[ -z "$PYSPARK_DRIVER_PYTHON" ]]; then
+     PYSPARK_DRIVER_PYTHON="${PYSPARK_PYTHON:-"python"}"
+  fi
+  export SPARK_HOME=$($PYSPARK_DRIVER_PYTHON "$FIND_SPARK_HOME_PYTHON_SCRIPT")
+fi
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/find-spark-home.cmd b/spark-2.4.0-bin-hadoop2.7/bin/find-spark-home.cmd
new file mode 100644
index 0000000000000000000000000000000000000000..6025f67c38de4edaa009651378f69dafe2f790a0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/find-spark-home.cmd
@@ -0,0 +1,60 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem Path to Python script finding SPARK_HOME
+set FIND_SPARK_HOME_PYTHON_SCRIPT=%~dp0find_spark_home.py
+
+rem Default to standard python interpreter unless told otherwise
+set PYTHON_RUNNER=python
+rem If PYSPARK_DRIVER_PYTHON is set, it overwrites the python version
+if not "x%PYSPARK_DRIVER_PYTHON%"=="x" (
+  set PYTHON_RUNNER=%PYSPARK_DRIVER_PYTHON%
+)
+rem If PYSPARK_PYTHON is set, it overwrites the python version
+if not "x%PYSPARK_PYTHON%"=="x" (
+  set PYTHON_RUNNER=%PYSPARK_PYTHON%
+)
+
+rem If there is python installed, trying to use the root dir as SPARK_HOME
+where %PYTHON_RUNNER% > nul 2>&1
+if %ERRORLEVEL% neq 0 (
+  if not exist %PYTHON_RUNNER% (
+    if "x%SPARK_HOME%"=="x" (
+      echo Missing Python executable '%PYTHON_RUNNER%', defaulting to '%~dp0..' for SPARK_HOME ^
+environment variable. Please install Python or specify the correct Python executable in ^
+PYSPARK_DRIVER_PYTHON or PYSPARK_PYTHON environment variable to detect SPARK_HOME safely.
+      set SPARK_HOME=%~dp0..
+    )
+  )
+)
+
+rem Only attempt to find SPARK_HOME if it is not set.
+if "x%SPARK_HOME%"=="x" (
+  if not exist "%FIND_SPARK_HOME_PYTHON_SCRIPT%" (
+    rem If we are not in the same directory as find_spark_home.py we are not pip installed so we don't
+    rem need to search the different Python directories for a Spark installation.
+    rem Note only that, if the user has pip installed PySpark but is directly calling pyspark-shell or
+    rem spark-submit in another directory we want to use that version of PySpark rather than the
+    rem pip installed version of PySpark.
+    set SPARK_HOME=%~dp0..
+  ) else (
+    rem We are pip installed, use the Python script to resolve a reasonable SPARK_HOME
+    for /f "delims=" %%i in ('%PYTHON_RUNNER% %FIND_SPARK_HOME_PYTHON_SCRIPT%') do set SPARK_HOME=%%i
+  )
+)
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/load-spark-env.cmd b/spark-2.4.0-bin-hadoop2.7/bin/load-spark-env.cmd
new file mode 100644
index 0000000000000000000000000000000000000000..cefa513b6fb77591b1517dc30a0e740e1ad65e39
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/load-spark-env.cmd
@@ -0,0 +1,57 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem This script loads spark-env.cmd if it exists, and ensures it is only loaded once.
+rem spark-env.cmd is loaded from SPARK_CONF_DIR if set, or within the current directory's
+rem conf\ subdirectory.
+
+if [%SPARK_ENV_LOADED%] == [] (
+  set SPARK_ENV_LOADED=1
+
+  if [%SPARK_CONF_DIR%] == [] (
+    set SPARK_CONF_DIR=%~dp0..\conf
+  )
+
+  call :LoadSparkEnv
+)
+
+rem Setting SPARK_SCALA_VERSION if not already set.
+
+set ASSEMBLY_DIR2="%SPARK_HOME%\assembly\target\scala-2.11"
+set ASSEMBLY_DIR1="%SPARK_HOME%\assembly\target\scala-2.12"
+
+if [%SPARK_SCALA_VERSION%] == [] (
+
+  if exist %ASSEMBLY_DIR2% if exist %ASSEMBLY_DIR1% (
+    echo "Presence of build for multiple Scala versions detected."
+    echo "Either clean one of them or, set SPARK_SCALA_VERSION in spark-env.cmd."
+    exit 1
+  )
+  if exist %ASSEMBLY_DIR2% (
+    set SPARK_SCALA_VERSION=2.11
+  ) else (
+    set SPARK_SCALA_VERSION=2.12
+  )
+)
+exit /b 0
+
+:LoadSparkEnv
+if exist "%SPARK_CONF_DIR%\spark-env.cmd" (
+  call "%SPARK_CONF_DIR%\spark-env.cmd"
+)
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/load-spark-env.sh b/spark-2.4.0-bin-hadoop2.7/bin/load-spark-env.sh
new file mode 100644
index 0000000000000000000000000000000000000000..0b5006dbd63acc88bc2a31ee5b913683af539f89
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/load-spark-env.sh
@@ -0,0 +1,60 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This script loads spark-env.sh if it exists, and ensures it is only loaded once.
+# spark-env.sh is loaded from SPARK_CONF_DIR if set, or within the current directory's
+# conf/ subdirectory.
+
+# Figure out where Spark is installed
+if [ -z "${SPARK_HOME}" ]; then
+  source "$(dirname "$0")"/find-spark-home
+fi
+
+if [ -z "$SPARK_ENV_LOADED" ]; then
+  export SPARK_ENV_LOADED=1
+
+  export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}"/conf}"
+
+  if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then
+    # Promote all variable declarations to environment (exported) variables
+    set -a
+    . "${SPARK_CONF_DIR}/spark-env.sh"
+    set +a
+  fi
+fi
+
+# Setting SPARK_SCALA_VERSION if not already set.
+
+if [ -z "$SPARK_SCALA_VERSION" ]; then
+
+  ASSEMBLY_DIR2="${SPARK_HOME}/assembly/target/scala-2.11"
+  ASSEMBLY_DIR1="${SPARK_HOME}/assembly/target/scala-2.12"
+
+  if [[ -d "$ASSEMBLY_DIR2" && -d "$ASSEMBLY_DIR1" ]]; then
+    echo -e "Presence of build for multiple Scala versions detected." 1>&2
+    echo -e 'Either clean one of them or, export SPARK_SCALA_VERSION in spark-env.sh.' 1>&2
+    exit 1
+  fi
+
+  if [ -d "$ASSEMBLY_DIR2" ]; then
+    export SPARK_SCALA_VERSION="2.11"
+  else
+    export SPARK_SCALA_VERSION="2.12"
+  fi
+fi
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/pyspark b/spark-2.4.0-bin-hadoop2.7/bin/pyspark
new file mode 100755
index 0000000000000000000000000000000000000000..5d5affb1f97c3871e82e10ef09458377cfe72795
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/pyspark
@@ -0,0 +1,77 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+if [ -z "${SPARK_HOME}" ]; then
+  source "$(dirname "$0")"/find-spark-home
+fi
+
+source "${SPARK_HOME}"/bin/load-spark-env.sh
+export _SPARK_CMD_USAGE="Usage: ./bin/pyspark [options]"
+
+# In Spark 2.0, IPYTHON and IPYTHON_OPTS are removed and pyspark fails to launch if either option
+# is set in the user's environment. Instead, users should set PYSPARK_DRIVER_PYTHON=ipython
+# to use IPython and set PYSPARK_DRIVER_PYTHON_OPTS to pass options when starting the Python driver
+# (e.g. PYSPARK_DRIVER_PYTHON_OPTS='notebook').  This supports full customization of the IPython
+# and executor Python executables.
+
+# Fail noisily if removed options are set
+if [[ -n "$IPYTHON" || -n "$IPYTHON_OPTS" ]]; then
+  echo "Error in pyspark startup:"
+  echo "IPYTHON and IPYTHON_OPTS are removed in Spark 2.0+. Remove these from the environment and set PYSPARK_DRIVER_PYTHON and PYSPARK_DRIVER_PYTHON_OPTS instead."
+  exit 1
+fi
+
+# Default to standard python interpreter unless told otherwise
+if [[ -z "$PYSPARK_DRIVER_PYTHON" ]]; then
+  PYSPARK_DRIVER_PYTHON="${PYSPARK_PYTHON:-"python"}"
+fi
+
+WORKS_WITH_IPYTHON=$(python -c 'import sys; print(sys.version_info >= (2, 7, 0))')
+
+# Determine the Python executable to use for the executors:
+if [[ -z "$PYSPARK_PYTHON" ]]; then
+  if [[ $PYSPARK_DRIVER_PYTHON == *ipython* && ! $WORKS_WITH_IPYTHON ]]; then
+    echo "IPython requires Python 2.7+; please install python2.7 or set PYSPARK_PYTHON" 1>&2
+    exit 1
+  else
+    PYSPARK_PYTHON=python
+  fi
+fi
+export PYSPARK_PYTHON
+
+# Add the PySpark classes to the Python path:
+export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
+export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.7-src.zip:$PYTHONPATH"
+
+# Load the PySpark shell.py script when ./pyspark is used interactively:
+export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
+export PYTHONSTARTUP="${SPARK_HOME}/python/pyspark/shell.py"
+
+# For pyspark tests
+if [[ -n "$SPARK_TESTING" ]]; then
+  unset YARN_CONF_DIR
+  unset HADOOP_CONF_DIR
+  export PYTHONHASHSEED=0
+  exec "$PYSPARK_DRIVER_PYTHON" -m "$@"
+  exit
+fi
+
+export PYSPARK_DRIVER_PYTHON
+export PYSPARK_DRIVER_PYTHON_OPTS
+exec "${SPARK_HOME}"/bin/spark-submit pyspark-shell-main --name "PySparkShell" "$@"
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/pyspark.cmd b/spark-2.4.0-bin-hadoop2.7/bin/pyspark.cmd
new file mode 100644
index 0000000000000000000000000000000000000000..3dcf1d45a8189bf349f75b62ba20cbb782fde9b3
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/pyspark.cmd
@@ -0,0 +1,25 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem This is the entry point for running PySpark. To avoid polluting the
+rem environment, it just launches a new cmd to do the real work.
+
+rem The outermost quotes are used to prevent Windows command line parse error
+rem when there are some quotes in parameters, see SPARK-21877.
+cmd /V /E /C ""%~dp0pyspark2.cmd" %*"
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/pyspark2.cmd b/spark-2.4.0-bin-hadoop2.7/bin/pyspark2.cmd
new file mode 100644
index 0000000000000000000000000000000000000000..15fa910c277b387acc6b9c02f57d93b5f9bdd031
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/pyspark2.cmd
@@ -0,0 +1,38 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem Figure out where the Spark framework is installed
+call "%~dp0find-spark-home.cmd"
+
+call "%SPARK_HOME%\bin\load-spark-env.cmd"
+set _SPARK_CMD_USAGE=Usage: bin\pyspark.cmd [options]
+
+rem Figure out which Python to use.
+if "x%PYSPARK_DRIVER_PYTHON%"=="x" (
+  set PYSPARK_DRIVER_PYTHON=python
+  if not [%PYSPARK_PYTHON%] == [] set PYSPARK_DRIVER_PYTHON=%PYSPARK_PYTHON%
+)
+
+set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH%
+set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.7-src.zip;%PYTHONPATH%
+
+set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
+set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py
+
+call "%SPARK_HOME%\bin\spark-submit2.cmd" pyspark-shell-main --name "PySparkShell" %*
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/run-example b/spark-2.4.0-bin-hadoop2.7/bin/run-example
new file mode 100755
index 0000000000000000000000000000000000000000..4ba5399311d339a31b5b6b2a415726ecfad5d0a4
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/run-example
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+if [ -z "${SPARK_HOME}" ]; then
+  source "$(dirname "$0")"/find-spark-home
+fi
+
+export _SPARK_CMD_USAGE="Usage: ./bin/run-example [options] example-class [example args]"
+exec "${SPARK_HOME}"/bin/spark-submit run-example "$@"
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/run-example.cmd b/spark-2.4.0-bin-hadoop2.7/bin/run-example.cmd
new file mode 100644
index 0000000000000000000000000000000000000000..2dd396e785358db259664d76ba4f05b0613dab16
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/run-example.cmd
@@ -0,0 +1,27 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem Figure out where the Spark framework is installed
+call "%~dp0find-spark-home.cmd"
+
+set _SPARK_CMD_USAGE=Usage: .\bin\run-example [options] example-class [example args]
+
+rem The outermost quotes are used to prevent Windows command line parse error
+rem when there are some quotes in parameters, see SPARK-21877.
+cmd /V /E /C ""%~dp0spark-submit.cmd" run-example %*"
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/spark-class b/spark-2.4.0-bin-hadoop2.7/bin/spark-class
new file mode 100755
index 0000000000000000000000000000000000000000..65d3b9612909a4eeac0ad4db4cc399be3b32fb77
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/spark-class
@@ -0,0 +1,99 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+if [ -z "${SPARK_HOME}" ]; then
+  source "$(dirname "$0")"/find-spark-home
+fi
+
+. "${SPARK_HOME}"/bin/load-spark-env.sh
+
+# Find the java binary
+if [ -n "${JAVA_HOME}" ]; then
+  RUNNER="${JAVA_HOME}/bin/java"
+else
+  if [ "$(command -v java)" ]; then
+    RUNNER="java"
+  else
+    echo "JAVA_HOME is not set" >&2
+    exit 1
+  fi
+fi
+
+# Find Spark jars.
+if [ -d "${SPARK_HOME}/jars" ]; then
+  SPARK_JARS_DIR="${SPARK_HOME}/jars"
+else
+  SPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION/jars"
+fi
+
+if [ ! -d "$SPARK_JARS_DIR" ] && [ -z "$SPARK_TESTING$SPARK_SQL_TESTING" ]; then
+  echo "Failed to find Spark jars directory ($SPARK_JARS_DIR)." 1>&2
+  echo "You need to build Spark with the target \"package\" before running this program." 1>&2
+  exit 1
+else
+  LAUNCH_CLASSPATH="$SPARK_JARS_DIR/*"
+fi
+
+# Add the launcher build dir to the classpath if requested.
+if [ -n "$SPARK_PREPEND_CLASSES" ]; then
+  LAUNCH_CLASSPATH="${SPARK_HOME}/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH"
+fi
+
+# For tests
+if [[ -n "$SPARK_TESTING" ]]; then
+  unset YARN_CONF_DIR
+  unset HADOOP_CONF_DIR
+fi
+
+# The launcher library will print arguments separated by a NULL character, to allow arguments with
+# characters that would be otherwise interpreted by the shell. Read that in a while loop, populating
+# an array that will be used to exec the final command.
+#
+# The exit code of the launcher is appended to the output, so the parent shell removes it from the
+# command array and checks the value to see if the launcher succeeded.
+build_command() {
+  "$RUNNER" -Xmx128m -cp "$LAUNCH_CLASSPATH" org.apache.spark.launcher.Main "$@"
+  printf "%d\0" $?
+}
+
+# Turn off posix mode since it does not allow process substitution
+set +o posix
+CMD=()
+while IFS= read -d '' -r ARG; do
+  CMD+=("$ARG")
+done < <(build_command "$@")
+
+COUNT=${#CMD[@]}
+LAST=$((COUNT - 1))
+LAUNCHER_EXIT_CODE=${CMD[$LAST]}
+
+# Certain JVM failures result in errors being printed to stdout (instead of stderr), which causes
+# the code that parses the output of the launcher to get confused. In those cases, check if the
+# exit code is an integer, and if it's not, handle it as a special error case.
+if ! [[ $LAUNCHER_EXIT_CODE =~ ^[0-9]+$ ]]; then
+  echo "${CMD[@]}" | head -n-1 1>&2
+  exit 1
+fi
+
+if [ $LAUNCHER_EXIT_CODE != 0 ]; then
+  exit $LAUNCHER_EXIT_CODE
+fi
+
+CMD=("${CMD[@]:0:$LAST}")
+exec "${CMD[@]}"
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/spark-class.cmd b/spark-2.4.0-bin-hadoop2.7/bin/spark-class.cmd
new file mode 100644
index 0000000000000000000000000000000000000000..b22536ab6f45861e89ad7acf2c89fa90c66def45
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/spark-class.cmd
@@ -0,0 +1,25 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem This is the entry point for running a Spark class. To avoid polluting
+rem the environment, it just launches a new cmd to do the real work.
+
+rem The outermost quotes are used to prevent Windows command line parse error
+rem when there are some quotes in parameters, see SPARK-21877.
+cmd /V /E /C ""%~dp0spark-class2.cmd" %*"
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/spark-class2.cmd b/spark-2.4.0-bin-hadoop2.7/bin/spark-class2.cmd
new file mode 100644
index 0000000000000000000000000000000000000000..5da7d7a430d79e5d196bccc60ddc5029ad3e71c6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/spark-class2.cmd
@@ -0,0 +1,72 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem Figure out where the Spark framework is installed
+call "%~dp0find-spark-home.cmd"
+
+call "%SPARK_HOME%\bin\load-spark-env.cmd"
+
+rem Test that an argument was given
+if "x%1"=="x" (
+  echo Usage: spark-class ^<class^> [^<args^>]
+  exit /b 1
+)
+
+rem Find Spark jars.
+if exist "%SPARK_HOME%\jars" (
+  set SPARK_JARS_DIR="%SPARK_HOME%\jars"
+) else (
+  set SPARK_JARS_DIR="%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%\jars"
+)
+
+if not exist "%SPARK_JARS_DIR%"\ (
+  echo Failed to find Spark jars directory.
+  echo You need to build Spark before running this program.
+  exit /b 1
+)
+
+set LAUNCH_CLASSPATH=%SPARK_JARS_DIR%\*
+
+rem Add the launcher build dir to the classpath if requested.
+if not "x%SPARK_PREPEND_CLASSES%"=="x" (
+  set LAUNCH_CLASSPATH="%SPARK_HOME%\launcher\target\scala-%SPARK_SCALA_VERSION%\classes;%LAUNCH_CLASSPATH%"
+)
+
+rem Figure out where java is.
+set RUNNER=java
+if not "x%JAVA_HOME%"=="x" (
+  set RUNNER=%JAVA_HOME%\bin\java
+) else (
+  where /q "%RUNNER%"
+  if ERRORLEVEL 1 (
+    echo Java not found and JAVA_HOME environment variable is not set.
+    echo Install Java and set JAVA_HOME to point to the Java installation directory.
+    exit /b 1
+  )
+)
+
+rem The launcher library prints the command to be executed in a single line suitable for being
+rem executed by the batch interpreter. So read all the output of the launcher into a variable.
+set LAUNCHER_OUTPUT=%temp%\spark-class-launcher-output-%RANDOM%.txt
+"%RUNNER%" -Xmx128m -cp "%LAUNCH_CLASSPATH%" org.apache.spark.launcher.Main %* > %LAUNCHER_OUTPUT%
+for /f "tokens=*" %%i in (%LAUNCHER_OUTPUT%) do (
+  set SPARK_CMD=%%i
+)
+del %LAUNCHER_OUTPUT%
+%SPARK_CMD%
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/spark-shell b/spark-2.4.0-bin-hadoop2.7/bin/spark-shell
new file mode 100755
index 0000000000000000000000000000000000000000..421f36cac3d47993fcb745c8335b75eecd88ea70
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/spark-shell
@@ -0,0 +1,95 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# Shell script for starting the Spark Shell REPL
+
+cygwin=false
+case "$(uname)" in
+  CYGWIN*) cygwin=true;;
+esac
+
+# Enter posix mode for bash
+set -o posix
+
+if [ -z "${SPARK_HOME}" ]; then
+  source "$(dirname "$0")"/find-spark-home
+fi
+
+export _SPARK_CMD_USAGE="Usage: ./bin/spark-shell [options]"
+
+# SPARK-4161: scala does not assume use of the java classpath,
+# so we need to add the "-Dscala.usejavacp=true" flag manually. We
+# do this specifically for the Spark shell because the scala REPL
+# has its own class loader, and any additional classpath specified
+# through spark.driver.extraClassPath is not automatically propagated.
+SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Dscala.usejavacp=true"
+
+function main() {
+  if $cygwin; then
+    # Workaround for issue involving JLine and Cygwin
+    # (see http://sourceforge.net/p/jline/bugs/40/).
+    # If you're using the Mintty terminal emulator in Cygwin, may need to set the
+    # "Backspace sends ^H" setting in "Keys" section of the Mintty options
+    # (see https://github.com/sbt/sbt/issues/562).
+    stty -icanon min 1 -echo > /dev/null 2>&1
+    export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
+    "${SPARK_HOME}"/bin/spark-submit --class org.apache.spark.repl.Main --name "Spark shell" "$@"
+    stty icanon echo > /dev/null 2>&1
+  else
+    export SPARK_SUBMIT_OPTS
+    "${SPARK_HOME}"/bin/spark-submit --class org.apache.spark.repl.Main --name "Spark shell" "$@"
+  fi
+}
+
+# Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in
+# binary distribution of Spark where Scala is not installed
+exit_status=127
+saved_stty=""
+
+# restore stty settings (echo in particular)
+function restoreSttySettings() {
+  stty $saved_stty
+  saved_stty=""
+}
+
+function onExit() {
+  if [[ "$saved_stty" != "" ]]; then
+    restoreSttySettings
+  fi
+  exit $exit_status
+}
+
+# to reenable echo if we are interrupted before completing.
+trap onExit INT
+
+# save terminal settings
+saved_stty=$(stty -g 2>/dev/null)
+# clear on error so we don't later try to restore them
+if [[ ! $? ]]; then
+  saved_stty=""
+fi
+
+main "$@"
+
+# record the exit status lest it be overwritten:
+# then reenable echo and propagate the code.
+exit_status=$?
+onExit
+
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/spark-shell.cmd b/spark-2.4.0-bin-hadoop2.7/bin/spark-shell.cmd
new file mode 100644
index 0000000000000000000000000000000000000000..e734f13097d6125eaa36b7e1a0def40a4be6ae3d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/spark-shell.cmd
@@ -0,0 +1,25 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem This is the entry point for running Spark shell. To avoid polluting the
+rem environment, it just launches a new cmd to do the real work.
+
+rem The outermost quotes are used to prevent Windows command line parse error
+rem when there are some quotes in parameters, see SPARK-21877.
+cmd /V /E /C ""%~dp0spark-shell2.cmd" %*"
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/spark-shell2.cmd b/spark-2.4.0-bin-hadoop2.7/bin/spark-shell2.cmd
new file mode 100644
index 0000000000000000000000000000000000000000..aaf71906c6526e33d2e43fff8013914f23a985fc
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/spark-shell2.cmd
@@ -0,0 +1,37 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem Figure out where the Spark framework is installed
+call "%~dp0find-spark-home.cmd"
+
+set _SPARK_CMD_USAGE=Usage: .\bin\spark-shell.cmd [options]
+
+rem SPARK-4161: scala does not assume use of the java classpath,
+rem so we need to add the "-Dscala.usejavacp=true" flag manually. We
+rem do this specifically for the Spark shell because the scala REPL
+rem has its own class loader, and any additional classpath specified
+rem through spark.driver.extraClassPath is not automatically propagated.
+if "x%SPARK_SUBMIT_OPTS%"=="x" (
+  set SPARK_SUBMIT_OPTS=-Dscala.usejavacp=true
+  goto run_shell
+)
+set SPARK_SUBMIT_OPTS="%SPARK_SUBMIT_OPTS% -Dscala.usejavacp=true"
+
+:run_shell
+"%SPARK_HOME%\bin\spark-submit2.cmd" --class org.apache.spark.repl.Main --name "Spark shell" %*
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/spark-sql b/spark-2.4.0-bin-hadoop2.7/bin/spark-sql
new file mode 100755
index 0000000000000000000000000000000000000000..b08b944ebd3195addc323e306cd4cbaea57150a5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/spark-sql
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+if [ -z "${SPARK_HOME}" ]; then
+  source "$(dirname "$0")"/find-spark-home
+fi
+
+export _SPARK_CMD_USAGE="Usage: ./bin/spark-sql [options] [cli option]"
+exec "${SPARK_HOME}"/bin/spark-submit --class org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver "$@"
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/spark-sql.cmd b/spark-2.4.0-bin-hadoop2.7/bin/spark-sql.cmd
new file mode 100644
index 0000000000000000000000000000000000000000..919e3214b5863d365241e4b50fbda67159e9414b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/spark-sql.cmd
@@ -0,0 +1,25 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem This is the entry point for running SparkSQL. To avoid polluting the
+rem environment, it just launches a new cmd to do the real work.
+
+rem The outermost quotes are used to prevent Windows command line parse error
+rem when there are some quotes in parameters, see SPARK-21877.
+cmd /V /E /C ""%~dp0spark-sql2.cmd" %*"
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/spark-sql2.cmd b/spark-2.4.0-bin-hadoop2.7/bin/spark-sql2.cmd
new file mode 100644
index 0000000000000000000000000000000000000000..c34a3c5aa07395d34fa020e864977d98d5c5d643
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/spark-sql2.cmd
@@ -0,0 +1,25 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem Figure out where the Spark framework is installed
+call "%~dp0find-spark-home.cmd"
+
+set _SPARK_CMD_USAGE=Usage: .\bin\spark-sql [options] [cli option]
+
+call "%SPARK_HOME%\bin\spark-submit2.cmd" --class org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver %*
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/spark-submit b/spark-2.4.0-bin-hadoop2.7/bin/spark-submit
new file mode 100755
index 0000000000000000000000000000000000000000..4e9d3614e63705a5e67985dee42118d7fd327a33
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/spark-submit
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+if [ -z "${SPARK_HOME}" ]; then
+  source "$(dirname "$0")"/find-spark-home
+fi
+
+# disable randomized hash for string in Python 3.3+
+export PYTHONHASHSEED=0
+
+exec "${SPARK_HOME}"/bin/spark-class org.apache.spark.deploy.SparkSubmit "$@"
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/spark-submit.cmd b/spark-2.4.0-bin-hadoop2.7/bin/spark-submit.cmd
new file mode 100644
index 0000000000000000000000000000000000000000..da62a8777524d0bc9b41aa45f5f3d6a129839c3c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/spark-submit.cmd
@@ -0,0 +1,25 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem This is the entry point for running Spark submit. To avoid polluting the
+rem environment, it just launches a new cmd to do the real work.
+
+rem The outermost quotes are used to prevent Windows command line parse error
+rem when there are some quotes in parameters, see SPARK-21877.
+cmd /V /E /C ""%~dp0spark-submit2.cmd" %*"
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/spark-submit2.cmd b/spark-2.4.0-bin-hadoop2.7/bin/spark-submit2.cmd
new file mode 100644
index 0000000000000000000000000000000000000000..49e350fa5c416740ef4954ff3104e74977824600
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/spark-submit2.cmd
@@ -0,0 +1,27 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem This is the entry point for running Spark submit. To avoid polluting the
+rem environment, it just launches a new cmd to do the real work.
+
+rem disable randomized hash for string in Python 3.3+
+set PYTHONHASHSEED=0
+
+set CLASS=org.apache.spark.deploy.SparkSubmit
+"%~dp0spark-class2.cmd" %CLASS% %*
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/sparkR b/spark-2.4.0-bin-hadoop2.7/bin/sparkR
new file mode 100755
index 0000000000000000000000000000000000000000..29ab10df8ab6d6adec27876a6ab044555d6be90a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/sparkR
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+if [ -z "${SPARK_HOME}" ]; then
+  source "$(dirname "$0")"/find-spark-home
+fi
+
+source "${SPARK_HOME}"/bin/load-spark-env.sh
+export _SPARK_CMD_USAGE="Usage: ./bin/sparkR [options]"
+exec "${SPARK_HOME}"/bin/spark-submit sparkr-shell-main "$@"
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/sparkR.cmd b/spark-2.4.0-bin-hadoop2.7/bin/sparkR.cmd
new file mode 100644
index 0000000000000000000000000000000000000000..fcd172b083e1ef95d6c668eed91d71fa42a9b11f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/sparkR.cmd
@@ -0,0 +1,25 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem This is the entry point for running SparkR. To avoid polluting the
+rem environment, it just launches a new cmd to do the real work.
+
+rem The outermost quotes are used to prevent Windows command line parse error
+rem when there are some quotes in parameters, see SPARK-21877.
+cmd /V /E /C ""%~dp0sparkR2.cmd" %*"
diff --git a/spark-2.4.0-bin-hadoop2.7/bin/sparkR2.cmd b/spark-2.4.0-bin-hadoop2.7/bin/sparkR2.cmd
new file mode 100644
index 0000000000000000000000000000000000000000..446f0c30bfe82929137cbfeae38f49e43738ad0b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/bin/sparkR2.cmd
@@ -0,0 +1,25 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements.  See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License.  You may obtain a copy of the License at
+rem
+rem    http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem Figure out where the Spark framework is installed
+call "%~dp0find-spark-home.cmd"
+
+call "%SPARK_HOME%\bin\load-spark-env.cmd"
+set _SPARK_CMD_USAGE=Usage: .\bin\sparkR [options]
+call "%SPARK_HOME%\bin\spark-submit2.cmd" sparkr-shell-main %*
diff --git a/spark-2.4.0-bin-hadoop2.7/conf/docker.properties.template b/spark-2.4.0-bin-hadoop2.7/conf/docker.properties.template
new file mode 100644
index 0000000000000000000000000000000000000000..2ecb4f1464a4f96a261546ca9dea7626c22a693c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/conf/docker.properties.template
@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+spark.mesos.executor.docker.image: <image built from `../external/docker/spark-mesos/Dockerfile`>
+spark.mesos.executor.docker.volumes: /usr/local/lib:/host/usr/local/lib:ro
+spark.mesos.executor.home: /opt/spark
diff --git a/spark-2.4.0-bin-hadoop2.7/conf/fairscheduler.xml.template b/spark-2.4.0-bin-hadoop2.7/conf/fairscheduler.xml.template
new file mode 100644
index 0000000000000000000000000000000000000000..385b2e772d2c80945a0d062fbe9fd6f2135d160f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/conf/fairscheduler.xml.template
@@ -0,0 +1,31 @@
+<?xml version="1.0"?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<allocations>
+  <pool name="production">
+    <schedulingMode>FAIR</schedulingMode>
+    <weight>1</weight>
+    <minShare>2</minShare>
+  </pool>
+  <pool name="test">
+    <schedulingMode>FIFO</schedulingMode>
+    <weight>2</weight>
+    <minShare>3</minShare>
+  </pool>
+</allocations>
diff --git a/spark-2.4.0-bin-hadoop2.7/conf/log4j.properties b/spark-2.4.0-bin-hadoop2.7/conf/log4j.properties
new file mode 100644
index 0000000000000000000000000000000000000000..71643d749333819248c4e8f21b2f0954b34e2231
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/conf/log4j.properties
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+spark.log.dir=${user.home}/nxcals-spark
+# Set everything to be logged to the file
+log4j.rootCategory=WARN, console, file
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+log4j.appender.file=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.file.File=${spark.log.dir}/spark.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.DatePattern='.'yyyy-MM-dd-HH
+log4j.appender.file.layout.ConversionPattern=[%d] %p %m (%c)%n
+# Set the default spark-shell log level to WARN. When running the spark-shell, the
+# log level for this class is used to overwrite the root logger's log level, so that
+# the user can have different defaults for the shell and regular Spark apps.
+log4j.logger.org.apache.spark.repl.Main=WARN
+# Settings to quiet third party logs that are too verbose
+log4j.logger.org.spark_project.jetty=WARN
+log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
+log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
+log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
+log4j.logger.org.apache.parquet=ERROR
+log4j.logger.parquet=ERROR
+# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
+log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
+log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
diff --git a/spark-2.4.0-bin-hadoop2.7/conf/log4j.properties.template b/spark-2.4.0-bin-hadoop2.7/conf/log4j.properties.template
new file mode 100644
index 0000000000000000000000000000000000000000..ec1aa187dfb32a808cdf7106dd0a7801e43424fb
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/conf/log4j.properties.template
@@ -0,0 +1,40 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the console
+log4j.rootCategory=INFO, console
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+
+# Set the default spark-shell log level to WARN. When running the spark-shell, the
+# log level for this class is used to overwrite the root logger's log level, so that
+# the user can have different defaults for the shell and regular Spark apps.
+log4j.logger.org.apache.spark.repl.Main=WARN
+
+# Settings to quiet third party logs that are too verbose
+log4j.logger.org.spark_project.jetty=WARN
+log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
+log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
+log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
+log4j.logger.org.apache.parquet=ERROR
+log4j.logger.parquet=ERROR
+
+# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
+log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
+log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
diff --git a/spark-2.4.0-bin-hadoop2.7/conf/metrics.properties.template b/spark-2.4.0-bin-hadoop2.7/conf/metrics.properties.template
new file mode 100644
index 0000000000000000000000000000000000000000..4c008a13607c25cad5e83cc1164eeef25ce4a1fb
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/conf/metrics.properties.template
@@ -0,0 +1,182 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#  syntax: [instance].sink|source.[name].[options]=[value]
+
+#  This file configures Spark's internal metrics system. The metrics system is
+#  divided into instances which correspond to internal components.
+#  Each instance can be configured to report its metrics to one or more sinks.
+#  Accepted values for [instance] are "master", "worker", "executor", "driver",
+#  and "applications". A wildcard "*" can be used as an instance name, in
+#  which case all instances will inherit the supplied property.
+#
+#  Within an instance, a "source" specifies a particular set of grouped metrics.
+#  there are two kinds of sources:
+#    1. Spark internal sources, like MasterSource, WorkerSource, etc, which will
+#    collect a Spark component's internal state. Each instance is paired with a
+#    Spark source that is added automatically.
+#    2. Common sources, like JvmSource, which will collect low level state.
+#    These can be added through configuration options and are then loaded
+#    using reflection.
+#
+#  A "sink" specifies where metrics are delivered to. Each instance can be
+#  assigned one or more sinks.
+#
+#  The sink|source field specifies whether the property relates to a sink or
+#  source.
+#
+#  The [name] field specifies the name of source or sink.
+#
+#  The [options] field is the specific property of this source or sink. The
+#  source or sink is responsible for parsing this property.
+#
+#  Notes:
+#    1. To add a new sink, set the "class" option to a fully qualified class
+#    name (see examples below).
+#    2. Some sinks involve a polling period. The minimum allowed polling period
+#    is 1 second.
+#    3. Wildcard properties can be overridden by more specific properties.
+#    For example, master.sink.console.period takes precedence over
+#    *.sink.console.period.
+#    4. A metrics specific configuration
+#    "spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties" should be
+#    added to Java properties using -Dspark.metrics.conf=xxx if you want to
+#    customize metrics system. You can also put the file in ${SPARK_HOME}/conf
+#    and it will be loaded automatically.
+#    5. The MetricsServlet sink is added by default as a sink in the master,
+#    worker and driver, and you can send HTTP requests to the "/metrics/json"
+#    endpoint to get a snapshot of all the registered metrics in JSON format.
+#    For master, requests to the "/metrics/master/json" and
+#    "/metrics/applications/json" endpoints can be sent separately to get
+#    metrics snapshots of the master instance and applications. This
+#    MetricsServlet does not have to be configured.
+
+## List of available common sources and their properties.
+
+# org.apache.spark.metrics.source.JvmSource
+#   Note: Currently, JvmSource is the only available common source.
+#         It can be added to an instance by setting the "class" option to its
+#         fully qualified class name (see examples below).
+
+## List of available sinks and their properties.
+
+# org.apache.spark.metrics.sink.ConsoleSink
+#   Name:   Default:   Description:
+#   period  10         Poll period
+#   unit    seconds    Unit of the poll period
+
+# org.apache.spark.metrics.sink.CSVSink
+#   Name:     Default:   Description:
+#   period    10         Poll period
+#   unit      seconds    Unit of the poll period
+#   directory /tmp       Where to store CSV files
+
+# org.apache.spark.metrics.sink.GangliaSink
+#   Name:     Default:   Description:
+#   host      NONE       Hostname or multicast group of the Ganglia server,
+#                        must be set
+#   port      NONE       Port of the Ganglia server(s), must be set
+#   period    10         Poll period
+#   unit      seconds    Unit of the poll period
+#   ttl       1          TTL of messages sent by Ganglia
+#   dmax      0          Lifetime in seconds of metrics (0 never expired)
+#   mode      multicast  Ganglia network mode ('unicast' or 'multicast')
+
+# org.apache.spark.metrics.sink.JmxSink
+
+# org.apache.spark.metrics.sink.MetricsServlet
+#   Name:     Default:   Description:
+#   path      VARIES*    Path prefix from the web server root
+#   sample    false      Whether to show entire set of samples for histograms
+#                        ('false' or 'true')
+#
+# * Default path is /metrics/json for all instances except the master. The
+#   master has two paths:
+#     /metrics/applications/json # App information
+#     /metrics/master/json       # Master information
+
+# org.apache.spark.metrics.sink.GraphiteSink
+#   Name:     Default:      Description:
+#   host      NONE          Hostname of the Graphite server, must be set
+#   port      NONE          Port of the Graphite server, must be set
+#   period    10            Poll period
+#   unit      seconds       Unit of the poll period
+#   prefix    EMPTY STRING  Prefix to prepend to every metric's name
+#   protocol  tcp           Protocol ("tcp" or "udp") to use
+
+# org.apache.spark.metrics.sink.StatsdSink
+#   Name:     Default:      Description:
+#   host      127.0.0.1     Hostname or IP of StatsD server
+#   port      8125          Port of StatsD server
+#   period    10            Poll period
+#   unit      seconds       Units of poll period
+#   prefix    EMPTY STRING  Prefix to prepend to metric name
+
+## Examples
+# Enable JmxSink for all instances by class name
+#*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink
+
+# Enable ConsoleSink for all instances by class name
+#*.sink.console.class=org.apache.spark.metrics.sink.ConsoleSink
+
+# Enable StatsdSink for all instances by class name
+#*.sink.statsd.class=org.apache.spark.metrics.sink.StatsdSink
+#*.sink.statsd.prefix=spark
+
+# Polling period for the ConsoleSink
+#*.sink.console.period=10
+# Unit of the polling period for the ConsoleSink
+#*.sink.console.unit=seconds
+
+# Polling period for the ConsoleSink specific for the master instance
+#master.sink.console.period=15
+# Unit of the polling period for the ConsoleSink specific for the master
+# instance
+#master.sink.console.unit=seconds
+
+# Enable CsvSink for all instances by class name
+#*.sink.csv.class=org.apache.spark.metrics.sink.CsvSink
+
+# Polling period for the CsvSink
+#*.sink.csv.period=1
+# Unit of the polling period for the CsvSink
+#*.sink.csv.unit=minutes
+
+# Polling directory for CsvSink
+#*.sink.csv.directory=/tmp/
+
+# Polling period for the CsvSink specific for the worker instance
+#worker.sink.csv.period=10
+# Unit of the polling period for the CsvSink specific for the worker instance
+#worker.sink.csv.unit=minutes
+
+# Enable Slf4jSink for all instances by class name
+#*.sink.slf4j.class=org.apache.spark.metrics.sink.Slf4jSink
+
+# Polling period for the Slf4JSink
+#*.sink.slf4j.period=1
+# Unit of the polling period for the Slf4jSink
+#*.sink.slf4j.unit=minutes
+
+# Enable JvmSource for instance master, worker, driver and executor
+#master.source.jvm.class=org.apache.spark.metrics.source.JvmSource
+
+#worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource
+
+#driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource
+
+#executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource
diff --git a/spark-2.4.0-bin-hadoop2.7/conf/slaves.template b/spark-2.4.0-bin-hadoop2.7/conf/slaves.template
new file mode 100644
index 0000000000000000000000000000000000000000..be42a638230b7835bb0fd9f6a4bef7cc3e69d66e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/conf/slaves.template
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# A Spark Worker will be started on each of the machines listed below.
+localhost
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/conf/spark-defaults.conf b/spark-2.4.0-bin-hadoop2.7/conf/spark-defaults.conf
new file mode 100644
index 0000000000000000000000000000000000000000..09533df04ca5e212d764c1919d7357114eb8ef47
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/conf/spark-defaults.conf
@@ -0,0 +1,94 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default system properties included when running spark-submit.
+# This is useful for setting default environmental settings.
+
+# Example:
+# spark.master                     spark://master:7077
+# spark.eventLog.enabled           true
+# spark.eventLog.dir               hdfs://namenode:8021/directory
+# spark.serializer                 org.apache.spark.serializer.KryoSerializer
+# spark.driver.memory              5g
+# spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
+#
+
+
+#####################################################
+# Spark defaults                                    #
+#####################################################
+
+spark.driver.extraJavaOptions   -Dservice.url=https://cs-ccr-nxcals6.cern.ch:19093,https://cs-ccr-nxcals7.cern.ch:19093,https://cs-ccr-nxcals8.cern.ch:19093 
+
+spark.sql.caseSensitive true
+spark.sql.execution.arrow.enabled true
+spark.debug.maxToStringFields 50
+
+spark.eventLog.enabled  true
+spark.eventLog.dir  hdfs://nxcals//var/log/spark-history
+spark.history.fs.logDirectory   hdfs://nxcals//var/log/spark-history
+
+spark.history.fs.cleaner.enabled    true
+spark.history.fs.cleaner.maxAge     60d
+
+spark.hbase.connector.meta.cache.enabled true
+spark.hbase.connector.meta.cache.expiry 10m
+
+spark.driver.port                       5001
+spark.blockManager.port                 5101
+spark.ui.port                           5201
+spark.port.maxRetries                   99
+
+
+
+
+spark.jars nxcals-jars/activation-1.1.1.jar,nxcals-jars/aircompressor-0.10.jar,nxcals-jars/amqp-client-4.4.1.jar,nxcals-jars/animal-sniffer-annotation-1.0.jar,nxcals-jars/animal-sniffer-annotations-1.17.jar,nxcals-jars/annotations-2.0.0.jar,nxcals-jars/antlr-2.7.7.jar,nxcals-jars/antlr4-runtime-4.7.jar,nxcals-jars/antlr-runtime-3.4.jar,nxcals-jars/aopalliance-1.0.jar,nxcals-jars/aopalliance-repackaged-2.4.0-b31.jar,nxcals-jars/apacheds-i18n-2.0.0-M15.jar,nxcals-jars/apacheds-kerberos-codec-2.0.0-M15.jar,nxcals-jars/api-asn1-api-1.0.0-M20.jar,nxcals-jars/api-util-1.0.0-M20.jar,nxcals-jars/archaius-core-0.6.6.jar,nxcals-jars/arrow-format-0.10.0.jar,nxcals-jars/arrow-memory-0.10.0.jar,nxcals-jars/arrow-vector-0.10.0.jar,nxcals-jars/asm-3.1.jar,nxcals-jars/aspectjrt-1.8.10.jar,nxcals-jars/aspectjweaver-1.8.10.jar,nxcals-jars/avro-1.8.2.jar,nxcals-jars/avro-ipc-1.8.2.jar,nxcals-jars/avro-mapred-1.8.2-hadoop2.jar,nxcals-jars/bonecp-0.8.0.RELEASE.jar,nxcals-jars/caffeine-2.6.2.jar,nxcals-jars/calcite-avatica-1.2.0-incubating.jar,nxcals-jars/calcite-core-1.2.0-incubating.jar,nxcals-jars/calcite-linq4j-1.2.0-incubating.jar,nxcals-jars/cglib-2.2.1-v20090111.jar,nxcals-jars/checker-qual-2.5.2.jar,nxcals-jars/chill_2.11-0.9.3.jar,nxcals-jars/chill-java-0.9.3.jar,nxcals-jars/classmate-1.3.4.jar,nxcals-jars/cmw-data-2.0.3.jar,nxcals-jars/cmw-datax-1.0.0.jar,nxcals-jars/commons-cli-1.2.jar,nxcals-jars/commons-codec-1.11.jar,nxcals-jars/commons-collections-3.2.2.jar,nxcals-jars/commons-compiler-3.0.9.jar,nxcals-jars/commons-compress-1.8.1.jar,nxcals-jars/commons-configuration-1.10.jar,nxcals-jars/commons-crypto-1.0.0.jar,nxcals-jars/commons-dbcp-1.4.jar,nxcals-jars/commons-el-1.0.jar,nxcals-jars/commons-httpclient-3.1.jar,nxcals-jars/commons-io-2.6.jar,nxcals-jars/commons-lang-2.6.jar,nxcals-jars/commons-lang3-3.8.1.jar,nxcals-jars/commons-logging-1.2.jar,nxcals-jars/commons-math-2.2.jar,nxcals-jars/commons-math3-3.6.1.jar,nxcals-jars/commons-net-3.1.jar,nxcals-jars/commons-pool-1.5.4.jar,nxcals-jars/compress-lzf-1.0.3.jar,nxcals-jars/config-1.3.1.jar,nxcals-jars/curator-client-2.13.0.jar,nxcals-jars/curator-framework-2.13.0.jar,nxcals-jars/curator-recipes-2.13.0.jar,nxcals-jars/datanucleus-api-jdo-3.2.6.jar,nxcals-jars/datanucleus-core-3.2.10.jar,nxcals-jars/datanucleus-rdbms-3.2.9.jar,nxcals-jars/derby-10.12.1.1.jar,nxcals-jars/disruptor-3.3.0.jar,nxcals-jars/eigenbase-properties-1.1.5.jar,nxcals-jars/error_prone_annotations-2.2.0.jar,nxcals-jars/failureaccess-1.0.1.jar,nxcals-jars/feign-core-9.5.0.jar,nxcals-jars/feign-httpclient-9.5.0.jar,nxcals-jars/feign-jackson-9.5.0.jar,nxcals-jars/feign-ribbon-9.5.0.jar,nxcals-jars/feign-slf4j-9.5.0.jar,nxcals-jars/findbugs-annotations-1.3.9-1.jar,nxcals-jars/flatbuffers-1.2.0-3f79e055.jar,nxcals-jars/gson-2.8.5.jar,nxcals-jars/guava-27.0.1-jre.jar,nxcals-jars/guice-3.0.jar,nxcals-jars/guice-servlet-3.0.jar,nxcals-jars/hadoop-annotations-2.7.5.1.jar,nxcals-jars/hadoop-auth-2.7.5.1.jar,nxcals-jars/hadoop-client-2.7.5.1.jar,nxcals-jars/hadoop-common-2.7.5.1.jar,nxcals-jars/hadoop-hdfs-2.7.5.1.jar,nxcals-jars/hadoop-mapreduce-client-app-2.7.5.1.jar,nxcals-jars/hadoop-mapreduce-client-common-2.7.5.1.jar,nxcals-jars/hadoop-mapreduce-client-core-2.7.5.1.jar,nxcals-jars/hadoop-mapreduce-client-jobclient-2.7.5.1.jar,nxcals-jars/hadoop-mapreduce-client-shuffle-2.7.5.1.jar,nxcals-jars/hadoop-yarn-api-2.7.5.1.jar,nxcals-jars/hadoop-yarn-client-2.7.5.1.jar,nxcals-jars/hadoop-yarn-common-2.7.5.1.jar,nxcals-jars/hadoop-yarn-server-common-2.7.5.1.jar,nxcals-jars/hadoop-yarn-server-nodemanager-2.7.5.1.jar,nxcals-jars/hadoop-yarn-server-web-proxy-2.7.5.1.jar,nxcals-jars/hamcrest-core-1.3.jar,nxcals-jars/hbase-annotations-1.4.9.jar,nxcals-jars/hbase-client-1.4.9.jar,nxcals-jars/hbase-common-1.4.9.jar,nxcals-jars/hbase-common-1.4.9-tests.jar,nxcals-jars/hbase-hadoop2-compat-1.4.9.jar,nxcals-jars/hbase-hadoop-compat-1.4.9.jar,nxcals-jars/hbase-metrics-1.4.9.jar,nxcals-jars/hbase-metrics-api-1.4.9.jar,nxcals-jars/hbase-prefix-tree-1.4.9.jar,nxcals-jars/hbase-procedure-1.4.9.jar,nxcals-jars/hbase-protocol-1.4.9.jar,nxcals-jars/hbase-server-1.4.9.jar,nxcals-jars/hibernate-validator-6.0.15.Final.jar,nxcals-jars/hive-exec-1.2.1.spark2.jar,nxcals-jars/hive-metastore-1.2.1.spark2.jar,nxcals-jars/hk2-api-2.4.0-b31.jar,nxcals-jars/hk2-locator-2.4.0-b31.jar,nxcals-jars/hk2-utils-2.4.0-b31.jar,nxcals-jars/hppc-0.7.2.jar,nxcals-jars/htrace-core-3.1.0-incubating.jar,nxcals-jars/httpclient-4.5.3.jar,nxcals-jars/httpcore-4.4.6.jar,nxcals-jars/ivy-2.4.0.jar,nxcals-jars/j2objc-annotations-1.1.jar,nxcals-jars/jackson-annotations-2.9.8.jar,nxcals-jars/jackson-core-2.9.8.jar,nxcals-jars/jackson-core-asl-1.9.13.jar,nxcals-jars/jackson-databind-2.9.8.jar,nxcals-jars/jackson-dataformat-yaml-2.9.8.jar,nxcals-jars/jackson-datatype-jdk8-2.9.8.jar,nxcals-jars/jackson-datatype-jsr310-2.9.8.jar,nxcals-jars/jackson-jaxrs-1.9.13.jar,nxcals-jars/jackson-mapper-asl-1.9.13.jar,nxcals-jars/jackson-module-parameter-names-2.9.8.jar,nxcals-jars/jackson-module-paranamer-2.9.8.jar,nxcals-jars/jackson-module-scala_2.11-2.9.8.jar,nxcals-jars/jackson-xc-1.9.13.jar,nxcals-jars/jamon-runtime-2.4.1.jar,nxcals-jars/janino-3.0.9.jar,nxcals-jars/jasper-compiler-5.5.23.jar,nxcals-jars/jasper-runtime-5.5.23.jar,nxcals-jars/JavaEWAH-0.3.2.jar,nxcals-jars/javassist-3.18.1-GA.jar,nxcals-jars/javax.annotation-api-1.2.jar,nxcals-jars/javax.inject-1.jar,nxcals-jars/javax.inject-2.4.0-b31.jar,nxcals-jars/java-xmlbuilder-0.4.jar,nxcals-jars/javax.servlet-api-4.0.1.jar,nxcals-jars/javax.ws.rs-api-2.0.1.jar,nxcals-jars/javolution-5.5.1.jar,nxcals-jars/jaxb-api-2.2.2.jar,nxcals-jars/jaxb-impl-2.2.3-1.jar,nxcals-jars/jboss-logging-3.3.2.Final.jar,nxcals-jars/jcl-over-slf4j-1.7.16.jar,nxcals-jars/jcodings-1.0.8.jar,nxcals-jars/jdo-api-3.0.1.jar,nxcals-jars/jersey-client-1.9.jar,nxcals-jars/jersey-client-2.22.jar,nxcals-jars/jersey-common-2.22.jar,nxcals-jars/jersey-container-servlet-2.22.jar,nxcals-jars/jersey-container-servlet-core-2.22.jar,nxcals-jars/jersey-core-1.9.jar,nxcals-jars/jersey-guava-2.22.jar,nxcals-jars/jersey-guice-1.9.jar,nxcals-jars/jersey-json-1.9.jar,nxcals-jars/jersey-media-jaxb-2.22.jar,nxcals-jars/jersey-server-1.9.jar,nxcals-jars/jersey-server-2.22.jar,nxcals-jars/jets3t-0.9.0.jar,nxcals-jars/jettison-1.1.jar,nxcals-jars/jetty-6.1.26.jar,nxcals-jars/jetty-sslengine-6.1.26.jar,nxcals-jars/jetty-util-6.1.26.jar,nxcals-jars/jline-0.9.94.jar,nxcals-jars/joda-time-2.9.9.jar,nxcals-jars/jodd-core-3.5.2.jar,nxcals-jars/joni-2.1.2.jar,nxcals-jars/jsch-0.1.54.jar,nxcals-jars/json4s-ast_2.11-3.5.3.jar,nxcals-jars/json4s-core_2.11-3.5.3.jar,nxcals-jars/json4s-jackson_2.11-3.5.3.jar,nxcals-jars/json4s-native_2.11-3.5.3.jar,nxcals-jars/json4s-scalap_2.11-3.5.3.jar,nxcals-jars/jsp-api-2.1.jar,nxcals-jars/jsr305-3.0.2.jar,nxcals-jars/jta-1.1.jar,nxcals-jars/jul-to-slf4j-1.7.16.jar,nxcals-jars/junit-4.12.jar,nxcals-jars/kryo-shaded-4.0.2.jar,nxcals-jars/leveldbjni-all-1.8.jar,nxcals-jars/libfb303-0.9.3.jar,nxcals-jars/libthrift-0.9.3.jar,nxcals-jars/listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar,nxcals-jars/lz4-java-1.4.0.jar,nxcals-jars/metrics-core-2.2.0.jar,nxcals-jars/metrics-core-4.0.5.jar,nxcals-jars/metrics-graphite-4.0.5.jar,nxcals-jars/metrics-json-4.0.5.jar,nxcals-jars/metrics-jvm-4.0.5.jar,nxcals-jars/minlog-1.3.0.jar,nxcals-jars/netflix-commons-util-0.1.1.jar,nxcals-jars/netflix-statistics-0.1.1.jar,nxcals-jars/netty-3.9.9.Final.jar,nxcals-jars/netty-all-4.1.30.Final.jar,nxcals-jars/nxcals-common-0.1.164.jar,nxcals-jars/nxcals-data-access-0.1.164.jar,nxcals-jars/nxcals-extraction-starter-0.1.164.jar,nxcals-jars/nxcals-service-client-0.1.164.jar,nxcals-jars/objenesis-2.5.1.jar,nxcals-jars/opencsv-2.3.jar,nxcals-jars/orc-core-1.5.2-nohive.jar,nxcals-jars/orc-mapreduce-1.5.2-nohive.jar,nxcals-jars/orc-shims-1.5.2.jar,nxcals-jars/oro-2.0.8.jar,nxcals-jars/osgi-resource-locator-1.0.1.jar,nxcals-jars/paranamer-2.8.jar,nxcals-jars/parquet-column-1.10.0.jar,nxcals-jars/parquet-common-1.10.0.jar,nxcals-jars/parquet-encoding-1.10.0.jar,nxcals-jars/parquet-format-2.4.0.jar,nxcals-jars/parquet-hadoop-1.10.0.jar,nxcals-jars/parquet-hadoop-bundle-1.6.0.jar,nxcals-jars/parquet-jackson-1.10.0.jar,nxcals-jars/protobuf-java-2.5.0.jar,nxcals-jars/py4j-0.10.7.jar,nxcals-jars/pyrolite-4.13.jar,nxcals-jars/ribbon-core-2.1.1.jar,nxcals-jars/ribbon-loadbalancer-2.1.1.jar,nxcals-jars/RoaringBitmap-0.5.11.jar,nxcals-jars/rxjava-1.0.9.jar,nxcals-jars/scaffeine_2.11-2.5.0.jar,nxcals-jars/scala-java8-compat_2.11-0.8.0.jar,nxcals-jars/scala-library-2.11.12.jar,nxcals-jars/scala-parser-combinators_2.11-1.1.0.jar,nxcals-jars/scala-reflect-2.11.12.jar,nxcals-jars/scala-xml_2.11-1.0.6.jar,nxcals-jars/servlet-api-2.5-20081211.jar,nxcals-jars/servlet-api-2.5.jar,nxcals-jars/servo-core-0.9.2.jar,nxcals-jars/servo-internal-0.9.2.jar,nxcals-jars/shc-core-1.1.1-2.1-s_2.11-NXCALS.jar,nxcals-jars/slf4j-api-1.7.22.jar,nxcals-jars/snakeyaml-1.23.jar,nxcals-jars/snappy-0.2.jar,nxcals-jars/snappy-java-1.1.7.2.jar,nxcals-jars/ST4-4.0.4.jar,nxcals-jars/stax-api-1.0.1.jar,nxcals-jars/stax-api-1.0-2.jar,nxcals-jars/stream-2.7.0.jar,nxcals-jars/stringtemplate-3.2.1.jar,nxcals-jars/univocity-parsers-2.7.3.jar,nxcals-jars/unused-1.0.0.jar,nxcals-jars/validation-api-2.0.1.Final.jar,nxcals-jars/xbean-asm6-shaded-4.8.jar,nxcals-jars/xercesImpl-2.9.1.jar,nxcals-jars/xmlenc-0.52.jar,nxcals-jars/xz-1.5.jar,nxcals-jars/zookeeper-3.4.10.jar,nxcals-jars/zstd-jni-1.3.2-2.jar
+
+
+#####################################################
+# Spark YARN mode properties                        #
+#####################################################
+
+spark.yarn.appMasterEnv.JAVA_HOME   /var/nxcals/jdk1.8.0_121
+spark.executorEnv.JAVA_HOME /var/nxcals/jdk1.8.0_121
+
+spark.executorEnv.PYTHONPATH /cvmfs/sft.cern.ch/lcg/releases/Python/3.6.5-56635/x86_64-centos7-gcc7-opt/lib:/cvmfs/sft.cern.ch/lcg/releases/Python/3.6.5-56635/x86_64-centos7-gcc7-opt/lib/python3.6/site-packages:/cvmfs/sft.cern.ch/lcg/views/LCG_95apython3_nxcals/x86_64-centos7-gcc7-opt/lib/python3.6/site-packages
+
+spark.executorEnv.LD_LIBRARY_PATH /cvmfs/sft.cern.ch/lcg/views/LCG_95apython3_nxcals/x86_64-centos7-gcc7-opt/lib/python3.6/site-packages:/cvmfs/sft.cern.ch/lcg/views/LCG_95apython3_nxcals/x86_64-centos7-gcc7-opt/lib/python3.6/site-packages/tensorflow:/cvmfs/sft.cern.ch/lcg/views/LCG_95apython3_nxcals/x86_64-centos7-gcc7-opt/lib/python3.6/site-packages/tensorflow/contrib/tensor_forest:/cvmfs/sft.cern.ch/lcg/views/LCG_95apython3_nxcals/x86_64-centos7-gcc7-opt/lib/python3.6/site-packages/tensorflow/python/framework:/cvmfs/sft.cern.ch/lcg/releases/java/8u91-ae32f/x86_64-centos7-gcc7-opt/jre/lib/amd64:/cvmfs/sft.cern.ch/lcg/views/LCG_95apython3_nxcals/x86_64-centos7-gcc7-opt/lib64:/cvmfs/sft.cern.ch/lcg/views/LCG_95apython3_nxcals/x86_64-centos7-gcc7-opt/lib:/cvmfs/sft.cern.ch/lcg/releases/gcc/7.3.0-cb1ee/x86_64-centos7/lib:/cvmfs/sft.cern.ch/lcg/releases/gcc/7.3.0-cb1ee/x86_64-centos7/lib64:/cvmfs/sft.cern.ch/lcg/releases/binutils/2.28-a983d/x86_64-centos7/lib:/usr/local/lib/
+
+spark.yarn.jars hdfs:////project/nxcals/lib/spark-2.4.0/*.jar
+
+spark.yarn.am.extraLibraryPath    /usr/lib/hadoop/lib/native
+spark.executor.extraLibraryPath   /usr/lib/hadoop/lib/native
+
+spark.yarn.historyServer.address        ithdp1001.cern.ch:18080
+spark.yarn.access.hadoopFileSystems nxcals
+
+
+# You can uncomment the following spark properties to directly
+# configure spark deploy mode and master type.
+#
+# NOTE: this is the configuration point of view, equivalent to following spark command:
+# $ /path/to/spark/bin/spark-shell --master yarn
+#
+# spark.master
+
+# You can uncomment the following kerberos properties to directly
+# configure spark against a kerberos keytab/principal entry
+
+# spark.yarn.keytab   <file_location>
+# spark.yarn.principal <login@CERN.CH>
diff --git a/spark-2.4.0-bin-hadoop2.7/conf/spark-defaults.conf.template b/spark-2.4.0-bin-hadoop2.7/conf/spark-defaults.conf.template
new file mode 100644
index 0000000000000000000000000000000000000000..19cba6e71ed1905e6ddf9f248016a7d1d98dbfca
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/conf/spark-defaults.conf.template
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default system properties included when running spark-submit.
+# This is useful for setting default environmental settings.
+
+# Example:
+# spark.master                     spark://master:7077
+# spark.eventLog.enabled           true
+# spark.eventLog.dir               hdfs://namenode:8021/directory
+# spark.serializer                 org.apache.spark.serializer.KryoSerializer
+# spark.driver.memory              5g
+# spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
diff --git a/spark-2.4.0-bin-hadoop2.7/conf/spark-env.sh b/spark-2.4.0-bin-hadoop2.7/conf/spark-env.sh
new file mode 100644
index 0000000000000000000000000000000000000000..47789104407f721aa99d4d473381b5f88027337f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/conf/spark-env.sh
@@ -0,0 +1,93 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This file is sourced when running various Spark programs.
+# Copy it as spark-env.sh and edit that to configure Spark for your site.
+
+# Options read when launching programs locally with
+# ./bin/run-example or ./bin/spark-submit
+# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
+# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
+# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
+# - SPARK_CLASSPATH, default classpath entries to append
+
+# Options read by executors and drivers running inside the cluster
+# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
+# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
+# - SPARK_CLASSPATH, default classpath entries to append
+# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
+# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos
+
+# Options read in YARN client mode
+# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
+# - SPARK_EXECUTOR_INSTANCES, Number of executors to start (Default: 2)
+# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).
+# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)
+# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)
+
+# Options for the daemons used in the standalone deploy mode
+# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname
+# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master
+# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")
+# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
+# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g)
+# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker
+# - SPARK_WORKER_INSTANCES, to set the number of worker processes per node
+# - SPARK_WORKER_DIR, to set the working directory of worker processes
+# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y")
+# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g).
+# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
+# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y")
+# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
+# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
+
+# Generic options for the daemons used in the standalone deploy mode
+# - SPARK_CONF_DIR      Alternate conf dir. (Default: ${SPARK_HOME}/conf)
+# - SPARK_LOG_DIR       Where log files are stored.  (Default: ${SPARK_HOME}/logs)
+# - SPARK_PID_DIR       Where the pid file is stored. (Default: /tmp)
+# - SPARK_IDENT_STRING  A string representing this instance of spark. (Default: $USER)
+# - SPARK_NICENESS      The scheduling priority for daemons. (Default: 0)
+# - SPARK_NO_DAEMONIZE  Run the proposed command in the foreground. It will not output a PID file.
+
+
+# We need to check if we are using YARN or not for Python to work correctly.
+# Setting LCG path to python in local mode will not work if LCG is not mounted on the driver machine.
+# In the local mode we set python to the one from the environment.
+# In order to determine that we check the spark-default.conf file and script input arguments ($@).
+# If you use spark-submit and set this inside the Python script we are unable to determine that. (jwozniak)
+
+echo "Trying to determine YARN usage to make Python work correctly (conf/spark-env.sh)..."
+
+grep -q -e "^\s*spark.master\s*yarn" ${SPARK_HOME}/conf/spark-defaults.conf
+
+if [[ `echo $?` == '0' || $@ =~ .*master.*yarn.* ]]; then
+  echo "Using YARN"
+  export PYSPARK_PYTHON=/cvmfs/sft.cern.ch/lcg/releases/Python/3.6.5-56635/x86_64-centos7-gcc7-opt/bin/python
+else
+  echo "Not using YARN"
+  export PYSPARK_PYTHON=`which python3`
+fi
+
+#A must as the pySpark is using those 2 variables to set the python on the executor. Both vars must be set. The driver uses what is the current python3 and the executor must use the LCG.
+export PYSPARK_DRIVER_PYTHON=`which python3`
+
+
+# This has to be empty, we use jar with hadoop config
+export HADOOP_CONF_DIR=
+
diff --git a/spark-2.4.0-bin-hadoop2.7/conf/spark-env.sh.template b/spark-2.4.0-bin-hadoop2.7/conf/spark-env.sh.template
new file mode 100755
index 0000000000000000000000000000000000000000..bc92c78f0f8f3a5df0be0e588c8711b4cefcf7d5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/conf/spark-env.sh.template
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This file is sourced when running various Spark programs.
+# Copy it as spark-env.sh and edit that to configure Spark for your site.
+
+# Options read when launching programs locally with
+# ./bin/run-example or ./bin/spark-submit
+# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
+# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
+# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
+
+# Options read by executors and drivers running inside the cluster
+# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
+# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
+# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
+# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos
+
+# Options read in YARN client/cluster mode
+# - SPARK_CONF_DIR, Alternate conf dir. (Default: ${SPARK_HOME}/conf)
+# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
+# - YARN_CONF_DIR, to point Spark towards YARN configuration files when you use YARN
+# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).
+# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)
+# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)
+
+# Options for the daemons used in the standalone deploy mode
+# - SPARK_MASTER_HOST, to bind the master to a different IP address or hostname
+# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master
+# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")
+# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
+# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g)
+# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker
+# - SPARK_WORKER_DIR, to set the working directory of worker processes
+# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y")
+# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g).
+# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
+# - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y")
+# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
+# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons
+# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
+
+# Generic options for the daemons used in the standalone deploy mode
+# - SPARK_CONF_DIR      Alternate conf dir. (Default: ${SPARK_HOME}/conf)
+# - SPARK_LOG_DIR       Where log files are stored.  (Default: ${SPARK_HOME}/logs)
+# - SPARK_PID_DIR       Where the pid file is stored. (Default: /tmp)
+# - SPARK_IDENT_STRING  A string representing this instance of spark. (Default: $USER)
+# - SPARK_NICENESS      The scheduling priority for daemons. (Default: 0)
+# - SPARK_NO_DAEMONIZE  Run the proposed command in the foreground. It will not output a PID file.
+# Options for native BLAS, like Intel MKL, OpenBLAS, and so on.
+# You might get better performance to enable these options if using native BLAS (see SPARK-21305).
+# - MKL_NUM_THREADS=1        Disable multi-threading of Intel MKL
+# - OPENBLAS_NUM_THREADS=1   Disable multi-threading of OpenBLAS
diff --git a/spark-2.4.0-bin-hadoop2.7/data/graphx/followers.txt b/spark-2.4.0-bin-hadoop2.7/data/graphx/followers.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7bb8e900e24d4baea3193ba3ec472ec536916237
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/graphx/followers.txt
@@ -0,0 +1,8 @@
+2 1
+4 1
+1 2
+6 3
+7 3
+7 6
+6 7
+3 7
diff --git a/spark-2.4.0-bin-hadoop2.7/data/graphx/users.txt b/spark-2.4.0-bin-hadoop2.7/data/graphx/users.txt
new file mode 100644
index 0000000000000000000000000000000000000000..982d19d50bc3ea053916445a4deaff2e588d779f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/graphx/users.txt
@@ -0,0 +1,7 @@
+1,BarackObama,Barack Obama
+2,ladygaga,Goddess of Love
+3,jeresig,John Resig
+4,justinbieber,Justin Bieber
+6,matei_zaharia,Matei Zaharia
+7,odersky,Martin Odersky
+8,anonsys
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/als/sample_movielens_ratings.txt b/spark-2.4.0-bin-hadoop2.7/data/mllib/als/sample_movielens_ratings.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0889142950797ecccdcc04b8a2c9f4cbc7c21fb6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/als/sample_movielens_ratings.txt
@@ -0,0 +1,1501 @@
+0::2::3::1424380312
+0::3::1::1424380312
+0::5::2::1424380312
+0::9::4::1424380312
+0::11::1::1424380312
+0::12::2::1424380312
+0::15::1::1424380312
+0::17::1::1424380312
+0::19::1::1424380312
+0::21::1::1424380312
+0::23::1::1424380312
+0::26::3::1424380312
+0::27::1::1424380312
+0::28::1::1424380312
+0::29::1::1424380312
+0::30::1::1424380312
+0::31::1::1424380312
+0::34::1::1424380312
+0::37::1::1424380312
+0::41::2::1424380312
+0::44::1::1424380312
+0::45::2::1424380312
+0::46::1::1424380312
+0::47::1::1424380312
+0::48::1::1424380312
+0::50::1::1424380312
+0::51::1::1424380312
+0::54::1::1424380312
+0::55::1::1424380312
+0::59::2::1424380312
+0::61::2::1424380312
+0::64::1::1424380312
+0::67::1::1424380312
+0::68::1::1424380312
+0::69::1::1424380312
+0::71::1::1424380312
+0::72::1::1424380312
+0::77::2::1424380312
+0::79::1::1424380312
+0::83::1::1424380312
+0::87::1::1424380312
+0::89::2::1424380312
+0::91::3::1424380312
+0::92::4::1424380312
+0::94::1::1424380312
+0::95::2::1424380312
+0::96::1::1424380312
+0::98::1::1424380312
+0::99::1::1424380312
+1::2::2::1424380312
+1::3::1::1424380312
+1::4::2::1424380312
+1::6::1::1424380312
+1::9::3::1424380312
+1::12::1::1424380312
+1::13::1::1424380312
+1::14::1::1424380312
+1::16::1::1424380312
+1::19::1::1424380312
+1::21::3::1424380312
+1::27::1::1424380312
+1::28::3::1424380312
+1::33::1::1424380312
+1::36::2::1424380312
+1::37::1::1424380312
+1::40::1::1424380312
+1::41::2::1424380312
+1::43::1::1424380312
+1::44::1::1424380312
+1::47::1::1424380312
+1::50::1::1424380312
+1::54::1::1424380312
+1::56::2::1424380312
+1::57::1::1424380312
+1::58::1::1424380312
+1::60::1::1424380312
+1::62::4::1424380312
+1::63::1::1424380312
+1::67::1::1424380312
+1::68::4::1424380312
+1::70::2::1424380312
+1::72::1::1424380312
+1::73::1::1424380312
+1::74::2::1424380312
+1::76::1::1424380312
+1::77::3::1424380312
+1::78::1::1424380312
+1::81::1::1424380312
+1::82::1::1424380312
+1::85::3::1424380312
+1::86::2::1424380312
+1::88::2::1424380312
+1::91::1::1424380312
+1::92::2::1424380312
+1::93::1::1424380312
+1::94::2::1424380312
+1::96::1::1424380312
+1::97::1::1424380312
+2::4::3::1424380312
+2::6::1::1424380312
+2::8::5::1424380312
+2::9::1::1424380312
+2::10::1::1424380312
+2::12::3::1424380312
+2::13::1::1424380312
+2::15::2::1424380312
+2::18::2::1424380312
+2::19::4::1424380312
+2::22::1::1424380312
+2::26::1::1424380312
+2::28::1::1424380312
+2::34::4::1424380312
+2::35::1::1424380312
+2::37::5::1424380312
+2::38::1::1424380312
+2::39::5::1424380312
+2::40::4::1424380312
+2::47::1::1424380312
+2::50::1::1424380312
+2::52::2::1424380312
+2::54::1::1424380312
+2::55::1::1424380312
+2::57::2::1424380312
+2::58::2::1424380312
+2::59::1::1424380312
+2::61::1::1424380312
+2::62::1::1424380312
+2::64::1::1424380312
+2::65::1::1424380312
+2::66::3::1424380312
+2::68::1::1424380312
+2::71::3::1424380312
+2::76::1::1424380312
+2::77::1::1424380312
+2::78::1::1424380312
+2::80::1::1424380312
+2::83::5::1424380312
+2::85::1::1424380312
+2::87::2::1424380312
+2::88::1::1424380312
+2::89::4::1424380312
+2::90::1::1424380312
+2::92::4::1424380312
+2::93::5::1424380312
+3::0::1::1424380312
+3::1::1::1424380312
+3::2::1::1424380312
+3::7::3::1424380312
+3::8::3::1424380312
+3::9::1::1424380312
+3::14::1::1424380312
+3::15::1::1424380312
+3::16::1::1424380312
+3::18::4::1424380312
+3::19::1::1424380312
+3::24::3::1424380312
+3::26::1::1424380312
+3::29::3::1424380312
+3::33::1::1424380312
+3::34::3::1424380312
+3::35::1::1424380312
+3::36::3::1424380312
+3::37::1::1424380312
+3::38::2::1424380312
+3::43::1::1424380312
+3::44::1::1424380312
+3::46::1::1424380312
+3::47::1::1424380312
+3::51::5::1424380312
+3::52::3::1424380312
+3::56::1::1424380312
+3::58::1::1424380312
+3::60::3::1424380312
+3::62::1::1424380312
+3::65::2::1424380312
+3::66::1::1424380312
+3::67::1::1424380312
+3::68::2::1424380312
+3::70::1::1424380312
+3::72::2::1424380312
+3::76::3::1424380312
+3::79::3::1424380312
+3::80::4::1424380312
+3::81::1::1424380312
+3::83::1::1424380312
+3::84::1::1424380312
+3::86::1::1424380312
+3::87::2::1424380312
+3::88::4::1424380312
+3::89::1::1424380312
+3::91::1::1424380312
+3::94::3::1424380312
+4::1::1::1424380312
+4::6::1::1424380312
+4::8::1::1424380312
+4::9::1::1424380312
+4::10::1::1424380312
+4::11::1::1424380312
+4::12::1::1424380312
+4::13::1::1424380312
+4::14::2::1424380312
+4::15::1::1424380312
+4::17::1::1424380312
+4::20::1::1424380312
+4::22::1::1424380312
+4::23::1::1424380312
+4::24::1::1424380312
+4::29::4::1424380312
+4::30::1::1424380312
+4::31::1::1424380312
+4::34::1::1424380312
+4::35::1::1424380312
+4::36::1::1424380312
+4::39::2::1424380312
+4::40::3::1424380312
+4::41::4::1424380312
+4::43::2::1424380312
+4::44::1::1424380312
+4::45::1::1424380312
+4::46::1::1424380312
+4::47::1::1424380312
+4::49::2::1424380312
+4::50::1::1424380312
+4::51::1::1424380312
+4::52::4::1424380312
+4::54::1::1424380312
+4::55::1::1424380312
+4::60::3::1424380312
+4::61::1::1424380312
+4::62::4::1424380312
+4::63::3::1424380312
+4::65::1::1424380312
+4::67::2::1424380312
+4::69::1::1424380312
+4::70::4::1424380312
+4::71::1::1424380312
+4::73::1::1424380312
+4::78::1::1424380312
+4::84::1::1424380312
+4::85::1::1424380312
+4::87::3::1424380312
+4::88::3::1424380312
+4::89::2::1424380312
+4::96::1::1424380312
+4::97::1::1424380312
+4::98::1::1424380312
+4::99::1::1424380312
+5::0::1::1424380312
+5::1::1::1424380312
+5::4::1::1424380312
+5::5::1::1424380312
+5::8::1::1424380312
+5::9::3::1424380312
+5::10::2::1424380312
+5::13::3::1424380312
+5::15::1::1424380312
+5::19::1::1424380312
+5::20::3::1424380312
+5::21::2::1424380312
+5::23::3::1424380312
+5::27::1::1424380312
+5::28::1::1424380312
+5::29::1::1424380312
+5::31::1::1424380312
+5::36::3::1424380312
+5::38::2::1424380312
+5::39::1::1424380312
+5::42::1::1424380312
+5::48::3::1424380312
+5::49::4::1424380312
+5::50::3::1424380312
+5::51::1::1424380312
+5::52::1::1424380312
+5::54::1::1424380312
+5::55::5::1424380312
+5::56::3::1424380312
+5::58::1::1424380312
+5::60::1::1424380312
+5::61::1::1424380312
+5::64::3::1424380312
+5::65::2::1424380312
+5::68::4::1424380312
+5::70::1::1424380312
+5::71::1::1424380312
+5::72::1::1424380312
+5::74::1::1424380312
+5::79::1::1424380312
+5::81::2::1424380312
+5::84::1::1424380312
+5::85::1::1424380312
+5::86::1::1424380312
+5::88::1::1424380312
+5::90::4::1424380312
+5::91::2::1424380312
+5::95::2::1424380312
+5::99::1::1424380312
+6::0::1::1424380312
+6::1::1::1424380312
+6::2::3::1424380312
+6::5::1::1424380312
+6::6::1::1424380312
+6::9::1::1424380312
+6::10::1::1424380312
+6::15::2::1424380312
+6::16::2::1424380312
+6::17::1::1424380312
+6::18::1::1424380312
+6::20::1::1424380312
+6::21::1::1424380312
+6::22::1::1424380312
+6::24::1::1424380312
+6::25::5::1424380312
+6::26::1::1424380312
+6::28::1::1424380312
+6::30::1::1424380312
+6::33::1::1424380312
+6::38::1::1424380312
+6::39::1::1424380312
+6::43::4::1424380312
+6::44::1::1424380312
+6::45::1::1424380312
+6::48::1::1424380312
+6::49::1::1424380312
+6::50::1::1424380312
+6::53::1::1424380312
+6::54::1::1424380312
+6::55::1::1424380312
+6::56::1::1424380312
+6::58::4::1424380312
+6::59::1::1424380312
+6::60::1::1424380312
+6::61::3::1424380312
+6::63::3::1424380312
+6::66::1::1424380312
+6::67::3::1424380312
+6::68::1::1424380312
+6::69::1::1424380312
+6::71::2::1424380312
+6::73::1::1424380312
+6::75::1::1424380312
+6::77::1::1424380312
+6::79::1::1424380312
+6::81::1::1424380312
+6::84::1::1424380312
+6::85::3::1424380312
+6::86::1::1424380312
+6::87::1::1424380312
+6::88::1::1424380312
+6::89::1::1424380312
+6::91::2::1424380312
+6::94::1::1424380312
+6::95::2::1424380312
+6::96::1::1424380312
+7::1::1::1424380312
+7::2::2::1424380312
+7::3::1::1424380312
+7::4::1::1424380312
+7::7::1::1424380312
+7::10::1::1424380312
+7::11::2::1424380312
+7::14::2::1424380312
+7::15::1::1424380312
+7::16::1::1424380312
+7::18::1::1424380312
+7::21::1::1424380312
+7::22::1::1424380312
+7::23::1::1424380312
+7::25::5::1424380312
+7::26::1::1424380312
+7::29::4::1424380312
+7::30::1::1424380312
+7::31::3::1424380312
+7::32::1::1424380312
+7::33::1::1424380312
+7::35::1::1424380312
+7::37::2::1424380312
+7::39::3::1424380312
+7::40::2::1424380312
+7::42::2::1424380312
+7::44::1::1424380312
+7::45::2::1424380312
+7::47::4::1424380312
+7::48::1::1424380312
+7::49::1::1424380312
+7::53::1::1424380312
+7::54::1::1424380312
+7::55::1::1424380312
+7::56::1::1424380312
+7::59::1::1424380312
+7::61::2::1424380312
+7::62::3::1424380312
+7::63::2::1424380312
+7::66::1::1424380312
+7::67::3::1424380312
+7::74::1::1424380312
+7::75::1::1424380312
+7::76::3::1424380312
+7::77::1::1424380312
+7::81::1::1424380312
+7::82::1::1424380312
+7::84::2::1424380312
+7::85::4::1424380312
+7::86::1::1424380312
+7::92::2::1424380312
+7::96::1::1424380312
+7::97::1::1424380312
+7::98::1::1424380312
+8::0::1::1424380312
+8::2::4::1424380312
+8::3::2::1424380312
+8::4::2::1424380312
+8::5::1::1424380312
+8::7::1::1424380312
+8::9::1::1424380312
+8::11::1::1424380312
+8::15::1::1424380312
+8::18::1::1424380312
+8::19::1::1424380312
+8::21::1::1424380312
+8::29::5::1424380312
+8::31::3::1424380312
+8::33::1::1424380312
+8::35::1::1424380312
+8::36::1::1424380312
+8::40::2::1424380312
+8::44::1::1424380312
+8::45::1::1424380312
+8::50::1::1424380312
+8::51::1::1424380312
+8::52::5::1424380312
+8::53::5::1424380312
+8::54::1::1424380312
+8::55::1::1424380312
+8::56::1::1424380312
+8::58::4::1424380312
+8::60::3::1424380312
+8::62::4::1424380312
+8::64::1::1424380312
+8::67::3::1424380312
+8::69::1::1424380312
+8::71::1::1424380312
+8::72::3::1424380312
+8::77::3::1424380312
+8::78::1::1424380312
+8::79::1::1424380312
+8::83::1::1424380312
+8::85::5::1424380312
+8::86::1::1424380312
+8::88::1::1424380312
+8::90::1::1424380312
+8::92::2::1424380312
+8::95::4::1424380312
+8::96::3::1424380312
+8::97::1::1424380312
+8::98::1::1424380312
+8::99::1::1424380312
+9::2::3::1424380312
+9::3::1::1424380312
+9::4::1::1424380312
+9::5::1::1424380312
+9::6::1::1424380312
+9::7::5::1424380312
+9::9::1::1424380312
+9::12::1::1424380312
+9::14::3::1424380312
+9::15::1::1424380312
+9::19::1::1424380312
+9::21::1::1424380312
+9::22::1::1424380312
+9::24::1::1424380312
+9::25::1::1424380312
+9::26::1::1424380312
+9::30::3::1424380312
+9::32::4::1424380312
+9::35::2::1424380312
+9::36::2::1424380312
+9::37::2::1424380312
+9::38::1::1424380312
+9::39::1::1424380312
+9::43::3::1424380312
+9::49::5::1424380312
+9::50::3::1424380312
+9::53::1::1424380312
+9::54::1::1424380312
+9::58::1::1424380312
+9::59::1::1424380312
+9::60::1::1424380312
+9::61::1::1424380312
+9::63::3::1424380312
+9::64::3::1424380312
+9::68::1::1424380312
+9::69::1::1424380312
+9::70::3::1424380312
+9::71::1::1424380312
+9::73::2::1424380312
+9::75::1::1424380312
+9::77::2::1424380312
+9::81::2::1424380312
+9::82::1::1424380312
+9::83::1::1424380312
+9::84::1::1424380312
+9::86::1::1424380312
+9::87::4::1424380312
+9::88::1::1424380312
+9::90::3::1424380312
+9::94::2::1424380312
+9::95::3::1424380312
+9::97::2::1424380312
+9::98::1::1424380312
+10::0::3::1424380312
+10::2::4::1424380312
+10::4::3::1424380312
+10::7::1::1424380312
+10::8::1::1424380312
+10::10::1::1424380312
+10::13::2::1424380312
+10::14::1::1424380312
+10::16::2::1424380312
+10::17::1::1424380312
+10::18::1::1424380312
+10::21::1::1424380312
+10::22::1::1424380312
+10::24::1::1424380312
+10::25::3::1424380312
+10::28::1::1424380312
+10::35::1::1424380312
+10::36::1::1424380312
+10::37::1::1424380312
+10::38::1::1424380312
+10::39::1::1424380312
+10::40::4::1424380312
+10::41::2::1424380312
+10::42::3::1424380312
+10::43::1::1424380312
+10::49::3::1424380312
+10::50::1::1424380312
+10::51::1::1424380312
+10::52::1::1424380312
+10::55::2::1424380312
+10::56::1::1424380312
+10::58::1::1424380312
+10::63::1::1424380312
+10::66::1::1424380312
+10::67::2::1424380312
+10::68::1::1424380312
+10::75::1::1424380312
+10::77::1::1424380312
+10::79::1::1424380312
+10::86::1::1424380312
+10::89::3::1424380312
+10::90::1::1424380312
+10::97::1::1424380312
+10::98::1::1424380312
+11::0::1::1424380312
+11::6::2::1424380312
+11::9::1::1424380312
+11::10::1::1424380312
+11::11::1::1424380312
+11::12::1::1424380312
+11::13::4::1424380312
+11::16::1::1424380312
+11::18::5::1424380312
+11::19::4::1424380312
+11::20::1::1424380312
+11::21::1::1424380312
+11::22::1::1424380312
+11::23::5::1424380312
+11::25::1::1424380312
+11::27::5::1424380312
+11::30::5::1424380312
+11::32::5::1424380312
+11::35::3::1424380312
+11::36::2::1424380312
+11::37::2::1424380312
+11::38::4::1424380312
+11::39::1::1424380312
+11::40::1::1424380312
+11::41::1::1424380312
+11::43::2::1424380312
+11::45::1::1424380312
+11::47::1::1424380312
+11::48::5::1424380312
+11::50::4::1424380312
+11::51::3::1424380312
+11::59::1::1424380312
+11::61::1::1424380312
+11::62::1::1424380312
+11::64::1::1424380312
+11::66::4::1424380312
+11::67::1::1424380312
+11::69::5::1424380312
+11::70::1::1424380312
+11::71::3::1424380312
+11::72::3::1424380312
+11::75::3::1424380312
+11::76::1::1424380312
+11::77::1::1424380312
+11::78::1::1424380312
+11::79::5::1424380312
+11::80::3::1424380312
+11::81::4::1424380312
+11::82::1::1424380312
+11::86::1::1424380312
+11::88::1::1424380312
+11::89::1::1424380312
+11::90::4::1424380312
+11::94::2::1424380312
+11::97::3::1424380312
+11::99::1::1424380312
+12::2::1::1424380312
+12::4::1::1424380312
+12::6::1::1424380312
+12::7::3::1424380312
+12::8::1::1424380312
+12::14::1::1424380312
+12::15::2::1424380312
+12::16::4::1424380312
+12::17::5::1424380312
+12::18::2::1424380312
+12::21::1::1424380312
+12::22::2::1424380312
+12::23::3::1424380312
+12::24::1::1424380312
+12::25::1::1424380312
+12::27::5::1424380312
+12::30::2::1424380312
+12::31::4::1424380312
+12::35::5::1424380312
+12::38::1::1424380312
+12::41::1::1424380312
+12::44::2::1424380312
+12::45::1::1424380312
+12::50::4::1424380312
+12::51::1::1424380312
+12::52::1::1424380312
+12::53::1::1424380312
+12::54::1::1424380312
+12::56::2::1424380312
+12::57::1::1424380312
+12::60::1::1424380312
+12::63::1::1424380312
+12::64::5::1424380312
+12::66::3::1424380312
+12::67::1::1424380312
+12::70::1::1424380312
+12::72::1::1424380312
+12::74::1::1424380312
+12::75::1::1424380312
+12::77::1::1424380312
+12::78::1::1424380312
+12::79::3::1424380312
+12::82::2::1424380312
+12::83::1::1424380312
+12::84::1::1424380312
+12::85::1::1424380312
+12::86::1::1424380312
+12::87::1::1424380312
+12::88::1::1424380312
+12::91::3::1424380312
+12::92::1::1424380312
+12::94::4::1424380312
+12::95::2::1424380312
+12::96::1::1424380312
+12::98::2::1424380312
+13::0::1::1424380312
+13::3::1::1424380312
+13::4::2::1424380312
+13::5::1::1424380312
+13::6::1::1424380312
+13::12::1::1424380312
+13::14::2::1424380312
+13::15::1::1424380312
+13::17::1::1424380312
+13::18::3::1424380312
+13::20::1::1424380312
+13::21::1::1424380312
+13::22::1::1424380312
+13::26::1::1424380312
+13::27::1::1424380312
+13::29::3::1424380312
+13::31::1::1424380312
+13::33::1::1424380312
+13::40::2::1424380312
+13::43::2::1424380312
+13::44::1::1424380312
+13::45::1::1424380312
+13::49::1::1424380312
+13::51::1::1424380312
+13::52::2::1424380312
+13::53::3::1424380312
+13::54::1::1424380312
+13::62::1::1424380312
+13::63::2::1424380312
+13::64::1::1424380312
+13::68::1::1424380312
+13::71::1::1424380312
+13::72::3::1424380312
+13::73::1::1424380312
+13::74::3::1424380312
+13::77::2::1424380312
+13::78::1::1424380312
+13::79::2::1424380312
+13::83::3::1424380312
+13::85::1::1424380312
+13::86::1::1424380312
+13::87::2::1424380312
+13::88::2::1424380312
+13::90::1::1424380312
+13::93::4::1424380312
+13::94::1::1424380312
+13::98::1::1424380312
+13::99::1::1424380312
+14::1::1::1424380312
+14::3::3::1424380312
+14::4::1::1424380312
+14::5::1::1424380312
+14::6::1::1424380312
+14::7::1::1424380312
+14::9::1::1424380312
+14::10::1::1424380312
+14::11::1::1424380312
+14::12::1::1424380312
+14::13::1::1424380312
+14::14::3::1424380312
+14::15::1::1424380312
+14::16::1::1424380312
+14::17::1::1424380312
+14::20::1::1424380312
+14::21::1::1424380312
+14::24::1::1424380312
+14::25::2::1424380312
+14::27::1::1424380312
+14::28::1::1424380312
+14::29::5::1424380312
+14::31::3::1424380312
+14::34::1::1424380312
+14::36::1::1424380312
+14::37::2::1424380312
+14::39::2::1424380312
+14::40::1::1424380312
+14::44::1::1424380312
+14::45::1::1424380312
+14::47::3::1424380312
+14::48::1::1424380312
+14::49::1::1424380312
+14::51::1::1424380312
+14::52::5::1424380312
+14::53::3::1424380312
+14::54::1::1424380312
+14::55::1::1424380312
+14::56::1::1424380312
+14::62::4::1424380312
+14::63::5::1424380312
+14::67::3::1424380312
+14::68::1::1424380312
+14::69::3::1424380312
+14::71::1::1424380312
+14::72::4::1424380312
+14::73::1::1424380312
+14::76::5::1424380312
+14::79::1::1424380312
+14::82::1::1424380312
+14::83::1::1424380312
+14::88::1::1424380312
+14::93::3::1424380312
+14::94::1::1424380312
+14::95::2::1424380312
+14::96::4::1424380312
+14::98::1::1424380312
+15::0::1::1424380312
+15::1::4::1424380312
+15::2::1::1424380312
+15::5::2::1424380312
+15::6::1::1424380312
+15::7::1::1424380312
+15::13::1::1424380312
+15::14::1::1424380312
+15::15::1::1424380312
+15::17::2::1424380312
+15::19::2::1424380312
+15::22::2::1424380312
+15::23::2::1424380312
+15::25::1::1424380312
+15::26::3::1424380312
+15::27::1::1424380312
+15::28::2::1424380312
+15::29::1::1424380312
+15::32::1::1424380312
+15::33::2::1424380312
+15::34::1::1424380312
+15::35::2::1424380312
+15::36::1::1424380312
+15::37::1::1424380312
+15::39::1::1424380312
+15::42::1::1424380312
+15::46::5::1424380312
+15::48::2::1424380312
+15::50::2::1424380312
+15::51::1::1424380312
+15::52::1::1424380312
+15::58::1::1424380312
+15::62::1::1424380312
+15::64::3::1424380312
+15::65::2::1424380312
+15::72::1::1424380312
+15::73::1::1424380312
+15::74::1::1424380312
+15::79::1::1424380312
+15::80::1::1424380312
+15::81::1::1424380312
+15::82::2::1424380312
+15::85::1::1424380312
+15::87::1::1424380312
+15::91::2::1424380312
+15::96::1::1424380312
+15::97::1::1424380312
+15::98::3::1424380312
+16::2::1::1424380312
+16::5::3::1424380312
+16::6::2::1424380312
+16::7::1::1424380312
+16::9::1::1424380312
+16::12::1::1424380312
+16::14::1::1424380312
+16::15::1::1424380312
+16::19::1::1424380312
+16::21::2::1424380312
+16::29::4::1424380312
+16::30::2::1424380312
+16::32::1::1424380312
+16::34::1::1424380312
+16::36::1::1424380312
+16::38::1::1424380312
+16::46::1::1424380312
+16::47::3::1424380312
+16::48::1::1424380312
+16::49::1::1424380312
+16::50::1::1424380312
+16::51::5::1424380312
+16::54::5::1424380312
+16::55::1::1424380312
+16::56::2::1424380312
+16::57::1::1424380312
+16::60::1::1424380312
+16::63::2::1424380312
+16::65::1::1424380312
+16::67::1::1424380312
+16::72::1::1424380312
+16::74::1::1424380312
+16::80::1::1424380312
+16::81::1::1424380312
+16::82::1::1424380312
+16::85::5::1424380312
+16::86::1::1424380312
+16::90::5::1424380312
+16::91::1::1424380312
+16::93::1::1424380312
+16::94::3::1424380312
+16::95::2::1424380312
+16::96::3::1424380312
+16::98::3::1424380312
+16::99::1::1424380312
+17::2::1::1424380312
+17::3::1::1424380312
+17::6::1::1424380312
+17::10::4::1424380312
+17::11::1::1424380312
+17::13::2::1424380312
+17::17::5::1424380312
+17::19::1::1424380312
+17::20::5::1424380312
+17::22::4::1424380312
+17::28::1::1424380312
+17::29::1::1424380312
+17::33::1::1424380312
+17::34::1::1424380312
+17::35::2::1424380312
+17::37::1::1424380312
+17::38::1::1424380312
+17::45::1::1424380312
+17::46::5::1424380312
+17::47::1::1424380312
+17::49::3::1424380312
+17::51::1::1424380312
+17::55::5::1424380312
+17::56::3::1424380312
+17::57::1::1424380312
+17::58::1::1424380312
+17::59::1::1424380312
+17::60::1::1424380312
+17::63::1::1424380312
+17::66::1::1424380312
+17::68::4::1424380312
+17::69::1::1424380312
+17::70::1::1424380312
+17::72::1::1424380312
+17::73::3::1424380312
+17::78::1::1424380312
+17::79::1::1424380312
+17::82::2::1424380312
+17::84::1::1424380312
+17::90::5::1424380312
+17::91::3::1424380312
+17::92::1::1424380312
+17::93::1::1424380312
+17::94::4::1424380312
+17::95::2::1424380312
+17::97::1::1424380312
+18::1::1::1424380312
+18::4::3::1424380312
+18::5::2::1424380312
+18::6::1::1424380312
+18::7::1::1424380312
+18::10::1::1424380312
+18::11::4::1424380312
+18::12::2::1424380312
+18::13::1::1424380312
+18::15::1::1424380312
+18::18::1::1424380312
+18::20::1::1424380312
+18::21::2::1424380312
+18::22::1::1424380312
+18::23::2::1424380312
+18::25::1::1424380312
+18::26::1::1424380312
+18::27::1::1424380312
+18::28::5::1424380312
+18::29::1::1424380312
+18::31::1::1424380312
+18::32::1::1424380312
+18::36::1::1424380312
+18::38::5::1424380312
+18::39::5::1424380312
+18::40::1::1424380312
+18::42::1::1424380312
+18::43::1::1424380312
+18::44::4::1424380312
+18::46::1::1424380312
+18::47::1::1424380312
+18::48::1::1424380312
+18::51::2::1424380312
+18::55::1::1424380312
+18::56::1::1424380312
+18::57::1::1424380312
+18::62::1::1424380312
+18::63::1::1424380312
+18::66::3::1424380312
+18::67::1::1424380312
+18::70::1::1424380312
+18::75::1::1424380312
+18::76::3::1424380312
+18::77::1::1424380312
+18::80::3::1424380312
+18::81::3::1424380312
+18::82::1::1424380312
+18::83::5::1424380312
+18::84::1::1424380312
+18::97::1::1424380312
+18::98::1::1424380312
+18::99::2::1424380312
+19::0::1::1424380312
+19::1::1::1424380312
+19::2::1::1424380312
+19::4::1::1424380312
+19::6::2::1424380312
+19::11::1::1424380312
+19::12::1::1424380312
+19::14::1::1424380312
+19::23::1::1424380312
+19::26::1::1424380312
+19::31::1::1424380312
+19::32::4::1424380312
+19::33::1::1424380312
+19::34::1::1424380312
+19::37::1::1424380312
+19::38::1::1424380312
+19::41::1::1424380312
+19::43::1::1424380312
+19::45::1::1424380312
+19::48::1::1424380312
+19::49::1::1424380312
+19::50::2::1424380312
+19::53::2::1424380312
+19::54::3::1424380312
+19::55::1::1424380312
+19::56::2::1424380312
+19::58::1::1424380312
+19::61::1::1424380312
+19::62::1::1424380312
+19::63::1::1424380312
+19::64::1::1424380312
+19::65::1::1424380312
+19::69::2::1424380312
+19::72::1::1424380312
+19::74::3::1424380312
+19::76::1::1424380312
+19::78::1::1424380312
+19::79::1::1424380312
+19::81::1::1424380312
+19::82::1::1424380312
+19::84::1::1424380312
+19::86::1::1424380312
+19::87::2::1424380312
+19::90::4::1424380312
+19::93::1::1424380312
+19::94::4::1424380312
+19::95::2::1424380312
+19::96::1::1424380312
+19::98::4::1424380312
+20::0::1::1424380312
+20::1::1::1424380312
+20::2::2::1424380312
+20::4::2::1424380312
+20::6::1::1424380312
+20::8::1::1424380312
+20::12::1::1424380312
+20::21::2::1424380312
+20::22::5::1424380312
+20::24::2::1424380312
+20::25::1::1424380312
+20::26::1::1424380312
+20::29::2::1424380312
+20::30::2::1424380312
+20::32::2::1424380312
+20::39::1::1424380312
+20::40::1::1424380312
+20::41::2::1424380312
+20::45::2::1424380312
+20::48::1::1424380312
+20::50::1::1424380312
+20::51::3::1424380312
+20::53::3::1424380312
+20::55::1::1424380312
+20::57::2::1424380312
+20::60::1::1424380312
+20::61::1::1424380312
+20::64::1::1424380312
+20::66::1::1424380312
+20::70::2::1424380312
+20::72::1::1424380312
+20::73::2::1424380312
+20::75::4::1424380312
+20::76::1::1424380312
+20::77::4::1424380312
+20::78::1::1424380312
+20::79::1::1424380312
+20::84::2::1424380312
+20::85::2::1424380312
+20::88::3::1424380312
+20::89::1::1424380312
+20::90::3::1424380312
+20::91::1::1424380312
+20::92::2::1424380312
+20::93::1::1424380312
+20::94::4::1424380312
+20::97::1::1424380312
+21::0::1::1424380312
+21::2::4::1424380312
+21::3::1::1424380312
+21::7::2::1424380312
+21::11::1::1424380312
+21::12::1::1424380312
+21::13::1::1424380312
+21::14::3::1424380312
+21::17::1::1424380312
+21::19::1::1424380312
+21::20::1::1424380312
+21::21::1::1424380312
+21::22::1::1424380312
+21::23::1::1424380312
+21::24::1::1424380312
+21::27::1::1424380312
+21::29::5::1424380312
+21::30::2::1424380312
+21::38::1::1424380312
+21::40::2::1424380312
+21::43::3::1424380312
+21::44::1::1424380312
+21::45::1::1424380312
+21::46::1::1424380312
+21::48::1::1424380312
+21::51::1::1424380312
+21::53::5::1424380312
+21::54::1::1424380312
+21::55::1::1424380312
+21::56::1::1424380312
+21::58::3::1424380312
+21::59::3::1424380312
+21::64::1::1424380312
+21::66::1::1424380312
+21::68::1::1424380312
+21::71::1::1424380312
+21::73::1::1424380312
+21::74::4::1424380312
+21::80::1::1424380312
+21::81::1::1424380312
+21::83::1::1424380312
+21::84::1::1424380312
+21::85::3::1424380312
+21::87::4::1424380312
+21::89::2::1424380312
+21::92::2::1424380312
+21::96::3::1424380312
+21::99::1::1424380312
+22::0::1::1424380312
+22::3::2::1424380312
+22::5::2::1424380312
+22::6::2::1424380312
+22::9::1::1424380312
+22::10::1::1424380312
+22::11::1::1424380312
+22::13::1::1424380312
+22::14::1::1424380312
+22::16::1::1424380312
+22::18::3::1424380312
+22::19::1::1424380312
+22::22::5::1424380312
+22::25::1::1424380312
+22::26::1::1424380312
+22::29::3::1424380312
+22::30::5::1424380312
+22::32::4::1424380312
+22::33::1::1424380312
+22::35::1::1424380312
+22::36::3::1424380312
+22::37::1::1424380312
+22::40::1::1424380312
+22::41::3::1424380312
+22::44::1::1424380312
+22::45::2::1424380312
+22::48::1::1424380312
+22::51::5::1424380312
+22::55::1::1424380312
+22::56::2::1424380312
+22::60::3::1424380312
+22::61::1::1424380312
+22::62::4::1424380312
+22::63::1::1424380312
+22::65::1::1424380312
+22::66::1::1424380312
+22::68::4::1424380312
+22::69::4::1424380312
+22::70::3::1424380312
+22::71::1::1424380312
+22::74::5::1424380312
+22::75::5::1424380312
+22::78::1::1424380312
+22::80::3::1424380312
+22::81::1::1424380312
+22::82::1::1424380312
+22::84::1::1424380312
+22::86::1::1424380312
+22::87::3::1424380312
+22::88::5::1424380312
+22::90::2::1424380312
+22::92::3::1424380312
+22::95::2::1424380312
+22::96::2::1424380312
+22::98::4::1424380312
+22::99::1::1424380312
+23::0::1::1424380312
+23::2::1::1424380312
+23::4::1::1424380312
+23::6::2::1424380312
+23::10::4::1424380312
+23::12::1::1424380312
+23::13::4::1424380312
+23::14::1::1424380312
+23::15::1::1424380312
+23::18::4::1424380312
+23::22::2::1424380312
+23::23::4::1424380312
+23::24::1::1424380312
+23::25::1::1424380312
+23::26::1::1424380312
+23::27::5::1424380312
+23::28::1::1424380312
+23::29::1::1424380312
+23::30::4::1424380312
+23::32::5::1424380312
+23::33::2::1424380312
+23::36::3::1424380312
+23::37::1::1424380312
+23::38::1::1424380312
+23::39::1::1424380312
+23::43::1::1424380312
+23::48::5::1424380312
+23::49::5::1424380312
+23::50::4::1424380312
+23::53::1::1424380312
+23::55::5::1424380312
+23::57::1::1424380312
+23::59::1::1424380312
+23::60::1::1424380312
+23::61::1::1424380312
+23::64::4::1424380312
+23::65::5::1424380312
+23::66::2::1424380312
+23::67::1::1424380312
+23::68::3::1424380312
+23::69::1::1424380312
+23::72::1::1424380312
+23::73::3::1424380312
+23::77::1::1424380312
+23::82::2::1424380312
+23::83::1::1424380312
+23::84::1::1424380312
+23::85::1::1424380312
+23::87::3::1424380312
+23::88::1::1424380312
+23::95::2::1424380312
+23::97::1::1424380312
+24::4::1::1424380312
+24::6::3::1424380312
+24::7::1::1424380312
+24::10::2::1424380312
+24::12::1::1424380312
+24::15::1::1424380312
+24::19::1::1424380312
+24::24::1::1424380312
+24::27::3::1424380312
+24::30::5::1424380312
+24::31::1::1424380312
+24::32::3::1424380312
+24::33::1::1424380312
+24::37::1::1424380312
+24::39::1::1424380312
+24::40::1::1424380312
+24::42::1::1424380312
+24::43::3::1424380312
+24::45::2::1424380312
+24::46::1::1424380312
+24::47::1::1424380312
+24::48::1::1424380312
+24::49::1::1424380312
+24::50::1::1424380312
+24::52::5::1424380312
+24::57::1::1424380312
+24::59::4::1424380312
+24::63::4::1424380312
+24::65::1::1424380312
+24::66::1::1424380312
+24::67::1::1424380312
+24::68::3::1424380312
+24::69::5::1424380312
+24::71::1::1424380312
+24::72::4::1424380312
+24::77::4::1424380312
+24::78::1::1424380312
+24::80::1::1424380312
+24::82::1::1424380312
+24::84::1::1424380312
+24::86::1::1424380312
+24::87::1::1424380312
+24::88::2::1424380312
+24::89::1::1424380312
+24::90::5::1424380312
+24::91::1::1424380312
+24::92::1::1424380312
+24::94::2::1424380312
+24::95::1::1424380312
+24::96::5::1424380312
+24::98::1::1424380312
+24::99::1::1424380312
+25::1::3::1424380312
+25::2::1::1424380312
+25::7::1::1424380312
+25::9::1::1424380312
+25::12::3::1424380312
+25::16::3::1424380312
+25::17::1::1424380312
+25::18::1::1424380312
+25::20::1::1424380312
+25::22::1::1424380312
+25::23::1::1424380312
+25::26::2::1424380312
+25::29::1::1424380312
+25::30::1::1424380312
+25::31::2::1424380312
+25::33::4::1424380312
+25::34::3::1424380312
+25::35::2::1424380312
+25::36::1::1424380312
+25::37::1::1424380312
+25::40::1::1424380312
+25::41::1::1424380312
+25::43::1::1424380312
+25::47::4::1424380312
+25::50::1::1424380312
+25::51::1::1424380312
+25::53::1::1424380312
+25::56::1::1424380312
+25::58::2::1424380312
+25::64::2::1424380312
+25::67::2::1424380312
+25::68::1::1424380312
+25::70::1::1424380312
+25::71::4::1424380312
+25::73::1::1424380312
+25::74::1::1424380312
+25::76::1::1424380312
+25::79::1::1424380312
+25::82::1::1424380312
+25::84::2::1424380312
+25::85::1::1424380312
+25::91::3::1424380312
+25::92::1::1424380312
+25::94::1::1424380312
+25::95::1::1424380312
+25::97::2::1424380312
+26::0::1::1424380312
+26::1::1::1424380312
+26::2::1::1424380312
+26::3::1::1424380312
+26::4::4::1424380312
+26::5::2::1424380312
+26::6::3::1424380312
+26::7::5::1424380312
+26::13::3::1424380312
+26::14::1::1424380312
+26::16::1::1424380312
+26::18::3::1424380312
+26::20::1::1424380312
+26::21::3::1424380312
+26::22::5::1424380312
+26::23::5::1424380312
+26::24::5::1424380312
+26::27::1::1424380312
+26::31::1::1424380312
+26::35::1::1424380312
+26::36::4::1424380312
+26::40::1::1424380312
+26::44::1::1424380312
+26::45::2::1424380312
+26::47::1::1424380312
+26::48::1::1424380312
+26::49::3::1424380312
+26::50::2::1424380312
+26::52::1::1424380312
+26::54::4::1424380312
+26::55::1::1424380312
+26::57::3::1424380312
+26::58::1::1424380312
+26::61::1::1424380312
+26::62::2::1424380312
+26::66::1::1424380312
+26::68::4::1424380312
+26::71::1::1424380312
+26::73::4::1424380312
+26::76::1::1424380312
+26::81::3::1424380312
+26::85::1::1424380312
+26::86::3::1424380312
+26::88::5::1424380312
+26::91::1::1424380312
+26::94::5::1424380312
+26::95::1::1424380312
+26::96::1::1424380312
+26::97::1::1424380312
+27::0::1::1424380312
+27::9::1::1424380312
+27::10::1::1424380312
+27::18::4::1424380312
+27::19::3::1424380312
+27::20::1::1424380312
+27::22::2::1424380312
+27::24::2::1424380312
+27::25::1::1424380312
+27::27::3::1424380312
+27::28::1::1424380312
+27::29::1::1424380312
+27::31::1::1424380312
+27::33::3::1424380312
+27::40::1::1424380312
+27::42::1::1424380312
+27::43::1::1424380312
+27::44::3::1424380312
+27::45::1::1424380312
+27::51::3::1424380312
+27::52::1::1424380312
+27::55::3::1424380312
+27::57::1::1424380312
+27::59::1::1424380312
+27::60::1::1424380312
+27::61::1::1424380312
+27::64::1::1424380312
+27::66::3::1424380312
+27::68::1::1424380312
+27::70::1::1424380312
+27::71::2::1424380312
+27::72::1::1424380312
+27::75::3::1424380312
+27::78::1::1424380312
+27::80::3::1424380312
+27::82::1::1424380312
+27::83::3::1424380312
+27::86::1::1424380312
+27::87::2::1424380312
+27::90::1::1424380312
+27::91::1::1424380312
+27::92::1::1424380312
+27::93::1::1424380312
+27::94::2::1424380312
+27::95::1::1424380312
+27::98::1::1424380312
+28::0::3::1424380312
+28::1::1::1424380312
+28::2::4::1424380312
+28::3::1::1424380312
+28::6::1::1424380312
+28::7::1::1424380312
+28::12::5::1424380312
+28::13::2::1424380312
+28::14::1::1424380312
+28::15::1::1424380312
+28::17::1::1424380312
+28::19::3::1424380312
+28::20::1::1424380312
+28::23::3::1424380312
+28::24::3::1424380312
+28::27::1::1424380312
+28::29::1::1424380312
+28::33::1::1424380312
+28::34::1::1424380312
+28::36::1::1424380312
+28::38::2::1424380312
+28::39::2::1424380312
+28::44::1::1424380312
+28::45::1::1424380312
+28::49::4::1424380312
+28::50::1::1424380312
+28::52::1::1424380312
+28::54::1::1424380312
+28::56::1::1424380312
+28::57::3::1424380312
+28::58::1::1424380312
+28::59::1::1424380312
+28::60::1::1424380312
+28::62::3::1424380312
+28::63::1::1424380312
+28::65::1::1424380312
+28::75::1::1424380312
+28::78::1::1424380312
+28::81::5::1424380312
+28::82::4::1424380312
+28::83::1::1424380312
+28::85::1::1424380312
+28::88::2::1424380312
+28::89::4::1424380312
+28::90::1::1424380312
+28::92::5::1424380312
+28::94::1::1424380312
+28::95::2::1424380312
+28::98::1::1424380312
+28::99::1::1424380312
+29::3::1::1424380312
+29::4::1::1424380312
+29::5::1::1424380312
+29::7::2::1424380312
+29::9::1::1424380312
+29::10::3::1424380312
+29::11::1::1424380312
+29::13::3::1424380312
+29::14::1::1424380312
+29::15::1::1424380312
+29::17::3::1424380312
+29::19::3::1424380312
+29::22::3::1424380312
+29::23::4::1424380312
+29::25::1::1424380312
+29::29::1::1424380312
+29::31::1::1424380312
+29::32::4::1424380312
+29::33::2::1424380312
+29::36::2::1424380312
+29::38::3::1424380312
+29::39::1::1424380312
+29::42::1::1424380312
+29::46::5::1424380312
+29::49::3::1424380312
+29::51::2::1424380312
+29::59::1::1424380312
+29::61::1::1424380312
+29::62::1::1424380312
+29::67::1::1424380312
+29::68::3::1424380312
+29::69::1::1424380312
+29::70::1::1424380312
+29::74::1::1424380312
+29::75::1::1424380312
+29::79::2::1424380312
+29::80::1::1424380312
+29::81::2::1424380312
+29::83::1::1424380312
+29::85::1::1424380312
+29::86::1::1424380312
+29::90::4::1424380312
+29::93::1::1424380312
+29::94::4::1424380312
+29::97::1::1424380312
+29::99::1::1424380312
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/als/test.data b/spark-2.4.0-bin-hadoop2.7/data/mllib/als/test.data
new file mode 100644
index 0000000000000000000000000000000000000000..e476cc23e047d78aa4e2bda8fa2f5cea631ef708
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/als/test.data
@@ -0,0 +1,16 @@
+1,1,5.0
+1,2,1.0
+1,3,5.0
+1,4,1.0
+2,1,5.0
+2,2,1.0
+2,3,5.0
+2,4,1.0
+3,1,1.0
+3,2,5.0
+3,3,1.0
+3,4,5.0
+4,1,1.0
+4,2,5.0
+4,3,1.0
+4,4,5.0
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/gmm_data.txt b/spark-2.4.0-bin-hadoop2.7/data/mllib/gmm_data.txt
new file mode 100644
index 0000000000000000000000000000000000000000..934ee4a83a2df4c411fa26bff3c326472e66a1ec
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/gmm_data.txt
@@ -0,0 +1,2000 @@
+ 2.59470454e+00 2.12298217e+00
+ 1.15807024e+00 -1.46498723e-01
+ 2.46206638e+00 6.19556894e-01
+ -5.54845070e-01 -7.24700066e-01
+ -3.23111426e+00 -1.42579084e+00
+ 3.02978115e+00 7.87121753e-01
+ 1.97365907e+00 1.15914704e+00
+ -6.44852101e+00 -3.18154314e+00
+ 1.30963349e+00 1.62866434e-01
+ 4.26482541e+00 2.15547996e+00
+ 3.79927257e+00 1.50572445e+00
+ 4.17452609e-01 -6.74032760e-01
+ 4.21117627e-01 4.45590255e-01
+ -2.80425571e+00 -7.77150554e-01
+ 2.55928797e+00 7.03954218e-01
+ 1.32554059e+00 -9.46663152e-01
+ -3.39691439e+00 -1.49005743e+00
+ -2.26542270e-01 3.60052515e-02
+ 1.04994198e+00 5.29825685e-01
+ -1.51566882e+00 -1.86264432e-01
+ -3.27928172e-01 -7.60859110e-01
+ -3.18054866e-01 3.97719805e-01
+ 1.65579418e-01 -3.47232033e-01
+ 6.47162333e-01 4.96059961e-02
+ -2.80776647e-01 4.79418757e-01
+ 7.45069752e-01 1.20790281e-01
+ 2.13604102e-01 1.59542555e-01
+ -3.08860224e+00 -1.43259870e+00
+ 8.97066497e-01 1.10206801e+00
+ -2.23918874e-01 -1.07267267e+00
+ 2.51525708e+00 2.84761973e-01
+ 9.98052532e-01 1.08333783e+00
+ 1.76705588e+00 8.18866778e-01
+ 5.31555163e-02 -1.90111151e-01
+ -2.17405059e+00 7.21854582e-02
+ -2.13772505e+00 -3.62010387e-01
+ 2.95974057e+00 1.31602381e+00
+ 2.74053561e+00 1.61781757e+00
+ 6.68135448e-01 2.86586009e-01
+ 2.82323739e+00 1.74437257e+00
+ 8.11540288e-01 5.50744478e-01
+ 4.10050897e-01 5.10668402e-03
+ 9.58626136e-01 -3.49633680e-01
+ 4.66599798e+00 1.49964894e+00
+ 4.94507794e-01 2.58928077e-01
+ -2.36029742e+00 -1.61042909e+00
+ -4.99306804e-01 -8.04984769e-01
+ 1.07448510e+00 9.39605828e-01
+ -1.80448949e+00 -1.05983264e+00
+ -3.22353821e-01 1.73612093e-01
+ 1.85418702e+00 1.15640643e+00
+ 6.93794163e-01 6.59993560e-01
+ 1.99399102e+00 1.44547123e+00
+ 3.38866124e+00 1.23379290e+00
+ -4.24067720e+00 -1.22264282e+00
+ 6.03230201e-02 2.95232729e-01
+ -3.59341813e+00 -7.17453726e-01
+ 4.87447372e-01 -2.00733911e-01
+ 1.20149195e+00 4.07880197e-01
+ -2.13331464e+00 -4.58518077e-01
+ -3.84091083e+00 -1.71553950e+00
+ -5.37279250e-01 2.64822629e-02
+ -2.10155227e+00 -1.32558103e+00
+ -1.71318897e+00 -7.12098563e-01
+ -1.46280695e+00 -1.84868337e-01
+ -3.59785325e+00 -1.54832434e+00
+ -5.77528081e-01 -5.78580857e-01
+ 3.14734283e-01 5.80184639e-01
+ -2.71164714e+00 -1.19379432e+00
+ 1.09634489e+00 7.20143887e-01
+ -3.05527722e+00 -1.47774064e+00
+ 6.71753586e-01 7.61350020e-01
+ 3.98294144e+00 1.54166484e+00
+ -3.37220384e+00 -2.21332064e+00
+ 1.81222914e+00 7.41212752e-01
+ 2.71458282e-01 1.36329078e-01
+ -3.97815359e-01 1.16766886e-01
+ -1.70192814e+00 -9.75851571e-01
+ -3.46803804e+00 -1.09965988e+00
+ -1.69649627e+00 -5.76045801e-01
+ -1.02485636e-01 -8.81841246e-01
+ -3.24194667e-02 2.55429276e-01
+ -2.75343168e+00 -1.51366320e+00
+ -2.78676702e+00 -5.22360489e-01
+ 1.70483164e+00 1.19769805e+00
+ 4.92022579e-01 3.24944706e-01
+ 2.48768464e+00 1.00055363e+00
+ 4.48786400e-01 7.63902870e-01
+ 2.93862696e+00 1.73809968e+00
+ -3.55019305e+00 -1.97875558e+00
+ 1.74270784e+00 6.90229224e-01
+ 5.13391994e-01 4.58374016e-01
+ 1.78379499e+00 9.08026381e-01
+ 1.75814147e+00 7.41449784e-01
+ -2.30687792e-01 3.91009729e-01
+ 3.92271353e+00 1.44006290e+00
+ 2.93361679e-01 -4.99886375e-03
+ 2.47902690e-01 -7.49542503e-01
+ -3.97675355e-01 1.36824887e-01
+ 3.56535953e+00 1.15181329e+00
+ 3.22425301e+00 1.28702383e+00
+ -2.94192478e-01 -2.42382557e-01
+ 8.02068864e-01 -1.51671475e-01
+ 8.54133530e-01 -4.89514885e-02
+ -1.64316316e-01 -5.34642346e-01
+ -6.08485405e-01 -2.10332352e-01
+ -2.18940059e+00 -1.07024952e+00
+ -1.71586960e+00 -2.83333492e-02
+ 1.70200448e-01 -3.28031178e-01
+ -1.97210346e+00 -5.39948532e-01
+ 2.19500160e+00 1.05697170e+00
+ -1.76239935e+00 -1.09377438e+00
+ 1.68314744e+00 6.86491164e-01
+ -2.99852288e+00 -1.46619067e+00
+ -2.23769560e+00 -9.15008355e-01
+ 9.46887516e-01 5.58410503e-01
+ 5.02153123e-01 1.63851235e-01
+ -9.70297062e-01 3.14625374e-01
+ -1.29405593e+00 -8.20994131e-01
+ 2.72516079e+00 7.85839947e-01
+ 1.45788024e+00 3.37487353e-01
+ -4.36292749e-01 -5.42150480e-01
+ 2.21304711e+00 1.25254042e+00
+ -1.20810271e-01 4.79632898e-01
+ -3.30884511e+00 -1.50607586e+00
+ -6.55882455e+00 -1.94231256e+00
+ -3.17033630e+00 -9.94678930e-01
+ 1.42043617e+00 7.28808957e-01
+ -1.57546099e+00 -1.10320497e+00
+ -3.22748754e+00 -1.64174579e+00
+ 2.96776017e-03 -3.16191512e-02
+ -2.25986054e+00 -6.13123197e-01
+ 2.49434243e+00 7.73069183e-01
+ 9.08494049e-01 -1.53926853e-01
+ -2.80559090e+00 -1.37474221e+00
+ 4.75224286e-01 2.53153674e-01
+ 4.37644006e+00 8.49116998e-01
+ 2.27282959e+00 6.16568202e-01
+ 1.16006880e+00 1.65832798e-01
+ -1.67163193e+00 -1.22555386e+00
+ -1.38231118e+00 -7.29575504e-01
+ -3.49922750e+00 -2.26446675e+00
+ -3.73780110e-01 -1.90657869e-01
+ 1.68627679e+00 1.05662987e+00
+ -3.28891792e+00 -1.11080334e+00
+ -2.59815798e+00 -1.51410198e+00
+ -2.61203309e+00 -6.00143552e-01
+ 6.58964943e-01 4.47216094e-01
+ -2.26711381e+00 -7.26512923e-01
+ -5.31429009e-02 -1.97925341e-02
+ 3.19749807e+00 9.20425476e-01
+ -1.37595787e+00 -6.58062732e-01
+ 8.09900278e-01 -3.84286160e-01
+ -5.07741280e+00 -1.97683808e+00
+ -2.99764250e+00 -1.50753777e+00
+ -9.87671815e-01 -4.63255889e-01
+ 1.65390765e+00 6.73806615e-02
+ 5.51252659e+00 2.69842267e+00
+ -2.23724309e+00 -4.77624004e-01
+ 4.99726228e+00 1.74690949e+00
+ 1.75859162e-01 -1.49350995e-01
+ 4.13382789e+00 1.31735161e+00
+ 2.69058117e+00 4.87656923e-01
+ 1.07180318e+00 1.01426954e+00
+ 3.37216869e+00 1.05955377e+00
+ -2.95006781e+00 -1.57048303e+00
+ -2.46401648e+00 -8.37056374e-01
+ 1.19012962e-01 7.54702770e-01
+ 3.34142539e+00 4.81938295e-01
+ 2.92643913e+00 1.04301050e+00
+ 2.89697751e+00 1.37551442e+00
+ -1.03094242e+00 2.20903962e-01
+ -5.13914589e+00 -2.23355387e+00
+ -8.81680780e-01 1.83590000e-01
+ 2.82334775e+00 1.26650464e+00
+ -2.81042540e-01 -3.26370240e-01
+ 2.97995487e+00 8.34569452e-01
+ -1.39857135e+00 -1.15798385e+00
+ 4.27186506e+00 9.04253702e-01
+ 6.98684517e-01 7.91167305e-01
+ 3.52233095e+00 1.29976473e+00
+ 2.21448029e+00 2.73213379e-01
+ -3.13505683e-01 -1.20593774e-01
+ 3.70571571e+00 1.06220876e+00
+ 9.83881041e-01 5.67713803e-01
+ -2.17897705e+00 2.52925205e-01
+ 1.38734039e+00 4.61287066e-01
+ -1.41181602e+00 -1.67248955e-02
+ -1.69974639e+00 -7.17812071e-01
+ -2.01005793e-01 -7.49662056e-01
+ 1.69016336e+00 3.24687979e-01
+ -2.03250179e+00 -2.76108460e-01
+ 3.68776848e-01 4.12536941e-01
+ 7.66238259e-01 -1.84750637e-01
+ -2.73989147e-01 -1.72817250e-01
+ -2.18623745e+00 -2.10906798e-01
+ -1.39795625e-01 3.26066094e-02
+ -2.73826912e-01 -6.67586097e-02
+ -1.57880654e+00 -4.99395900e-01
+ 4.55950908e+00 2.29410489e+00
+ -7.36479631e-01 -1.57861857e-01
+ 1.92082888e+00 1.05843391e+00
+ 4.29192810e+00 1.38127810e+00
+ 1.61852879e+00 1.95871986e-01
+ -1.95027403e+00 -5.22448168e-01
+ -1.67446281e+00 -9.41497162e-01
+ 6.07097859e-01 3.44178029e-01
+ -3.44004683e+00 -1.49258461e+00
+ 2.72114752e+00 6.00728991e-01
+ 8.80685522e-01 -2.53243336e-01
+ 1.39254928e+00 3.42988512e-01
+ 1.14194836e-01 -8.57945694e-02
+ -1.49387332e+00 -7.60860481e-01
+ -1.98053285e+00 -4.86039865e-01
+ 3.56008568e+00 1.08438692e+00
+ 2.27833961e-01 1.09441881e+00
+ -1.16716710e+00 -6.54778242e-01
+ 2.02156613e+00 5.42075758e-01
+ 1.08429178e+00 -7.67420693e-01
+ 6.63058455e-01 4.61680991e-01
+ -1.06201537e+00 1.38862846e-01
+ 3.08701875e+00 8.32580273e-01
+ -4.96558108e-01 -2.47031257e-01
+ 7.95109987e-01 7.59314147e-02
+ -3.39903524e-01 8.71565566e-03
+ 8.68351357e-01 4.78358641e-01
+ 1.48750819e+00 7.63257420e-01
+ -4.51224101e-01 -4.44056898e-01
+ -3.02734750e-01 -2.98487961e-01
+ 5.46846609e-01 7.02377629e-01
+ 1.65129778e+00 3.74008231e-01
+ -7.43336512e-01 3.95723531e-01
+ -5.88446605e-01 -6.47520211e-01
+ 3.58613167e+00 1.95024937e+00
+ 3.11718883e+00 8.37984715e-01
+ 1.80919244e+00 9.62644986e-01
+ 5.43856371e-02 -5.86297543e-01
+ -1.95186766e+00 -1.02624212e-01
+ 8.95628057e-01 5.91812281e-01
+ 4.97691627e-02 5.31137156e-01
+ -1.07633113e+00 -2.47392788e-01
+ -1.17257986e+00 -8.68528265e-01
+ -8.19227665e-02 5.80579434e-03
+ -2.86409787e-01 1.95812924e-01
+ 1.10582671e+00 7.42853240e-01
+ 4.06429774e+00 1.06557476e+00
+ -3.42521792e+00 -7.74327139e-01
+ 1.28468671e+00 6.20431661e-01
+ 6.01201008e-01 -1.16799728e-01
+ -1.85058727e-01 -3.76235293e-01
+ 5.44083324e+00 2.98490868e+00
+ 2.69273070e+00 7.83901153e-01
+ 1.88938036e-01 -4.83222152e-01
+ 1.05667256e+00 -2.57003165e-01
+ 2.99711662e-01 -4.33131912e-01
+ 7.73689216e-02 -1.78738364e-01
+ 9.58326279e-01 6.38325706e-01
+ -3.97727049e-01 2.27314759e-01
+ 3.36098175e+00 1.12165237e+00
+ 1.77804871e+00 6.46961933e-01
+ -2.86945546e+00 -1.00395518e+00
+ 3.03494815e+00 7.51814612e-01
+ -1.43658194e+00 -3.55432244e-01
+ -3.08455105e+00 -1.51535106e+00
+ -1.55841975e+00 3.93454820e-02
+ 7.96073412e-01 -3.11036969e-01
+ -9.84125401e-01 -1.02064649e+00
+ -7.75688143e+00 -3.65219926e+00
+ 1.53816429e+00 7.65926670e-01
+ -4.92712738e-01 2.32244240e-02
+ -1.93166919e+00 -1.07701304e+00
+ 2.03029875e-02 -7.54055699e-01
+ 2.52177489e+00 1.01544979e+00
+ 3.65109048e-01 -9.48328494e-01
+ -1.28849143e-01 2.51947174e-01
+ -1.02428075e+00 -9.37767116e-01
+ -3.04179748e+00 -9.97926994e-01
+ -2.51986980e+00 -1.69117413e+00
+ -1.24900838e+00 -4.16179917e-01
+ 2.77943992e+00 1.22842327e+00
+ -4.37434557e+00 -1.70182693e+00
+ -1.60019319e+00 -4.18345639e-01
+ -1.67613646e+00 -9.44087262e-01
+ -9.00843245e-01 8.26378089e-02
+ 3.29770621e-01 -9.07870444e-01
+ -2.84650535e+00 -9.00155396e-01
+ 1.57111705e+00 7.07432268e-01
+ 1.24948552e+00 1.04812849e-01
+ 1.81440558e+00 9.53545082e-01
+ -1.74915794e+00 -1.04606288e+00
+ 1.20593269e+00 -1.12607147e-02
+ 1.36004919e-01 -1.09828044e+00
+ 2.57480693e-01 3.34941541e-01
+ 7.78775385e-01 -5.32494732e-01
+ -1.79155126e+00 -6.29994129e-01
+ -1.75706839e+00 -8.35100126e-01
+ 4.29512012e-01 7.81426910e-02
+ 3.08349370e-01 -1.27359861e-01
+ 1.05560329e+00 4.55150640e-01
+ 1.95662574e+00 1.17593217e+00
+ 8.77376632e-01 6.57866662e-01
+ 7.71311255e-01 9.15134334e-02
+ -6.36978275e+00 -2.55874241e+00
+ -2.98335339e+00 -1.59567024e+00
+ -3.67104587e-01 1.85315291e-01
+ 1.95347407e+00 -7.15503113e-02
+ 8.45556363e-01 6.51256415e-02
+ 9.42868521e-01 3.56647624e-01
+ 2.99321875e+00 1.07505254e+00
+ -2.91030538e-01 -3.77637183e-01
+ 1.62870918e+00 3.37563671e-01
+ 2.05773173e-01 3.43337416e-01
+ -8.40879199e-01 -1.35600767e-01
+ 1.38101624e+00 5.99253495e-01
+ -6.93715607e+00 -2.63580662e+00
+ -1.04423404e+00 -8.32865050e-01
+ 1.33448476e+00 1.04863475e+00
+ 6.01675207e-01 1.98585194e-01
+ 2.31233993e+00 7.98628331e-01
+ 1.85201313e-01 -1.76070247e+00
+ 1.92006354e+00 8.45737582e-01
+ 1.06320415e+00 2.93426068e-01
+ -1.20360141e+00 -1.00301288e+00
+ 1.95926629e+00 6.26643532e-01
+ 6.04483978e-02 5.72643059e-01
+ -1.04568563e+00 -5.91021496e-01
+ 2.62300678e+00 9.50997831e-01
+ -4.04610275e-01 3.73150879e-01
+ 2.26371902e+00 8.73627529e-01
+ 2.12545313e+00 7.90640352e-01
+ 7.72181917e-03 1.65718952e-02
+ 1.00422340e-01 -2.05562936e-01
+ -1.22989802e+00 -1.01841681e-01
+ 3.09064082e+00 1.04288010e+00
+ 5.18274167e+00 1.34749259e+00
+ -8.32075153e-01 -1.97592029e-01
+ 3.84126764e-02 5.58171345e-01
+ 4.99560727e-01 -4.26154438e-02
+ 4.79071151e+00 2.19728942e+00
+ -2.78437968e+00 -1.17812590e+00
+ -2.22804226e+00 -4.31174255e-01
+ 8.50762292e-01 -1.06445261e-01
+ 1.10812830e+00 -2.59118812e-01
+ -2.91450155e-01 6.42802679e-01
+ -1.38631532e-01 -5.88585623e-01
+ -5.04120983e-01 -2.17094915e-01
+ 3.41410820e+00 1.67897767e+00
+ -2.23697326e+00 -6.62735244e-01
+ -3.55961064e-01 -1.27647226e-01
+ -3.55568274e+00 -2.49011369e+00
+ -8.77586408e-01 -9.38268065e-03
+ 1.52382384e-01 -5.62155760e-01
+ 1.55885574e-01 1.07617069e-01
+ -8.37129973e-01 -5.22259081e-01
+ -2.92741750e+00 -1.35049428e+00
+ -3.54670781e-01 5.69205952e-02
+ 2.21030255e+00 1.34689986e+00
+ 1.60787722e+00 5.75984706e-01
+ 1.32294221e+00 5.31577509e-01
+ 7.05672928e-01 3.34241244e-01
+ 1.41406179e+00 1.15783408e+00
+ -6.92172228e-01 -2.84817896e-01
+ 3.28358655e-01 -2.66910083e-01
+ 1.68013644e-01 -4.28016549e-02
+ 2.07365974e+00 7.76496211e-01
+ -3.92974907e-01 2.46796730e-01
+ -5.76078636e-01 3.25676963e-01
+ -1.82547204e-01 -5.06410543e-01
+ 3.04754906e+00 1.16174496e+00
+ -3.01090632e+00 -1.09195183e+00
+ -1.44659696e+00 -6.87838682e-01
+ 2.11395861e+00 9.10495785e-01
+ 1.40962871e+00 1.13568678e+00
+ -1.66653234e-01 -2.10012503e-01
+ 3.17456029e+00 9.74502922e-01
+ 2.15944820e+00 8.62807189e-01
+ -3.45418719e+00 -1.33647548e+00
+ -3.41357732e+00 -8.47048920e-01
+ -3.06702448e-01 -6.64280634e-01
+ -2.86930714e-01 -1.35268264e-01
+ -3.15835557e+00 -5.43439253e-01
+ 2.49541440e-01 -4.71733570e-01
+ 2.71933912e+00 4.13308399e-01
+ -2.43787038e+00 -1.08050547e+00
+ -4.90234490e-01 -6.64069865e-01
+ 8.99524451e-02 5.76180541e-01
+ 5.00500404e+00 2.12125521e+00
+ -1.73107940e-01 -2.28506575e-02
+ 5.44938858e-01 -1.29523352e-01
+ 5.13526842e+00 1.68785993e+00
+ 1.70228304e+00 1.02601138e+00
+ 3.58957507e+00 1.54396196e+00
+ 1.85615738e+00 4.92916197e-01
+ 2.55772147e+00 7.88438908e-01
+ -1.57008279e+00 -4.17377300e-01
+ -1.42548604e+00 -3.63684860e-01
+ -8.52026118e-01 2.72052686e-01
+ -5.10563077e+00 -2.35665994e+00
+ -2.95517031e+00 -1.84945297e+00
+ -2.91947959e+00 -1.66016784e+00
+ -4.21462387e+00 -1.41131535e+00
+ 6.59901121e-01 4.87156314e-01
+ -9.75352532e-01 -4.50231285e-01
+ -5.94084444e-01 -1.16922670e+00
+ 7.50554615e-01 -9.83692552e-01
+ 1.07054926e+00 2.77143030e-01
+ -3.88079578e-01 -4.17737309e-02
+ -9.59373733e-01 -8.85454886e-01
+ -7.53560665e-02 -5.16223870e-02
+ 9.84108158e-01 -5.89290700e-02
+ 1.87272961e-01 -4.34238391e-01
+ 6.86509981e-01 -3.15116460e-01
+ -1.07762538e+00 6.58984161e-02
+ 6.09266592e-01 6.91808473e-02
+ -8.30529954e-01 -7.00454791e-01
+ -9.13179464e-01 -6.31712891e-01
+ 7.68744851e-01 1.09840676e+00
+ -1.07606690e+00 -8.78390282e-01
+ -1.71038184e+00 -5.73606033e-01
+ 8.75982765e-01 3.66343143e-01
+ -7.04919009e-01 -8.49182590e-01
+ -1.00274668e+00 -7.99573611e-01
+ -1.05562848e+00 -5.84060076e-01
+ 4.03490015e+00 1.28679206e+00
+ -3.53484804e+00 -1.71381255e+00
+ 2.31527363e-01 1.04179397e-01
+ -3.58592392e-02 3.74895739e-01
+ 3.92253428e+00 1.81852726e+00
+ -7.27384249e-01 -6.45605128e-01
+ 4.65678097e+00 2.41379899e+00
+ 1.16750534e+00 7.60718205e-01
+ 1.15677059e+00 7.96225550e-01
+ -1.42920261e+00 -4.66946295e-01
+ 3.71148192e+00 1.88060191e+00
+ 2.44052407e+00 3.84472199e-01
+ -1.64535035e+00 -8.94530036e-01
+ -3.69608753e+00 -1.36402754e+00
+ 2.24419208e+00 9.69744889e-01
+ 2.54822427e+00 1.22613039e+00
+ 3.77484909e-01 -5.98521878e-01
+ -3.61521175e+00 -1.11123912e+00
+ 3.28113127e+00 1.52551775e+00
+ -3.51030902e+00 -1.53913980e+00
+ -2.44874505e+00 -6.30246005e-01
+ -3.42516153e-01 -5.07352665e-01
+ 1.09110502e+00 6.36821628e-01
+ -2.49434967e+00 -8.02827146e-01
+ 1.41763139e+00 -3.46591820e-01
+ 1.61108619e+00 5.93871102e-01
+ 3.97371717e+00 1.35552499e+00
+ -1.33437177e+00 -2.83908670e-01
+ -1.41606483e+00 -1.76402601e-01
+ 2.23945322e-01 -1.77157065e-01
+ 2.60271569e+00 2.40778251e-01
+ -2.82213895e-02 1.98255474e-01
+ 4.20727940e+00 1.31490863e+00
+ 3.36944889e+00 1.57566635e+00
+ 3.53049396e+00 1.73579350e+00
+ -1.29170202e+00 -1.64196290e+00
+ 9.27295604e-01 9.98808036e-01
+ 1.75321843e-01 -2.83267817e-01
+ -2.19069578e+00 -1.12814358e+00
+ 1.66606031e+00 7.68006933e-01
+ -7.13826035e-01 5.20881684e-02
+ -3.43821888e+00 -2.36137021e+00
+ -5.93210310e-01 1.21843813e-01
+ -4.09800822e+00 -1.39893953e+00
+ 2.74110954e+00 1.52728606e+00
+ 1.72652512e+00 -1.25435113e-01
+ 1.97722357e+00 6.40667481e-01
+ 4.18635780e-01 3.57018509e-01
+ -1.78303569e+00 -2.11864764e-01
+ -3.52809366e+00 -2.58794450e-01
+ -4.72407090e+00 -1.63870734e+00
+ 1.73917807e+00 8.73251829e-01
+ 4.37979356e-01 8.49210569e-01
+ 3.93791881e+00 1.76269490e+00
+ 2.79065411e+00 1.04019042e+00
+ -8.47426142e-01 -3.40136892e-01
+ -4.24389181e+00 -1.80253120e+00
+ -1.86675870e+00 -7.64558265e-01
+ 9.46212675e-01 -7.77681445e-02
+ -2.82448462e+00 -1.33592449e+00
+ -2.57938567e+00 -1.56554690e+00
+ -2.71615767e+00 -6.27667233e-01
+ -1.55999166e+00 -5.81013466e-01
+ -4.24696864e-01 -7.44673250e-01
+ 1.67592970e+00 7.68164292e-01
+ 8.48455216e-01 -6.05681126e-01
+ 6.12575454e+00 1.65607584e+00
+ 1.38207327e+00 2.39261863e-01
+ 3.13364450e+00 1.17154698e+00
+ 1.71694858e+00 1.26744905e+00
+ -1.61746367e+00 -8.80098073e-01
+ -8.52196756e-01 -9.27299728e-01
+ -1.51562462e-01 -8.36552490e-02
+ -7.04792753e-01 -1.24726713e-02
+ -3.35265757e+00 -1.82176312e+00
+ 3.32173170e-01 -1.33405580e-01
+ 4.95841013e-01 4.58292712e-01
+ 1.57713955e+00 7.79272991e-01
+ 2.09743109e+00 9.23542557e-01
+ 3.90450311e-03 -8.42873164e-01
+ 2.59519038e+00 7.56479591e-01
+ -5.77643976e-01 -2.36401904e-01
+ -5.22310654e-01 1.34187830e-01
+ -2.22096086e+00 -7.75507719e-01
+ 1.35907831e+00 7.80197510e-01
+ 3.80355868e+00 1.16983476e+00
+ 3.82746596e+00 1.31417718e+00
+ 3.30451183e+00 1.55398159e+00
+ -3.42917814e-01 -8.62281222e-02
+ -2.59093020e+00 -9.29883526e-01
+ 1.40928562e+00 1.08398346e+00
+ 1.54400137e-01 3.35881092e-01
+ 1.59171586e+00 1.18855802e+00
+ -5.25164002e-01 -1.03104220e-01
+ 2.20067959e+00 1.37074713e+00
+ 6.97860830e-01 6.27718548e-01
+ -4.59743507e-01 1.36061163e-01
+ -1.04691963e-01 -2.16271727e-01
+ -1.08905573e+00 -5.95510769e-01
+ -1.00826983e+00 -5.38509162e-02
+ -3.16402719e+00 -1.33414216e+00
+ 1.47870874e-01 1.75234619e-01
+ -2.57078234e-01 7.03316889e-02
+ 1.81073945e+00 4.26901462e-01
+ 2.65476530e+00 6.74217273e-01
+ 1.27539811e+00 6.22914081e-01
+ -3.76750499e-01 -1.20629449e+00
+ 1.00177595e+00 -1.40660091e-01
+ -2.98919265e+00 -1.65145013e+00
+ -2.21557682e+00 -8.11123452e-01
+ -3.22635378e+00 -1.65639056e+00
+ -2.72868553e+00 -1.02812087e+00
+ 1.26042797e+00 8.49005248e-01
+ -9.38318534e-01 -9.87588651e-01
+ 3.38013194e-01 -1.00237461e-01
+ 1.91175691e+00 8.48716369e-01
+ 4.30244344e-01 6.05539915e-02
+ 2.21783435e+00 3.03268204e-01
+ 1.78019576e+00 1.27377108e+00
+ 1.59733274e+00 4.40674687e-02
+ 3.97428484e+00 2.20881566e+00
+ -2.41108677e+00 -6.01410418e-01
+ -2.50796499e+00 -5.71169866e-01
+ -3.71957427e+00 -1.38195726e+00
+ -1.57992670e+00 1.32068593e-01
+ -1.35278851e+00 -6.39349270e-01
+ 1.23075932e+00 2.40445409e-01
+ 1.35606530e+00 4.33180078e-01
+ 9.60968518e-02 2.26734255e-01
+ 6.22975063e-01 5.03431915e-02
+ -1.47624851e+00 -3.60568238e-01
+ -2.49337808e+00 -1.15083052e+00
+ 2.15717792e+00 1.03071559e+00
+ -3.07814376e-02 1.38700314e-02
+ 4.52049499e-02 -4.86409775e-01
+ 2.58231061e+00 1.14327809e-01
+ 1.10999138e+00 -5.18568405e-01
+ -2.19426443e-01 -5.37505538e-01
+ -4.44740298e-01 6.78099955e-01
+ 4.03379080e+00 1.49825720e+00
+ -5.13182408e-01 -4.90201950e-01
+ -6.90139716e-01 1.63875126e-01
+ -8.17281461e-01 2.32155064e-01
+ -2.92357619e-01 -8.02573544e-01
+ -1.80769841e+00 -7.58907326e-01
+ 2.16981590e+00 1.06728873e+00
+ 1.98995203e-01 -6.84176682e-02
+ -2.39546753e+00 -2.92873789e-01
+ -4.24251021e+00 -1.46255564e+00
+ -5.01411291e-01 -5.95712813e-03
+ 2.68085809e+00 1.42883780e+00
+ -4.13289873e+00 -1.62729388e+00
+ 1.87957843e+00 3.63341638e-01
+ -1.15270744e+00 -3.03563774e-01
+ -4.43994248e+00 -2.97323905e+00
+ -7.17067733e-01 -7.08349542e-01
+ -3.28870393e+00 -1.19263863e+00
+ -7.55325944e-01 -5.12703329e-01
+ -2.07291938e+00 -2.65025085e-01
+ -7.50073814e-01 -1.70771041e-01
+ -8.77381404e-01 -5.47417325e-01
+ -5.33725862e-01 5.15837119e-01
+ 8.45056431e-01 2.82125560e-01
+ -1.59598637e+00 -1.38743235e+00
+ 1.41362902e+00 1.06407789e+00
+ 1.02584504e+00 -3.68219466e-01
+ -1.04644488e+00 -1.48769392e-01
+ 2.66990191e+00 8.57633492e-01
+ -1.84251857e+00 -9.82430175e-01
+ 9.71404204e-01 -2.81934209e-01
+ -2.50177989e+00 -9.21260335e-01
+ -1.31060074e+00 -5.84488113e-01
+ -2.12129400e-01 -3.06244708e-02
+ -5.28933882e+00 -2.50663129e+00
+ 1.90220541e+00 1.08662918e+00
+ -3.99366086e-02 -6.87178973e-01
+ -4.93417342e-01 4.37354182e-01
+ 2.13494486e+00 1.37679569e+00
+ 2.18396765e+00 5.81023868e-01
+ -3.07866587e+00 -1.45384974e+00
+ 6.10894119e-01 -4.17050124e-01
+ -1.88766952e+00 -8.86160058e-01
+ 3.34527253e+00 1.78571260e+00
+ 6.87769059e-01 -5.01157336e-01
+ 2.60470837e+00 1.45853560e+00
+ -6.49315691e-01 -9.16112805e-01
+ -1.29817687e+00 -2.15924339e-01
+ -1.20100409e-03 -4.03137422e-01
+ -1.36471594e+00 -6.93266356e-01
+ 1.38682062e+00 7.15131598e-01
+ 2.47830103e+00 1.24862305e+00
+ -2.78288147e+00 -1.03329235e+00
+ -7.33443403e-01 -6.11041652e-01
+ -4.12745671e-01 -5.96133390e-02
+ -2.58632336e+00 -4.51557058e-01
+ -1.16570367e+00 -1.27065510e+00
+ 2.76187104e+00 2.21895451e-01
+ -3.80443767e+00 -1.66319902e+00
+ 9.84658633e-01 6.81475569e-01
+ 9.33814584e-01 -4.89335563e-02
+ -4.63427997e-01 1.72989539e-01
+ 1.82401546e+00 3.60164021e-01
+ -5.36521077e-01 -8.08691351e-01
+ -1.37367030e+00 -1.02126160e+00
+ -3.70310682e+00 -1.19840844e+00
+ -1.51894242e+00 -3.89510223e-01
+ -3.67347940e-01 -3.25540516e-02
+ -1.00988595e+00 1.82802194e-01
+ 2.01622795e+00 7.86367901e-01
+ 1.02440231e+00 8.79780360e-01
+ -3.05971480e+00 -8.40901527e-01
+ 2.73909457e+00 1.20558628e+00
+ 2.39559056e+00 1.10786694e+00
+ 1.65471544e+00 7.33824651e-01
+ 2.18546787e+00 6.41168955e-01
+ 1.47152266e+00 3.91839132e-01
+ 1.45811155e+00 5.21820495e-01
+ -4.27531469e-02 -3.52343068e-03
+ -9.54948010e-01 -1.52313876e-01
+ 7.57151215e-01 -5.68728854e-03
+ -8.46205751e-01 -7.54580229e-01
+ 4.14493548e+00 1.45532780e+00
+ 4.58688968e-01 -4.54012803e-02
+ -1.49295381e+00 -4.57471758e-01
+ 1.80020351e+00 8.13724973e-01
+ -5.82727738e+00 -2.18269581e+00
+ -2.09017809e+00 -1.18305177e+00
+ -2.31628303e+00 -7.21600235e-01
+ -8.09679091e-01 -1.49101752e-01
+ 8.88005605e-01 8.57940857e-01
+ -1.44148219e+00 -3.10926299e-01
+ 3.68828186e-01 -3.08848059e-01
+ -6.63267389e-01 -8.58950139e-02
+ -1.14702569e+00 -6.32147854e-01
+ -1.51741715e+00 -8.53330564e-01
+ -1.33903718e+00 -1.45875547e-01
+ 4.12485387e+00 1.85620435e+00
+ -2.42353639e+00 -2.92669850e-01
+ 1.88708583e+00 9.35984730e-01
+ 2.15585179e+00 6.30469051e-01
+ -1.13627973e-01 -1.62554045e-01
+ 2.04540494e+00 1.36599834e+00
+ 2.81591381e+00 1.60897941e+00
+ 3.02736260e-02 3.83255815e-03
+ 7.97634013e-02 -2.82035099e-01
+ -3.24607473e-01 -5.30065956e-01
+ -3.91862894e+00 -1.94083334e+00
+ 1.56360901e+00 7.93882743e-01
+ -1.03905772e+00 6.25590229e-01
+ 2.54746492e+00 1.64233560e+00
+ -4.80774423e-01 -8.92298032e-02
+ 9.06979990e-02 1.05020427e+00
+ -2.47521290e+00 -1.78275982e-01
+ -3.91871729e-01 3.80285423e-01
+ 1.00658382e+00 4.58947483e-01
+ 4.68102941e-01 1.02992741e+00
+ 4.44242568e-01 2.89870239e-01
+ 3.29684452e+00 1.44677474e+00
+ -2.24983007e+00 -9.65574499e-01
+ -3.54453926e-01 -3.99020325e-01
+ -3.87429665e+00 -1.90079739e+00
+ 2.02656674e+00 1.12444894e+00
+ 3.77011621e+00 1.43200852e+00
+ 1.61259275e+00 4.65417399e-01
+ 2.28725434e+00 6.79181395e-01
+ 2.75421009e+00 2.27327345e+00
+ -2.40894409e+00 -1.03926359e+00
+ 1.52996651e-01 -2.73373046e-02
+ -2.63218977e+00 -7.22802821e-01
+ 2.77688169e+00 1.15310186e+00
+ 1.18832341e+00 4.73457165e-01
+ -2.35536326e+00 -1.08034554e+00
+ -5.84221627e-01 1.03505984e-02
+ 2.96730300e+00 1.33478306e+00
+ -8.61947692e-01 6.09137051e-02
+ 8.22343921e-01 -8.14155286e-02
+ 1.75809015e+00 1.07921470e+00
+ 1.19501279e+00 1.05309972e+00
+ -1.75901792e+00 9.75320161e-02
+ 1.64398635e+00 9.54384323e-01
+ -2.21878052e-01 -3.64847144e-01
+ -2.03128968e+00 -8.57866419e-01
+ 1.86750633e+00 7.08524487e-01
+ 8.03972976e-01 3.47404314e-01
+ 3.41203749e+00 1.39810900e+00
+ 4.22397681e-01 -6.41440488e-01
+ -4.88493360e+00 -1.58967816e+00
+ -1.67649284e-01 -1.08485915e-01
+ 2.11489023e+00 1.50506158e+00
+ -1.81639929e+00 -3.85542192e-01
+ 2.24044819e-01 -1.45100577e-01
+ -3.39262411e+00 -1.44394324e+00
+ 1.68706599e+00 2.29199618e-01
+ -1.94093257e+00 -1.65975814e-01
+ 8.28143367e-01 5.92109281e-01
+ -8.29587998e-01 -9.57130831e-01
+ -1.50011401e+00 -8.36802092e-01
+ 2.40770449e+00 9.32820177e-01
+ 7.41391309e-02 3.12878473e-01
+ 1.87745264e-01 6.19231425e-01
+ 9.57622692e-01 -2.20640033e-01
+ 3.18479243e+00 1.02986233e+00
+ 2.43133846e+00 8.41302677e-01
+ -7.09963834e-01 1.99718943e-01
+ -2.88253498e-01 -3.62772094e-01
+ 5.14052574e+00 1.79304595e+00
+ -3.27930993e+00 -1.29177973e+00
+ -1.16723536e+00 1.29519656e-01
+ 1.04801056e+00 3.41508300e-01
+ -3.99256195e+00 -2.51176471e+00
+ -7.62824318e-01 -6.84242153e-01
+ 2.71524986e-02 5.35157164e-02
+ 3.26430102e+00 1.34887262e+00
+ -1.72357766e+00 -4.94524388e-01
+ -3.81149536e+00 -1.28121944e+00
+ 3.36919354e+00 1.10672075e+00
+ -3.14841757e+00 -7.10713767e-01
+ -3.16463676e+00 -7.58558435e-01
+ -2.44745969e+00 -1.08816514e+00
+ 2.79173264e-01 -2.19652051e-02
+ 4.15309883e-01 6.07502790e-01
+ -9.51007417e-01 -5.83976336e-01
+ -1.47929839e+00 -8.39850409e-01
+ 2.38335703e+00 6.16055149e-01
+ -7.47749031e-01 -5.56164928e-01
+ -3.65643622e-01 -5.06684411e-01
+ -1.76634163e+00 -7.86382097e-01
+ 6.76372222e-01 -3.06592181e-01
+ -1.33505058e+00 -1.18301441e-01
+ 3.59660179e+00 2.00424178e+00
+ -7.88912762e-02 8.71956146e-02
+ 1.22656397e+00 1.18149583e+00
+ 4.24919729e+00 1.20082355e+00
+ 2.94607456e+00 1.00676505e+00
+ 7.46061275e-02 4.41761753e-02
+ -2.47738025e-02 1.92737701e-01
+ -2.20509316e-01 -3.79163193e-01
+ -3.50222190e-01 3.58727299e-01
+ -3.64788014e+00 -1.36107312e+00
+ 3.56062799e+00 9.27032742e-01
+ 1.04317289e+00 6.08035970e-01
+ 4.06718718e-01 3.00628051e-01
+ 4.33158086e+00 2.25860714e+00
+ 2.13917145e-01 -1.72757967e-01
+ -1.40637998e+00 -1.14119465e+00
+ 3.61554872e+00 1.87797348e+00
+ 1.01726871e+00 5.70255097e-01
+ -7.04902551e-01 2.16444147e-01
+ -2.51492186e+00 -8.52997369e-01
+ 1.85097530e+00 1.15124496e+00
+ -8.67569714e-01 -3.05682432e-01
+ 8.07550858e-01 5.88901608e-01
+ 1.85186755e-01 -1.94589367e-01
+ -1.23378238e+00 -7.84128347e-01
+ -1.22713161e+00 -4.21218235e-01
+ 2.97751165e-01 2.81055275e-01
+ 4.77703554e+00 1.66265524e+00
+ 2.51549669e+00 7.49980674e-01
+ 2.76510822e-01 1.40456909e-01
+ 1.98740905e+00 -1.79608212e-01
+ 9.35429145e-01 8.44344180e-01
+ -1.20854492e+00 -5.00598453e-01
+ 2.29936219e+00 8.10236668e-01
+ 6.92555544e-01 -2.65891331e-01
+ -1.58050994e+00 2.31237821e-01
+ -1.50864880e+00 -9.49661690e-01
+ -1.27689206e+00 -7.18260016e-01
+ -3.12517127e+00 -1.75587113e+00
+ 8.16062912e-02 -6.56551804e-01
+ -5.02479939e-01 -4.67162543e-01
+ -5.47435788e+00 -2.47799576e+00
+ 1.95872901e-02 5.80874076e-01
+ -1.59064958e+00 -6.34554756e-01
+ -3.77521478e+00 -1.74301790e+00
+ 5.89628224e-01 8.55736553e-01
+ -1.81903543e+00 -7.50011008e-01
+ 1.38557775e+00 3.71490991e-01
+ 9.70032652e-01 -7.11356016e-01
+ 2.63539625e-01 -4.20994771e-01
+ 2.12154222e+00 8.19081400e-01
+ -6.56977937e-01 -1.37810098e-01
+ 8.91309581e-01 2.77864361e-01
+ -7.43693195e-01 -1.46293770e-01
+ 2.24447769e+00 4.00911438e-01
+ -2.25169262e-01 2.04148801e-02
+ 1.68744684e+00 9.47573007e-01
+ 2.73086373e-01 3.30877195e-01
+ 5.54294414e+00 2.14198009e+00
+ -8.49238733e-01 3.65603298e-02
+ 2.39685712e+00 1.17951039e+00
+ -2.58230528e+00 -5.52116673e-01
+ 2.79785277e+00 2.88833717e-01
+ -1.96576188e-01 1.11652123e+00
+ -4.69383301e-01 1.96496282e-01
+ -1.95011845e+00 -6.15235169e-01
+ 1.03379890e-02 2.33701239e-01
+ 4.18933607e-01 2.77939814e-01
+ -1.18473337e+00 -4.10051126e-01
+ -7.61499744e-01 -1.43658094e+00
+ -1.65586092e+00 -3.41615303e-01
+ -5.58523700e-02 -5.21837080e-01
+ -2.40331088e+00 -2.64521583e-01
+ 2.24925206e+00 6.79843335e-02
+ 1.46360479e+00 1.04271443e+00
+ -3.09255443e+00 -1.82548953e+00
+ 2.11325841e+00 1.14996627e+00
+ -8.70657797e-01 1.02461839e-01
+ -5.71056521e-01 9.71232588e-02
+ -3.37870752e+00 -1.54091877e+00
+ 1.03907189e+00 -1.35661392e-01
+ 8.40057486e-01 6.12172413e-02
+ -1.30998234e+00 -1.34077226e+00
+ 7.53744974e-01 1.49447350e-01
+ 9.13995056e-01 -1.81227962e-01
+ 2.28386229e-01 3.74498520e-01
+ 2.54829151e-01 -2.88802704e-01
+ 1.61709009e+00 2.09319193e-01
+ -1.12579380e+00 -5.95955338e-01
+ -2.69610726e+00 -2.76222736e-01
+ -2.63773329e+00 -7.84491970e-01
+ -2.62167427e+00 -1.54792874e+00
+ -4.80639856e-01 -1.30582102e-01
+ -1.26130891e+00 -8.86841840e-01
+ -1.24951950e+00 -1.18182622e+00
+ -1.40107574e+00 -9.13695575e-01
+ 4.99872179e-01 4.69014702e-01
+ -2.03550193e-02 -1.48859738e-01
+ -1.50189069e+00 -2.97714278e-02
+ -2.07846113e+00 -7.29937809e-01
+ -5.50576792e-01 -7.03151525e-01
+ -3.88069238e+00 -1.63215295e+00
+ 2.97032988e+00 6.43571144e-01
+ -1.85999273e-01 1.18107620e+00
+ 1.79249709e+00 6.65356160e-01
+ 2.68842472e+00 1.35703255e+00
+ 1.07675417e+00 1.39845588e-01
+ 8.01226349e-01 2.11392275e-01
+ 9.64329379e-01 3.96146195e-01
+ -8.22529511e-01 1.96080831e-01
+ 1.92481841e+00 4.62985744e-01
+ 3.69756927e-01 3.77135799e-01
+ 1.19807835e+00 8.87715050e-01
+ -1.01363587e+00 -2.48151636e-01
+ 8.53071010e-01 4.96887868e-01
+ -3.41120553e+00 -1.35401843e+00
+ -2.64787381e+00 -1.08690563e+00
+ -1.11416759e+00 -4.43848915e-01
+ 1.46242648e+00 6.17106076e-02
+ -7.52968881e-01 -9.20972209e-01
+ -1.22492228e+00 -5.40327617e-01
+ 1.08001827e+00 5.29593785e-01
+ -2.58706464e-01 1.13022085e-01
+ -4.27394011e-01 1.17864354e-02
+ -3.20728413e+00 -1.71224737e-01
+ 1.71398530e+00 8.68885893e-01
+ 2.12067866e+00 1.45092772e+00
+ 4.32782616e-01 -3.34117769e-01
+ 7.80084374e-01 -1.35100217e-01
+ -2.05547729e+00 -4.70217750e-01
+ 2.38379736e+00 1.09186058e+00
+ -2.80825477e+00 -1.03320187e+00
+ 2.63434576e+00 1.15671733e+00
+ -1.60936214e+00 1.91843035e-01
+ -5.02298769e+00 -2.32820708e+00
+ 1.90349195e+00 1.45215416e+00
+ 3.00232888e-01 3.24412586e-01
+ -2.46503943e+00 -1.19550010e+00
+ 1.06304233e+00 2.20136246e-01
+ -2.99101388e+00 -1.58299318e+00
+ 2.30071719e+00 1.12881362e+00
+ -2.37587247e+00 -8.08298336e-01
+ 7.27006308e-01 3.80828984e-01
+ 2.61199061e+00 1.56473491e+00
+ 8.33936357e-01 -1.42189425e-01
+ 3.13291605e+00 1.77771210e+00
+ 2.21917371e+00 5.68427075e-01
+ 2.38867649e+00 9.06637262e-01
+ -6.92959466e+00 -3.57682881e+00
+ 2.57904824e+00 5.93959108e-01
+ 2.71452670e+00 1.34436199e+00
+ 4.39988761e+00 2.13124672e+00
+ 5.71783077e-01 5.08346173e-01
+ -3.65399429e+00 -1.18192861e+00
+ 4.46176453e-01 3.75685594e-02
+ -2.97501495e+00 -1.69459236e+00
+ 1.60855728e+00 9.20930014e-01
+ -1.44270290e+00 -1.93922306e-01
+ 1.67624229e+00 1.66233866e+00
+ -1.42579598e+00 -1.44990145e-01
+ 1.19923176e+00 4.58490278e-01
+ -9.00068460e-01 5.09701825e-02
+ -1.69391694e+00 -7.60070300e-01
+ -1.36576440e+00 -5.24244256e-01
+ -1.03016748e+00 -3.44625878e-01
+ 2.40519313e+00 1.09947587e+00
+ 1.50365433e+00 1.06464802e+00
+ -1.07609727e+00 -3.68897187e-01
+ 2.44969069e+00 1.28486192e+00
+ -1.25610307e+00 -1.14644789e+00
+ 2.05962899e+00 4.31162369e-01
+ -7.15886908e-01 -6.11587804e-02
+ -6.92354119e-01 -7.85019920e-01
+ -1.63016508e+00 -5.96944975e-01
+ 1.90352536e+00 1.28197457e+00
+ -4.01535243e+00 -1.81934488e+00
+ -1.07534435e+00 -2.10544784e-01
+ 3.25500866e-01 7.69603661e-01
+ 2.18443365e+00 6.59773335e-01
+ 8.80856790e-01 6.39505913e-01
+ -2.23956372e-01 -4.65940132e-01
+ -1.06766519e+00 -5.38388505e-03
+ 7.25556863e-01 -2.91123488e-01
+ -4.69451411e-01 7.89182650e-02
+ 2.58146587e+00 1.29653243e+00
+ 1.53747468e-01 7.69239075e-01
+ -4.61152262e-01 -4.04151413e-01
+ 1.48183517e+00 8.10079506e-01
+ -1.83402614e+00 -1.36939322e+00
+ 1.49315501e+00 7.95225425e-01
+ 1.41922346e+00 1.05582774e-01
+ 1.57473493e-01 9.70795657e-01
+ -2.67603254e+00 -7.48562280e-01
+ -8.49156216e-01 -6.05762529e-03
+ 1.12944274e+00 3.67741591e-01
+ 1.94228071e-01 5.28188141e-01
+ -3.65610158e-01 4.05851838e-01
+ -1.98839111e+00 -1.38452764e+00
+ 2.73765752e+00 8.24150530e-01
+ 7.63728641e-01 3.51617707e-01
+ 5.78307267e+00 1.68103612e+00
+ 2.27547227e+00 3.60876164e-01
+ -3.50681697e+00 -1.74429984e+00
+ 4.01241184e+00 1.26227829e+00
+ 2.44946343e+00 9.06119057e-01
+ -2.96638941e+00 -9.01532322e-01
+ 1.11267643e+00 -3.43333381e-01
+ -6.61868994e-01 -3.44666391e-01
+ -8.34917179e-01 5.69478372e-01
+ -1.91888454e+00 -3.03791075e-01
+ 1.50397636e+00 8.31961240e-01
+ 6.12260198e+00 2.16851807e+00
+ 1.34093127e+00 8.86649385e-01
+ 1.48748519e+00 8.26273697e-01
+ 7.62243068e-01 2.64841396e-01
+ -2.17604986e+00 -3.54219958e-01
+ 2.64708640e-01 -4.38136718e-02
+ 1.44725372e+00 1.18499914e-01
+ -6.71259446e-01 -1.19526851e-01
+ 2.40134595e-01 -8.90042323e-02
+ -3.57238199e+00 -1.23166201e+00
+ -3.77626645e+00 -1.19533443e+00
+ -3.81101035e-01 -4.94160532e-01
+ -3.02758757e+00 -1.18436066e+00
+ 2.59116298e-01 1.38023047e+00
+ 4.17900116e+00 1.12065959e+00
+ 1.54598848e+00 2.89806755e-01
+ 1.00656475e+00 1.76974511e-01
+ -4.15730234e-01 -6.22681694e-01
+ -6.00903565e-01 -1.43256959e-01
+ -6.03652508e-01 -5.09936379e-01
+ -1.94096658e+00 -9.48789544e-01
+ -1.74464105e+00 -8.50491590e-01
+ 1.17652544e+00 1.88118317e+00
+ 2.35507776e+00 1.44000205e+00
+ 2.63067924e+00 1.06692988e+00
+ 2.88805386e+00 1.23924715e+00
+ 8.27595008e-01 5.75364692e-01
+ 3.91384216e-01 9.72781920e-02
+ -1.03866816e+00 -1.37567768e+00
+ -1.34777969e+00 -8.40266025e-02
+ -4.12904508e+00 -1.67618340e+00
+ 1.27918111e+00 3.52085961e-01
+ 4.15361174e-01 6.28896189e-01
+ -7.00539496e-01 4.80447955e-02
+ -1.62332639e+00 -5.98236485e-01
+ 1.45957300e+00 1.00305154e+00
+ -3.06875603e+00 -1.25897545e+00
+ -1.94708176e+00 4.85143006e-01
+ 3.55744156e+00 -1.07468822e+00
+ 1.21602223e+00 1.28768827e-01
+ 1.89093098e+00 -4.70835659e-01
+ -6.55759125e+00 2.70114082e+00
+ 8.96843535e-01 -3.98115252e-01
+ 4.13450429e+00 -2.32069236e+00
+ 2.37764218e+00 -1.09098890e+00
+ -1.11388901e+00 6.27083097e-01
+ -6.34116929e-01 4.62816387e-01
+ 2.90203079e+00 -1.33589143e+00
+ 3.17457598e+00 -5.13575945e-01
+ -1.76362299e+00 5.71820693e-01
+ 1.66103362e+00 -8.99466249e-01
+ -2.53947433e+00 8.40084780e-01
+ 4.36631397e-01 7.24234261e-02
+ -1.87589394e+00 5.08529113e-01
+ 4.49563965e+00 -9.43365992e-01
+ 1.78876299e+00 -1.27076149e+00
+ -1.16269107e-01 -4.55078316e-01
+ 1.92966079e+00 -8.05371385e-01
+ 2.20632583e+00 -9.00919345e-01
+ 1.52387824e+00 -4.82391996e-01
+ 8.04004564e-01 -2.73650595e-01
+ -7.75326067e-01 1.07469566e+00
+ 1.83226282e+00 -4.52173344e-01
+ 1.25079758e-01 -3.52895417e-02
+ -9.90957437e-01 8.55993130e-01
+ 1.71623322e+00 -7.08691667e-01
+ -2.86175924e+00 6.75160955e-01
+ -8.40817853e-01 -1.00361809e-01
+ 1.33393000e+00 -4.65788123e-01
+ 5.29394114e-01 -5.44881619e-02
+ -8.07435599e-01 8.27353370e-01
+ -4.33165824e+00 1.97299638e+00
+ 1.26452422e+00 -8.34070486e-01
+ 1.45996394e-02 2.97736043e-01
+ -1.64489287e+00 6.72839598e-01
+ -5.74234578e+00 3.20975117e+00
+ 2.13841341e-02 3.64514015e-01
+ 6.68084924e+00 -2.27464254e+00
+ -3.22881590e+00 8.01879324e-01
+ 3.02534313e-01 -4.56222796e-01
+ -5.84520734e+00 1.95678162e+00
+ 2.81515232e+00 -1.72101318e+00
+ -2.39620908e-01 2.69145522e-01
+ -7.41669691e-01 -2.30283281e-01
+ -2.15682714e+00 3.45313021e-01
+ 1.23475788e+00 -7.32276553e-01
+ -1.71816113e-01 1.20419560e-02
+ 1.89174235e+00 2.27435901e-01
+ -3.64511114e-01 1.72260361e-02
+ -3.24143860e+00 6.50125817e-01
+ -2.25707409e+00 5.66970751e-01
+ 1.03901456e+00 -1.00588433e+00
+ -5.09159710e+00 1.58736109e+00
+ 1.45534075e+00 -5.83787452e-01
+ 4.28879587e+00 -1.58006866e+00
+ 8.52384427e-01 -1.11042299e+00
+ 4.51431615e+00 -2.63844265e+00
+ -4.33042648e+00 1.86497078e+00
+ -2.13568046e+00 5.82559743e-01
+ -4.42568887e+00 1.26131214e+00
+ 3.15821315e+00 -1.61515905e+00
+ -3.14125204e+00 8.49604386e-01
+ 6.54152300e-01 -2.04624711e-01
+ -3.73374317e-01 9.94187820e-02
+ -3.96177282e+00 1.27245623e+00
+ 9.59825199e-01 -1.15547861e+00
+ 3.56902055e+00 -1.46591091e+00
+ 1.55433633e-02 6.93544345e-01
+ 1.15684646e+00 -4.99836352e-01
+ 3.11824573e+00 -4.75900506e-01
+ -8.61706369e-01 -3.50774059e-01
+ 9.89057391e-01 -7.16878802e-01
+ -4.94787870e+00 2.09137481e+00
+ 1.37777347e+00 -1.34946349e+00
+ -1.13161577e+00 8.05114754e-01
+ 8.12020675e-01 -1.04849421e+00
+ 4.73783881e+00 -2.26718812e+00
+ 8.99579366e-01 -8.89764451e-02
+ 4.78524868e+00 -2.25795843e+00
+ 1.75164590e+00 -1.73822209e-01
+ 1.30204590e+00 -7.26724717e-01
+ -7.26526403e-01 -5.23925361e-02
+ 2.01255351e+00 -1.69965366e+00
+ 9.87852740e-01 -4.63577220e-01
+ 2.45957762e+00 -1.29278962e+00
+ -3.13817948e+00 1.64433038e+00
+ -1.76302159e+00 9.62784302e-01
+ -1.91106331e+00 5.81460008e-01
+ -3.30883001e+00 1.30378978e+00
+ 5.54376450e-01 3.78814272e-01
+ 1.09982111e+00 -1.47969612e+00
+ -2.61300705e-02 -1.42573464e-01
+ -2.22096157e+00 7.75684440e-01
+ 1.70319323e+00 -2.89738444e-01
+ -1.43223842e+00 6.39284281e-01
+ 2.34360959e-01 -1.64379268e-01
+ -2.67147991e+00 9.46548086e-01
+ 1.51131425e+00 -4.91594395e-01
+ -2.48446856e+00 1.01286123e+00
+ 1.50534658e-01 -2.94620246e-01
+ -1.66966792e+00 1.67755508e+00
+ -1.50094241e+00 3.30163095e-01
+ 2.27681194e+00 -1.08064317e+00
+ 2.05122965e+00 -1.15165939e+00
+ -4.23509309e-01 -6.56906167e-02
+ 1.80084023e+00 -1.07228556e+00
+ -2.65769521e+00 1.18023206e+00
+ 2.02852676e+00 -8.06793574e-02
+ -4.49544185e+00 2.68200163e+00
+ -7.50043216e-01 1.17079331e+00
+ 6.80060893e-02 3.99055351e-01
+ -3.83634635e+00 1.38406887e+00
+ 3.24858545e-01 -9.25273218e-02
+ -2.19895100e+00 1.47819500e+00
+ -3.61569522e-01 -1.03188739e-01
+ 1.12180375e-01 -9.52696354e-02
+ -1.31477803e+00 1.79900570e-01
+ 2.39573628e+00 -6.09739269e-01
+ -1.00135700e+00 6.02837296e-01
+ -4.11994589e+00 2.49599192e+00
+ -1.54196236e-01 -4.84921951e-01
+ 5.92569908e-01 -1.87310359e-01
+ 3.85407741e+00 -1.50979925e+00
+ 5.17802528e+00 -2.26032607e+00
+ -1.37018916e+00 1.87111822e-01
+ 8.46682996e-01 -3.56676331e-01
+ -1.17559949e+00 5.29057734e-02
+ -5.56475671e-02 6.79049243e-02
+ 1.07851745e+00 -5.14535101e-01
+ -2.71622446e+00 1.00151846e+00
+ -1.08477208e+00 8.81391054e-01
+ 5.50755824e-01 -5.20577727e-02
+ 4.70885495e+00 -2.04220397e+00
+ -1.87375336e-01 -6.16962830e-02
+ 3.52097100e-01 2.21163550e-01
+ 7.07929984e-01 -1.75827590e-01
+ -1.22149219e+00 1.83084346e-01
+ 2.58247412e+00 -6.15914898e-01
+ -6.01206182e-01 -2.29832987e-01
+ 9.83360449e-01 -3.75870060e-01
+ -3.20027685e+00 1.35467480e+00
+ 1.79178978e+00 -1.38531981e+00
+ -3.30376867e-01 -1.16250192e-01
+ -1.89053055e+00 5.68463567e-01
+ -4.20604849e+00 1.65429681e+00
+ -1.01185529e+00 1.92801240e-01
+ -6.18819882e-01 5.42206996e-01
+ -5.08091672e+00 2.61598591e+00
+ -2.62570344e+00 2.51590658e+00
+ 3.05577906e+00 -1.49090609e+00
+ 2.77609677e+00 -1.37681378e+00
+ -7.93515301e-02 4.28072744e-01
+ -2.08359471e+00 8.94334295e-01
+ 2.20163801e+00 4.01127167e-02
+ -1.18145785e-01 -2.06822464e-01
+ -2.74788298e-01 2.96250607e-01
+ 1.59613555e+00 -3.87246203e-01
+ -3.82971472e-01 -3.39716093e-02
+ -4.20311307e-02 3.88529510e-01
+ 1.52128574e+00 -9.33138876e-01
+ -9.06584458e-01 -2.75016094e-02
+ 3.56216834e+00 -9.99384622e-01
+ 2.11964220e+00 -9.98749118e-02
+ 4.01203480e+00 -2.03032745e+00
+ -1.24171557e+00 1.97596725e-01
+ -1.57230455e+00 4.14126609e-01
+ -1.85484741e+00 5.40041563e-01
+ 1.76329831e+00 -6.95967734e-01
+ -2.29439232e-01 5.08669245e-01
+ -5.45124276e+00 2.26907549e+00
+ -5.71364288e-02 5.04476476e-01
+ 3.12468018e+00 -1.46358879e+00
+ 8.20017359e-01 6.51949028e-01
+ -1.33977500e+00 2.83634232e-04
+ -1.83311685e+00 1.23947117e+00
+ 6.31205922e-01 1.19792164e-02
+ -2.21967834e+00 6.94056232e-01
+ -1.41693842e+00 9.93526233e-01
+ -7.58885703e-01 6.78547347e-01
+ 3.60239086e+00 -1.08644935e+00
+ 6.72217073e-02 3.00036011e-02
+ -3.42680958e-01 -3.48049352e-01
+ 1.87546079e+00 -4.78018246e-01
+ 7.00485821e-01 -3.52905383e-01
+ -8.54580948e-01 8.17330861e-01
+ 8.19123706e-01 -5.73927281e-01
+ 2.70855639e-01 -3.08940052e-01
+ -1.05059952e+00 3.27873168e-01
+ 1.08282999e+00 4.84559349e-02
+ -7.89899220e-01 1.22291138e+00
+ -2.87939816e+00 7.17403497e-01
+ -2.08429452e+00 8.87409226e-01
+ 1.58409232e+00 -4.74123532e-01
+ 1.26882735e+00 1.59162510e-01
+ -2.53782993e+00 6.18253491e-01
+ -8.92757445e-01 3.35979011e-01
+ 1.31867900e+00 -1.17355054e+00
+ 1.14918879e-01 -5.35184038e-01
+ -1.70288738e-01 5.35868087e-02
+ 4.21355121e-01 5.41848690e-02
+ 2.07926943e+00 -5.72538144e-01
+ 4.08788970e-01 3.77655777e-01
+ -3.39631381e+00 9.84216764e-01
+ 2.94170163e+00 -1.83120916e+00
+ -7.94798752e-01 7.39889052e-01
+ 1.46555463e+00 -4.62275563e-01
+ 2.57255955e+00 -1.04671434e+00
+ 8.45042540e-01 -1.96952892e-01
+ -3.23526646e+00 1.60049846e+00
+ 3.21948565e+00 -8.88376674e-01
+ 1.43005104e+00 -9.21561086e-01
+ 8.82360506e-01 2.98403872e-01
+ -8.91168097e-01 1.01319072e+00
+ -5.13215241e-01 -2.47182649e-01
+ -1.35759444e+00 7.07450608e-02
+ -4.04550983e+00 2.23534867e+00
+ 1.39348883e+00 3.81637747e-01
+ -2.85676418e+00 1.53240862e+00
+ -1.37183120e+00 6.37977425e-02
+ -3.88195859e+00 1.73887145e+00
+ 1.19509776e+00 -6.25013512e-01
+ -2.80062734e+00 1.79840585e+00
+ 1.96558429e+00 -4.70997234e-01
+ 1.93111352e+00 -9.70318441e-01
+ 3.57991190e+00 -1.65065116e+00
+ 2.12831714e+00 -1.11531708e+00
+ -3.95661018e-01 -8.54339904e-02
+ -2.41630441e+00 1.65166304e+00
+ 7.55412624e-01 -1.53453579e-01
+ -1.77043450e+00 1.39928715e+00
+ -9.32631260e-01 8.73649199e-01
+ 1.53342205e+00 -8.39569765e-01
+ -6.29846924e-02 1.25023084e-01
+ 3.31509049e+00 -1.10733235e+00
+ -2.18957109e+00 3.07376993e-01
+ -2.35740747e+00 6.47437564e-01
+ -2.22142438e+00 8.47318938e-01
+ -6.51401147e-01 3.48398562e-01
+ 2.75763095e+00 -1.21390708e+00
+ 1.12550484e+00 -5.61412847e-01
+ -5.65053161e-01 6.74365205e-02
+ 1.68952456e+00 -6.57566096e-01
+ 8.95598401e-01 3.96738993e-01
+ -1.86537066e+00 9.44129208e-01
+ -2.59933294e+00 2.57423247e-01
+ -6.59598267e-01 1.91828851e-02
+ -2.64506676e+00 8.41783205e-01
+ -1.25911802e+00 5.52425066e-01
+ -1.39754507e+00 3.73689222e-01
+ 5.49550729e-02 1.35071215e+00
+ 3.31874811e+00 -1.05682424e+00
+ 3.63159604e+00 -1.42864695e+00
+ -4.45944617e+00 1.42889446e+00
+ 5.87314342e-01 -4.88892988e-01
+ -7.26130820e-01 1.51936106e-01
+ -1.79246441e+00 6.05888105e-01
+ -5.50948207e-01 6.21443081e-01
+ -3.17246063e-01 1.77213880e-01
+ -2.00098937e+00 1.23799074e+00
+ 4.33790961e+00 -1.08490465e+00
+ -2.03114114e+00 1.31613237e+00
+ -6.29216542e+00 1.92406317e+00
+ -1.60265624e+00 8.87947500e-01
+ 8.64465062e-01 -8.37416270e-01
+ -2.14273937e+00 8.05485900e-01
+ -2.36844256e+00 6.17915124e-01
+ -1.40429636e+00 6.78296866e-01
+ 9.99019988e-01 -5.84297572e-01
+ 7.38824546e-01 1.68838678e-01
+ 1.45681238e+00 3.04641461e-01
+ 2.15914949e+00 -3.43089227e-01
+ -1.23895930e+00 1.05339864e-01
+ -1.23162264e+00 6.46629863e-01
+ 2.28183862e+00 -9.24157063e-01
+ -4.29615882e-01 5.69130863e-01
+ -1.37449121e+00 -9.12032183e-01
+ -7.33890904e-01 -3.91865471e-02
+ 8.41400661e-01 -4.76002200e-01
+ -1.73349274e-01 -6.84143467e-02
+ 3.16042891e+00 -1.32651856e+00
+ -3.78244609e+00 2.38619718e+00
+ -3.69634380e+00 2.22368561e+00
+ 1.83766344e+00 -1.65675953e+00
+ -1.63206002e+00 1.19484469e+00
+ 3.68480064e-01 -5.70764494e-01
+ 3.61982479e-01 1.04274409e-01
+ 2.48863048e+00 -1.13285542e+00
+ -2.81896488e+00 9.47958768e-01
+ 5.74952901e-01 -2.75959392e-01
+ 3.72783275e-01 -3.48937848e-01
+ 1.95935716e+00 -1.06750415e+00
+ 5.19357531e+00 -2.32070803e+00
+ 4.09246149e+00 -1.89976700e+00
+ -3.36666087e-01 8.17645057e-02
+ 1.85453493e-01 3.76913151e-01
+ -3.06458262e+00 1.34106402e+00
+ -3.13796566e+00 7.00485099e-01
+ 1.42964058e+00 -1.35536932e-01
+ -1.23440423e-01 4.60094177e-02
+ -2.86753037e+00 -5.21724160e-02
+ 2.67113726e+00 -1.83746924e+00
+ -1.35335062e+00 1.28238073e+00
+ -2.43569899e+00 1.25998539e+00
+ 1.26036740e-01 -2.35416844e-01
+ -1.35725745e+00 7.37788491e-01
+ -3.80897538e-01 3.30757889e-01
+ 6.58694434e-01 -1.07566603e+00
+ 2.11273640e+00 -9.02260632e-01
+ 4.00755057e-01 -2.49229150e-02
+ -1.80095812e+00 9.73099742e-01
+ -2.68408372e+00 1.63737364e+00
+ -2.66079826e+00 7.47289412e-01
+ -9.92321439e-02 -1.49331396e-01
+ 4.45678251e+00 -1.80352394e+00
+ 1.35962915e+00 -1.31554389e+00
+ -7.76601417e-01 -9.66173523e-02
+ 1.68096348e+00 -6.27235133e-01
+ 1.53081227e-01 -3.54216830e-01
+ -1.54913095e+00 3.43689269e-01
+ 5.29187357e-02 -6.73916964e-01
+ -2.06606084e+00 8.34784242e-01
+ 1.73701179e+00 -6.06467340e-01
+ 1.55856757e+00 -2.58642780e-01
+ 1.04349101e+00 -4.43027348e-01
+ -1.02397719e+00 1.01308824e+00
+ -2.13860204e-01 -4.73347361e-01
+ -2.59004955e+00 1.43367853e+00
+ 7.98457679e-01 2.18621627e-02
+ -1.32974762e+00 4.61802208e-01
+ 3.21419359e-01 2.30723316e-02
+ 2.87201888e-02 6.24566672e-02
+ -1.22261418e+00 6.02340363e-01
+ 1.28750335e+00 -3.34839548e-02
+ -9.67952623e-01 4.34470505e-01
+ 2.02850324e+00 -9.05160255e-01
+ -4.13946010e+00 2.33779091e+00
+ -4.47508806e-01 3.06440495e-01
+ -3.91543394e+00 1.68251022e+00
+ -6.45193001e-01 5.29781162e-01
+ -2.15518916e-02 5.07278355e-01
+ -2.83356868e+00 1.00670227e+00
+ 1.82989749e+00 -1.37329222e+00
+ -1.09330213e+00 1.08560688e+00
+ 1.90533722e+00 -1.28905879e+00
+ 2.33986084e+00 2.30642626e-02
+ 8.01940220e-01 -1.63986962e+00
+ -4.23415165e+00 2.07530423e+00
+ 9.33382522e-01 -7.62917211e-01
+ -1.84033954e+00 1.07469401e+00
+ -2.81938669e+00 1.07342024e+00
+ -7.05169988e-01 2.13124943e-01
+ 5.09598137e-01 1.32725493e-01
+ -2.34558226e+00 8.62383168e-01
+ -1.70322072e+00 2.70893796e-01
+ 1.23652660e+00 -7.53216034e-02
+ 2.84660646e+00 -3.48178304e-02
+ 2.50250128e+00 -1.27770855e+00
+ -1.00279469e+00 8.77194218e-01
+ -4.34674121e-02 -2.12091350e-01
+ -5.84151289e-01 1.50382340e-01
+ -1.79024013e+00 4.24972808e-01
+ -1.23434666e+00 -8.85546570e-02
+ 1.36575412e+00 -6.42639880e-01
+ -1.98429947e+00 2.27650336e-01
+ 2.36253589e+00 -1.51340773e+00
+ 8.79157643e-01 6.84142159e-01
+ -2.18577755e+00 2.76526200e-01
+ -3.55473434e-01 8.29976561e-01
+ 1.16442595e+00 -5.97699411e-01
+ -7.35528097e-01 2.40318183e-01
+ -1.73702631e-01 7.33788663e-02
+ -1.40451745e+00 3.24899628e-01
+ -2.05434385e+00 5.68123738e-01
+ 8.47876642e-01 -5.74224294e-01
+ -6.91955602e-01 1.26009087e+00
+ 2.56574498e+00 -1.15602581e+00
+ 3.93306545e+00 -1.38398209e+00
+ -2.73230251e+00 4.89062581e-01
+ -1.04315474e+00 6.06335547e-01
+ 1.23231431e+00 -4.46675065e-01
+ -3.93035285e+00 1.43287651e+00
+ -1.02132111e+00 9.58919791e-01
+ -1.49425352e+00 1.06456165e+00
+ -6.26485337e-01 1.03791402e+00
+ -6.61772998e-01 2.63275425e-01
+ -1.80940386e+00 5.70767403e-01
+ 9.83720450e-01 -1.39449756e-01
+ -2.24619662e+00 9.01044870e-01
+ 8.94343014e-01 5.31038678e-02
+ 1.95518199e-01 -2.81343295e-01
+ -2.30533019e-01 -1.74478106e-01
+ -2.01550361e+00 5.55958010e-01
+ -4.36281469e+00 1.94374226e+00
+ -5.18530457e+00 2.89278357e+00
+ 2.67289101e+00 -2.98511449e-01
+ -1.53566179e+00 -1.00588944e-01
+ -6.09943217e-02 -1.56986047e-01
+ -5.22146452e+00 1.66209208e+00
+ -3.69777478e+00 2.26154873e+00
+ 2.24607181e-01 -4.86934960e-01
+ 2.49909450e+00 -1.03033370e+00
+ -1.07841120e+00 8.22388054e-01
+ -3.20697089e+00 1.09536143e+00
+ 3.43524232e+00 -1.47289362e+00
+ -5.65784134e-01 4.60365175e-01
+ -1.76714734e+00 1.57752346e-01
+ -7.77620365e-01 5.60153443e-01
+ 6.34399352e-01 -5.22339836e-01
+ 2.91011875e+00 -9.72623380e-01
+ -1.19286824e+00 6.32370253e-01
+ -2.18327609e-01 8.23953181e-01
+ 3.42430842e-01 1.37098055e-01
+ 1.28658034e+00 -9.11357320e-01
+ 2.06914465e+00 -6.67556382e-01
+ -6.69451020e-01 -6.38605102e-01
+ -2.09312398e+00 1.16743634e+00
+ -3.63778357e+00 1.91919157e+00
+ 8.74685911e-01 -1.09931208e+00
+ -3.91496791e+00 1.00808357e+00
+ 1.29621330e+00 -8.32239802e-01
+ 9.00222045e-01 -1.31159793e+00
+ -1.12242062e+00 1.98517079e-01
+ -3.71932852e-01 1.31667093e-01
+ -2.23829610e+00 1.26328346e+00
+ -2.08365062e+00 9.93385336e-01
+ -1.91082720e+00 7.45866855e-01
+ 4.38024917e+00 -2.05901118e+00
+ -2.28872886e+00 6.85279335e-01
+ 1.01274497e-01 -3.26227153e-01
+ -5.04447572e-01 -3.18619513e-01
+ 1.28537006e+00 -1.04573551e+00
+ -7.83175212e-01 1.54791645e-01
+ -3.89239175e+00 1.60017929e+00
+ -8.87877111e-01 -1.04968005e-01
+ 9.32215179e-01 -5.58691113e-01
+ -6.44977127e-01 -2.23018375e-01
+ 1.10141900e+00 -1.00666432e+00
+ 2.92755687e-01 -1.45480350e-01
+ 7.73580681e-01 -2.21150567e-01
+ -1.40873709e+00 7.61548044e-01
+ -8.89031805e-01 -3.48542923e-01
+ 4.16844267e-01 -2.39914494e-01
+ -4.64265832e-01 7.29581138e-01
+ 1.99835179e+00 -7.70542813e-01
+ 4.20523191e-02 -2.18783563e-01
+ -6.32611758e-01 -3.09926115e-01
+ 6.82912198e-02 -8.48327050e-01
+ 1.92425229e+00 -1.37876951e+00
+ 3.49461782e+00 -1.88354255e+00
+ -3.25209026e+00 1.49809395e+00
+ 6.59273182e-01 -2.37435654e-01
+ -1.15517300e+00 8.46134387e-01
+ 1.26756151e+00 -4.58988026e-01
+ -3.99178418e+00 2.04153008e+00
+ 7.05687841e-01 -6.83433306e-01
+ -1.61997342e+00 8.16577004e-01
+ -3.89750399e-01 4.29753250e-01
+ -2.53026432e-01 4.92861432e-01
+ -3.16788324e+00 4.44285524e-01
+ -7.86248901e-01 1.12753716e+00
+ -3.02351433e+00 1.28419015e+00
+ -1.30131355e+00 1.71226678e+00
+ -4.08843475e+00 1.62063214e+00
+ -3.09209403e+00 1.19958520e+00
+ 1.49102271e+00 -1.11834864e+00
+ -3.18059348e+00 5.74587042e-01
+ 2.06054867e+00 3.25797860e-03
+ -3.50999200e+00 2.02412428e+00
+ -8.26610023e-01 3.46528211e-01
+ 2.00546034e+00 -4.07333110e-01
+ -9.69941653e-01 4.80953753e-01
+ 4.47925660e+00 -2.33127314e+00
+ 2.03845790e+00 -9.90439915e-01
+ -1.11349191e+00 4.31183918e-01
+ -4.03628396e+00 1.68509679e+00
+ -1.48177601e+00 7.74322088e-01
+ 3.07369385e+00 -9.57465886e-01
+ 2.39011286e+00 -6.44506921e-01
+ 2.91561991e+00 -8.78627328e-01
+ 1.10212733e+00 -4.21637388e-01
+ 5.31985231e-01 -6.17445696e-01
+ -6.82340929e-01 -2.93529716e-01
+ 1.94290679e+00 -4.64268634e-01
+ 1.92262116e+00 -7.93142835e-01
+ 4.73762800e+00 -1.63654174e+00
+ -3.17848641e+00 8.05791391e-01
+ 4.08739432e+00 -1.80816807e+00
+ -7.60648826e-01 1.24216138e-01
+ -2.24716400e+00 7.90020937e-01
+ 1.64284052e+00 -7.18784070e-01
+ 1.04410012e-01 -7.11195880e-02
+ 2.18268225e+00 -7.01767831e-01
+ 2.06218013e+00 -8.70251746e-01
+ -1.35266581e+00 7.08456358e-01
+ -1.38157779e+00 5.14401086e-01
+ -3.28326008e+00 1.20988399e+00
+ 8.85358917e-01 -8.12213495e-01
+ -2.34067500e+00 3.67657353e-01
+ 3.96878127e+00 -1.66841450e+00
+ 1.36518053e+00 -8.33436812e-01
+ 5.25771988e-01 -5.06121987e-01
+ -2.25948361e+00 1.30663765e+00
+ -2.57662070e+00 6.32114628e-01
+ -3.43134685e+00 2.38106008e+00
+ 2.31571924e+00 -1.56566818e+00
+ -2.95397202e+00 1.05661888e+00
+ -1.35331242e+00 6.76383411e-01
+ 1.40977132e+00 -1.17775938e+00
+ 1.52561996e+00 -9.83147176e-01
+ 2.26550832e+00 -2.10464123e-02
+ 6.23371684e-01 -5.30768122e-01
+ -4.42356624e-01 9.72226986e-01
+ 2.31517901e+00 -1.08468105e+00
+ 1.97236640e+00 -1.42016619e+00
+ 3.18618687e+00 -1.45056343e+00
+ -2.75880360e+00 5.40254980e-01
+ -1.92916581e+00 1.45029864e-01
+ 1.90022524e+00 -6.03805754e-01
+ -1.05446211e+00 5.74361752e-01
+ 1.45990390e+00 -9.28233993e-01
+ 5.14960557e+00 -2.07564096e+00
+ -7.53104842e-01 1.55876958e-01
+ 8.09490983e-02 -8.58886384e-02
+ -1.56894969e+00 4.53497227e-01
+ 1.36944658e-01 5.60670875e-01
+ -5.32635329e-01 4.40309945e-01
+ 1.32507853e+00 -5.83670099e-01
+ 1.20676031e+00 -8.02296831e-01
+ -3.65023422e+00 1.17211368e+00
+ 1.53393850e+00 -6.17771312e-01
+ -3.99977129e+00 1.71415137e+00
+ 5.70705058e-01 -4.60771539e-01
+ -2.20608002e+00 1.07866596e+00
+ -1.09040244e+00 6.77441076e-01
+ -5.09886482e-01 -1.97282128e-01
+ -1.58062785e+00 6.18333697e-01
+ -1.53295020e+00 4.02168701e-01
+ -5.18580598e-01 2.25767177e-01
+ 1.59514316e+00 -2.54983617e-01
+ -5.91938655e+00 2.68223782e+00
+ 2.84200509e+00 -1.04685313e+00
+ 1.31298664e+00 -1.16672614e+00
+ -2.36660033e+00 1.81359460e+00
+ 6.94163290e-02 3.76658816e-01
+ 2.33973934e+00 -8.33173023e-01
+ -8.24640389e-01 7.83717285e-01
+ -1.02888281e+00 1.04680766e+00
+ 1.34750745e+00 -5.89568160e-01
+ -2.48761231e+00 7.44199284e-01
+ -1.04501559e+00 4.72326911e-01
+ -3.14610089e+00 1.89843692e+00
+ 2.13003416e-01 5.76633620e-01
+ -1.69239608e+00 5.66070021e-01
+ 1.80491280e+00 -9.31701080e-01
+ -6.94362572e-02 6.96026587e-01
+ 1.36502578e+00 -6.85599000e-02
+ -7.76764337e-01 3.64328661e-01
+ -2.67322167e+00 6.80150021e-01
+ 1.84338485e+00 -1.18487494e+00
+ 2.88009231e+00 -1.25700411e+00
+ 1.17114433e+00 -7.69727080e-01
+ 2.11576167e+00 2.81502116e-01
+ -1.51470088e+00 2.61553540e-01
+ 1.18923669e-01 -1.17890202e-01
+ 4.48359786e+00 -1.81427466e+00
+ -1.27055948e+00 9.92388998e-01
+ -8.00276606e-01 9.11326621e-02
+ 7.51764024e-01 -1.03676498e-01
+ 1.35769348e-01 -2.11470084e-01
+ 2.50731332e+00 -1.12418270e+00
+ -2.49752781e-01 7.81224033e-02
+ -6.23037902e-01 3.16599691e-01
+ -3.93772902e+00 1.37195391e+00
+ 1.74256361e+00 -1.12363582e+00
+ -1.49737281e+00 5.98828310e-01
+ 7.75592115e-01 -4.64733802e-01
+ -2.26027693e+00 1.36991118e+00
+ -1.62849836e+00 7.36899107e-01
+ 2.36850751e+00 -9.32126872e-01
+ 5.86169745e+00 -2.49342512e+00
+ -5.37092226e-01 1.23821274e+00
+ 2.80535867e+00 -1.93363302e+00
+ -1.77638106e+00 9.10050276e-01
+ 3.02692018e+00 -1.60774676e+00
+ 1.97833084e+00 -1.50636531e+00
+ 9.09168906e-01 -8.83799359e-01
+ 2.39769655e+00 -7.56977869e-01
+ 1.47283981e+00 -1.06749890e+00
+ 2.92060943e-01 -6.07040605e-01
+ -2.09278201e+00 7.71858590e-01
+ 7.10015905e-01 -5.42768432e-01
+ -2.16826169e-01 1.56897896e-01
+ 4.56288247e+00 -2.08912680e+00
+ -6.63374020e-01 6.67325183e-01
+ 1.80564442e+00 -9.76366134e-01
+ 3.28720168e+00 -4.66575145e-01
+ -1.60463695e-01 -2.58428153e-01
+ 1.78590750e+00 -3.96427146e-01
+ 2.75950306e+00 -1.82102856e+00
+ -1.18234310e+00 6.28073320e-01
+ 4.11415835e+00 -2.33551216e+00
+ 1.38721004e+00 -2.77450622e-01
+ -2.94903545e+00 1.74813352e+00
+ 8.67290400e-01 -6.51667894e-01
+ 2.70022274e+00 -8.11832480e-01
+ -2.06766146e+00 8.24047249e-01
+ 3.90717142e+00 -1.20155758e+00
+ -2.95102809e+00 1.36667968e+00
+ 6.08815147e+00 -2.60737974e+00
+ 2.78576476e+00 -7.86628755e-01
+ -3.26258407e+00 1.09302450e+00
+ 1.59849422e+00 -1.09705202e+00
+ -2.50600710e-01 1.63243175e-01
+ -4.90477087e-01 -4.57729572e-01
+ -1.24837181e+00 3.22157840e-01
+ -2.46341049e+00 1.06517849e+00
+ 9.62880751e-01 4.56962496e-01
+ 3.99964487e-01 2.07472802e-01
+ 6.36657705e-01 -3.46400942e-02
+ 4.91231407e-02 -1.40289235e-02
+ -4.66683524e-02 -3.72326100e-01
+ -5.22049702e-01 -1.70440260e-01
+ 5.27062938e-01 -2.32628395e-01
+ -2.69440318e+00 1.18914874e+00
+ 3.65087539e+00 -1.53427267e+00
+ -1.16546364e-01 4.93245392e-02
+ 7.55931384e-01 -3.02980139e-01
+ 2.06338745e+00 -6.24841225e-01
+ 1.31177908e-01 7.29338183e-01
+ 1.48021784e+00 -6.39509896e-01
+ -5.98656707e-01 2.84525503e-01
+ -2.18611080e+00 1.79549812e+00
+ -2.91673624e+00 2.15772237e-01
+ -8.95591350e-01 7.68250538e-01
+ 1.36139762e+00 -1.93845144e-01
+ 5.45730414e+00 -2.28114404e+00
+ 3.22747247e-01 9.33582332e-01
+ -1.46384504e+00 1.12801186e-01
+ 4.26728166e-01 -2.33481242e-01
+ -1.41327270e+00 8.16103740e-01
+ -2.53998067e-01 1.44906646e-01
+ -1.32436467e+00 1.87556361e-01
+ -3.77313086e+00 1.32896038e+00
+ 3.77651731e+00 -1.76548043e+00
+ -2.45297093e+00 1.32571926e+00
+ -6.55900588e-01 3.56921462e-01
+ 9.25558722e-01 -4.51988954e-01
+ 1.20732231e+00 -3.02821614e-01
+ 3.72660154e-01 -1.89365208e-01
+ -1.77090939e+00 9.18087975e-01
+ 3.01127567e-01 2.67965829e-01
+ -1.76708900e+00 4.62069259e-01
+ -2.71812099e+00 1.57233508e+00
+ -5.35297633e-01 4.99231535e-01
+ 1.50507631e+00 -9.85763646e-01
+ 3.00424787e+00 -1.29837562e+00
+ -4.99311105e-01 3.91086482e-01
+ 1.30125207e+00 -1.26247924e-01
+ 4.01699483e-01 -4.46909391e-01
+ -1.33635257e+00 5.12068703e-01
+ 1.39229757e+00 -9.10974858e-01
+ -1.74229508e+00 1.49475978e+00
+ -1.21489414e+00 4.04193753e-01
+ -3.36537605e-01 -6.74335427e-01
+ -2.79186828e-01 8.48314720e-01
+ -2.03080140e+00 1.66599815e+00
+ -3.53064281e-01 -7.68582906e-04
+ -5.30305657e+00 2.91091546e+00
+ -1.20049972e+00 8.26578358e-01
+ 2.95906989e-01 2.40215920e-01
+ -1.42955534e+00 4.63480310e-01
+ -1.87856619e+00 8.21459385e-01
+ -2.71124720e+00 1.80246843e+00
+ -3.06933780e+00 1.22235760e+00
+ 5.21935582e-01 -1.27298218e+00
+ -1.34175797e+00 7.69018937e-01
+ -1.81962785e+00 1.15528991e+00
+ -3.99227550e-01 2.93821598e-01
+ 1.22533179e+00 -4.73846323e-01
+ -2.08068359e-01 -1.75039817e-01
+ -2.03068526e+00 1.50370503e+00
+ -3.27606113e+00 1.74906330e+00
+ -4.37802587e-01 -2.26956048e-01
+ -7.69774213e-02 -3.54922468e-01
+ 6.47160749e-02 -2.07334721e-01
+ -1.37791524e+00 4.43766709e-01
+ 3.29846803e+00 -1.04060799e+00
+ -3.63704046e+00 1.05800226e+00
+ -1.26716116e+00 1.13077353e+00
+ 1.98549075e+00 -1.31864807e+00
+ 1.85159500e+00 -5.78629560e-01
+ -1.55295206e+00 1.23655857e+00
+ 6.76026255e-01 9.18824125e-02
+ 1.23418960e+00 -4.68162027e-01
+ 2.43186642e+00 -9.22422440e-01
+ -3.18729701e+00 1.77582673e+00
+ -4.02945613e+00 1.14303496e+00
+ -1.92694576e-01 1.03301431e-01
+ 1.89554730e+00 -4.60128096e-01
+ -2.55626581e+00 1.16057084e+00
+ 6.89144365e-01 -9.94982900e-01
+ -4.44680606e+00 2.19751983e+00
+ -3.15196193e+00 1.18762993e+00
+ -1.17434977e+00 1.04534656e+00
+ 8.58386984e-02 -1.03947487e+00
+ 3.33354973e-01 5.54813610e-01
+ -9.37631808e-01 3.33450150e-01
+ -2.50232471e+00 5.39720635e-01
+ 1.03611949e+00 -7.16304095e-02
+ -2.05556816e-02 -3.28992265e-01
+ -2.24176201e+00 1.13077506e+00
+ 4.53583688e+00 -1.10710212e+00
+ 4.77389762e-01 -8.99445512e-01
+ -2.69075551e+00 6.83176866e-01
+ -2.21779724e+00 1.16916849e+00
+ -1.09669056e+00 2.10044765e-01
+ -8.45367920e-01 -8.45951423e-02
+ 4.37558941e-01 -6.95904256e-01
+ 1.84884195e+00 -1.71205136e-01
+ -8.36371957e-01 5.62862478e-01
+ 1.27786531e+00 -1.33362147e+00
+ 2.90684492e+00 -7.49892184e-01
+ -3.38652716e+00 1.51180670e+00
+ -1.30945978e+00 7.09261928e-01
+ -7.50471924e-01 -5.24637889e-01
+ 1.18580718e+00 -9.97943971e-04
+ -7.55395645e+00 3.19273590e+00
+ 1.72822535e+00 -1.20996962e+00
+ 5.67374320e-01 6.19573416e-01
+ -2.99163781e+00 1.79721534e+00
+ 1.49862187e+00 -6.05631846e-02
+ 1.79503506e+00 -4.90419706e-01
+ 3.85626054e+00 -1.95396324e+00
+ -9.39188410e-01 7.96498057e-01
+ 2.91986664e+00 -1.29392724e+00
+ -1.54265750e+00 6.40727933e-01
+ 1.14919794e+00 1.20834257e-01
+ 2.00936817e+00 -1.53728359e+00
+ 3.72468420e+00 -1.38704612e+00
+ -1.27794802e+00 3.48543179e-01
+ 3.63294077e-01 5.70623314e-01
+ 1.49381016e+00 -6.04500534e-01
+ 2.98912256e+00 -1.72295726e+00
+ -1.80833817e+00 2.94907625e-01
+ -3.19669622e+00 1.31888700e+00
+ 1.45889401e+00 -8.88448639e-01
+ -2.80045388e+00 1.01207060e+00
+ -4.78379567e+00 1.48646520e+00
+ 2.25510003e+00 -7.13372461e-01
+ -9.74441433e-02 -2.17766373e-01
+ 2.64468496e-01 -3.60842698e-01
+ -5.98821713e+00 3.20197892e+00
+ 2.67030213e-01 -5.36386416e-01
+ 2.24546960e+00 -8.13464649e-01
+ -4.89171414e-01 3.86255031e-01
+ -7.45713706e-01 6.29800380e-01
+ -3.30460503e-01 3.85127284e-01
+ -4.19588147e+00 1.52793198e+00
+ 5.42078582e-01 -2.61642741e-02
+ 4.24938513e-01 -5.72936751e-01
+ 2.82717288e+00 -6.75355024e-01
+ -1.44741788e+00 5.03578028e-01
+ -1.65547573e+00 7.76444277e-01
+ 2.20361170e+00 -1.40835680e+00
+ -3.69540235e+00 2.32953767e+00
+ -1.41909357e-01 2.28989778e-01
+ 1.92838879e+00 -8.72525737e-01
+ 1.40708100e+00 -6.81849638e-02
+ 1.24988112e+00 -1.39470590e-01
+ -2.39435855e+00 7.26587655e-01
+ 7.03985028e-01 4.85403277e-02
+ 4.05214529e+00 -9.16928318e-01
+ 3.74198837e-01 -5.04192358e-01
+ -8.43374127e-01 2.36064018e-01
+ -3.32253349e-01 7.47840055e-01
+ -6.03725210e+00 1.95173337e+00
+ 4.60829865e+00 -1.51191309e+00
+ -1.46247098e+00 1.11140916e+00
+ -9.60111157e-01 -1.23189114e-01
+ -7.49613187e-01 4.53614129e-01
+ -5.77838219e-01 2.07366469e-02
+ 8.07652950e-01 -5.16272662e-01
+ -6.02556049e-01 5.05318649e-01
+ -1.28712445e-01 2.57836512e-01
+ -5.27662820e+00 2.11790737e+00
+ 5.40819308e+00 -2.15366022e+00
+ 9.37742513e-02 -1.60221751e-01
+ 4.55902865e+00 -1.24646307e+00
+ -9.06582589e-01 1.92928110e-01
+ 2.99928996e+00 -8.04301218e-01
+ -3.24317381e+00 1.80076061e+00
+ 3.20421743e-01 8.76524679e-01
+ -5.29606705e-01 -3.16717696e-01
+ -1.77264560e+00 7.52686776e-01
+ -1.51706824e+00 8.43755103e-01
+ 1.52759111e+00 -7.86814243e-01
+ 4.74845617e-01 4.21319700e-01
+ 6.97829149e-01 -8.15664881e-01
+ 3.09564973e+00 -1.06202469e+00
+ 2.95320379e+00 -1.98963943e+00
+ -4.23033224e+00 1.41013338e+00
+ 1.48576206e+00 8.02908511e-02
+ 4.52041627e+00 -2.04620399e+00
+ 6.58403922e-01 -7.60781799e-01
+ 2.10667543e-01 1.15241731e-01
+ 1.77702583e+00 -8.10271859e-01
+ 2.41277385e+00 -1.46972042e+00
+ 1.50685525e+00 -1.99272545e-01
+ 7.61665522e-01 -4.11276152e-01
+ 1.18352312e+00 -9.59908608e-01
+ -3.32031305e-01 8.07500132e-02
+ 1.16813118e+00 -1.73095194e-01
+ 1.18363346e+00 -5.41565052e-01
+ 5.17702179e-01 -7.62442035e-01
+ 4.57401006e-01 -1.45951115e-02
+ 1.49377115e-01 2.99571605e-01
+ 1.40399453e+00 -1.30160353e+00
+ 5.26231567e-01 3.52783752e-01
+ -1.91136514e+00 4.24228635e-01
+ 1.74156701e+00 -9.92076776e-01
+ -4.89323391e+00 2.32483507e+00
+ 2.54011209e+00 -8.80366295e-01
+ -5.56925706e-01 1.48842026e-01
+ -2.35904668e+00 9.60474853e-01
+ 1.42216971e+00 -4.67062761e-01
+ -1.10809680e+00 7.68684300e-01
+ 4.09674726e+00 -1.90795680e+00
+ -2.23048923e+00 9.03812542e-01
+ 6.57025763e-01 1.36514871e-01
+ 2.10944145e+00 -9.78897838e-02
+ 1.22552525e+00 -2.50303867e-01
+ 2.84620103e-01 -5.30164020e-01
+ -2.13562585e+00 1.03503056e+00
+ 1.32414902e-01 -8.14190240e-03
+ -5.82433561e-01 3.21020292e-01
+ -5.06473247e-01 3.11530419e-01
+ 1.57162465e+00 -1.20763919e+00
+ -1.43155284e+00 -2.51203698e-02
+ -1.47093713e+00 -1.39620999e-01
+ -2.65765643e+00 1.06091403e+00
+ 2.45992927e+00 -5.88815836e-01
+ -1.28440162e+00 -1.99377398e-01
+ 6.11257504e-01 -3.73577401e-01
+ -3.46606103e-01 6.06081290e-01
+ 3.76687505e+00 -8.80181424e-01
+ -1.03725103e+00 1.45177517e+00
+ 2.76659936e+00 -1.09361320e+00
+ -3.61311296e+00 9.75032455e-01
+ 3.22878655e+00 -9.69497365e-01
+ 1.43560379e+00 -5.52524585e-01
+ 2.94042153e+00 -1.79747037e+00
+ 1.30739580e+00 2.47989248e-01
+ -4.05056982e-01 1.22831715e+00
+ -2.25827421e+00 2.30604626e-01
+ 3.69262926e-01 4.32714650e-02
+ -5.52064063e-01 6.07806340e-01
+ 7.03325987e+00 -2.17956730e+00
+ -2.37823835e-01 -8.28068639e-01
+ -4.84279888e-01 5.67765194e-01
+ -3.15863410e+00 1.02241617e+00
+ -3.39561593e+00 1.36876374e+00
+ -2.78482934e+00 6.81641104e-01
+ -4.37604334e+00 2.23826340e+00
+ -2.54049692e+00 8.22676745e-01
+ 3.73264822e+00 -9.93498732e-01
+ -3.49536064e+00 1.84771519e+00
+ 9.81801604e-01 -5.21278776e-01
+ 1.52996831e+00 -1.27386206e+00
+ -9.23490293e-01 5.29099482e-01
+ -2.76999461e+00 9.24831872e-01
+ -3.30029834e-01 -2.49645555e-01
+ -1.71156166e+00 5.44940854e-01
+ -2.37009487e+00 5.83826982e-01
+ -3.03216865e+00 1.04922722e+00
+ -2.19539936e+00 1.37558730e+00
+ 1.15350207e+00 -6.15318535e-01
+ 4.62011792e+00 -2.46714517e+00
+ 1.52627952e-02 -1.00618283e-01
+ -1.10399342e+00 4.87413533e-01
+ 3.55448194e+00 -9.10394190e-01
+ -5.21890321e+00 2.44710745e+00
+ 1.54289749e+00 -6.54269311e-01
+ 2.67935674e+00 -9.92758863e-01
+ 1.05801310e+00 2.60054285e-02
+ 1.52509097e+00 -4.08768600e-01
+ 3.27576917e+00 -1.28769406e+00
+ 1.71008412e-01 -2.68739994e-01
+ -9.83351344e-04 7.02495897e-02
+ -7.60795056e-03 1.61968285e-01
+ -1.80620472e+00 4.24934471e-01
+ 2.32023297e-02 -2.57284559e-01
+ 3.98219478e-01 -4.65361935e-01
+ 6.63476988e-01 -3.29823196e-02
+ 4.00154707e+00 -1.01792211e+00
+ -1.50286870e+00 9.46875359e-01
+ -2.22717585e+00 7.50636195e-01
+ -3.47381508e-01 -6.51596975e-01
+ 2.08076453e+00 -8.22800165e-01
+ 2.05099963e+00 -4.00868250e-01
+ 3.52576988e-02 -2.54418565e-01
+ 1.57342042e+00 -7.62166492e-02
+ -1.47019722e+00 3.40861172e-01
+ -1.21156090e+00 3.21891246e-01
+ 3.79729047e+00 -1.54350764e+00
+ 1.26459678e-02 6.99203693e-01
+ 1.53974177e-01 4.68643204e-01
+ -1.73923561e-01 -1.26229768e-01
+ 4.54644993e+00 -2.13951783e+00
+ 1.46022547e-01 -4.57084165e-01
+ 6.50048037e+00 -2.78872609e+00
+ -1.51934912e+00 1.03216768e+00
+ -3.06483575e+00 1.81101446e+00
+ -2.38212125e+00 9.19559042e-01
+ -1.81319611e+00 8.10545112e-01
+ 1.70951294e+00 -6.10712680e-01
+ 1.67974156e+00 -1.51241453e+00
+ -5.94795113e+00 2.56893813e+00
+ 3.62633110e-01 -7.46965304e-01
+ -2.44042594e+00 8.52761797e-01
+ 3.32412550e+00 -1.28439899e+00
+ 4.74860766e+00 -1.72821964e+00
+ 1.29072541e+00 -8.24872902e-01
+ -1.69450702e+00 4.09600876e-01
+ 1.29705411e+00 1.22300809e-01
+ -2.63597613e+00 8.55612913e-01
+ 9.28467301e-01 -2.63550114e-02
+ 2.44670264e+00 -4.10123002e-01
+ 1.06408206e+00 -5.03361942e-01
+ 5.12384049e-02 -1.27116595e-02
+ -1.06731272e+00 -1.76205029e-01
+ -9.45454582e-01 3.74404917e-01
+ 2.54343689e+00 -7.13810545e-01
+ -2.54460335e+00 1.31590265e+00
+ 1.89864233e+00 -3.98436339e-01
+ -1.93990133e+00 6.01474630e-01
+ -1.35938824e+00 4.00751788e-01
+ 2.38567018e+00 -6.13904880e-01
+ 2.18748050e-01 2.62631712e-01
+ -2.01388788e+00 1.41474031e+00
+ 2.74014581e+00 -1.27448105e+00
+ -2.13828583e+00 1.13616144e+00
+ 5.98730932e+00 -2.53430080e+00
+ -1.72872795e+00 1.53702057e+00
+ -2.53263962e+00 1.27342410e+00
+ 1.34326968e+00 -1.99395088e-01
+ 3.83352666e-01 -1.25683065e-01
+ -2.35630657e+00 5.54116983e-01
+ -1.94900838e+00 5.76270178e-01
+ -1.36699108e+00 -3.40904824e-01
+ -2.34727346e+00 -1.93054940e-02
+ -3.82779777e+00 1.83025664e+00
+ -4.31602080e+00 9.21605705e-01
+ 5.54098133e-01 2.33991419e-01
+ -4.53591188e+00 1.99833353e+00
+ -3.92715909e+00 1.83231482e+00
+ 3.91344440e-01 -1.11355111e-01
+ 3.48576363e+00 -1.41379449e+00
+ -1.42858690e+00 3.84532286e-01
+ 1.79519859e+00 -9.23486448e-01
+ 8.49691242e-01 -1.76551084e-01
+ 1.53618138e+00 8.23835015e-02
+ 5.91476520e-02 3.88296940e-02
+ 1.44837346e+00 -7.24097604e-01
+ -6.79008418e-01 4.04078097e-01
+ 2.87555510e+00 -9.51825076e-01
+ -1.12379101e+00 2.93457714e-01
+ 1.45263980e+00 -6.01960544e-01
+ -2.55741621e-01 9.26233518e-01
+ 3.54570714e+00 -1.41521877e+00
+ -1.61542388e+00 6.57844512e-01
+ -3.22844269e-01 3.02823546e-01
+ 1.03523913e+00 -6.92730711e-01
+ 1.11084909e+00 -3.50823642e-01
+ 3.41268693e+00 -1.90865862e+00
+ 7.67062858e-01 -9.48792160e-01
+ -5.49798016e+00 1.71139960e+00
+ 1.14865798e+00 -6.12669150e-01
+ -2.18256680e+00 7.78634462e-01
+ 4.78857389e+00 -2.55555085e+00
+ -1.85555569e+00 8.04311615e-01
+ -4.22278799e+00 2.01162524e+00
+ -1.56556149e+00 1.54353907e+00
+ -3.11527864e+00 1.65973526e+00
+ 2.66342611e+00 -1.20449402e+00
+ 1.57635314e+00 -1.48716308e-01
+ -6.35606865e-01 2.59701180e-01
+ 1.02431976e+00 -6.76929904e-01
+ 1.12973772e+00 1.49473892e-02
+ -9.12758116e-01 2.21533933e-01
+ -2.98014470e+00 1.71651189e+00
+ 2.74016965e+00 -9.47893923e-01
+ -3.47830591e+00 1.34941430e+00
+ 1.74757562e+00 -3.72503752e-01
+ 5.55820383e-01 -6.47992466e-01
+ -1.19871928e+00 9.82429151e-01
+ -2.53040133e+00 2.10671307e+00
+ -1.94085605e+00 1.38938137e+00
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/images/license.txt b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/license.txt
new file mode 100644
index 0000000000000000000000000000000000000000..052f302c4670aa57aa831681183a990fcc103b98
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/license.txt
@@ -0,0 +1,13 @@
+The images in the folder "kittens" are under the creative commons CC0 license, or no rights reserved:
+https://creativecommons.org/share-your-work/public-domain/cc0/
+The images are taken from:
+https://ccsearch.creativecommons.org/image/detail/WZnbJSJ2-dzIDiuUUdto3Q==
+https://ccsearch.creativecommons.org/image/detail/_TlKu_rm_QrWlR0zthQTXA==
+https://ccsearch.creativecommons.org/image/detail/OPNnHJb6q37rSZ5o_L5JHQ==
+https://ccsearch.creativecommons.org/image/detail/B2CVP_j5KjwZm7UAVJ3Hvw==
+
+The chr30.4.184.jpg and grayscale.jpg images are also under the CC0 license, taken from:
+https://ccsearch.creativecommons.org/image/detail/8eO_qqotBfEm2UYxirLntw==
+
+The image under "multi-channel" directory is under the CC BY-SA 4.0 license cropped from:
+https://en.wikipedia.org/wiki/Alpha_compositing#/media/File:Hue_alpha_falloff.png
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/kittens/29.5.a_b_EGDP022204.jpg b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/kittens/29.5.a_b_EGDP022204.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..435e7dfd6a459822d7f8feaeec2b1f3c5b2a3f76
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/kittens/29.5.a_b_EGDP022204.jpg differ
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/kittens/54893.jpg b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/kittens/54893.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..825630cc40288dd7a64b1dd76681be305c959c5a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/kittens/54893.jpg differ
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/kittens/DP153539.jpg b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/kittens/DP153539.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..571efe933ccfc636a3fae9d463573db88c496106
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/kittens/DP153539.jpg differ
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/kittens/DP802813.jpg b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/kittens/DP802813.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2d123594b7af7d2f8fdc593b91ad8f8d97925f48
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/kittens/DP802813.jpg differ
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/kittens/not-image.txt b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/kittens/not-image.txt
new file mode 100644
index 0000000000000000000000000000000000000000..283e5e936f2316e9ac9fb13134b1dce1a9ecd73e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/kittens/not-image.txt
@@ -0,0 +1 @@
+not an image
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/license.txt b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/license.txt
new file mode 100644
index 0000000000000000000000000000000000000000..052f302c4670aa57aa831681183a990fcc103b98
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/license.txt
@@ -0,0 +1,13 @@
+The images in the folder "kittens" are under the creative commons CC0 license, or no rights reserved:
+https://creativecommons.org/share-your-work/public-domain/cc0/
+The images are taken from:
+https://ccsearch.creativecommons.org/image/detail/WZnbJSJ2-dzIDiuUUdto3Q==
+https://ccsearch.creativecommons.org/image/detail/_TlKu_rm_QrWlR0zthQTXA==
+https://ccsearch.creativecommons.org/image/detail/OPNnHJb6q37rSZ5o_L5JHQ==
+https://ccsearch.creativecommons.org/image/detail/B2CVP_j5KjwZm7UAVJ3Hvw==
+
+The chr30.4.184.jpg and grayscale.jpg images are also under the CC0 license, taken from:
+https://ccsearch.creativecommons.org/image/detail/8eO_qqotBfEm2UYxirLntw==
+
+The image under "multi-channel" directory is under the CC BY-SA 4.0 license cropped from:
+https://en.wikipedia.org/wiki/Alpha_compositing#/media/File:Hue_alpha_falloff.png
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/multi-channel/BGRA.png b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/multi-channel/BGRA.png
new file mode 100644
index 0000000000000000000000000000000000000000..a944c6cdb066dbc1857d2bd6bd8b7e676f35c602
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/multi-channel/BGRA.png differ
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/multi-channel/BGRA_alpha_60.png b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/multi-channel/BGRA_alpha_60.png
new file mode 100644
index 0000000000000000000000000000000000000000..913637cd2828ab4e2ff4b2bbd92c4cf362f871c4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/multi-channel/BGRA_alpha_60.png differ
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/multi-channel/chr30.4.184.jpg b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/multi-channel/chr30.4.184.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7068b97deb344269811fd6000dc254d5005c1444
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/multi-channel/chr30.4.184.jpg differ
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/multi-channel/grayscale.jpg b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/multi-channel/grayscale.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..621cdd11e2b927ca0e6527af222bbb1c2b2da5a3
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/origin/multi-channel/grayscale.jpg differ
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=kittens/date=2018-01/29.5.a_b_EGDP022204.jpg b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=kittens/date=2018-01/29.5.a_b_EGDP022204.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..435e7dfd6a459822d7f8feaeec2b1f3c5b2a3f76
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=kittens/date=2018-01/29.5.a_b_EGDP022204.jpg differ
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=kittens/date=2018-01/not-image.txt b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=kittens/date=2018-01/not-image.txt
new file mode 100644
index 0000000000000000000000000000000000000000..283e5e936f2316e9ac9fb13134b1dce1a9ecd73e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=kittens/date=2018-01/not-image.txt
@@ -0,0 +1 @@
+not an image
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=kittens/date=2018-02/54893.jpg b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=kittens/date=2018-02/54893.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..825630cc40288dd7a64b1dd76681be305c959c5a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=kittens/date=2018-02/54893.jpg differ
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=kittens/date=2018-02/DP153539.jpg b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=kittens/date=2018-02/DP153539.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..571efe933ccfc636a3fae9d463573db88c496106
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=kittens/date=2018-02/DP153539.jpg differ
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=kittens/date=2018-02/DP802813.jpg b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=kittens/date=2018-02/DP802813.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2d123594b7af7d2f8fdc593b91ad8f8d97925f48
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=kittens/date=2018-02/DP802813.jpg differ
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=multichannel/date=2018-01/BGRA.png b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=multichannel/date=2018-01/BGRA.png
new file mode 100644
index 0000000000000000000000000000000000000000..a944c6cdb066dbc1857d2bd6bd8b7e676f35c602
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=multichannel/date=2018-01/BGRA.png differ
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=multichannel/date=2018-01/BGRA_alpha_60.png b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=multichannel/date=2018-01/BGRA_alpha_60.png
new file mode 100644
index 0000000000000000000000000000000000000000..913637cd2828ab4e2ff4b2bbd92c4cf362f871c4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=multichannel/date=2018-01/BGRA_alpha_60.png differ
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=multichannel/date=2018-02/chr30.4.184.jpg b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=multichannel/date=2018-02/chr30.4.184.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7068b97deb344269811fd6000dc254d5005c1444
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=multichannel/date=2018-02/chr30.4.184.jpg differ
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=multichannel/date=2018-02/grayscale.jpg b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=multichannel/date=2018-02/grayscale.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..621cdd11e2b927ca0e6527af222bbb1c2b2da5a3
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/data/mllib/images/partitioned/cls=multichannel/date=2018-02/grayscale.jpg differ
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/iris_libsvm.txt b/spark-2.4.0-bin-hadoop2.7/data/mllib/iris_libsvm.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db959010255d0eeea2f51712a51c3c23ae04456c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/iris_libsvm.txt
@@ -0,0 +1,150 @@
+0.0 1:5.1 2:3.5 3:1.4 4:0.2
+0.0 1:4.9 2:3.0 3:1.4 4:0.2
+0.0 1:4.7 2:3.2 3:1.3 4:0.2
+0.0 1:4.6 2:3.1 3:1.5 4:0.2
+0.0 1:5.0 2:3.6 3:1.4 4:0.2
+0.0 1:5.4 2:3.9 3:1.7 4:0.4
+0.0 1:4.6 2:3.4 3:1.4 4:0.3
+0.0 1:5.0 2:3.4 3:1.5 4:0.2
+0.0 1:4.4 2:2.9 3:1.4 4:0.2
+0.0 1:4.9 2:3.1 3:1.5 4:0.1
+0.0 1:5.4 2:3.7 3:1.5 4:0.2
+0.0 1:4.8 2:3.4 3:1.6 4:0.2
+0.0 1:4.8 2:3.0 3:1.4 4:0.1
+0.0 1:4.3 2:3.0 3:1.1 4:0.1
+0.0 1:5.8 2:4.0 3:1.2 4:0.2
+0.0 1:5.7 2:4.4 3:1.5 4:0.4
+0.0 1:5.4 2:3.9 3:1.3 4:0.4
+0.0 1:5.1 2:3.5 3:1.4 4:0.3
+0.0 1:5.7 2:3.8 3:1.7 4:0.3
+0.0 1:5.1 2:3.8 3:1.5 4:0.3
+0.0 1:5.4 2:3.4 3:1.7 4:0.2
+0.0 1:5.1 2:3.7 3:1.5 4:0.4
+0.0 1:4.6 2:3.6 3:1.0 4:0.2
+0.0 1:5.1 2:3.3 3:1.7 4:0.5
+0.0 1:4.8 2:3.4 3:1.9 4:0.2
+0.0 1:5.0 2:3.0 3:1.6 4:0.2
+0.0 1:5.0 2:3.4 3:1.6 4:0.4
+0.0 1:5.2 2:3.5 3:1.5 4:0.2
+0.0 1:5.2 2:3.4 3:1.4 4:0.2
+0.0 1:4.7 2:3.2 3:1.6 4:0.2
+0.0 1:4.8 2:3.1 3:1.6 4:0.2
+0.0 1:5.4 2:3.4 3:1.5 4:0.4
+0.0 1:5.2 2:4.1 3:1.5 4:0.1
+0.0 1:5.5 2:4.2 3:1.4 4:0.2
+0.0 1:4.9 2:3.1 3:1.5 4:0.1
+0.0 1:5.0 2:3.2 3:1.2 4:0.2
+0.0 1:5.5 2:3.5 3:1.3 4:0.2
+0.0 1:4.9 2:3.1 3:1.5 4:0.1
+0.0 1:4.4 2:3.0 3:1.3 4:0.2
+0.0 1:5.1 2:3.4 3:1.5 4:0.2
+0.0 1:5.0 2:3.5 3:1.3 4:0.3
+0.0 1:4.5 2:2.3 3:1.3 4:0.3
+0.0 1:4.4 2:3.2 3:1.3 4:0.2
+0.0 1:5.0 2:3.5 3:1.6 4:0.6
+0.0 1:5.1 2:3.8 3:1.9 4:0.4
+0.0 1:4.8 2:3.0 3:1.4 4:0.3
+0.0 1:5.1 2:3.8 3:1.6 4:0.2
+0.0 1:4.6 2:3.2 3:1.4 4:0.2
+0.0 1:5.3 2:3.7 3:1.5 4:0.2
+0.0 1:5.0 2:3.3 3:1.4 4:0.2
+1.0 1:7.0 2:3.2 3:4.7 4:1.4
+1.0 1:6.4 2:3.2 3:4.5 4:1.5
+1.0 1:6.9 2:3.1 3:4.9 4:1.5
+1.0 1:5.5 2:2.3 3:4.0 4:1.3
+1.0 1:6.5 2:2.8 3:4.6 4:1.5
+1.0 1:5.7 2:2.8 3:4.5 4:1.3
+1.0 1:6.3 2:3.3 3:4.7 4:1.6
+1.0 1:4.9 2:2.4 3:3.3 4:1.0
+1.0 1:6.6 2:2.9 3:4.6 4:1.3
+1.0 1:5.2 2:2.7 3:3.9 4:1.4
+1.0 1:5.0 2:2.0 3:3.5 4:1.0
+1.0 1:5.9 2:3.0 3:4.2 4:1.5
+1.0 1:6.0 2:2.2 3:4.0 4:1.0
+1.0 1:6.1 2:2.9 3:4.7 4:1.4
+1.0 1:5.6 2:2.9 3:3.6 4:1.3
+1.0 1:6.7 2:3.1 3:4.4 4:1.4
+1.0 1:5.6 2:3.0 3:4.5 4:1.5
+1.0 1:5.8 2:2.7 3:4.1 4:1.0
+1.0 1:6.2 2:2.2 3:4.5 4:1.5
+1.0 1:5.6 2:2.5 3:3.9 4:1.1
+1.0 1:5.9 2:3.2 3:4.8 4:1.8
+1.0 1:6.1 2:2.8 3:4.0 4:1.3
+1.0 1:6.3 2:2.5 3:4.9 4:1.5
+1.0 1:6.1 2:2.8 3:4.7 4:1.2
+1.0 1:6.4 2:2.9 3:4.3 4:1.3
+1.0 1:6.6 2:3.0 3:4.4 4:1.4
+1.0 1:6.8 2:2.8 3:4.8 4:1.4
+1.0 1:6.7 2:3.0 3:5.0 4:1.7
+1.0 1:6.0 2:2.9 3:4.5 4:1.5
+1.0 1:5.7 2:2.6 3:3.5 4:1.0
+1.0 1:5.5 2:2.4 3:3.8 4:1.1
+1.0 1:5.5 2:2.4 3:3.7 4:1.0
+1.0 1:5.8 2:2.7 3:3.9 4:1.2
+1.0 1:6.0 2:2.7 3:5.1 4:1.6
+1.0 1:5.4 2:3.0 3:4.5 4:1.5
+1.0 1:6.0 2:3.4 3:4.5 4:1.6
+1.0 1:6.7 2:3.1 3:4.7 4:1.5
+1.0 1:6.3 2:2.3 3:4.4 4:1.3
+1.0 1:5.6 2:3.0 3:4.1 4:1.3
+1.0 1:5.5 2:2.5 3:4.0 4:1.3
+1.0 1:5.5 2:2.6 3:4.4 4:1.2
+1.0 1:6.1 2:3.0 3:4.6 4:1.4
+1.0 1:5.8 2:2.6 3:4.0 4:1.2
+1.0 1:5.0 2:2.3 3:3.3 4:1.0
+1.0 1:5.6 2:2.7 3:4.2 4:1.3
+1.0 1:5.7 2:3.0 3:4.2 4:1.2
+1.0 1:5.7 2:2.9 3:4.2 4:1.3
+1.0 1:6.2 2:2.9 3:4.3 4:1.3
+1.0 1:5.1 2:2.5 3:3.0 4:1.1
+1.0 1:5.7 2:2.8 3:4.1 4:1.3
+2.0 1:6.3 2:3.3 3:6.0 4:2.5
+2.0 1:5.8 2:2.7 3:5.1 4:1.9
+2.0 1:7.1 2:3.0 3:5.9 4:2.1
+2.0 1:6.3 2:2.9 3:5.6 4:1.8
+2.0 1:6.5 2:3.0 3:5.8 4:2.2
+2.0 1:7.6 2:3.0 3:6.6 4:2.1
+2.0 1:4.9 2:2.5 3:4.5 4:1.7
+2.0 1:7.3 2:2.9 3:6.3 4:1.8
+2.0 1:6.7 2:2.5 3:5.8 4:1.8
+2.0 1:7.2 2:3.6 3:6.1 4:2.5
+2.0 1:6.5 2:3.2 3:5.1 4:2.0
+2.0 1:6.4 2:2.7 3:5.3 4:1.9
+2.0 1:6.8 2:3.0 3:5.5 4:2.1
+2.0 1:5.7 2:2.5 3:5.0 4:2.0
+2.0 1:5.8 2:2.8 3:5.1 4:2.4
+2.0 1:6.4 2:3.2 3:5.3 4:2.3
+2.0 1:6.5 2:3.0 3:5.5 4:1.8
+2.0 1:7.7 2:3.8 3:6.7 4:2.2
+2.0 1:7.7 2:2.6 3:6.9 4:2.3
+2.0 1:6.0 2:2.2 3:5.0 4:1.5
+2.0 1:6.9 2:3.2 3:5.7 4:2.3
+2.0 1:5.6 2:2.8 3:4.9 4:2.0
+2.0 1:7.7 2:2.8 3:6.7 4:2.0
+2.0 1:6.3 2:2.7 3:4.9 4:1.8
+2.0 1:6.7 2:3.3 3:5.7 4:2.1
+2.0 1:7.2 2:3.2 3:6.0 4:1.8
+2.0 1:6.2 2:2.8 3:4.8 4:1.8
+2.0 1:6.1 2:3.0 3:4.9 4:1.8
+2.0 1:6.4 2:2.8 3:5.6 4:2.1
+2.0 1:7.2 2:3.0 3:5.8 4:1.6
+2.0 1:7.4 2:2.8 3:6.1 4:1.9
+2.0 1:7.9 2:3.8 3:6.4 4:2.0
+2.0 1:6.4 2:2.8 3:5.6 4:2.2
+2.0 1:6.3 2:2.8 3:5.1 4:1.5
+2.0 1:6.1 2:2.6 3:5.6 4:1.4
+2.0 1:7.7 2:3.0 3:6.1 4:2.3
+2.0 1:6.3 2:3.4 3:5.6 4:2.4
+2.0 1:6.4 2:3.1 3:5.5 4:1.8
+2.0 1:6.0 2:3.0 3:4.8 4:1.8
+2.0 1:6.9 2:3.1 3:5.4 4:2.1
+2.0 1:6.7 2:3.1 3:5.6 4:2.4
+2.0 1:6.9 2:3.1 3:5.1 4:2.3
+2.0 1:5.8 2:2.7 3:5.1 4:1.9
+2.0 1:6.8 2:3.2 3:5.9 4:2.3
+2.0 1:6.7 2:3.3 3:5.7 4:2.5
+2.0 1:6.7 2:3.0 3:5.2 4:2.3
+2.0 1:6.3 2:2.5 3:5.0 4:1.9
+2.0 1:6.5 2:3.0 3:5.2 4:2.0
+2.0 1:6.2 2:3.4 3:5.4 4:2.3
+2.0 1:5.9 2:3.0 3:5.1 4:1.8
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/kmeans_data.txt b/spark-2.4.0-bin-hadoop2.7/data/mllib/kmeans_data.txt
new file mode 100644
index 0000000000000000000000000000000000000000..338664f78de50564de5ee6eb2af2e368904628f4
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/kmeans_data.txt
@@ -0,0 +1,6 @@
+0.0 0.0 0.0
+0.1 0.1 0.1
+0.2 0.2 0.2
+9.0 9.0 9.0
+9.1 9.1 9.1
+9.2 9.2 9.2
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/pagerank_data.txt b/spark-2.4.0-bin-hadoop2.7/data/mllib/pagerank_data.txt
new file mode 100644
index 0000000000000000000000000000000000000000..95755ab8f5af8b27377a60cbe3edb8ab403e8465
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/pagerank_data.txt
@@ -0,0 +1,6 @@
+1 2
+1 3
+1 4
+2 1
+3 1
+4 1
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/pic_data.txt b/spark-2.4.0-bin-hadoop2.7/data/mllib/pic_data.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fcfef8cd19131f664b938c1ef18fcb98601a6bd7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/pic_data.txt
@@ -0,0 +1,19 @@
+0 1 1.0
+0 2 1.0
+0 3 1.0
+1 2 1.0
+1 3 1.0
+2 3 1.0
+3 4 0.1
+4 5 1.0
+4 15 1.0
+5 6 1.0
+6 7 1.0
+7 8 1.0
+8 9 1.0
+9 10 1.0
+10 11 1.0
+11 12 1.0
+12 13 1.0
+13 14 1.0
+14 15 1.0
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/ridge-data/lpsa.data b/spark-2.4.0-bin-hadoop2.7/data/mllib/ridge-data/lpsa.data
new file mode 100644
index 0000000000000000000000000000000000000000..fdd16e36b49997c7b6959cfdc66ec1faa76927ba
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/ridge-data/lpsa.data
@@ -0,0 +1,67 @@
+-0.4307829,-1.63735562648104 -2.00621178480549 -1.86242597251066 -1.02470580167082 -0.522940888712441 -0.863171185425945 -1.04215728919298 -0.864466507337306
+-0.1625189,-1.98898046126935 -0.722008756122123 -0.787896192088153 -1.02470580167082 -0.522940888712441 -0.863171185425945 -1.04215728919298 -0.864466507337306
+-0.1625189,-1.57881887548545 -2.1887840293994 1.36116336875686 -1.02470580167082 -0.522940888712441 -0.863171185425945 0.342627053981254 -0.155348103855541
+-0.1625189,-2.16691708463163 -0.807993896938655 -0.787896192088153 -1.02470580167082 -0.522940888712441 -0.863171185425945 -1.04215728919298 -0.864466507337306
+0.3715636,-0.507874475300631 -0.458834049396776 -0.250631301876899 -1.02470580167082 -0.522940888712441 -0.863171185425945 -1.04215728919298 -0.864466507337306
+0.7654678,-2.03612849966376 -0.933954647105133 -1.86242597251066 -1.02470580167082 -0.522940888712441 -0.863171185425945 -1.04215728919298 -0.864466507337306
+0.8544153,-0.557312518810673 -0.208756571683607 -0.787896192088153 0.990146852537193 -0.522940888712441 -0.863171185425945 -1.04215728919298 -0.864466507337306
+1.2669476,-0.929360463147704 -0.0578991819441687 0.152317365781542 -1.02470580167082 -0.522940888712441 -0.863171185425945 -1.04215728919298 -0.864466507337306
+1.2669476,-2.28833047634983 -0.0706369432557794 -0.116315079324086 0.80409888772376 -0.522940888712441 -0.863171185425945 -1.04215728919298 -0.864466507337306
+1.2669476,0.223498042876113 -1.41471935455355 -0.116315079324086 -1.02470580167082 -0.522940888712441 -0.29928234305568 0.342627053981254 0.199211097885341
+1.3480731,0.107785900236813 -1.47221551299731 0.420949810887169 -1.02470580167082 -0.522940888712441 -0.863171185425945 0.342627053981254 -0.687186906466865
+1.446919,0.162180092313795 -1.32557369901905 0.286633588334355 -1.02470580167082 -0.522940888712441 -0.863171185425945 -1.04215728919298 -0.864466507337306
+1.4701758,-1.49795329918548 -0.263601072284232 0.823898478545609 0.788388310173035 -0.522940888712441 -0.29928234305568 0.342627053981254 0.199211097885341
+1.4929041,0.796247055396743 0.0476559407005752 0.286633588334355 -1.02470580167082 -0.522940888712441 0.394013435896129 -1.04215728919298 -0.864466507337306
+1.5581446,-1.62233848461465 -0.843294091975396 -3.07127197548598 -1.02470580167082 -0.522940888712441 -0.863171185425945 -1.04215728919298 -0.864466507337306
+1.5993876,-0.990720665490831 0.458513517212311 0.823898478545609 1.07379746308195 -0.522940888712441 -0.863171185425945 -1.04215728919298 -0.864466507337306
+1.6389967,-0.171901281967138 -0.489197399065355 -0.65357996953534 -1.02470580167082 -0.522940888712441 -0.863171185425945 -1.04215728919298 -0.864466507337306
+1.6956156,-1.60758252338831 -0.590700340358265 -0.65357996953534 -0.619561070667254 -0.522940888712441 -0.863171185425945 -1.04215728919298 -0.864466507337306
+1.7137979,0.366273918511144 -0.414014962912583 -0.116315079324086 0.232904453212813 -0.522940888712441 0.971228997418125 0.342627053981254 1.26288870310799
+1.8000583,-0.710307384579833 0.211731938156277 0.152317365781542 -1.02470580167082 -0.522940888712441 -0.442797990776478 0.342627053981254 1.61744790484887
+1.8484548,-0.262791728113881 -1.16708345615721 0.420949810887169 0.0846342590816532 -0.522940888712441 0.163172393491611 0.342627053981254 1.97200710658975
+1.8946169,0.899043117369237 -0.590700340358265 0.152317365781542 -1.02470580167082 -0.522940888712441 1.28643254437683 -1.04215728919298 -0.864466507337306
+1.9242487,-0.903451690500615 1.07659722048274 0.152317365781542 1.28380453408541 -0.522940888712441 -0.442797990776478 -1.04215728919298 -0.864466507337306
+2.008214,-0.0633337899773081 -1.38088970920094 0.958214701098423 0.80409888772376 -0.522940888712441 -0.863171185425945 -1.04215728919298 -0.864466507337306
+2.0476928,-1.15393789990757 -0.961853075398404 -0.116315079324086 -1.02470580167082 -0.522940888712441 -0.442797990776478 -1.04215728919298 -0.864466507337306
+2.1575593,0.0620203721138446 0.0657973885499142 1.22684714620405 -0.468824786336838 -0.522940888712441 1.31421001659859 1.72741139715549 -0.332627704725983
+2.1916535,-0.75731027755674 -2.92717970468456 0.018001143228728 -1.02470580167082 -0.522940888712441 -0.863171185425945 0.342627053981254 -0.332627704725983
+2.2137539,1.11226993252773 1.06484916245061 0.555266033439982 0.877691038550889 1.89254797819741 1.43890404648442 0.342627053981254 0.376490698755783
+2.2772673,-0.468768642850639 -1.43754788774533 -1.05652863719378 0.576050411655607 -0.522940888712441 0.0120483832567209 0.342627053981254 -0.687186906466865
+2.2975726,-0.618884859896728 -1.1366360750781 -0.519263746982526 -1.02470580167082 -0.522940888712441 -0.863171185425945 3.11219574032972 1.97200710658975
+2.3272777,-0.651431999123483 0.55329161145762 -0.250631301876899 1.11210019001038 -0.522940888712441 -0.179808625688859 -1.04215728919298 -0.864466507337306
+2.5217206,0.115499102435224 -0.512233676577595 0.286633588334355 1.13650173283446 -0.522940888712441 -0.179808625688859 0.342627053981254 -0.155348103855541
+2.5533438,0.266341329949937 -0.551137885443386 -0.384947524429713 0.354857790686005 -0.522940888712441 -0.863171185425945 0.342627053981254 -0.332627704725983
+2.5687881,1.16902610257751 0.855491905752846 2.03274448152093 1.22628985326088 1.89254797819741 2.02833774827712 3.11219574032972 2.68112551007152
+2.6567569,-0.218972367124187 0.851192298581141 0.555266033439982 -1.02470580167082 -0.522940888712441 -0.863171185425945 0.342627053981254 0.908329501367106
+2.677591,0.263121415733908 1.4142681068416 0.018001143228728 1.35980653053822 -0.522940888712441 -0.863171185425945 -1.04215728919298 -0.864466507337306
+2.7180005,-0.0704736333296423 1.52000996595417 0.286633588334355 1.39364261119802 -0.522940888712441 -0.863171185425945 0.342627053981254 -0.332627704725983
+2.7942279,-0.751957286017338 0.316843561689933 -1.99674219506348 0.911736065044475 -0.522940888712441 -0.863171185425945 -1.04215728919298 -0.864466507337306
+2.8063861,-0.685277652430997 1.28214038482516 0.823898478545609 0.232904453212813 -0.522940888712441 -0.863171185425945 0.342627053981254 -0.155348103855541
+2.8124102,-0.244991501432929 0.51882005949686 -0.384947524429713 0.823246560137838 -0.522940888712441 -0.863171185425945 0.342627053981254 0.553770299626224
+2.8419982,-0.75731027755674 2.09041984898851 1.22684714620405 1.53428167116843 -0.522940888712441 -0.863171185425945 -1.04215728919298 -0.864466507337306
+2.8535925,1.20962937075363 -0.242882661178889 1.09253092365124 -1.02470580167082 -0.522940888712441 1.24263233939889 3.11219574032972 2.50384590920108
+2.9204698,0.570886990493502 0.58243883987948 0.555266033439982 1.16006887775962 -0.522940888712441 1.07357183940747 0.342627053981254 1.61744790484887
+2.9626924,0.719758684343624 0.984970304132004 1.09253092365124 1.52137230773457 -0.522940888712441 -0.179808625688859 0.342627053981254 -0.509907305596424
+2.9626924,-1.52406140158064 1.81975700990333 0.689582255992796 -1.02470580167082 -0.522940888712441 -0.863171185425945 -1.04215728919298 -0.864466507337306
+2.9729753,-0.132431544081234 2.68769877553723 1.09253092365124 1.53428167116843 -0.522940888712441 -0.442797990776478 0.342627053981254 -0.687186906466865
+3.0130809,0.436161292804989 -0.0834447307428255 -0.519263746982526 -1.02470580167082 1.89254797819741 1.07357183940747 0.342627053981254 1.26288870310799
+3.0373539,-0.161195191984091 -0.671900359186746 1.7641120364153 1.13650173283446 -0.522940888712441 -0.863171185425945 0.342627053981254 0.0219314970149
+3.2752562,1.39927182372944 0.513852869452676 0.689582255992796 -1.02470580167082 1.89254797819741 1.49394503405693 0.342627053981254 -0.155348103855541
+3.3375474,1.51967002306341 -0.852203755696565 0.555266033439982 -0.104527297798983 1.89254797819741 1.85927724828569 0.342627053981254 0.908329501367106
+3.3928291,0.560725834706224 1.87867703391426 1.09253092365124 1.39364261119802 -0.522940888712441 0.486423065822545 0.342627053981254 1.26288870310799
+3.4355988,1.00765532502814 1.69426310090641 1.89842825896812 1.53428167116843 -0.522940888712441 -0.863171185425945 0.342627053981254 -0.509907305596424
+3.4578927,1.10152996153577 -0.10927271844907 0.689582255992796 -1.02470580167082 1.89254797819741 1.97630171771485 0.342627053981254 1.61744790484887
+3.5160131,0.100001934217311 -1.30380956369388 0.286633588334355 0.316555063757567 -0.522940888712441 0.28786643052924 0.342627053981254 0.553770299626224
+3.5307626,0.987291634724086 -0.36279314978779 -0.922212414640967 0.232904453212813 -0.522940888712441 1.79270085261407 0.342627053981254 1.26288870310799
+3.5652984,1.07158528137575 0.606453149641961 1.7641120364153 -0.432854616994416 1.89254797819741 0.528504607720369 0.342627053981254 0.199211097885341
+3.5876769,0.180156323255198 0.188987436375017 -0.519263746982526 1.09956763075594 -0.522940888712441 0.708239632330506 0.342627053981254 0.199211097885341
+3.6309855,1.65687973755377 -0.256675483533719 0.018001143228728 -1.02470580167082 1.89254797819741 1.79270085261407 0.342627053981254 1.26288870310799
+3.6800909,0.5720085322365 0.239854450210939 -0.787896192088153 1.0605418233138 -0.522940888712441 -0.863171185425945 -1.04215728919298 -0.864466507337306
+3.7123518,0.323806133438225 -0.606717660886078 -0.250631301876899 -1.02470580167082 1.89254797819741 0.342907418101747 0.342627053981254 0.199211097885341
+3.9843437,1.23668206715898 2.54220539083611 0.152317365781542 -1.02470580167082 1.89254797819741 1.89037692416194 0.342627053981254 1.26288870310799
+3.993603,0.180156323255198 0.154448192444669 1.62979581386249 0.576050411655607 1.89254797819741 0.708239632330506 0.342627053981254 1.79472750571931
+4.029806,1.60906277046565 1.10378605019827 0.555266033439982 -1.02470580167082 -0.522940888712441 -0.863171185425945 -1.04215728919298 -0.864466507337306
+4.1295508,1.0036214996026 0.113496885050331 -0.384947524429713 0.860016436332751 1.89254797819741 -0.863171185425945 0.342627053981254 -0.332627704725983
+4.3851468,1.25591974271076 0.577607033774471 0.555266033439982 -1.02470580167082 1.89254797819741 1.07357183940747 0.342627053981254 1.26288870310799
+4.6844434,2.09650591351268 0.625488598331018 -2.66832330782754 -1.02470580167082 1.89254797819741 1.67954222367555 0.342627053981254 0.553770299626224
+5.477509,1.30028987435881 0.338383613253713 0.555266033439982 1.00481276295349 1.89254797819741 1.24263233939889 0.342627053981254 1.97200710658975
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_binary_classification_data.txt b/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_binary_classification_data.txt
new file mode 100644
index 0000000000000000000000000000000000000000..861c70cde7fd27f447db2bd205558a38c7da66f6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_binary_classification_data.txt
@@ -0,0 +1,100 @@
+0 128:51 129:159 130:253 131:159 132:50 155:48 156:238 157:252 158:252 159:252 160:237 182:54 183:227 184:253 185:252 186:239 187:233 188:252 189:57 190:6 208:10 209:60 210:224 211:252 212:253 213:252 214:202 215:84 216:252 217:253 218:122 236:163 237:252 238:252 239:252 240:253 241:252 242:252 243:96 244:189 245:253 246:167 263:51 264:238 265:253 266:253 267:190 268:114 269:253 270:228 271:47 272:79 273:255 274:168 290:48 291:238 292:252 293:252 294:179 295:12 296:75 297:121 298:21 301:253 302:243 303:50 317:38 318:165 319:253 320:233 321:208 322:84 329:253 330:252 331:165 344:7 345:178 346:252 347:240 348:71 349:19 350:28 357:253 358:252 359:195 372:57 373:252 374:252 375:63 385:253 386:252 387:195 400:198 401:253 402:190 413:255 414:253 415:196 427:76 428:246 429:252 430:112 441:253 442:252 443:148 455:85 456:252 457:230 458:25 467:7 468:135 469:253 470:186 471:12 483:85 484:252 485:223 494:7 495:131 496:252 497:225 498:71 511:85 512:252 513:145 521:48 522:165 523:252 524:173 539:86 540:253 541:225 548:114 549:238 550:253 551:162 567:85 568:252 569:249 570:146 571:48 572:29 573:85 574:178 575:225 576:253 577:223 578:167 579:56 595:85 596:252 597:252 598:252 599:229 600:215 601:252 602:252 603:252 604:196 605:130 623:28 624:199 625:252 626:252 627:253 628:252 629:252 630:233 631:145 652:25 653:128 654:252 655:253 656:252 657:141 658:37
+1 159:124 160:253 161:255 162:63 186:96 187:244 188:251 189:253 190:62 214:127 215:251 216:251 217:253 218:62 241:68 242:236 243:251 244:211 245:31 246:8 268:60 269:228 270:251 271:251 272:94 296:155 297:253 298:253 299:189 323:20 324:253 325:251 326:235 327:66 350:32 351:205 352:253 353:251 354:126 378:104 379:251 380:253 381:184 382:15 405:80 406:240 407:251 408:193 409:23 432:32 433:253 434:253 435:253 436:159 460:151 461:251 462:251 463:251 464:39 487:48 488:221 489:251 490:251 491:172 515:234 516:251 517:251 518:196 519:12 543:253 544:251 545:251 546:89 570:159 571:255 572:253 573:253 574:31 597:48 598:228 599:253 600:247 601:140 602:8 625:64 626:251 627:253 628:220 653:64 654:251 655:253 656:220 681:24 682:193 683:253 684:220
+1 125:145 126:255 127:211 128:31 152:32 153:237 154:253 155:252 156:71 180:11 181:175 182:253 183:252 184:71 209:144 210:253 211:252 212:71 236:16 237:191 238:253 239:252 240:71 264:26 265:221 266:253 267:252 268:124 269:31 293:125 294:253 295:252 296:252 297:108 322:253 323:252 324:252 325:108 350:255 351:253 352:253 353:108 378:253 379:252 380:252 381:108 406:253 407:252 408:252 409:108 434:253 435:252 436:252 437:108 462:255 463:253 464:253 465:170 490:253 491:252 492:252 493:252 494:42 518:149 519:252 520:252 521:252 522:144 546:109 547:252 548:252 549:252 550:144 575:218 576:253 577:253 578:255 579:35 603:175 604:252 605:252 606:253 607:35 631:73 632:252 633:252 634:253 635:35 659:31 660:211 661:252 662:253 663:35
+1 153:5 154:63 155:197 181:20 182:254 183:230 184:24 209:20 210:254 211:254 212:48 237:20 238:254 239:255 240:48 265:20 266:254 267:254 268:57 293:20 294:254 295:254 296:108 321:16 322:239 323:254 324:143 350:178 351:254 352:143 378:178 379:254 380:143 406:178 407:254 408:162 434:178 435:254 436:240 462:113 463:254 464:240 490:83 491:254 492:245 493:31 518:79 519:254 520:246 521:38 547:214 548:254 549:150 575:144 576:241 577:8 603:144 604:240 605:2 631:144 632:254 633:82 659:230 660:247 661:40 687:168 688:209 689:31
+1 152:1 153:168 154:242 155:28 180:10 181:228 182:254 183:100 209:190 210:254 211:122 237:83 238:254 239:162 265:29 266:254 267:248 268:25 293:29 294:255 295:254 296:103 321:29 322:254 323:254 324:109 349:29 350:254 351:254 352:109 377:29 378:254 379:254 380:109 405:29 406:255 407:254 408:109 433:29 434:254 435:254 436:109 461:29 462:254 463:254 464:63 489:29 490:254 491:254 492:28 517:29 518:254 519:254 520:28 545:29 546:254 547:254 548:35 573:29 574:254 575:254 576:109 601:6 602:212 603:254 604:109 630:203 631:254 632:178 658:155 659:254 660:190 686:32 687:199 688:104
+0 130:64 131:253 132:255 133:63 157:96 158:205 159:251 160:253 161:205 162:111 163:4 184:96 185:189 186:251 187:251 188:253 189:251 190:251 191:31 209:16 210:64 211:223 212:244 213:251 214:251 215:211 216:213 217:251 218:251 219:31 236:80 237:181 238:251 239:253 240:251 241:251 242:251 243:94 244:96 245:251 246:251 247:31 263:92 264:253 265:253 266:253 267:255 268:253 269:253 270:253 271:95 272:96 273:253 274:253 275:31 290:92 291:236 292:251 293:243 294:220 295:233 296:251 297:251 298:243 299:82 300:96 301:251 302:251 303:31 317:80 318:253 319:251 320:251 321:188 323:96 324:251 325:251 326:109 328:96 329:251 330:251 331:31 344:96 345:240 346:253 347:243 348:188 349:42 351:96 352:204 353:109 354:4 356:12 357:197 358:251 359:31 372:221 373:251 374:253 375:121 379:36 380:23 385:190 386:251 387:31 399:48 400:234 401:253 413:191 414:253 415:31 426:44 427:221 428:251 429:251 440:12 441:197 442:251 443:31 454:190 455:251 456:251 457:251 468:96 469:251 470:251 471:31 482:190 483:251 484:251 485:113 495:40 496:234 497:251 498:219 499:23 510:190 511:251 512:251 513:94 522:40 523:217 524:253 525:231 526:47 538:191 539:253 540:253 541:253 548:12 549:174 550:253 551:253 552:219 553:39 566:67 567:236 568:251 569:251 570:191 571:190 572:111 573:72 574:190 575:191 576:197 577:251 578:243 579:121 580:39 595:63 596:236 597:251 598:253 599:251 600:251 601:251 602:251 603:253 604:251 605:188 606:94 624:27 625:129 626:253 627:251 628:251 629:251 630:251 631:229 632:168 633:15 654:95 655:212 656:251 657:211 658:94 659:59
+1 159:121 160:254 161:136 186:13 187:230 188:253 189:248 190:99 213:4 214:118 215:253 216:253 217:225 218:42 241:61 242:253 243:253 244:253 245:74 268:32 269:206 270:253 271:253 272:186 273:9 296:211 297:253 298:253 299:239 300:69 324:254 325:253 326:253 327:133 351:142 352:255 353:253 354:186 355:8 378:149 379:229 380:254 381:207 382:21 405:54 406:229 407:253 408:254 409:105 433:152 434:254 435:254 436:213 437:26 460:112 461:251 462:253 463:253 464:26 487:29 488:212 489:253 490:250 491:149 514:36 515:214 516:253 517:253 518:137 542:75 543:253 544:253 545:253 546:59 570:93 571:253 572:253 573:189 574:17 598:224 599:253 600:253 601:84 625:43 626:235 627:253 628:126 629:1 653:99 654:248 655:253 656:119 682:225 683:235 684:49
+1 100:166 101:222 102:55 128:197 129:254 130:218 131:5 155:29 156:249 157:254 158:254 159:9 183:45 184:254 185:254 186:174 187:2 210:4 211:164 212:254 213:254 214:85 238:146 239:254 240:254 241:254 242:85 265:101 266:245 267:254 268:254 269:254 270:85 292:97 293:248 294:254 295:204 296:254 297:254 298:85 315:12 316:59 317:98 318:151 319:237 320:254 321:254 322:109 323:35 324:254 325:254 326:85 343:41 344:216 345:254 346:254 347:239 348:153 349:37 350:4 351:32 352:254 353:254 354:85 372:7 373:44 374:44 375:30 379:32 380:254 381:254 382:96 407:19 408:230 409:254 410:174 436:197 437:254 438:110 464:197 465:254 466:85 492:197 493:253 494:63 515:37 516:54 517:54 518:45 519:26 520:84 521:221 522:84 523:21 524:31 525:162 526:78 540:6 541:41 542:141 543:244 544:254 545:254 546:248 547:236 548:254 549:254 550:254 551:233 552:239 553:254 554:138 567:23 568:167 569:254 570:254 571:254 572:254 573:229 574:228 575:185 576:138 577:138 578:138 579:138 580:138 581:138 582:44 595:113 596:254 597:254 598:254 599:179 600:64 601:5 623:32 624:209 625:183 626:97
+0 155:53 156:255 157:253 158:253 159:253 160:124 183:180 184:253 185:251 186:251 187:251 188:251 189:145 190:62 209:32 210:217 211:241 212:253 213:251 214:251 215:251 216:251 217:253 218:107 237:37 238:251 239:251 240:253 241:251 242:251 243:251 244:251 245:253 246:107 265:166 266:251 267:251 268:253 269:251 270:96 271:148 272:251 273:253 274:107 291:73 292:253 293:253 294:253 295:253 296:130 299:110 300:253 301:255 302:108 319:73 320:251 321:251 322:251 323:251 327:109 328:251 329:253 330:107 347:202 348:251 349:251 350:251 351:225 354:6 355:129 356:251 357:253 358:107 375:150 376:251 377:251 378:251 379:71 382:115 383:251 384:251 385:253 386:107 403:253 404:251 405:251 406:173 407:20 410:217 411:251 412:251 413:253 414:107 430:182 431:255 432:253 433:216 438:218 439:253 440:253 441:182 457:63 458:221 459:253 460:251 461:215 465:84 466:236 467:251 468:251 469:77 485:109 486:251 487:253 488:251 489:215 492:11 493:160 494:251 495:251 496:96 513:109 514:251 515:253 516:251 517:137 520:150 521:251 522:251 523:251 524:71 541:109 542:251 543:253 544:251 545:35 547:130 548:253 549:251 550:251 551:173 552:20 569:110 570:253 571:255 572:253 573:98 574:150 575:253 576:255 577:253 578:164 597:109 598:251 599:253 600:251 601:251 602:251 603:251 604:253 605:251 606:35 625:93 626:241 627:253 628:251 629:251 630:251 631:251 632:216 633:112 634:5 654:103 655:253 656:251 657:251 658:251 659:251 683:124 684:251 685:225 686:71 687:71
+0 128:73 129:253 130:227 131:73 132:21 156:73 157:251 158:251 159:251 160:174 182:16 183:166 184:228 185:251 186:251 187:251 188:122 210:62 211:220 212:253 213:251 214:251 215:251 216:251 217:79 238:79 239:231 240:253 241:251 242:251 243:251 244:251 245:232 246:77 264:145 265:253 266:253 267:253 268:255 269:253 270:253 271:253 272:253 273:255 274:108 292:144 293:251 294:251 295:251 296:253 297:168 298:107 299:169 300:251 301:253 302:189 303:20 318:27 319:89 320:236 321:251 322:235 323:215 324:164 325:15 326:6 327:129 328:251 329:253 330:251 331:35 345:47 346:211 347:253 348:251 349:251 350:142 354:37 355:251 356:251 357:253 358:251 359:35 373:109 374:251 375:253 376:251 377:251 378:142 382:11 383:148 384:251 385:253 386:251 387:164 400:11 401:150 402:253 403:255 404:211 405:25 410:11 411:150 412:253 413:255 414:211 415:25 428:140 429:251 430:251 431:253 432:107 438:37 439:251 440:251 441:211 442:46 456:190 457:251 458:251 459:253 460:128 461:5 466:37 467:251 468:251 469:51 484:115 485:251 486:251 487:253 488:188 489:20 492:32 493:109 494:129 495:251 496:173 497:103 512:217 513:251 514:251 515:201 516:30 520:73 521:251 522:251 523:251 524:71 540:166 541:253 542:253 543:255 544:149 545:73 546:150 547:253 548:255 549:253 550:253 551:143 568:140 569:251 570:251 571:253 572:251 573:251 574:251 575:251 576:253 577:251 578:230 579:61 596:190 597:251 598:251 599:253 600:251 601:251 602:251 603:251 604:242 605:215 606:55 624:21 625:189 626:251 627:253 628:251 629:251 630:251 631:173 632:103 653:31 654:200 655:253 656:251 657:96 658:71 659:20
+1 155:178 156:255 157:105 182:6 183:188 184:253 185:216 186:14 210:14 211:202 212:253 213:253 214:23 238:12 239:199 240:253 241:128 242:6 266:42 267:253 268:253 269:158 294:42 295:253 296:253 297:158 322:155 323:253 324:253 325:158 350:160 351:253 352:253 353:147 378:160 379:253 380:253 381:41 405:17 406:225 407:253 408:235 409:31 433:24 434:253 435:253 436:176 461:24 462:253 463:253 464:176 489:24 490:253 491:253 492:176 517:24 518:253 519:253 520:176 545:24 546:253 547:253 548:162 573:46 574:253 575:253 576:59 601:142 602:253 603:253 604:59 629:142 630:253 631:253 632:59 657:142 658:253 659:202 660:8 685:87 686:253 687:139
+0 154:46 155:105 156:254 157:254 158:254 159:254 160:255 161:239 162:41 180:37 181:118 182:222 183:254 184:253 185:253 186:253 187:253 188:253 189:253 190:211 191:54 207:14 208:200 209:253 210:253 211:254 212:253 213:253 214:253 215:253 216:253 217:253 218:253 219:116 233:16 234:160 235:236 236:253 237:253 238:253 239:254 240:253 241:253 242:246 243:229 244:253 245:253 246:253 247:116 261:99 262:253 263:253 264:253 265:253 266:253 267:254 268:253 269:253 270:213 271:99 272:253 273:253 274:253 275:116 288:25 289:194 290:253 291:253 292:253 293:253 294:131 295:97 296:169 297:253 298:93 299:99 300:253 301:253 302:253 303:116 316:206 317:253 318:253 319:251 320:233 321:127 322:9 324:18 325:38 326:3 327:15 328:171 329:253 330:253 331:116 343:55 344:240 345:253 346:253 347:233 355:31 356:186 357:253 358:253 359:116 371:176 372:253 373:253 374:253 375:127 383:99 384:253 385:253 386:253 387:116 399:176 400:253 401:253 402:131 403:9 411:99 412:253 413:253 414:253 415:116 426:119 427:254 428:254 429:232 430:75 440:158 441:254 442:254 443:117 454:118 455:253 456:253 457:154 468:156 469:253 470:253 471:116 482:118 483:253 484:253 485:154 496:156 497:253 498:253 499:116 509:46 510:222 511:253 512:253 513:154 522:7 523:116 524:246 525:253 526:180 527:9 538:118 539:253 540:253 541:154 550:116 551:253 552:253 553:253 554:174 566:118 567:253 568:253 569:154 577:110 578:246 579:253 580:253 581:240 582:67 594:118 595:253 596:253 597:238 598:215 599:49 600:20 601:20 602:20 603:66 604:215 605:241 606:253 607:245 608:233 609:64 622:82 623:229 624:253 625:253 626:253 627:253 628:253 629:253 630:253 631:254 632:253 633:253 634:240 635:107 651:176 652:253 653:253 654:253 655:253 656:253 657:253 658:253 659:254 660:253 661:253 662:108 679:40 680:239 681:253 682:253 683:253 684:253 685:253 686:253 687:254 688:161 689:57 690:4
+0 152:56 153:105 154:220 155:254 156:63 178:18 179:166 180:233 181:253 182:253 183:253 184:236 185:209 186:209 187:209 188:77 189:18 206:84 207:253 208:253 209:253 210:253 211:253 212:254 213:253 214:253 215:253 216:253 217:172 218:8 233:57 234:238 235:253 236:253 237:253 238:253 239:253 240:254 241:253 242:253 243:253 244:253 245:253 246:119 260:14 261:238 262:253 263:253 264:253 265:253 266:253 267:253 268:179 269:196 270:253 271:253 272:253 273:253 274:238 275:12 288:33 289:253 290:253 291:253 292:253 293:253 294:248 295:134 297:18 298:83 299:237 300:253 301:253 302:253 303:14 316:164 317:253 318:253 319:253 320:253 321:253 322:128 327:57 328:119 329:214 330:253 331:94 343:57 344:248 345:253 346:253 347:253 348:126 349:14 350:4 357:179 358:253 359:248 360:56 371:175 372:253 373:253 374:240 375:190 376:28 385:179 386:253 387:253 388:173 399:209 400:253 401:253 402:178 413:92 414:253 415:253 416:208 427:211 428:254 429:254 430:179 442:135 443:255 444:209 455:209 456:253 457:253 458:90 470:134 471:253 472:208 483:209 484:253 485:253 486:178 497:2 498:142 499:253 500:208 511:209 512:253 513:253 514:214 515:35 525:30 526:253 527:253 528:208 539:165 540:253 541:253 542:253 543:215 544:36 553:163 554:253 555:253 556:164 567:18 568:172 569:253 570:253 571:253 572:214 573:127 574:7 580:72 581:232 582:253 583:171 584:17 596:8 597:182 598:253 599:253 600:253 601:253 602:162 603:56 607:64 608:240 609:253 610:253 611:14 625:7 626:173 627:253 628:253 629:253 630:253 631:245 632:241 633:239 634:239 635:246 636:253 637:225 638:14 639:1 654:18 655:59 656:138 657:224 658:253 659:253 660:254 661:253 662:253 663:253 664:240 665:96 685:37 686:104 687:192 688:255 689:253 690:253 691:182 692:73
+1 130:7 131:176 132:254 133:224 158:51 159:253 160:253 161:223 185:4 186:170 187:253 188:253 189:214 213:131 214:253 215:253 216:217 217:39 241:209 242:253 243:253 244:134 268:75 269:240 270:253 271:239 272:26 296:184 297:253 298:245 299:63 323:142 324:255 325:253 326:185 350:62 351:229 352:254 353:242 354:73 377:54 378:229 379:253 380:254 381:105 405:152 406:254 407:254 408:213 409:26 432:32 433:243 434:253 435:253 436:115 459:2 460:142 461:253 462:253 463:155 487:30 488:253 489:253 490:232 491:55 515:75 516:253 517:253 518:164 542:72 543:232 544:253 545:189 546:17 570:224 571:253 572:253 573:163 597:43 598:235 599:253 600:253 601:195 602:21 625:28 626:231 627:253 628:253 629:184 630:14 654:225 655:253 656:253 657:75
+0 155:21 156:176 157:253 158:253 159:124 182:105 183:176 184:251 185:251 186:251 187:251 188:105 208:58 209:217 210:241 211:253 212:251 213:251 214:251 215:251 216:243 217:113 218:5 235:63 236:231 237:251 238:251 239:253 240:251 241:251 242:251 243:251 244:253 245:251 246:113 263:144 264:251 265:251 266:251 267:253 268:251 269:251 270:251 271:251 272:253 273:251 274:215 290:125 291:253 292:253 293:253 294:253 295:255 296:253 297:253 298:253 299:253 300:255 301:253 302:227 303:42 318:253 319:251 320:251 321:251 322:251 323:253 324:251 325:251 326:251 327:251 328:253 329:251 330:251 331:142 345:27 346:253 347:251 348:251 349:235 350:241 351:253 352:251 353:246 354:137 355:35 356:98 357:251 358:251 359:236 360:61 372:47 373:211 374:253 375:251 376:235 377:82 378:103 379:253 380:251 381:137 384:73 385:251 386:251 387:251 388:71 399:27 400:211 401:251 402:253 403:251 404:86 407:72 408:71 409:10 412:73 413:251 414:251 415:173 416:20 427:89 428:253 429:253 430:255 431:253 432:35 440:73 441:253 442:253 443:253 444:72 454:84 455:236 456:251 457:251 458:253 459:251 460:138 468:73 469:251 470:251 471:251 472:71 481:63 482:236 483:251 484:251 485:251 486:227 487:251 488:246 489:138 490:11 494:16 495:37 496:228 497:251 498:246 499:137 500:10 509:73 510:251 511:251 512:251 513:173 514:42 515:142 516:142 517:142 518:41 522:109 523:251 524:253 525:251 526:137 537:73 538:251 539:251 540:173 541:20 549:27 550:211 551:251 552:253 553:147 554:10 565:73 566:253 567:253 568:143 575:21 576:176 577:253 578:253 579:253 593:73 594:251 595:251 596:205 597:144 603:176 604:251 605:251 606:188 607:107 621:62 622:236 623:251 624:251 625:251 626:218 627:217 628:217 629:217 630:217 631:253 632:230 633:189 634:20 650:83 651:158 652:251 653:251 654:253 655:251 656:251 657:251 658:251 659:253 660:107 679:37 680:251 681:251 682:253 683:251 684:251 685:251 686:122 687:72 688:30
+1 151:68 152:45 153:131 154:131 155:131 156:101 157:68 158:92 159:44 187:19 188:170 211:29 212:112 213:89 215:40 216:222 239:120 240:254 241:251 242:127 243:40 244:222 267:197 268:254 269:254 270:91 271:40 272:222 294:64 295:247 296:254 297:236 298:50 299:40 300:107 322:184 323:254 324:254 325:91 327:6 328:14 350:203 351:254 352:254 353:71 377:23 378:218 379:254 380:254 381:71 405:113 406:254 407:255 408:239 409:53 433:210 434:254 435:254 436:195 460:62 461:242 462:254 463:241 464:88 468:28 488:86 489:254 490:254 491:189 495:28 496:104 516:106 517:254 518:254 519:168 523:40 524:91 544:216 545:254 546:245 547:51 551:35 552:80 572:216 573:254 574:102 599:55 600:239 601:254 602:52 627:166 628:254 629:210 630:23 655:223 656:252 657:104 683:223 684:169
+0 125:29 126:170 127:255 128:255 129:141 151:29 152:198 153:255 154:255 155:255 156:226 157:255 158:86 178:141 179:255 180:255 181:170 182:29 184:86 185:255 186:255 187:141 204:29 205:226 206:255 207:198 208:57 213:226 214:255 215:255 216:226 217:114 231:29 232:255 233:255 234:114 241:141 242:170 243:114 244:255 245:255 246:141 259:226 260:255 261:170 269:29 270:57 273:141 274:255 275:226 286:57 287:255 288:170 302:114 303:255 304:198 314:226 315:255 331:170 332:255 333:57 342:255 343:226 360:255 361:170 370:255 371:170 388:114 389:198 398:255 399:226 416:86 417:255 426:198 427:255 444:86 445:255 454:114 455:255 456:57 472:86 473:255 482:29 483:255 484:226 500:141 501:255 511:170 512:255 513:170 528:226 529:198 539:29 540:226 541:255 542:170 555:29 556:255 557:114 568:29 569:226 570:255 571:141 582:57 583:226 584:226 598:141 599:255 600:255 601:170 602:86 607:29 608:86 609:226 610:255 611:226 612:29 627:86 628:198 629:255 630:255 631:255 632:255 633:255 634:255 635:255 636:255 637:255 638:141 639:29 657:29 658:114 659:170 660:170 661:170 662:170 663:170 664:86
+0 153:203 154:254 155:252 156:252 157:252 158:214 159:51 160:20 180:62 181:221 182:252 183:250 184:250 185:250 186:252 187:250 188:160 189:20 207:62 208:211 209:250 210:252 211:250 212:250 213:250 214:252 215:250 216:250 217:49 234:41 235:221 236:250 237:250 238:252 239:250 240:250 241:250 242:252 243:250 244:128 245:10 262:254 263:252 264:252 265:252 266:254 267:252 268:252 269:252 270:254 271:252 272:252 273:90 290:150 291:190 292:250 293:250 294:252 295:250 296:250 297:169 298:171 299:250 300:250 301:250 302:82 318:31 319:191 320:250 321:250 322:252 323:189 324:100 325:20 326:172 327:250 328:250 329:250 330:80 346:213 347:250 348:250 349:250 350:212 351:29 354:252 355:250 356:250 357:250 374:92 375:252 376:252 377:252 382:51 383:252 384:252 385:252 386:203 401:82 402:252 403:250 404:250 405:169 410:132 411:250 412:250 413:250 414:121 428:92 429:231 430:252 431:250 432:159 433:20 438:252 439:250 440:250 441:250 456:30 457:211 458:252 459:250 460:221 461:40 466:90 467:250 468:250 469:250 470:163 484:31 485:213 486:254 487:232 488:80 494:92 495:252 496:252 497:212 498:163 512:151 513:250 514:252 515:149 522:252 523:250 524:250 525:49 540:60 541:221 542:252 543:210 544:60 550:252 551:250 552:250 553:49 569:202 570:252 571:250 572:221 573:40 576:123 577:202 578:252 579:250 580:250 581:49 596:123 597:243 598:255 599:252 600:252 601:252 602:254 603:252 604:252 605:252 606:254 607:252 608:100 625:121 626:171 627:250 628:250 629:250 630:252 631:250 632:250 633:250 634:252 635:250 636:100 654:20 655:160 656:250 657:250 658:252 659:250 660:250 661:250 662:252 663:189 664:40 683:20 684:170 685:250 686:252 687:250 688:128 689:49 690:49 691:29
+1 98:64 99:191 100:70 125:68 126:243 127:253 128:249 129:63 152:30 153:223 154:253 155:253 156:247 157:41 179:73 180:238 181:253 182:253 183:253 184:242 206:73 207:236 208:253 209:253 210:253 211:253 212:242 234:182 235:253 236:253 237:191 238:247 239:253 240:149 262:141 263:253 264:143 265:86 266:249 267:253 268:122 290:9 291:36 292:7 293:14 294:233 295:253 296:122 322:230 323:253 324:122 350:230 351:253 352:122 378:231 379:255 380:123 406:230 407:253 408:52 433:61 434:245 435:253 461:98 462:253 463:253 468:35 469:12 489:98 490:253 491:253 494:9 495:142 496:233 497:146 517:190 518:253 519:253 520:128 521:7 522:99 523:253 524:253 525:180 544:29 545:230 546:253 547:253 548:252 549:210 550:253 551:253 552:253 553:140 571:28 572:207 573:253 574:253 575:253 576:254 577:253 578:253 579:235 580:70 581:9 599:126 600:253 601:253 602:253 603:253 604:254 605:253 606:168 607:19 627:79 628:253 629:253 630:201 631:190 632:132 633:63 634:5
+1 125:26 126:240 127:72 153:25 154:238 155:208 182:209 183:226 184:14 210:209 211:254 212:43 238:175 239:254 240:128 266:63 267:254 268:204 294:107 295:254 296:204 322:88 323:254 324:204 350:55 351:254 352:204 378:126 379:254 380:204 406:126 407:254 408:189 434:169 435:254 436:121 462:209 463:254 464:193 490:209 491:254 492:111 517:22 518:235 519:254 520:37 545:137 546:254 547:227 548:16 573:205 574:255 575:185 601:205 602:254 603:125 629:205 630:254 631:125 657:111 658:212 659:43
+0 155:62 156:91 157:213 158:255 159:228 160:91 161:12 182:70 183:230 184:253 185:253 186:253 187:253 188:253 189:152 190:7 210:246 211:253 212:253 213:253 214:253 215:253 216:253 217:253 218:106 237:21 238:247 239:253 240:253 241:253 242:253 243:253 244:253 245:208 246:24 265:156 266:253 267:253 268:253 269:253 270:253 271:253 272:253 273:195 292:88 293:238 294:253 295:253 296:253 297:221 298:253 299:253 300:253 301:195 320:230 321:253 322:253 323:253 324:198 325:40 326:177 327:253 328:253 329:195 346:56 347:156 348:251 349:253 350:189 351:182 352:15 354:86 355:240 356:253 357:210 358:28 374:213 375:253 376:253 377:156 378:3 383:205 384:253 385:253 386:106 401:121 402:252 403:253 404:135 405:3 411:46 412:253 413:253 414:106 428:28 429:212 430:253 431:248 432:23 439:42 440:253 441:253 442:106 456:197 457:253 458:234 459:70 467:42 468:253 469:253 470:106 483:11 484:202 485:253 486:187 495:58 496:253 497:210 498:27 511:107 512:253 513:253 514:40 522:53 523:227 524:253 525:195 539:107 540:253 541:253 542:40 549:47 550:227 551:253 552:231 553:58 567:107 568:253 569:253 570:40 575:5 576:131 577:222 578:253 579:231 580:59 595:14 596:204 597:253 598:226 599:222 600:73 601:58 602:58 603:170 604:253 605:253 606:227 607:58 624:197 625:253 626:253 627:253 628:253 629:253 630:253 631:253 632:253 633:238 634:58 652:33 653:179 654:241 655:253 656:253 657:253 658:253 659:250 660:116 661:14 682:75 683:179 684:253 685:151 686:89 687:86
+1 157:42 158:228 159:253 160:253 185:144 186:251 187:251 188:251 212:89 213:236 214:251 215:235 216:215 239:79 240:253 241:251 242:251 243:142 267:180 268:253 269:251 270:251 271:142 294:32 295:202 296:255 297:253 298:216 322:109 323:251 324:253 325:251 326:112 349:6 350:129 351:251 352:253 353:127 354:5 377:37 378:251 379:251 380:253 381:107 405:166 406:251 407:251 408:201 409:30 432:42 433:228 434:253 435:253 460:144 461:251 462:251 463:147 487:63 488:236 489:251 490:251 491:71 515:150 516:251 517:251 518:204 519:41 543:253 544:251 545:251 546:142 571:255 572:253 573:164 598:105 599:253 600:251 601:35 626:180 627:253 628:251 629:35 654:180 655:253 656:251 657:35 682:180 683:253 684:251 685:35
+1 128:62 129:254 130:213 156:102 157:253 158:252 159:102 160:20 184:102 185:254 186:253 187:254 188:50 212:102 213:253 214:252 215:253 216:50 240:102 241:254 242:253 243:254 244:50 268:142 269:253 270:252 271:253 272:50 295:51 296:253 297:254 298:253 299:224 300:20 323:132 324:252 325:253 326:252 327:162 351:173 352:253 353:254 354:253 355:102 378:82 379:253 380:252 381:253 382:252 383:61 406:203 407:254 408:253 409:254 410:233 433:41 434:243 435:253 436:252 437:253 438:111 461:132 462:253 463:254 464:253 465:203 488:41 489:253 490:252 491:253 492:252 493:40 515:11 516:213 517:254 518:253 519:254 520:151 543:92 544:252 545:253 546:252 547:192 548:50 570:21 571:214 572:253 573:255 574:253 575:41 598:142 599:253 600:252 601:253 602:171 625:113 626:253 627:255 628:253 629:203 630:40 653:30 654:131 655:233 656:111
+0 154:28 155:195 156:254 157:254 158:254 159:254 160:254 161:255 162:61 181:6 182:191 183:253 184:253 185:253 186:253 187:253 188:253 189:253 190:60 208:26 209:190 210:253 211:253 212:253 213:253 214:240 215:191 216:242 217:253 218:60 235:15 236:187 237:253 238:253 239:253 240:253 241:253 242:200 244:211 245:253 246:60 262:22 263:66 264:253 265:253 266:253 267:253 268:241 269:209 270:44 271:23 272:218 273:253 274:60 290:124 291:253 292:253 293:253 294:253 295:253 296:182 299:131 300:253 301:253 302:60 318:38 319:217 320:253 321:253 322:244 323:111 324:37 327:131 328:253 329:253 330:60 346:124 347:253 348:253 349:253 350:165 354:22 355:182 356:253 357:253 358:60 374:124 375:253 376:253 377:240 378:45 382:53 383:253 384:253 385:249 386:58 401:16 402:168 403:253 404:216 405:45 410:53 411:253 412:253 413:138 429:159 430:253 431:253 432:147 438:53 439:253 440:253 441:138 456:136 457:252 458:253 459:227 460:5 466:53 467:253 468:243 469:101 484:140 485:253 486:253 487:124 494:156 495:253 496:218 511:13 512:164 513:253 514:142 515:5 521:32 522:233 523:253 524:218 539:62 540:253 541:253 542:130 548:37 549:203 550:253 551:253 552:127 567:62 568:253 569:253 570:147 571:36 572:36 573:36 574:36 575:151 576:222 577:253 578:245 579:127 580:8 595:34 596:202 597:253 598:253 599:253 600:253 601:253 602:253 603:253 604:253 605:253 606:200 624:140 625:253 626:253 627:253 628:253 629:253 630:253 631:253 632:248 633:235 634:65 652:87 653:173 654:253 655:253 656:253 657:253 658:253 659:253 660:182 681:14 682:78 683:96 684:253 685:253 686:253 687:137 688:56
+0 123:8 124:76 125:202 126:254 127:255 128:163 129:37 130:2 150:13 151:182 152:253 153:253 154:253 155:253 156:253 157:253 158:23 177:15 178:179 179:253 180:253 181:212 182:91 183:218 184:253 185:253 186:179 187:109 205:105 206:253 207:253 208:160 209:35 210:156 211:253 212:253 213:253 214:253 215:250 216:113 232:19 233:212 234:253 235:253 236:88 237:121 238:253 239:233 240:128 241:91 242:245 243:253 244:248 245:114 260:104 261:253 262:253 263:110 264:2 265:142 266:253 267:90 270:26 271:199 272:253 273:248 274:63 287:1 288:173 289:253 290:253 291:29 293:84 294:228 295:39 299:72 300:251 301:253 302:215 303:29 315:36 316:253 317:253 318:203 319:13 328:82 329:253 330:253 331:170 343:36 344:253 345:253 346:164 356:11 357:198 358:253 359:184 360:6 371:36 372:253 373:253 374:82 385:138 386:253 387:253 388:35 399:128 400:253 401:253 402:47 413:48 414:253 415:253 416:35 427:154 428:253 429:253 430:47 441:48 442:253 443:253 444:35 455:102 456:253 457:253 458:99 469:48 470:253 471:253 472:35 483:36 484:253 485:253 486:164 496:16 497:208 498:253 499:211 500:17 511:32 512:244 513:253 514:175 515:4 524:44 525:253 526:253 527:156 540:171 541:253 542:253 543:29 551:30 552:217 553:253 554:188 555:19 568:171 569:253 570:253 571:59 578:60 579:217 580:253 581:253 582:70 596:78 597:253 598:253 599:231 600:48 604:26 605:128 606:249 607:253 608:244 609:94 610:15 624:8 625:151 626:253 627:253 628:234 629:101 630:121 631:219 632:229 633:253 634:253 635:201 636:80 653:38 654:232 655:253 656:253 657:253 658:253 659:253 660:253 661:253 662:201 663:66
+0 127:68 128:254 129:255 130:254 131:107 153:11 154:176 155:230 156:253 157:253 158:253 159:212 180:28 181:197 182:253 183:253 184:253 185:253 186:253 187:229 188:107 189:14 208:194 209:253 210:253 211:253 212:253 213:253 214:253 215:253 216:253 217:53 235:69 236:241 237:253 238:253 239:253 240:253 241:241 242:186 243:253 244:253 245:195 262:10 263:161 264:253 265:253 266:253 267:246 268:40 269:57 270:231 271:253 272:253 273:195 290:140 291:253 292:253 293:253 294:253 295:154 297:25 298:253 299:253 300:253 301:195 318:213 319:253 320:253 321:253 322:135 323:8 325:3 326:128 327:253 328:253 329:195 345:77 346:238 347:253 348:253 349:253 350:7 354:116 355:253 356:253 357:195 372:11 373:165 374:253 375:253 376:231 377:70 378:1 382:78 383:237 384:253 385:195 400:33 401:253 402:253 403:253 404:182 411:200 412:253 413:195 428:98 429:253 430:253 431:253 432:24 439:42 440:253 441:195 456:197 457:253 458:253 459:253 460:24 467:163 468:253 469:195 484:197 485:253 486:253 487:189 488:13 494:53 495:227 496:253 497:121 512:197 513:253 514:253 515:114 521:21 522:227 523:253 524:231 525:27 540:197 541:253 542:253 543:114 547:5 548:131 549:143 550:253 551:231 552:59 568:197 569:253 570:253 571:236 572:73 573:58 574:217 575:223 576:253 577:253 578:253 579:174 596:197 597:253 598:253 599:253 600:253 601:253 602:253 603:253 604:253 605:253 606:253 607:48 624:149 625:253 626:253 627:253 628:253 629:253 630:253 631:253 632:253 633:182 634:15 635:3 652:12 653:168 654:253 655:253 656:253 657:253 658:253 659:248 660:89 661:23
+1 157:85 158:255 159:103 160:1 185:205 186:253 187:253 188:30 213:205 214:253 215:253 216:30 240:44 241:233 242:253 243:244 244:27 268:135 269:253 270:253 271:100 296:153 297:253 298:240 299:76 323:12 324:208 325:253 326:166 351:69 352:253 353:253 354:142 378:14 379:110 380:253 381:235 382:33 406:63 407:223 408:235 409:130 434:186 435:253 436:235 437:37 461:17 462:145 463:253 464:231 465:35 489:69 490:220 491:231 492:123 516:18 517:205 518:253 519:176 520:27 543:17 544:125 545:253 546:185 547:39 571:71 572:214 573:231 574:41 599:167 600:253 601:225 602:33 626:72 627:205 628:207 629:14 653:30 654:249 655:233 656:49 681:32 682:253 683:89
+1 126:94 127:132 154:250 155:250 156:4 182:250 183:254 184:95 210:250 211:254 212:95 238:250 239:254 240:95 266:250 267:254 268:95 294:250 295:254 296:95 322:250 323:254 324:95 350:250 351:254 352:95 378:250 379:254 380:95 405:77 406:254 407:250 408:19 433:96 434:254 435:249 461:53 462:253 463:252 464:43 490:250 491:251 492:32 517:85 518:254 519:249 545:96 546:254 547:249 573:83 574:254 575:250 576:14 602:250 603:254 604:95 630:250 631:255 632:95 658:132 659:254 660:95
+1 124:32 125:253 126:31 152:32 153:251 154:149 180:32 181:251 182:188 208:32 209:251 210:188 236:32 237:251 238:228 239:59 264:32 265:253 266:253 267:95 292:28 293:236 294:251 295:114 321:127 322:251 323:251 349:127 350:251 351:251 377:48 378:232 379:251 406:223 407:253 408:159 434:221 435:251 436:158 462:142 463:251 464:158 490:64 491:251 492:242 493:55 518:64 519:251 520:253 521:161 546:64 547:253 548:255 549:221 574:16 575:181 576:253 577:220 603:79 604:253 605:236 606:63 632:213 633:251 634:126 660:96 661:251 662:126
+1 129:39 130:254 131:255 132:254 133:140 157:136 158:253 159:253 160:228 161:67 184:6 185:227 186:253 187:253 188:58 211:29 212:188 213:253 214:253 215:253 216:17 239:95 240:253 241:253 242:253 243:157 244:8 266:3 267:107 268:253 269:253 270:245 271:77 294:29 295:253 296:253 297:240 298:100 322:141 323:253 324:253 325:215 349:129 350:248 351:253 352:253 353:215 377:151 378:253 379:253 380:253 381:144 405:151 406:253 407:253 408:253 409:27 431:3 432:102 433:242 434:253 435:253 436:110 437:3 459:97 460:253 461:253 462:253 463:214 464:55 487:207 488:253 489:253 490:253 491:158 515:67 516:253 517:253 518:253 519:158 543:207 544:253 545:253 546:240 547:88 571:207 572:253 573:253 574:224 598:32 599:217 600:253 601:253 602:224 626:141 627:253 628:253 629:253 630:133 654:36 655:219 656:253 657:140 658:10
+0 123:59 124:55 149:71 150:192 151:254 152:250 153:147 154:17 176:123 177:247 178:253 179:254 180:253 181:253 182:196 183:79 184:176 185:175 186:175 187:124 188:48 203:87 204:247 205:247 206:176 207:95 208:102 209:117 210:243 211:237 212:192 213:232 214:253 215:253 216:245 217:152 218:6 230:23 231:229 232:253 233:138 238:219 239:58 241:95 242:118 243:80 244:230 245:254 246:196 247:30 258:120 259:254 260:205 261:8 266:114 272:38 273:255 274:254 275:155 276:5 286:156 287:253 288:92 301:61 302:235 303:253 304:102 314:224 315:253 316:78 330:117 331:253 332:196 333:18 342:254 343:253 344:78 358:9 359:211 360:253 361:73 370:254 371:253 372:78 387:175 388:253 389:155 398:194 399:254 400:101 415:79 416:254 417:155 426:112 427:253 428:211 429:9 443:73 444:251 445:200 454:41 455:241 456:253 457:87 471:25 472:240 473:253 483:147 484:253 485:227 486:47 499:94 500:253 501:200 511:5 512:193 513:253 514:230 515:76 527:175 528:253 529:155 540:31 541:219 542:254 543:255 544:126 545:18 553:14 554:149 555:254 556:244 557:45 569:21 570:158 571:254 572:253 573:226 574:162 575:118 576:96 577:20 578:20 579:73 580:118 581:224 582:253 583:247 584:85 598:30 599:155 600:253 601:253 602:253 603:253 604:254 605:253 606:253 607:253 608:253 609:254 610:247 611:84 627:5 628:27 629:117 630:206 631:244 632:229 633:213 634:213 635:213 636:176 637:117 638:32 659:45 660:23
+1 128:58 129:139 156:247 157:247 158:25 183:121 184:253 185:156 186:3 211:133 212:253 213:145 238:11 239:227 240:253 241:145 266:7 267:189 268:253 269:145 294:35 295:252 296:253 297:145 322:146 323:252 324:253 325:131 350:146 351:252 352:253 353:13 378:146 379:252 380:253 381:13 406:147 407:253 408:255 409:13 434:146 435:252 436:253 437:13 462:146 463:252 464:253 465:13 490:146 491:252 492:253 493:13 517:22 518:230 519:252 520:221 521:9 545:22 546:230 547:252 548:133 574:146 575:252 576:133 602:146 603:252 604:120 630:146 631:252 658:146 659:252
+1 129:28 130:247 131:255 132:165 156:47 157:221 158:252 159:252 160:164 184:177 185:252 186:252 187:252 188:164 212:177 213:252 214:252 215:223 216:78 240:177 241:252 242:252 243:197 267:114 268:236 269:252 270:235 271:42 294:5 295:148 296:252 297:252 298:230 321:14 322:135 323:252 324:252 325:252 326:230 349:78 350:252 351:252 352:252 353:252 354:162 377:78 378:252 379:252 380:252 381:252 382:9 405:78 406:252 407:252 408:252 409:252 410:9 432:32 433:200 434:252 435:252 436:252 437:105 438:3 459:10 460:218 461:252 462:252 463:252 464:105 465:8 487:225 488:252 489:252 490:252 491:240 492:69 514:44 515:237 516:252 517:252 518:228 519:85 541:59 542:218 543:252 544:252 545:225 546:93 568:65 569:208 570:252 571:252 572:252 573:175 596:133 597:252 598:252 599:252 600:225 601:68 624:133 625:252 626:252 627:244 628:54 652:133 653:252 654:252 655:48
+0 156:13 157:6 181:10 182:77 183:145 184:253 185:190 186:67 207:11 208:77 209:193 210:252 211:252 212:253 213:252 214:238 215:157 216:71 217:26 233:10 234:78 235:193 236:252 237:252 238:252 239:252 240:253 241:252 242:252 243:252 244:252 245:228 246:128 247:49 248:5 259:6 260:78 261:194 262:252 263:252 264:252 265:252 266:252 267:252 268:253 269:217 270:192 271:232 272:252 273:252 274:252 275:252 276:135 277:3 286:4 287:147 288:252 289:252 290:252 291:252 292:252 293:252 294:252 295:252 296:175 297:26 299:40 300:145 301:235 302:252 303:252 304:252 305:104 314:208 315:252 316:252 317:252 318:252 319:252 320:252 321:133 322:48 323:48 329:71 330:236 331:252 332:252 333:230 342:253 343:185 344:170 345:252 346:252 347:252 348:173 349:22 358:102 359:252 360:252 361:252 370:24 371:141 372:243 373:252 374:252 375:186 376:5 386:8 387:220 388:252 389:252 398:70 399:247 400:252 401:252 402:165 403:37 414:81 415:251 416:252 417:194 426:255 427:253 428:253 429:251 430:69 441:39 442:231 443:253 444:253 445:127 454:253 455:252 456:249 457:127 468:6 469:147 470:252 471:252 472:190 473:5 482:253 483:252 484:216 495:7 496:145 497:252 498:252 499:252 500:69 510:253 511:252 512:223 513:16 522:25 523:185 524:252 525:252 526:252 527:107 528:8 538:167 539:252 540:252 541:181 542:18 549:105 550:191 551:252 552:252 553:235 554:151 555:10 566:37 567:221 568:252 569:252 570:210 571:193 572:96 573:73 574:130 575:188 576:194 577:227 578:252 579:252 580:235 581:128 595:97 596:220 597:252 598:252 599:252 600:252 601:252 602:252 603:252 604:253 605:252 606:252 607:236 608:70 624:40 625:174 626:252 627:252 628:252 629:252 630:252 631:252 632:253 633:197 634:138 635:29 653:5 654:23 655:116 656:143 657:143 658:143 659:143 660:24 661:10
+0 127:28 128:164 129:254 130:233 131:148 132:11 154:3 155:164 156:254 157:234 158:225 159:254 160:204 182:91 183:254 184:235 185:48 186:32 187:166 188:251 189:92 208:33 209:111 210:214 211:205 212:49 215:24 216:216 217:210 235:34 236:217 237:254 238:254 239:211 244:87 245:237 246:43 262:34 263:216 264:254 265:254 266:252 267:243 268:61 272:38 273:248 274:182 290:171 291:254 292:184 293:205 294:175 295:36 301:171 302:227 317:28 318:234 319:190 320:13 321:193 322:157 329:124 330:238 331:26 345:140 346:254 347:131 349:129 350:157 357:124 358:254 359:95 373:201 374:238 375:56 377:70 378:103 385:124 386:254 387:148 400:62 401:255 402:210 413:150 414:254 415:122 428:86 429:254 430:201 431:15 440:28 441:237 442:246 443:44 456:128 457:254 458:143 468:34 469:243 470:227 484:62 485:254 486:210 496:58 497:249 498:179 512:30 513:240 514:210 524:207 525:254 526:64 541:216 542:231 543:34 551:129 552:248 553:170 554:9 569:131 570:254 571:170 577:17 578:129 579:248 580:225 581:24 597:50 598:245 599:245 600:184 601:106 602:106 603:106 604:133 605:231 606:254 607:244 608:53 626:67 627:249 628:254 629:254 630:254 631:254 632:254 633:251 634:193 635:40 655:38 656:157 657:248 658:166 659:166 660:139 661:57
+0 129:105 130:255 131:219 132:67 133:67 134:52 156:20 157:181 158:253 159:253 160:253 161:253 162:226 163:69 182:4 183:129 184:206 185:253 186:253 187:253 188:253 189:253 190:253 191:130 209:9 210:141 211:253 212:253 213:253 214:253 215:253 216:253 217:253 218:253 219:166 220:20 237:134 238:253 239:253 240:253 241:253 242:253 243:253 244:253 245:253 246:253 247:253 248:65 262:2 263:83 264:207 265:246 266:253 267:253 268:253 269:253 270:253 271:249 272:234 273:247 274:253 275:253 276:65 290:83 291:253 292:253 293:253 294:253 295:253 296:189 297:253 298:253 299:205 301:179 302:253 303:253 304:65 317:85 318:234 319:253 320:253 321:253 322:253 323:157 324:26 325:164 326:151 327:83 329:179 330:253 331:253 332:65 344:65 345:237 346:253 347:253 348:253 349:67 350:36 351:14 353:15 354:12 357:179 358:253 359:253 360:65 371:4 372:141 373:253 374:253 375:221 376:158 377:23 385:179 386:253 387:253 388:65 399:129 400:253 401:253 402:241 403:62 412:72 413:226 414:253 415:175 416:24 426:119 427:247 428:253 429:253 430:206 439:8 440:134 441:253 442:253 443:130 454:132 455:253 456:253 457:194 458:27 467:125 468:253 469:253 470:253 471:130 481:45 482:213 483:253 484:253 485:112 493:70 494:170 495:247 496:253 497:253 498:89 499:43 509:67 510:253 511:253 512:196 513:55 514:9 520:8 521:131 522:253 523:253 524:253 525:86 526:1 537:67 538:253 539:253 540:253 541:253 542:129 546:43 547:114 548:134 549:253 550:253 551:231 552:139 553:41 565:20 566:167 567:253 568:253 569:253 570:247 571:179 572:179 573:179 574:206 575:253 576:253 577:253 578:253 579:72 594:103 595:240 596:253 597:253 598:253 599:253 600:253 601:253 602:253 603:253 604:244 605:119 606:8 607:1 623:107 624:253 625:253 626:253 627:253 628:253 629:253 630:253 631:175 632:111 651:3 652:121 653:253 654:253 655:253 656:253 657:253 658:182 659:24
+0 125:22 126:183 127:252 128:254 129:252 130:252 131:252 132:76 151:85 152:85 153:168 154:250 155:250 156:252 157:250 158:250 159:250 160:250 161:71 163:43 164:85 165:14 178:107 179:252 180:250 181:250 182:250 183:250 184:252 185:250 186:250 187:250 188:250 189:210 191:127 192:250 193:146 205:114 206:237 207:252 208:250 209:250 210:250 211:250 212:252 213:250 214:250 215:250 216:250 217:210 219:127 220:250 221:250 232:107 233:237 234:250 235:252 236:250 237:250 238:250 239:74 240:41 241:41 242:41 243:41 244:217 245:34 247:127 248:250 249:250 259:15 260:148 261:252 262:252 263:254 264:238 265:105 275:128 276:252 277:252 286:15 287:140 288:250 289:250 290:250 291:167 292:111 303:127 304:250 305:250 314:43 315:250 316:250 317:250 318:250 331:127 332:250 333:250 342:183 343:250 344:250 345:250 346:110 358:57 359:210 360:250 361:250 370:252 371:250 372:250 373:110 374:7 386:85 387:250 388:250 389:250 398:254 399:252 400:252 401:83 414:86 415:252 416:252 417:217 426:252 427:250 428:250 429:138 430:14 441:15 442:140 443:250 444:250 445:41 454:252 455:250 456:250 457:250 458:41 469:43 470:250 471:250 472:250 473:41 482:252 483:250 484:250 485:250 486:181 497:183 498:250 499:250 500:250 501:41 510:76 511:250 512:250 513:250 514:250 524:177 525:252 526:250 527:250 528:110 529:7 538:36 539:224 540:252 541:252 542:252 543:219 544:43 545:43 546:43 547:7 549:15 550:43 551:183 552:252 553:255 554:252 555:126 567:85 568:250 569:250 570:250 571:252 572:250 573:250 574:250 575:111 576:86 577:140 578:250 579:250 580:250 581:252 582:222 583:83 595:42 596:188 597:250 598:250 599:252 600:250 601:250 602:250 603:250 604:252 605:250 606:250 607:250 608:250 609:126 610:83 624:127 625:250 626:250 627:252 628:250 629:250 630:250 631:250 632:252 633:250 634:250 635:137 636:83 652:21 653:41 654:217 655:252 656:250 657:250 658:250 659:250 660:217 661:41 662:41 663:14
+1 155:114 156:206 157:25 183:238 184:252 185:55 211:222 212:252 213:55 239:113 240:252 241:55 267:113 268:252 269:55 295:255 296:253 297:56 323:253 324:176 325:6 350:32 351:253 352:233 353:43 378:140 379:253 380:195 381:19 406:140 407:253 408:167 433:29 434:253 435:141 461:29 462:252 463:140 489:29 490:252 491:140 517:29 518:252 519:140 545:29 546:252 547:140 573:169 574:253 575:79 601:169 602:252 628:76 629:234 630:141 656:197 657:233 658:37 684:197 685:223
+1 127:73 128:253 129:253 130:63 155:115 156:252 157:252 158:144 183:217 184:252 185:252 186:144 210:63 211:237 212:252 213:252 214:144 238:109 239:252 240:252 241:252 266:109 267:252 268:252 269:252 294:109 295:252 296:252 297:252 322:191 323:252 324:252 325:252 349:145 350:255 351:253 352:253 353:253 376:32 377:237 378:253 379:252 380:252 381:210 404:37 405:252 406:253 407:252 408:252 409:108 432:37 433:252 434:253 435:252 436:252 437:108 460:21 461:207 462:255 463:253 464:253 465:108 489:144 490:253 491:252 492:252 493:108 516:27 517:221 518:253 519:252 520:252 521:108 544:16 545:190 546:253 547:252 548:252 549:108 573:145 574:255 575:253 576:253 577:253 601:144 602:253 603:252 604:252 605:210 629:144 630:253 631:252 632:252 633:108 657:62 658:253 659:252 660:252 661:108
+1 120:85 121:253 122:132 123:9 147:82 148:241 149:251 150:251 151:128 175:175 176:251 177:251 178:251 179:245 180:121 203:13 204:204 205:251 206:251 207:251 208:245 209:107 232:39 233:251 234:251 235:251 236:251 237:167 238:22 260:15 261:155 262:251 263:251 264:251 265:251 266:177 289:15 290:157 291:248 292:251 293:251 294:251 295:165 319:214 320:251 321:251 322:251 323:212 324:78 325:24 347:109 348:251 349:251 350:251 351:253 352:251 353:170 354:10 375:5 376:57 377:162 378:251 379:253 380:251 381:251 382:18 405:106 406:239 407:255 408:253 409:253 410:213 434:105 435:253 436:251 437:251 438:230 439:72 463:253 464:251 465:251 466:251 467:221 468:67 491:72 492:251 493:251 494:251 495:251 496:96 519:36 520:199 521:251 522:251 523:251 524:155 525:15 548:45 549:204 550:251 551:251 552:251 553:157 577:161 578:249 579:251 580:251 581:248 582:147 606:233 607:251 608:251 609:251 610:173 634:233 635:251 636:251 637:251 638:173 662:53 663:131 664:251 665:251 666:173
+1 126:15 127:200 128:255 129:90 154:42 155:254 156:254 157:173 182:42 183:254 184:254 185:199 210:26 211:237 212:254 213:221 214:12 239:213 240:254 241:231 242:17 267:213 268:254 269:199 295:213 296:254 297:199 323:213 324:254 325:96 350:20 351:232 352:254 353:33 378:84 379:254 380:229 381:17 406:168 407:254 408:203 433:8 434:217 435:254 436:187 461:84 462:254 463:254 464:48 489:195 490:254 491:254 492:37 516:20 517:233 518:254 519:212 520:4 544:132 545:254 546:254 547:82 571:9 572:215 573:254 574:254 575:116 576:46 599:55 600:254 601:254 602:254 603:254 604:121 627:113 628:254 629:254 630:254 631:254 632:40 655:12 656:163 657:254 658:185 659:58 660:1
+0 182:32 183:57 184:57 185:57 186:57 187:57 188:57 189:57 208:67 209:185 210:229 211:252 212:252 213:252 214:253 215:252 216:252 217:252 218:185 219:66 234:13 235:188 236:246 237:252 238:253 239:252 240:252 241:252 242:241 243:139 244:177 245:252 246:253 247:246 248:187 249:13 261:26 262:255 263:253 264:244 265:175 266:101 274:126 275:244 276:253 277:153 288:82 289:243 290:253 291:214 292:81 303:169 304:252 305:252 315:19 316:215 317:252 318:206 319:56 331:169 332:252 333:252 343:157 344:252 345:252 346:13 359:169 360:252 361:151 370:41 371:253 372:253 373:128 386:92 387:253 388:206 389:13 398:166 399:252 400:196 401:9 414:216 415:252 416:142 426:253 427:252 428:168 441:89 442:253 443:208 444:13 454:253 455:252 456:68 468:38 469:225 470:253 471:96 482:254 483:253 484:56 495:45 496:229 497:253 498:151 510:253 511:252 512:81 522:70 523:225 524:252 525:227 538:216 539:252 540:168 548:29 549:134 550:253 551:252 552:186 553:31 566:91 567:252 568:243 569:125 573:51 574:114 575:113 576:210 577:252 578:253 579:151 580:19 595:157 596:253 597:253 598:254 599:253 600:253 601:253 602:254 603:253 604:244 605:175 606:51 623:19 624:122 625:196 626:197 627:221 628:196 629:196 630:197 631:121 632:56 655:25
+0 127:42 128:235 129:255 130:84 153:15 154:132 155:208 156:253 157:253 158:171 159:108 180:6 181:177 182:253 183:253 184:253 185:253 186:253 187:242 188:110 208:151 209:253 210:253 211:253 212:253 213:253 214:253 215:253 216:139 235:48 236:208 237:253 238:253 239:253 240:253 241:253 242:253 243:253 244:139 263:85 264:253 265:253 266:253 267:253 268:236 269:156 270:184 271:253 272:148 273:6 290:7 291:141 292:253 293:253 294:253 295:253 296:27 298:170 299:253 300:253 301:74 318:19 319:253 320:253 321:253 322:253 323:253 324:27 326:170 327:253 328:253 329:74 345:16 346:186 347:253 348:253 349:253 350:242 351:105 352:4 354:170 355:253 356:253 357:94 358:1 373:141 374:253 375:253 376:253 377:242 378:100 382:170 383:253 384:253 385:253 386:8 401:141 402:253 403:253 404:253 405:224 410:170 411:253 412:253 413:253 414:8 428:12 429:158 430:253 431:253 432:230 433:51 438:18 439:237 440:253 441:253 442:8 456:76 457:253 458:253 459:218 460:61 467:236 468:253 469:253 470:8 484:76 485:253 486:253 487:168 495:110 496:253 497:132 498:3 512:76 513:253 514:253 515:168 521:20 522:174 523:239 524:147 525:5 539:5 540:155 541:253 542:253 543:168 548:102 549:170 550:253 551:253 552:139 567:3 568:128 569:253 570:253 571:228 572:179 573:179 574:179 575:179 576:245 577:253 578:253 579:219 580:41 596:76 597:253 598:253 599:253 600:253 601:253 602:253 603:253 604:253 605:253 606:253 607:163 624:39 625:199 626:253 627:253 628:253 629:253 630:253 631:253 632:253 633:253 634:170 635:9 653:36 654:219 655:253 656:253 657:253 658:253 659:253 660:224 661:65 662:22
+1 156:202 157:253 158:69 184:253 185:252 186:121 212:253 213:252 214:69 240:253 241:252 242:69 267:106 268:253 269:231 270:37 295:179 296:255 297:196 322:17 323:234 324:253 325:92 350:93 351:252 352:253 353:92 378:93 379:252 380:253 381:92 406:93 407:252 408:232 409:8 434:208 435:253 436:116 462:207 463:252 464:116 490:207 491:252 492:32 517:57 518:244 519:252 545:122 546:252 547:252 573:185 574:253 575:253 601:184 602:252 603:252 629:101 630:252 631:252 657:13 658:173 659:252 660:43 686:9 687:232 688:116
+1 156:73 157:253 158:253 159:253 160:124 184:73 185:251 186:251 187:251 188:251 212:99 213:251 214:251 215:251 216:225 240:253 241:251 242:251 243:251 244:71 266:79 267:180 268:253 269:251 270:251 271:173 272:20 294:110 295:253 296:255 297:253 298:216 322:109 323:251 324:253 325:251 326:215 350:109 351:251 352:253 353:251 354:215 378:109 379:251 380:253 381:251 382:137 406:109 407:251 408:253 409:251 410:35 433:37 434:253 435:253 436:255 437:253 438:35 461:140 462:251 463:251 464:253 465:168 466:15 488:125 489:246 490:251 491:251 492:190 493:15 516:144 517:251 518:251 519:251 520:180 543:53 544:221 545:251 546:251 547:251 548:51 571:125 572:253 573:253 574:253 575:201 598:105 599:253 600:251 601:251 602:188 603:30 626:180 627:253 628:251 629:251 630:142 654:180 655:253 656:251 657:235 658:82 682:180 683:253 684:251 685:215
+1 124:111 125:255 126:48 152:162 153:253 154:237 155:63 180:206 181:253 182:253 183:183 208:87 209:217 210:253 211:205 237:90 238:253 239:238 240:60 265:37 266:225 267:253 268:89 294:206 295:253 296:159 322:206 323:253 324:226 350:206 351:253 352:226 378:206 379:253 380:226 406:206 407:253 408:226 434:206 435:253 436:226 462:206 463:253 464:226 490:206 491:253 492:226 518:206 519:253 520:237 521:45 546:206 547:253 548:253 549:109 574:173 575:253 576:253 577:109 602:69 603:253 604:253 605:109 630:64 631:248 632:253 633:109 659:112 660:253 661:109
+0 99:70 100:255 101:165 102:114 127:122 128:253 129:253 130:253 131:120 155:165 156:253 157:253 158:253 159:234 160:52 183:99 184:253 185:253 186:253 187:253 188:228 189:26 209:60 210:168 211:238 212:202 213:174 214:253 215:253 216:253 217:127 235:91 236:81 237:1 238:215 239:128 240:28 241:12 242:181 243:253 244:253 245:175 246:3 262:18 263:204 264:253 265:77 270:7 271:253 272:253 273:253 274:54 289:54 290:248 291:253 292:253 293:143 298:1 299:127 300:253 301:253 302:188 317:104 318:253 319:253 320:253 321:20 327:81 328:249 329:253 330:191 345:192 346:253 347:253 348:218 349:5 356:203 357:253 358:208 359:21 372:56 373:237 374:253 375:250 376:100 384:104 385:253 386:253 387:75 400:76 401:253 402:253 403:224 412:119 413:253 414:253 415:75 428:80 429:253 430:253 431:103 439:4 440:241 441:253 442:218 443:32 456:213 457:253 458:253 459:103 467:125 468:253 469:253 470:191 484:213 485:253 486:253 487:103 494:3 495:176 496:253 497:253 498:135 512:213 513:253 514:253 515:103 521:9 522:162 523:253 524:253 525:226 526:37 540:179 541:253 542:253 543:135 548:46 549:157 550:253 551:253 552:253 553:63 568:23 569:188 570:253 571:249 572:179 573:179 574:179 575:179 576:233 577:253 578:253 579:233 580:156 581:10 597:51 598:235 599:253 600:253 601:253 602:253 603:253 604:253 605:251 606:232 607:120 626:16 627:124 628:253 629:253 630:253 631:253 632:152 633:104
+1 124:29 125:197 126:255 127:84 152:85 153:251 154:253 155:83 180:86 181:253 182:254 183:253 208:85 209:251 210:253 211:251 236:86 237:253 238:254 239:253 240:169 264:85 265:251 266:253 267:251 268:168 292:86 293:253 294:254 295:253 296:169 320:28 321:196 322:253 323:251 324:168 349:169 350:254 351:253 352:169 377:168 378:253 379:251 380:168 405:169 406:254 407:253 408:169 433:168 434:253 435:251 436:168 462:254 463:253 464:254 465:139 490:253 491:251 492:253 493:251 518:254 519:253 520:254 521:253 522:57 546:253 547:251 548:253 549:251 550:168 574:198 575:253 576:254 577:253 578:114 602:85 603:251 604:253 605:251 630:85 631:253 632:254 633:253 658:28 659:83 660:196 661:83
+1 159:31 160:210 161:253 162:163 187:198 188:252 189:252 190:162 213:10 214:86 215:242 216:252 217:252 218:66 241:164 242:252 243:252 244:252 245:188 246:8 268:53 269:242 270:252 271:252 272:225 273:14 296:78 297:252 298:252 299:252 300:204 323:56 324:231 325:252 326:252 327:212 328:35 351:157 352:252 353:252 354:252 355:37 377:8 378:132 379:253 380:252 381:252 382:230 383:24 405:45 406:252 407:253 408:252 409:154 410:55 427:7 428:55 433:107 434:253 435:255 436:228 437:53 454:15 455:24 456:23 460:110 461:242 462:252 463:228 464:59 482:57 483:83 487:88 488:247 489:252 490:252 491:140 514:15 515:189 516:252 517:252 518:252 542:74 543:252 544:252 545:238 546:90 570:178 571:252 572:252 573:189 597:40 598:217 599:252 600:252 601:59 625:75 626:252 627:252 628:252 629:85 630:61 653:62 654:239 655:252 656:156 657:14 682:178 683:252 684:14
+1 131:159 132:255 133:122 158:167 159:228 160:253 161:121 185:64 186:236 187:251 188:205 189:110 212:48 213:158 214:251 215:251 216:178 217:39 240:190 241:251 242:251 243:251 267:96 268:253 269:253 270:253 271:153 295:194 296:251 297:251 298:211 299:74 322:80 323:174 324:251 325:251 326:140 327:47 349:16 350:181 351:253 352:251 353:219 354:23 377:64 378:251 379:253 380:251 381:204 382:19 405:223 406:253 407:255 408:233 409:48 431:20 432:174 433:244 434:251 435:253 436:109 437:31 459:96 460:189 461:251 462:251 463:126 464:31 486:24 487:106 488:251 489:235 490:188 491:100 514:96 515:251 516:251 517:228 518:59 542:255 543:253 544:253 545:213 546:36 569:100 570:253 571:251 572:251 573:85 574:23 596:32 597:127 598:253 599:235 600:126 601:15 624:104 625:251 626:253 627:240 628:79 652:83 653:193 654:253 655:220
+0 153:92 154:191 155:178 156:253 157:242 158:141 159:104 160:29 180:26 181:253 182:252 183:252 184:252 185:253 186:252 187:252 188:252 189:108 190:19 206:57 207:123 208:222 209:253 210:252 211:252 212:252 213:168 214:224 215:252 216:252 217:253 218:84 233:176 234:243 235:252 236:252 237:253 238:252 239:252 240:252 242:19 243:153 244:252 245:253 246:209 247:25 259:10 260:128 261:255 262:253 263:244 264:225 265:114 266:194 267:253 268:178 272:163 273:254 274:253 275:168 287:85 288:252 289:253 290:189 291:56 294:19 295:133 296:9 300:38 301:253 302:252 303:168 314:19 315:191 316:252 317:194 318:19 329:253 330:252 331:234 332:22 342:107 343:252 344:252 345:13 357:253 358:252 359:252 360:128 370:169 371:253 372:241 385:141 386:253 387:253 388:140 397:19 398:225 399:252 400:139 413:66 414:252 415:252 416:139 425:29 426:252 427:252 428:52 441:29 442:252 443:252 444:139 453:29 454:252 455:252 456:28 469:29 470:252 471:252 472:40 481:141 482:253 483:253 484:91 497:154 498:253 499:168 509:66 510:252 511:252 512:165 525:253 526:252 527:168 537:19 538:224 539:252 540:252 552:126 553:253 554:252 555:80 566:169 567:252 568:252 569:214 570:38 579:126 580:249 581:253 582:151 583:6 594:26 595:223 596:253 597:254 598:253 599:128 600:29 604:13 605:41 606:216 607:253 608:253 609:226 610:38 623:122 624:252 625:253 626:252 627:252 628:252 629:169 630:169 631:169 632:206 633:253 634:252 635:252 636:202 637:38 651:19 652:56 653:168 654:224 655:252 656:252 657:253 658:252 659:252 660:252 661:253 662:233 663:130 664:6 682:94 683:139 684:190 685:153 686:252 687:164 688:139 689:28 690:22
+1 128:53 129:250 130:255 131:25 156:167 157:253 158:253 159:25 182:3 183:123 184:247 185:253 186:253 187:25 210:9 211:253 212:253 213:253 214:253 215:25 238:9 239:253 240:253 241:253 242:253 243:25 266:9 267:253 268:253 269:253 270:180 271:13 294:9 295:253 296:253 297:253 298:104 322:9 323:253 324:253 325:253 326:104 350:15 351:253 352:253 353:253 354:104 378:184 379:253 380:253 381:228 382:68 406:184 407:253 408:253 409:182 433:103 434:251 435:253 436:253 437:12 461:106 462:253 463:253 464:253 465:8 488:24 489:238 490:253 491:253 492:253 493:8 516:27 517:253 518:253 519:253 520:253 521:8 544:27 545:253 546:253 547:253 548:253 549:8 572:27 573:253 574:253 575:253 576:177 577:4 600:160 601:253 602:253 603:253 604:87 628:202 629:253 630:253 631:219 632:54 656:81 657:253 658:247 659:51
+0 122:63 123:176 124:253 125:253 126:159 127:113 128:63 150:140 151:253 152:252 153:252 154:252 155:252 156:241 157:100 158:66 177:54 178:227 179:253 180:252 181:252 182:252 183:252 184:253 185:252 186:239 187:181 188:57 204:38 205:224 206:252 207:253 208:226 209:246 210:252 211:252 212:253 213:252 214:252 215:252 216:252 217:108 218:3 232:57 233:252 234:252 235:253 236:27 237:88 238:112 239:112 240:112 241:112 242:142 243:252 244:252 245:253 246:152 247:31 260:198 261:253 262:253 263:79 270:32 271:153 272:253 273:255 274:253 275:196 287:76 288:246 289:252 290:127 299:3 300:106 301:253 302:252 303:214 304:28 315:194 316:252 317:252 318:112 329:143 330:252 331:252 332:193 343:225 344:252 345:217 346:37 357:38 358:234 359:252 360:223 370:63 371:240 372:252 373:84 386:146 387:252 388:223 398:114 399:253 400:228 401:47 414:147 415:253 416:253 417:112 426:159 427:252 428:195 442:225 443:252 444:252 445:112 454:253 455:252 456:195 470:225 471:252 472:230 473:25 482:159 483:252 484:202 485:10 497:92 498:243 499:252 500:208 510:113 511:252 512:252 513:161 524:79 525:253 526:252 527:220 528:37 538:114 539:253 540:253 541:253 542:174 543:63 550:26 551:128 552:253 553:255 554:253 555:133 566:12 567:228 568:252 569:252 570:252 571:241 572:100 573:85 574:76 576:85 577:131 578:231 579:252 580:252 581:253 582:129 583:6 595:97 596:208 597:252 598:252 599:253 600:252 601:252 602:246 603:197 604:253 605:252 606:252 607:252 608:220 609:133 610:6 624:19 625:99 626:239 627:253 628:252 629:252 630:252 631:252 632:253 633:252 634:245 635:223 636:99 654:63 655:112 656:112 657:221 658:252 659:252 660:253 661:127 662:87
+0 153:12 154:136 155:254 156:255 157:195 158:115 159:3 180:6 181:175 182:253 183:196 184:160 185:252 186:253 187:15 208:130 209:253 210:234 211:4 213:27 214:205 215:232 216:40 235:54 236:246 237:253 238:68 242:24 243:243 244:106 262:3 263:134 264:235 265:99 266:4 271:132 272:247 273:77 290:56 291:253 292:62 299:23 300:233 301:129 318:179 319:183 320:4 328:182 329:220 345:21 346:232 347:59 356:95 357:232 358:21 373:128 374:183 385:228 386:85 401:187 402:124 413:228 414:186 429:187 430:124 441:228 442:104 457:187 458:124 469:169 470:184 485:187 486:124 497:203 498:150 513:187 514:124 524:10 525:220 526:39 541:187 542:155 552:111 553:201 569:129 570:228 571:7 579:12 580:181 581:76 598:234 599:166 600:9 606:24 607:209 608:106 626:139 627:250 628:167 629:11 630:2 631:11 632:11 633:129 634:227 635:90 636:11 655:95 656:247 657:253 658:178 659:253 660:253 661:244 662:86 684:47 685:175 686:253 687:232 688:149 689:40
+1 128:255 129:253 130:57 156:253 157:251 158:225 159:56 183:169 184:254 185:253 186:254 187:84 211:168 212:253 213:251 214:253 215:83 238:85 239:253 240:254 241:253 242:169 266:85 267:251 268:253 269:251 270:56 294:141 295:253 296:254 297:253 322:253 323:251 324:253 325:251 350:254 351:253 352:254 353:253 378:253 379:251 380:253 381:251 406:254 407:253 408:254 409:196 433:114 434:253 435:251 436:253 437:83 461:169 462:254 463:253 464:226 465:56 489:168 490:253 491:251 492:168 516:85 517:253 518:254 519:253 544:85 545:251 546:253 547:251 572:254 573:253 574:254 575:253 600:253 601:251 602:253 603:251 628:254 629:253 630:254 631:253 656:139 657:251 658:253 659:138
+0 151:23 152:167 153:208 154:254 155:255 156:129 157:19 179:151 180:253 181:253 182:253 183:253 184:253 185:209 186:26 207:181 208:253 209:253 210:253 211:227 212:181 213:253 214:207 215:22 235:227 236:253 237:253 238:253 239:92 240:38 241:226 242:253 243:129 244:2 263:193 264:253 265:253 266:248 267:62 269:50 270:253 271:253 272:45 291:170 292:253 293:253 294:135 297:12 298:208 299:253 300:119 318:16 319:232 320:253 321:253 322:21 326:60 327:253 328:185 346:164 347:253 348:253 349:224 350:14 354:14 355:217 356:247 357:62 373:3 374:193 375:253 376:250 377:64 383:199 384:253 385:179 401:67 402:253 403:253 404:205 411:98 412:253 413:188 429:151 430:253 431:245 432:43 439:63 440:250 441:188 457:151 458:253 459:243 468:244 469:222 470:22 485:151 486:253 487:217 496:244 497:253 498:115 512:3 513:195 514:253 515:134 524:156 525:253 526:150 541:140 542:253 543:134 552:239 553:253 554:139 569:44 570:253 571:134 579:53 580:246 581:237 582:32 597:8 598:200 599:229 600:40 606:25 607:225 608:253 609:188 626:120 627:250 628:230 629:58 630:17 632:12 633:42 634:213 635:253 636:238 637:84 655:151 656:253 657:253 658:217 659:179 660:206 661:253 662:253 663:196 664:118 683:18 684:58 685:145 686:152 687:253 688:214 689:145 690:74 691:7
+1 130:24 131:150 132:233 133:38 156:14 157:89 158:253 159:254 160:254 161:71 183:78 184:203 185:254 186:254 187:254 188:232 189:77 190:54 191:8 209:12 210:155 211:240 212:254 213:223 214:76 215:254 216:254 217:254 218:254 219:68 235:3 236:101 237:216 238:254 239:227 240:122 241:26 242:110 243:254 244:254 245:254 246:184 247:100 262:46 263:222 264:254 265:254 266:179 267:48 270:181 271:254 272:254 273:146 274:6 288:2 289:145 290:248 291:254 292:182 293:111 294:4 297:3 298:250 299:254 300:206 301:3 315:6 316:144 317:254 318:254 319:171 325:125 326:254 327:252 328:80 342:6 343:142 344:254 345:179 346:95 347:4 352:61 353:246 354:254 355:150 370:64 371:254 372:177 373:14 380:124 381:254 382:246 383:32 398:108 399:97 400:15 407:24 408:226 409:254 410:116 435:177 436:255 437:254 438:5 463:196 464:254 465:99 466:1 490:3 491:199 492:254 493:79 518:129 519:254 520:254 521:23 546:178 547:254 548:192 549:8 550:3 551:43 573:11 574:198 575:254 576:128 577:66 578:130 579:225 595:137 596:202 597:106 598:84 599:84 600:84 601:112 602:254 603:254 604:254 605:254 606:212 607:151 623:172 624:254 625:254 626:254 627:254 628:254 629:254 630:254 631:254 632:254 633:162 634:75 651:12 652:106 653:177 654:254 655:254 656:254 657:235 658:135 659:100 660:17 661:2
+0 125:120 126:253 127:253 128:63 151:38 152:131 153:246 154:252 155:252 156:203 157:15 179:222 180:252 181:252 182:252 183:252 184:166 185:38 205:4 206:107 207:253 208:252 209:252 210:252 211:252 212:253 213:224 214:137 215:26 233:107 234:252 235:253 236:252 237:220 238:128 239:252 240:253 241:252 242:252 243:239 244:140 261:170 262:253 263:255 264:168 267:79 268:192 269:253 270:253 271:253 272:253 273:255 274:90 288:51 289:243 290:252 291:215 292:33 296:12 297:74 298:233 299:252 300:252 301:253 302:195 303:19 316:166 317:252 318:252 319:31 326:43 327:149 328:195 329:253 330:252 331:177 332:19 343:57 344:234 345:252 346:252 357:237 358:252 359:252 360:180 361:13 371:85 372:252 373:252 374:173 385:50 386:237 387:252 388:252 389:112 399:226 400:253 401:240 402:63 414:163 415:253 416:253 417:112 426:38 427:234 428:252 429:176 442:85 443:252 444:252 445:158 454:113 455:252 456:252 457:84 470:19 471:209 472:252 473:252 482:207 483:252 484:252 485:84 498:10 499:203 500:252 501:236 510:253 511:252 512:252 513:84 526:85 527:252 528:252 529:112 538:114 539:253 540:253 541:146 553:51 554:159 555:253 556:240 557:63 566:75 567:243 568:252 569:249 570:146 579:57 580:85 581:238 582:252 583:252 584:99 595:116 596:252 597:252 598:252 599:198 600:197 601:165 602:57 603:57 604:57 605:182 606:197 607:234 608:252 609:253 610:233 611:164 612:19 623:28 624:84 625:180 626:252 627:253 628:252 629:252 630:252 631:252 632:253 633:252 634:252 635:252 636:252 637:225 638:71 653:13 654:112 655:253 656:252 657:252 658:252 659:252 660:253 661:252 662:252 663:157 664:112
+1 127:155 128:253 129:126 155:253 156:251 157:141 158:4 183:253 184:251 185:251 186:31 211:253 212:251 213:251 214:31 239:253 240:251 241:251 242:31 267:255 268:253 269:253 270:31 293:8 294:131 295:253 296:251 297:235 298:27 321:64 322:251 323:253 324:251 325:126 349:64 350:251 351:253 352:251 353:126 377:64 378:251 379:253 380:251 381:126 405:64 406:253 407:255 408:221 433:182 434:251 435:253 436:200 460:64 461:236 462:251 463:253 464:62 487:8 488:158 489:251 490:251 491:169 492:8 515:32 516:251 517:251 518:251 519:158 543:32 544:253 545:253 546:253 547:159 571:32 572:251 573:251 574:251 575:39 599:32 600:251 601:251 602:251 627:32 628:251 629:251 630:251 631:100 655:32 656:251 657:251 658:251
+0 101:88 102:127 103:5 126:19 127:58 128:20 129:14 130:217 131:19 152:7 153:146 154:247 155:253 156:235 157:27 158:84 159:81 180:126 181:253 182:164 183:19 184:15 187:156 188:9 208:214 209:222 210:34 215:234 216:58 235:59 236:254 237:116 243:235 244:58 263:141 264:251 265:72 271:151 272:140 291:224 292:233 299:136 300:223 319:254 320:218 327:136 328:253 347:254 348:135 355:136 356:253 374:23 375:255 376:114 383:137 384:231 402:98 403:254 404:122 411:136 412:155 430:98 431:254 432:106 439:166 440:155 458:98 459:254 460:128 467:234 468:193 486:98 487:254 488:135 494:61 495:248 496:118 515:255 516:238 517:18 521:13 522:224 523:254 524:58 543:201 544:253 545:128 546:2 548:5 549:150 550:253 551:167 552:9 571:18 572:226 573:253 574:49 575:31 576:156 577:253 578:228 579:13 600:147 601:253 602:243 603:241 604:254 605:227 606:43 628:5 629:126 630:245 631:253 632:231 633:46
+0 127:37 128:141 129:156 130:156 131:194 132:194 133:47 153:11 154:132 155:239 156:253 157:253 158:253 159:253 160:254 161:181 180:25 181:172 182:253 183:235 184:167 185:78 186:93 187:174 188:254 189:247 190:54 207:26 208:210 209:253 210:237 211:90 216:201 217:253 218:78 235:192 236:253 237:237 238:58 244:156 245:253 246:78 262:141 263:254 264:235 265:53 269:19 270:5 272:156 273:254 274:78 289:46 290:254 291:253 292:92 296:17 297:226 298:217 299:49 300:148 301:253 302:78 317:165 318:254 319:239 320:24 324:20 325:253 326:253 327:58 328:18 329:115 330:24 344:37 345:248 346:254 347:91 352:2 353:117 354:250 355:163 356:91 372:77 373:253 374:254 375:39 382:196 383:253 384:173 400:159 401:254 402:218 403:15 410:77 411:254 412:255 413:61 428:234 429:253 430:113 438:21 439:226 440:254 441:135 455:25 456:240 457:253 458:68 467:195 468:254 469:135 483:79 484:253 485:253 495:195 496:254 497:135 511:79 512:253 513:253 514:76 523:195 524:254 525:99 540:212 541:254 542:209 543:9 550:10 551:209 552:196 553:15 568:54 569:253 570:254 571:137 572:36 576:2 577:20 578:168 579:253 580:60 596:28 597:235 598:254 599:253 600:199 601:124 602:79 603:79 604:167 605:253 606:253 607:185 608:30 625:15 626:117 627:217 628:253 629:253 630:253 631:254 632:253 633:240 634:109 635:12 655:27 656:126 657:208 658:253 659:193 660:147 661:40
+0 154:32 155:134 156:218 157:254 158:254 159:254 160:217 161:84 176:44 177:208 178:215 179:156 180:35 181:119 182:236 183:246 184:136 185:91 186:69 187:151 188:249 189:246 190:78 203:44 204:230 205:254 206:254 207:254 208:254 209:254 210:196 211:48 216:60 217:224 218:210 219:24 231:118 232:254 233:202 234:19 235:201 236:254 237:181 238:9 245:35 246:233 247:168 259:193 260:223 261:34 263:59 264:163 265:236 266:15 274:140 275:205 276:8 286:60 287:254 288:176 293:38 302:54 303:237 304:80 314:59 315:254 316:93 331:131 332:200 342:59 343:240 344:24 359:79 360:214 370:59 371:234 387:67 388:248 389:54 398:59 399:234 416:235 417:58 426:60 427:235 443:79 444:255 445:59 454:59 455:251 456:66 471:79 472:250 473:54 482:59 483:254 484:108 499:146 500:214 510:5 511:203 512:187 513:3 526:4 527:188 528:199 539:118 540:254 541:57 554:96 555:254 556:117 567:16 568:237 569:224 570:14 581:14 582:187 583:206 584:8 596:88 597:252 598:186 599:16 608:16 609:187 610:252 611:125 625:100 626:254 627:237 628:94 629:24 635:13 636:214 637:254 638:166 653:3 654:57 655:215 656:248 657:241 658:235 659:197 660:137 661:137 662:137 663:231 664:238 665:155 666:25 684:57 685:155 686:246 687:254 688:254 689:254 690:254 691:147 692:36
+1 124:102 125:252 126:252 127:41 152:102 153:250 154:250 155:202 180:102 181:250 182:250 183:232 184:91 208:102 209:250 210:250 211:212 212:29 236:102 237:252 238:252 239:254 240:150 264:102 265:250 266:250 267:252 268:149 292:102 293:250 294:250 295:252 296:149 320:102 321:250 322:250 323:252 324:231 325:80 349:152 350:252 351:254 352:252 353:100 377:151 378:250 379:252 380:250 381:100 405:151 406:250 407:252 408:250 409:100 433:151 434:250 435:252 436:250 437:100 461:123 462:243 463:254 464:252 465:100 490:202 491:252 492:250 493:100 518:80 519:252 520:250 521:190 522:30 547:252 548:250 549:250 550:49 575:255 576:252 577:252 578:252 579:214 580:31 603:171 604:250 605:250 606:250 607:252 608:190 609:40 631:20 632:160 633:250 634:250 635:252 636:250 637:100 660:20 661:170 662:250 663:212 664:49 665:20
+0 124:20 125:121 126:197 127:253 128:64 151:23 152:200 153:252 154:252 155:252 156:184 157:6 178:25 179:197 180:252 181:252 182:252 183:252 184:253 185:228 186:107 187:15 205:26 206:196 207:252 208:252 209:252 210:252 211:252 212:253 213:252 214:252 215:219 216:178 217:21 233:186 234:252 235:238 236:94 237:67 238:224 239:217 240:53 241:109 242:245 243:252 244:252 245:213 246:63 260:98 261:242 262:252 263:101 266:39 267:31 270:109 271:128 272:241 273:252 274:207 275:97 287:17 288:230 289:252 290:241 291:56 300:109 301:252 302:252 303:229 304:17 314:13 315:192 316:252 317:243 318:96 328:25 329:127 330:252 331:252 332:120 342:121 343:252 344:252 345:165 357:125 358:252 359:252 360:246 361:70 370:190 371:252 372:252 373:39 385:26 386:210 387:252 388:252 389:119 398:255 399:253 400:159 414:22 415:209 416:253 417:183 426:253 427:252 428:103 443:34 444:252 445:252 454:253 455:252 456:26 471:27 472:252 473:252 482:253 483:252 484:168 485:13 499:70 500:252 501:209 510:147 511:252 512:252 513:75 526:68 527:233 528:252 529:119 538:121 539:252 540:252 541:189 542:40 552:15 553:82 554:231 555:252 556:214 557:31 566:38 567:135 568:248 569:252 570:231 571:145 572:41 573:41 574:41 575:41 576:20 577:24 578:37 579:83 580:194 581:252 582:252 583:212 584:33 596:83 597:213 598:252 599:252 600:252 601:252 602:252 603:252 604:204 605:213 606:243 607:252 608:252 609:252 610:212 611:34 625:34 626:140 627:238 628:248 629:252 630:252 631:252 632:253 633:252 634:252 635:241 636:238 637:238 638:75 656:82 657:119 658:119 659:119 660:120 661:119 662:119 663:19
+1 127:20 128:254 129:255 130:37 155:19 156:253 157:253 158:134 183:19 184:253 185:253 186:246 187:125 211:76 212:253 213:253 214:253 215:158 239:207 240:253 241:253 242:253 243:158 267:207 268:253 269:253 270:253 271:158 294:48 295:223 296:253 297:253 298:243 299:106 322:141 323:253 324:253 325:253 326:113 349:65 350:237 351:253 352:253 353:253 354:36 377:76 378:253 379:253 380:253 381:253 382:36 405:76 406:253 407:253 408:253 409:253 410:36 433:76 434:253 435:253 436:253 437:118 438:4 460:4 461:148 462:253 463:253 464:253 465:103 488:10 489:253 490:253 491:253 492:253 493:103 516:10 517:253 518:253 519:253 520:173 521:7 544:10 545:253 546:253 547:253 548:168 572:143 573:253 574:253 575:239 576:49 600:198 601:253 602:253 603:234 615:140 628:198 629:253 630:253 631:234 656:198 657:253 658:253 659:234
+0 235:40 236:37 238:7 239:77 240:137 241:136 242:136 243:136 244:136 245:40 246:6 261:16 262:135 263:254 264:233 266:152 267:215 268:96 269:140 270:155 271:118 272:230 273:254 274:158 275:68 288:19 289:164 290:254 291:114 294:235 295:140 301:99 302:230 303:254 304:186 305:14 315:70 316:226 317:242 318:121 322:104 323:195 324:38 330:33 331:179 332:253 333:140 342:41 343:241 344:198 345:43 359:24 360:209 361:223 370:164 371:250 372:66 388:136 389:253 398:254 399:158 416:136 417:215 426:255 427:76 442:5 443:127 444:246 445:133 454:254 455:122 469:5 470:150 471:247 472:91 473:9 482:254 483:165 495:13 496:79 497:194 498:216 499:84 510:111 511:251 512:87 519:16 520:25 521:40 522:107 523:186 524:213 525:117 526:25 538:14 539:185 540:235 541:142 542:23 546:91 547:157 548:231 549:207 550:126 551:49 569:143 570:195 571:255 572:254 573:254 574:244 575:157 576:76 599:39 600:39 601:39 602:33
+1 128:166 129:255 130:187 131:6 156:165 157:253 158:253 159:13 183:15 184:191 185:253 186:253 187:13 211:49 212:253 213:253 214:253 215:13 239:141 240:253 241:253 242:169 243:4 266:4 267:189 268:253 269:249 270:53 294:69 295:253 296:253 297:246 322:69 323:253 324:253 325:246 350:118 351:253 352:253 353:124 378:206 379:253 380:231 381:21 405:66 406:241 407:253 408:199 433:105 434:253 435:253 436:89 460:3 461:228 462:253 463:252 464:86 488:111 489:253 490:253 491:205 516:166 517:253 518:253 519:75 543:43 544:249 545:253 546:193 547:9 570:4 571:160 572:253 573:253 574:184 598:37 599:253 600:253 601:253 602:88 626:140 627:253 628:253 629:186 630:18 654:14 655:253 656:253 657:27
+1 128:117 129:128 155:2 156:199 157:127 183:81 184:254 185:87 211:116 212:254 213:48 239:175 240:234 241:18 266:5 267:230 268:168 294:80 295:255 296:142 322:80 323:255 324:142 350:80 351:251 352:57 378:129 379:239 406:164 407:209 433:28 434:245 435:159 461:64 462:254 463:144 489:84 490:254 491:80 517:143 518:254 519:30 544:3 545:225 546:200 572:48 573:254 574:174 600:48 601:254 602:174 628:93 629:254 630:129 656:53 657:234 658:41
+1 129:159 130:142 156:11 157:220 158:141 184:78 185:254 186:141 212:111 213:254 214:109 240:196 241:221 242:15 267:26 268:221 269:159 295:63 296:254 297:159 323:178 324:254 325:93 350:7 351:191 352:254 353:97 378:42 379:255 380:254 381:41 406:42 407:254 408:195 409:10 434:141 435:255 436:78 461:11 462:202 463:254 464:59 489:86 490:254 491:254 492:59 517:142 518:254 519:248 520:52 545:142 546:254 547:195 573:142 574:254 575:164 601:142 602:254 603:77 629:142 630:254 631:131 657:77 658:172 659:5
+0 124:66 125:254 126:254 127:58 128:60 129:59 130:59 131:50 151:73 152:233 153:253 154:253 155:148 156:254 157:253 158:253 159:232 160:73 179:156 180:253 181:253 182:253 183:117 184:255 185:253 186:253 187:253 188:223 189:176 190:162 205:37 206:116 207:246 208:253 209:180 210:18 211:4 212:18 213:109 214:241 215:253 216:253 217:253 218:236 219:28 233:235 234:253 235:253 236:245 237:107 242:109 243:170 244:253 245:253 246:253 247:174 261:235 262:253 263:253 264:233 271:15 272:156 273:253 274:253 275:223 276:72 287:10 288:156 289:250 290:253 291:253 292:67 300:99 301:253 302:253 303:253 304:127 305:5 315:118 316:253 317:253 318:253 319:204 320:26 328:68 329:223 330:253 331:253 332:253 333:57 342:32 343:191 344:253 345:253 346:253 347:97 357:156 358:253 359:253 360:253 361:57 370:59 371:253 372:253 373:253 374:253 375:97 385:36 386:224 387:253 388:253 389:57 398:60 399:254 400:255 401:254 402:156 413:37 414:226 415:254 416:254 417:58 426:59 427:253 428:253 429:253 430:154 441:156 442:253 443:253 444:253 445:57 454:59 455:253 456:253 457:253 458:154 469:156 470:253 471:253 472:253 473:57 482:59 483:253 484:253 485:253 486:246 487:90 496:16 497:171 498:253 499:253 500:231 501:49 510:59 511:253 512:253 513:253 514:253 515:156 516:91 524:99 525:253 526:253 527:222 528:71 538:59 539:253 540:253 541:253 542:253 543:253 544:245 545:109 551:145 552:194 553:253 554:253 555:174 566:9 567:38 568:174 569:251 570:253 571:253 572:253 573:241 574:215 575:215 576:217 577:215 578:215 579:250 580:253 581:253 582:221 583:26 597:235 598:253 599:253 600:253 601:253 602:253 603:253 604:254 605:253 606:253 607:253 608:253 609:204 610:26 625:108 626:116 627:200 628:253 629:253 630:253 631:253 632:254 633:253 634:253 635:253 636:199 637:44 655:36 656:57 657:118 658:253 659:253 660:58 661:57 662:57 663:57 664:35
+1 129:101 130:222 131:84 157:225 158:252 159:84 184:89 185:246 186:208 187:19 212:128 213:252 214:195 239:79 240:253 241:252 242:195 267:141 268:255 269:253 270:133 294:26 295:240 296:253 297:252 298:55 322:60 323:252 324:253 325:154 326:12 349:7 350:178 351:252 352:253 353:27 377:57 378:252 379:252 380:253 381:27 405:57 406:253 407:253 408:204 409:15 433:104 434:252 435:252 436:94 460:19 461:209 462:252 463:252 488:101 489:252 490:252 491:157 516:225 517:252 518:252 519:112 544:226 545:253 546:240 547:63 572:225 573:252 574:223 600:225 601:252 602:223 628:225 629:252 630:242 631:75 656:146 657:252 658:236 659:50
+0 124:41 125:254 126:254 127:157 128:34 129:34 130:218 131:255 132:206 133:34 134:18 151:53 152:238 153:252 154:252 155:252 156:252 157:252 158:252 159:252 160:252 161:252 162:162 163:26 178:66 179:220 180:252 181:252 182:252 183:209 184:153 185:223 186:252 187:252 188:252 189:252 190:252 191:98 206:166 207:252 208:252 209:252 210:252 211:141 213:85 214:230 215:252 216:252 217:252 218:252 219:98 234:166 235:252 236:252 237:252 238:252 239:141 242:73 243:102 244:252 245:252 246:252 247:98 262:166 263:252 264:252 265:252 266:191 267:30 271:5 272:97 273:252 274:252 275:220 276:51 289:123 290:245 291:252 292:252 293:202 294:14 300:56 301:252 302:252 303:252 304:65 316:18 317:154 318:252 319:252 320:241 328:56 329:252 330:252 331:252 332:65 343:21 344:146 345:252 346:252 347:252 348:241 356:56 357:252 358:252 359:252 360:65 371:67 372:252 373:252 374:252 375:252 376:241 384:56 385:252 386:252 387:252 388:65 399:67 400:252 401:252 402:252 403:252 404:116 412:56 413:252 414:252 415:252 416:65 427:67 428:252 429:252 430:252 431:252 432:20 440:56 441:252 442:252 443:252 444:65 455:67 456:252 457:252 458:252 459:87 460:4 468:56 469:252 470:252 471:124 472:11 483:67 484:252 485:252 486:252 487:54 494:19 495:236 496:245 497:252 498:252 499:98 511:67 512:252 513:252 514:252 515:97 516:5 521:39 522:219 523:252 524:252 525:252 526:252 527:98 539:67 540:252 541:252 542:252 543:252 544:102 545:89 546:89 547:89 548:89 549:203 550:252 551:252 552:252 553:252 554:209 555:64 567:67 568:252 569:252 570:252 571:252 572:252 573:252 574:252 575:252 576:252 577:252 578:252 579:252 580:226 581:130 582:68 595:67 596:252 597:252 598:252 599:252 600:252 601:252 602:252 603:252 604:252 605:252 606:252 607:239 608:77 623:17 624:65 625:163 626:252 627:252 628:252 629:252 630:252 631:252 632:252 633:252 634:96 635:59 653:17 654:176 655:252 656:252 657:252 658:252 659:155 660:32 661:32 662:6
+0 96:56 97:247 98:121 124:24 125:242 126:245 127:122 153:231 154:253 155:253 156:104 157:12 181:90 182:253 183:253 184:254 185:221 186:120 187:120 188:85 206:67 207:75 208:36 209:11 210:56 211:222 212:254 213:253 214:253 215:253 216:245 217:207 218:36 233:86 234:245 235:249 236:105 239:44 240:224 241:230 242:253 243:253 244:253 245:253 246:214 247:10 260:8 261:191 262:253 263:143 269:29 270:119 271:119 272:158 273:253 274:253 275:94 288:15 289:253 290:226 291:48 300:4 301:183 302:253 303:248 304:56 316:42 317:253 318:178 329:179 330:253 331:184 332:14 344:164 345:253 346:178 357:179 358:253 359:163 371:61 372:254 373:254 374:179 384:76 385:254 386:254 387:164 399:60 400:253 401:253 402:178 411:29 412:206 413:253 414:253 415:40 427:60 428:253 429:253 430:178 439:120 440:253 441:253 442:245 443:13 455:60 456:253 457:253 458:178 467:120 468:253 469:239 470:63 483:60 484:253 485:253 486:178 494:14 495:238 496:253 497:179 511:18 512:190 513:253 514:231 515:70 521:43 522:184 523:253 524:253 525:74 540:86 541:253 542:253 543:239 544:134 545:8 548:56 549:163 550:253 551:253 552:213 553:35 568:16 569:253 570:253 571:253 572:253 573:240 574:239 575:239 576:247 577:253 578:253 579:210 580:27 596:4 597:59 598:204 599:253 600:253 601:253 602:253 603:253 604:254 605:253 606:250 607:110 626:31 627:122 628:253 629:253 630:253 631:253 632:255 633:217 634:98
+0 125:19 126:164 127:253 128:255 129:253 130:118 131:59 132:36 153:78 154:251 155:251 156:253 157:251 158:251 159:251 160:199 161:45 180:14 181:198 182:251 183:251 184:253 185:251 186:251 187:251 188:251 189:204 190:26 208:5 209:117 210:251 211:251 212:243 213:212 214:239 215:251 216:251 217:251 218:218 236:95 237:251 238:251 239:251 240:120 242:175 243:251 244:251 245:251 246:231 263:97 264:237 265:251 266:251 267:251 270:67 271:240 272:251 273:251 274:243 275:108 290:8 291:163 292:251 293:251 294:240 295:81 299:68 300:251 301:251 302:251 303:179 304:9 317:13 318:145 319:251 320:251 321:226 322:80 327:39 328:251 329:251 330:251 331:251 332:115 345:144 346:251 347:251 348:251 349:173 355:18 356:167 357:251 358:251 359:251 360:115 373:233 374:251 375:251 376:251 377:173 384:98 385:251 386:251 387:251 388:115 400:176 401:253 402:253 403:216 404:179 412:99 413:253 414:253 415:253 416:116 427:55 428:210 429:251 430:251 431:96 440:98 441:251 442:251 443:214 444:62 455:117 456:251 457:251 458:251 459:96 467:28 468:204 469:251 470:237 471:53 482:55 483:241 484:251 485:251 486:160 487:7 494:28 495:222 496:251 497:251 498:231 510:59 511:251 512:251 513:251 514:153 520:23 521:98 522:204 523:251 524:251 525:251 526:156 538:59 539:251 540:251 541:251 542:153 546:85 547:155 548:179 549:251 550:251 551:251 552:251 553:154 554:15 566:59 567:251 568:251 569:251 570:236 571:214 572:214 573:214 574:234 575:251 576:253 577:251 578:251 579:248 580:156 581:15 594:41 595:209 596:251 597:251 598:251 599:251 600:251 601:251 602:251 603:251 604:253 605:251 606:196 607:146 623:54 624:115 625:241 626:251 627:251 628:251 629:251 630:251 631:251 632:253 633:187 634:35 653:83 654:251 655:251 656:251 657:251 658:251 659:101 660:57 661:31
+1 129:232 130:255 131:107 156:58 157:244 158:253 159:106 184:95 185:253 186:253 187:106 212:95 213:253 214:253 215:106 240:95 241:253 242:249 243:69 268:144 269:253 270:192 295:97 296:233 297:253 298:66 323:195 324:253 325:253 326:5 350:38 351:232 352:253 353:182 354:2 377:10 378:160 379:253 380:231 381:53 405:42 406:253 407:253 408:158 433:141 434:253 435:253 436:115 460:75 461:245 462:253 463:183 464:4 487:1 488:147 489:253 490:251 491:58 515:20 516:253 517:253 518:180 543:202 544:253 545:226 546:27 571:243 572:253 573:212 598:85 599:251 600:253 601:173 626:209 627:253 628:244 629:57 654:169 655:253 656:174
+1 127:63 128:128 129:2 155:63 156:254 157:123 183:63 184:254 185:179 211:63 212:254 213:179 239:63 240:254 241:179 267:142 268:254 269:179 295:187 296:254 297:158 323:187 324:254 325:55 350:68 351:235 352:254 353:55 378:181 379:254 380:254 381:55 406:181 407:254 408:202 409:14 434:181 435:254 436:186 462:181 463:254 464:146 490:181 491:254 492:62 518:181 519:254 520:62 546:181 547:254 548:62 574:181 575:255 576:62 602:181 603:254 604:241 605:52 630:181 631:254 632:222 633:30 658:181 659:224 660:34
+1 130:131 131:255 132:184 133:15 157:99 158:247 159:253 160:182 161:15 185:124 186:253 187:253 188:253 189:38 212:9 213:171 214:253 215:253 216:140 217:1 240:47 241:253 242:253 243:251 244:117 267:43 268:219 269:253 270:253 271:153 295:78 296:253 297:253 298:253 299:84 323:97 324:253 325:253 326:244 327:74 350:69 351:243 352:253 353:253 354:183 377:10 378:168 379:253 380:253 381:215 382:34 405:31 406:253 407:253 408:253 409:129 433:107 434:253 435:253 436:242 437:67 460:24 461:204 462:253 463:253 464:187 488:95 489:253 490:253 491:201 492:25 516:239 517:253 518:253 519:176 543:119 544:251 545:253 546:253 547:138 570:30 571:212 572:253 573:252 574:165 575:8 598:193 599:253 600:253 601:222 626:193 627:253 628:253 629:189 654:193 655:253 656:201 657:27
+0 125:57 126:255 127:253 128:198 129:85 153:168 154:253 155:251 156:253 157:251 158:169 159:56 180:86 181:253 182:254 183:253 184:254 185:253 186:254 187:253 188:57 208:197 209:251 210:253 211:251 212:253 213:251 214:253 215:251 216:225 217:56 235:169 236:255 237:253 238:226 239:56 241:114 242:254 243:253 244:254 245:84 262:57 263:224 264:253 265:251 266:56 270:139 271:251 272:253 273:83 290:141 291:253 292:255 293:84 298:57 299:225 300:254 301:196 318:253 319:251 320:253 321:83 327:168 328:253 329:83 345:169 346:254 347:253 348:169 355:169 356:254 357:253 358:169 373:168 374:253 375:251 376:56 383:168 384:253 385:251 386:56 401:169 402:254 403:84 412:254 413:253 429:168 430:253 431:83 440:253 441:251 456:29 457:197 458:254 459:84 467:169 468:254 469:196 484:85 485:251 486:253 487:83 494:57 495:224 496:253 497:83 512:57 513:225 514:254 515:139 521:57 522:141 523:253 524:254 525:84 541:168 542:253 543:251 544:169 545:56 547:114 548:169 549:224 550:253 551:251 552:253 553:83 569:169 570:254 571:253 572:254 573:253 574:254 575:253 576:254 577:253 578:254 579:253 580:226 581:56 597:56 598:253 599:251 600:253 601:251 602:253 603:251 604:253 605:251 606:253 607:251 608:56 626:169 627:225 628:254 629:253 630:254 631:253 632:254 633:253 634:226 635:56 655:56 656:253 657:251 658:253 659:251 660:84 661:83 662:56
+0 127:12 128:105 129:224 130:255 131:247 132:22 155:131 156:254 157:254 158:243 159:252 160:76 182:131 183:225 184:254 185:224 186:48 187:136 208:13 209:109 210:252 211:254 212:254 213:254 214:197 215:76 235:9 236:181 237:254 238:254 239:240 240:229 241:237 242:254 243:252 244:152 245:21 262:9 263:143 264:254 265:254 266:226 267:36 269:22 270:138 271:254 272:254 273:188 289:13 290:181 291:254 292:254 293:250 294:64 298:2 299:53 300:236 301:252 302:131 317:102 318:254 319:254 320:254 321:111 328:56 329:243 330:251 331:42 344:30 345:186 346:254 347:254 348:206 349:29 357:199 358:254 359:91 372:92 373:254 374:254 375:237 376:13 385:134 386:254 387:91 400:133 401:254 402:254 403:126 413:134 414:250 415:17 428:187 429:254 430:237 431:23 441:200 442:183 456:187 457:254 458:213 467:2 468:134 469:252 470:101 484:183 485:254 486:133 495:14 496:254 497:234 498:34 512:92 513:254 514:161 522:84 523:204 524:254 525:56 540:92 541:254 542:229 549:85 550:252 551:252 552:188 553:11 568:56 569:252 570:229 575:3 576:53 577:235 578:253 579:166 597:224 598:245 599:130 600:68 601:68 602:134 603:214 604:254 605:254 606:159 625:141 626:254 627:254 628:254 629:254 630:254 631:254 632:233 633:95 634:3 653:14 654:152 655:254 656:254 657:254 658:186 659:157 660:53
+1 130:226 131:247 132:55 157:99 158:248 159:254 160:230 161:30 185:125 186:254 187:254 188:254 189:38 213:125 214:254 215:254 216:212 217:24 240:18 241:223 242:254 243:252 244:118 268:24 269:254 270:254 271:239 295:27 296:195 297:254 298:254 299:93 323:78 324:254 325:254 326:246 327:74 351:158 352:254 353:254 354:185 378:41 379:239 380:254 381:254 382:43 405:22 406:218 407:254 408:254 409:167 410:9 433:32 434:254 435:254 436:254 437:130 460:24 461:187 462:254 463:254 464:234 465:16 488:189 489:254 490:254 491:254 492:128 515:64 516:247 517:254 518:255 519:219 520:42 543:139 544:254 545:254 546:222 547:40 570:30 571:213 572:254 573:235 574:45 598:194 599:254 600:254 601:223 626:194 627:254 628:254 629:190 654:194 655:254 656:202 657:27
+1 130:166 131:253 132:124 133:53 158:140 159:251 160:251 161:180 185:125 186:246 187:251 188:251 189:51 212:32 213:190 214:251 215:251 216:251 217:103 240:21 241:174 242:251 243:251 244:251 268:73 269:176 270:253 271:253 272:201 296:149 297:251 298:251 299:251 300:71 323:27 324:228 325:251 326:251 327:157 328:10 351:180 352:253 353:251 354:251 355:142 377:27 378:180 379:231 380:253 381:251 382:96 383:41 405:89 406:253 407:253 408:255 409:211 410:25 433:217 434:251 435:251 436:253 437:107 460:21 461:221 462:251 463:251 464:242 465:92 487:32 488:190 489:251 490:251 491:251 492:103 515:202 516:251 517:251 518:251 519:122 542:53 543:255 544:253 545:253 546:221 547:51 570:180 571:253 572:251 573:251 574:142 598:180 599:253 600:251 601:251 602:142 626:180 627:253 628:251 629:157 630:82 654:180 655:253 656:147 657:10
+1 129:17 130:206 131:229 132:44 157:2 158:125 159:254 160:123 185:95 186:254 187:254 188:123 212:78 213:240 214:254 215:254 216:123 240:100 241:254 242:254 243:254 244:123 267:2 268:129 269:254 270:254 271:220 272:20 295:9 296:254 297:254 298:254 299:123 322:22 323:179 324:254 325:254 326:254 327:49 350:83 351:254 352:254 353:254 354:183 355:19 378:136 379:254 380:254 381:254 382:139 404:3 405:111 406:252 407:254 408:254 409:232 410:45 432:67 433:254 434:254 435:254 436:216 437:40 459:14 460:192 461:254 462:254 463:254 464:140 486:23 487:192 488:254 489:254 490:254 491:246 514:77 515:254 516:254 517:255 518:241 519:100 541:65 542:235 543:254 544:254 545:254 546:172 568:30 569:238 570:254 571:254 572:254 573:219 574:26 596:34 597:254 598:254 599:254 600:216 601:41 624:34 625:254 626:254 627:254 628:188 652:12 653:170 654:254 655:254 656:82
+1 130:218 131:253 132:124 157:84 158:236 159:251 160:251 184:63 185:236 186:251 187:251 188:122 212:73 213:251 214:251 215:251 216:173 240:202 241:251 242:251 243:251 244:71 267:53 268:255 269:253 270:253 271:253 272:72 295:180 296:253 297:251 298:251 299:188 300:30 323:180 324:253 325:251 326:251 327:142 350:47 351:211 352:253 353:251 354:235 355:82 377:27 378:211 379:251 380:253 381:251 382:215 405:89 406:253 407:253 408:255 409:253 410:164 433:217 434:251 435:251 436:253 437:168 438:15 460:21 461:221 462:251 463:251 464:253 465:107 487:32 488:190 489:251 490:251 491:251 492:221 493:61 515:73 516:251 517:251 518:251 519:251 520:180 543:255 544:253 545:253 546:253 547:201 570:105 571:253 572:251 573:251 574:251 575:71 598:180 599:253 600:251 601:246 602:137 603:10 626:180 627:253 628:251 629:215 654:180 655:253 656:251 657:86
+1 124:102 125:180 126:1 152:140 153:254 154:130 180:140 181:254 182:204 208:140 209:254 210:204 236:72 237:254 238:204 264:25 265:231 266:250 267:135 292:11 293:211 294:254 295:222 321:101 322:254 323:250 324:15 349:96 350:254 351:254 352:95 377:2 378:251 379:254 380:95 405:2 406:251 407:254 408:95 433:96 434:254 435:254 436:95 461:53 462:253 463:254 464:139 490:250 491:254 492:235 493:27 518:201 519:254 520:254 521:128 546:80 547:254 548:254 549:139 574:65 575:254 576:254 577:139 602:150 603:254 604:254 605:139 630:229 631:254 632:254 633:43 658:52 659:196 660:168 661:9
+0 128:87 129:208 130:249 155:27 156:212 157:254 158:195 182:118 183:225 184:254 185:254 186:232 187:147 188:46 209:115 210:248 211:254 212:254 213:254 214:254 215:254 216:230 217:148 218:12 236:18 237:250 238:254 239:245 240:226 241:254 242:254 243:254 244:254 245:254 246:148 263:92 264:205 265:254 266:250 267:101 268:20 269:194 270:254 271:254 272:254 273:254 274:229 275:53 291:152 292:254 293:254 294:94 297:14 298:124 299:187 300:254 301:254 302:254 303:213 318:95 319:252 320:254 321:206 322:15 327:3 328:6 329:51 330:231 331:254 332:94 345:50 346:246 347:254 348:254 349:20 358:200 359:254 360:96 372:21 373:184 374:254 375:254 376:147 377:2 386:200 387:254 388:96 400:177 401:254 402:254 403:218 404:33 413:16 414:211 415:254 416:96 427:11 428:219 429:254 430:251 431:92 441:84 442:254 443:232 444:44 455:101 456:254 457:254 458:141 469:162 470:254 471:231 472:42 483:235 484:254 485:227 486:42 496:51 497:238 498:254 499:213 511:235 512:254 513:199 524:160 525:254 526:229 527:52 539:235 540:254 541:199 549:10 550:84 551:150 552:253 553:254 554:147 567:235 568:254 569:213 570:20 575:17 576:63 577:158 578:254 579:254 580:254 581:155 582:12 595:122 596:248 597:254 598:204 599:98 600:42 601:177 602:180 603:200 604:254 605:254 606:253 607:213 608:82 609:10 624:203 625:254 626:254 627:254 628:254 629:254 630:254 631:254 632:251 633:219 634:94 652:35 653:221 654:254 655:254 656:254 657:254 658:254 659:217 660:95
+1 126:134 127:230 154:133 155:231 156:10 182:133 183:253 184:96 210:133 211:253 212:96 238:133 239:253 240:183 266:133 267:253 268:217 294:133 295:253 296:217 322:133 323:253 324:217 350:133 351:253 352:217 378:133 379:253 380:217 406:134 407:254 408:218 434:133 435:253 436:159 462:133 463:253 464:199 490:156 491:253 492:96 518:254 519:247 520:73 546:254 547:248 548:74 573:99 574:254 575:245 576:64 600:89 601:230 602:254 603:125 627:140 628:251 629:253 630:243 631:10 655:114 656:242 657:195 658:69
+1 125:29 126:85 127:255 128:139 153:197 154:251 155:253 156:251 181:254 182:253 183:254 184:253 209:253 210:251 211:253 212:251 237:254 238:253 239:254 240:253 265:253 266:251 267:253 268:138 293:254 294:253 295:254 296:196 321:253 322:251 323:253 324:196 349:254 350:253 351:254 352:84 377:253 378:251 379:253 380:196 405:254 406:253 407:254 408:253 433:253 434:251 435:253 436:251 461:254 462:253 463:254 464:253 489:253 490:251 491:253 492:251 517:254 518:253 519:254 520:253 545:253 546:251 547:253 548:251 573:254 574:253 575:254 576:253 601:253 602:251 603:253 604:251 629:57 630:225 631:254 632:253 658:56 659:253 660:251
+1 125:149 126:255 127:254 128:58 153:215 154:253 155:183 156:2 180:41 181:232 182:253 183:181 208:92 209:253 210:253 211:181 236:92 237:253 238:253 239:181 264:92 265:253 266:253 267:181 292:92 293:253 294:253 295:181 320:92 321:253 322:253 323:181 348:92 349:253 350:253 351:181 376:92 377:253 378:253 379:181 404:92 405:253 406:253 407:181 432:92 433:253 434:253 435:181 460:92 461:253 462:253 463:181 488:31 489:228 490:253 491:181 517:198 518:253 519:228 520:54 545:33 546:226 547:253 548:195 549:7 574:199 575:253 576:253 577:75 602:34 603:218 604:253 605:228 606:117 607:14 608:12 631:33 632:219 633:253 634:253 635:253 636:211 660:32 661:123 662:149 663:230 664:41
+1 130:79 131:203 132:141 157:51 158:240 159:240 160:140 185:88 186:252 187:252 188:140 213:197 214:252 215:252 216:140 241:197 242:252 243:252 244:140 268:147 269:253 270:253 271:253 295:38 296:234 297:252 298:242 299:89 323:113 324:252 325:252 326:223 350:16 351:207 352:252 353:252 354:129 377:16 378:203 379:253 380:252 381:220 382:37 405:29 406:253 407:255 408:253 409:56 432:19 433:181 434:252 435:253 436:176 437:6 460:166 461:252 462:252 463:228 464:52 487:10 488:203 489:252 490:252 491:126 514:63 515:178 516:252 517:252 518:173 542:114 543:253 544:253 545:225 570:238 571:252 572:252 573:99 596:7 597:135 598:253 599:252 600:176 601:19 624:29 625:252 626:253 627:252 628:55 652:13 653:189 654:253 655:204 656:25
+1 126:94 127:254 128:75 154:166 155:253 156:231 182:208 183:253 184:147 210:208 211:253 212:116 238:208 239:253 240:168 266:146 267:254 268:222 294:166 295:253 296:116 322:208 323:253 324:116 350:166 351:253 352:158 378:145 379:253 380:231 406:209 407:254 408:169 434:187 435:253 436:168 462:93 463:253 464:116 490:93 491:253 492:116 518:93 519:253 520:116 546:94 547:254 548:179 549:11 574:93 575:253 576:246 577:101 602:145 603:253 604:255 605:92 630:93 631:253 632:246 633:59 658:93 659:253 660:74
+0 127:46 128:105 129:254 130:254 131:224 132:59 133:59 134:9 155:196 156:254 157:253 158:253 159:253 160:253 161:253 162:128 182:96 183:235 184:254 185:253 186:253 187:253 188:253 189:253 190:247 191:122 208:4 209:101 210:244 211:253 212:254 213:234 214:241 215:253 216:253 217:253 218:253 219:186 220:18 236:96 237:253 238:253 239:253 240:232 241:83 242:109 243:170 244:253 245:253 246:253 247:253 248:116 264:215 265:253 266:253 267:253 268:196 271:40 272:253 273:253 274:253 275:253 276:116 290:8 291:141 292:247 293:253 294:253 295:237 296:29 299:6 300:38 301:171 302:253 303:253 304:116 317:13 318:146 319:253 320:253 321:253 322:253 323:57 329:156 330:253 331:253 332:116 345:40 346:253 347:253 348:253 349:253 350:178 351:27 357:156 358:253 359:253 360:116 372:136 373:204 374:253 375:253 376:253 377:192 378:27 385:156 386:253 387:253 388:116 399:28 400:195 401:254 402:254 403:254 404:250 405:135 412:99 413:255 414:254 415:254 416:117 427:118 428:253 429:253 430:253 431:253 432:142 439:19 440:170 441:253 442:253 443:216 444:62 454:42 455:212 456:253 457:253 458:253 459:253 460:38 466:124 467:188 468:253 469:253 470:253 471:174 482:59 483:253 484:253 485:253 486:237 487:93 488:3 491:31 492:40 493:130 494:247 495:253 496:253 497:253 498:204 499:13 510:59 511:253 512:253 513:253 514:154 518:54 519:218 520:254 521:253 522:253 523:253 524:253 525:253 526:38 538:59 539:253 540:253 541:253 542:215 543:156 544:156 545:156 546:209 547:253 548:255 549:253 550:253 551:253 552:192 553:97 554:15 566:55 567:242 568:253 569:253 570:253 571:253 572:253 573:253 574:253 575:253 576:254 577:253 578:253 579:204 580:23 595:118 596:253 597:253 598:253 599:253 600:253 601:253 602:253 603:253 604:254 605:216 606:174 607:13 623:54 624:116 625:243 626:253 627:253 628:253 629:253 630:253 631:146 632:117 633:62 653:53 654:132 655:253 656:253 657:192 658:57 659:13
+1 125:42 126:232 127:254 128:58 153:86 154:253 155:253 156:58 181:86 182:253 183:253 184:58 209:206 210:253 211:253 212:58 237:215 238:253 239:253 240:58 265:215 266:253 267:253 268:58 293:215 294:253 295:253 296:58 321:215 322:253 323:253 324:58 349:215 350:253 351:253 352:58 377:215 378:253 379:253 380:58 405:215 406:253 407:253 408:58 433:188 434:253 435:253 436:85 461:86 462:253 463:253 464:200 465:12 489:29 490:223 491:253 492:253 493:151 518:209 519:253 520:253 521:194 546:128 547:253 548:253 549:200 550:8 574:32 575:213 576:253 577:253 578:152 579:6 603:32 604:221 605:253 606:253 607:153 608:5 632:90 633:215 634:253 635:253 636:151 661:59 662:253 663:253 664:84
+1 156:60 157:229 158:38 184:187 185:254 186:78 211:121 212:252 213:254 214:78 239:197 240:254 241:206 242:6 267:197 268:254 269:202 294:27 295:218 296:233 297:62 322:117 323:254 324:195 350:203 351:254 352:195 377:64 378:244 379:254 380:195 405:79 406:254 407:255 408:161 433:79 434:254 435:254 436:65 461:79 462:254 463:241 464:52 489:79 490:254 491:189 517:79 518:254 519:189 545:79 546:254 547:189 573:79 574:254 575:189 601:79 602:254 603:194 604:5 629:35 630:219 631:254 632:72 658:34 659:223 660:195 687:129 688:195
+1 101:11 102:150 103:72 129:37 130:251 131:71 157:63 158:251 159:71 185:217 186:251 187:71 213:217 214:251 215:71 240:145 241:253 242:253 243:72 267:42 268:206 269:251 270:251 271:71 295:99 296:251 297:251 298:251 299:71 323:253 324:251 325:251 326:251 327:71 350:130 351:253 352:251 353:251 354:251 355:71 377:110 378:253 379:255 380:253 381:253 382:253 383:72 405:109 406:251 407:253 408:251 409:251 410:188 411:30 433:109 434:251 435:253 436:251 437:246 438:123 460:16 461:170 462:251 463:253 464:251 465:215 488:37 489:251 490:251 491:253 492:251 493:86 516:218 517:253 518:253 519:255 520:253 521:35 543:84 544:236 545:251 546:251 547:253 548:168 549:15 571:144 572:251 573:251 574:251 575:190 576:15 599:144 600:251 601:251 602:251 603:180 626:53 627:221 628:251 629:251 630:251 631:180
+0 127:45 128:254 129:254 130:254 131:148 132:24 133:9 154:43 155:254 156:252 157:252 158:252 159:252 160:252 161:121 162:13 181:58 182:237 183:254 184:252 185:252 186:252 187:252 188:252 189:252 190:68 208:69 209:224 210:252 211:254 212:252 213:252 214:252 215:252 216:252 217:252 218:135 219:17 235:75 236:216 237:252 238:252 239:254 240:231 241:168 242:252 243:252 244:252 245:252 246:252 247:45 262:77 263:212 264:252 265:252 266:252 267:242 268:93 269:32 270:114 271:177 272:252 273:252 274:252 275:158 276:12 289:75 290:212 291:252 292:252 293:252 294:252 295:231 299:116 300:252 301:252 302:252 303:252 304:21 316:69 317:216 318:252 319:252 320:252 321:252 322:252 323:62 327:116 328:252 329:252 330:252 331:252 332:21 344:93 345:252 346:252 347:252 348:252 349:252 350:62 355:21 356:158 357:252 358:252 359:252 360:21 371:64 372:239 373:252 374:252 375:252 376:252 377:252 378:21 384:139 385:252 386:252 387:252 388:21 398:5 399:87 400:254 401:254 402:254 403:254 404:237 405:41 411:11 412:150 413:254 414:254 415:254 416:22 425:5 426:85 427:252 428:252 429:252 430:252 431:222 432:55 439:116 440:252 441:252 442:252 443:214 444:18 453:24 454:252 455:252 456:252 457:252 458:252 459:91 466:26 467:153 468:252 469:252 470:252 471:45 481:24 482:252 483:252 484:252 485:252 486:252 487:91 492:18 493:93 494:151 495:252 496:252 497:252 498:184 499:28 509:24 510:252 511:252 512:252 513:252 514:252 515:164 516:116 517:116 518:116 519:117 520:141 521:252 522:252 523:252 524:252 525:252 526:68 537:24 538:252 539:252 540:252 541:252 542:252 543:252 544:252 545:252 546:252 547:254 548:252 549:252 550:252 551:252 552:252 553:163 554:31 565:9 566:121 567:252 568:252 569:252 570:252 571:252 572:252 573:252 574:252 575:254 576:252 577:252 578:252 579:178 580:91 581:33 594:13 595:119 596:252 597:252 598:252 599:252 600:252 601:252 602:252 603:254 604:252 605:252 606:184 607:37 623:13 624:121 625:252 626:252 627:252 628:252 629:252 630:252 631:254 632:214 633:45 634:28 652:8 653:21 654:21 655:169 656:252 657:252 658:41 659:22 660:18
+0 125:218 126:253 127:253 128:255 129:149 130:62 151:42 152:144 153:236 154:251 155:251 156:253 157:251 158:236 159:144 160:144 179:99 180:251 181:251 182:251 183:225 184:253 185:251 186:251 187:251 188:251 189:166 190:16 206:79 207:253 208:251 209:251 210:204 211:41 212:143 213:205 214:251 215:251 216:251 217:253 218:169 219:15 233:79 234:231 235:253 236:251 237:225 238:41 241:41 242:226 243:251 244:251 245:253 246:251 247:164 260:37 261:253 262:253 263:255 264:253 265:35 271:79 272:232 273:255 274:253 275:227 276:42 288:140 289:251 290:251 291:253 292:168 293:15 300:77 301:253 302:251 303:251 304:142 315:21 316:221 317:251 318:251 319:164 320:15 329:227 330:251 331:251 332:236 333:61 342:32 343:190 344:251 345:251 346:251 357:73 358:251 359:251 360:251 361:71 370:73 371:251 372:251 373:251 374:251 385:73 386:251 387:251 388:251 389:71 398:73 399:253 400:253 401:253 402:201 413:73 414:253 415:253 416:253 417:72 426:176 427:251 428:251 429:251 430:71 441:73 442:251 443:251 444:251 445:71 454:253 455:251 456:251 457:157 458:10 469:73 470:251 471:251 472:251 473:71 482:253 483:251 484:251 485:142 497:150 498:251 499:251 500:204 501:41 510:124 511:251 512:251 513:220 514:180 524:130 525:253 526:251 527:225 528:41 538:73 539:253 540:253 541:253 542:253 543:73 544:73 545:10 549:42 550:73 551:150 552:253 553:255 554:253 555:216 566:31 567:189 568:251 569:251 570:251 571:253 572:251 573:159 574:144 575:144 576:145 577:206 578:251 579:251 580:251 581:253 582:168 583:92 595:20 596:195 597:251 598:251 599:253 600:251 601:251 602:251 603:251 604:253 605:251 606:251 607:251 608:225 609:164 610:15 624:21 625:142 626:220 627:253 628:251 629:251 630:251 631:251 632:253 633:251 634:251 635:204 636:41 654:51 655:72 656:174 657:251 658:251 659:251 660:253 661:147 662:71 663:41
+0 127:60 128:96 129:96 130:48 153:16 154:171 155:228 156:253 157:251 158:220 159:51 160:32 181:127 182:251 183:251 184:253 185:251 186:251 187:251 188:251 189:80 207:24 208:182 209:236 210:251 211:211 212:189 213:236 214:251 215:251 216:251 217:242 218:193 234:100 235:194 236:251 237:251 238:211 239:35 241:71 242:173 243:251 244:251 245:253 246:240 247:158 248:19 261:64 262:253 263:255 264:253 265:205 266:19 271:40 272:218 273:255 274:253 275:253 276:91 288:16 289:186 290:251 291:253 292:247 293:110 300:39 301:233 302:251 303:251 304:188 315:16 316:189 317:251 318:251 319:205 320:110 329:48 330:220 331:251 332:220 333:48 343:72 344:251 345:251 346:251 347:158 358:51 359:251 360:251 361:232 371:190 372:251 373:251 374:251 375:59 386:32 387:251 388:251 389:251 398:96 399:253 400:253 401:253 402:95 414:32 415:253 416:253 417:193 426:214 427:251 428:251 429:204 430:23 442:52 443:251 444:251 445:94 454:253 455:251 456:251 457:109 469:48 470:221 471:251 472:219 473:47 482:253 483:251 484:251 485:70 497:234 498:251 499:251 500:188 510:253 511:251 512:251 513:188 523:40 524:158 525:253 526:251 527:172 528:70 539:191 540:253 541:253 542:253 543:96 544:24 549:12 550:174 551:253 552:253 553:255 554:221 567:71 568:251 569:251 570:251 571:253 572:205 573:190 574:190 575:190 576:191 577:197 578:251 579:251 580:231 581:221 582:93 595:16 596:126 597:236 598:251 599:253 600:251 601:251 602:251 603:251 604:253 605:251 606:251 607:140 608:47 625:67 626:188 627:189 628:188 629:188 630:188 631:188 632:189 633:188 634:109 635:4
+0 126:32 127:202 128:255 129:253 130:253 131:175 132:21 152:84 153:144 154:190 155:251 156:253 157:251 158:251 159:251 160:174 176:6 177:37 178:166 179:218 180:236 181:251 182:251 183:251 184:253 185:251 186:251 187:251 188:251 189:156 204:115 205:251 206:251 207:253 208:251 209:251 210:251 211:251 212:253 213:251 214:251 215:251 216:251 217:180 231:105 232:241 233:251 234:251 235:253 236:251 237:251 238:251 239:122 240:72 241:71 242:71 243:148 244:251 245:180 258:73 259:253 260:253 261:253 262:253 263:202 264:253 265:253 266:143 286:31 287:189 288:251 289:251 290:251 291:31 292:189 293:251 294:142 314:63 315:236 316:251 317:251 318:96 320:124 321:246 322:142 330:21 331:166 332:21 342:73 343:251 344:251 345:251 346:71 349:217 350:142 357:32 358:190 359:251 360:142 370:73 371:251 372:251 373:251 374:71 377:217 378:142 385:73 386:251 387:251 388:142 398:73 399:253 400:253 401:253 402:72 405:156 406:103 413:73 414:253 415:253 416:253 417:72 426:73 427:251 428:251 429:251 430:174 441:73 442:251 443:251 444:251 445:71 454:73 455:251 456:251 457:251 458:251 469:73 470:251 471:251 472:251 473:71 482:42 483:205 484:251 485:251 486:251 487:79 497:73 498:251 499:251 500:251 501:71 511:41 512:226 513:251 514:251 515:232 516:77 525:73 526:251 527:251 528:251 529:71 540:166 541:253 542:253 543:255 544:253 545:227 546:73 547:21 553:125 554:253 555:253 556:143 568:16 569:169 570:251 571:253 572:251 573:251 574:251 575:174 576:105 579:63 580:144 581:253 582:251 583:251 584:142 597:15 598:35 599:253 600:251 601:251 602:251 603:251 604:243 605:217 606:217 607:231 608:251 609:253 610:251 611:220 612:20 627:143 628:142 629:236 630:251 631:251 632:253 633:251 634:251 635:251 636:251 637:253 638:251 639:137 657:61 658:71 659:200 660:253 661:251 662:251 663:251 664:251 665:201 666:71 667:10
+1 130:218 131:170 132:108 157:32 158:227 159:252 160:232 185:129 186:252 187:252 188:252 212:1 213:253 214:252 215:252 216:168 240:144 241:253 242:252 243:236 244:62 268:144 269:253 270:252 271:215 296:144 297:253 298:252 299:112 323:21 324:206 325:253 326:252 327:71 351:99 352:253 353:255 354:119 378:63 379:242 380:252 381:253 382:35 406:94 407:252 408:252 409:154 410:10 433:145 434:237 435:252 436:252 461:255 462:253 463:253 464:108 487:11 488:155 489:253 490:252 491:179 492:15 514:11 515:150 516:252 517:253 518:200 519:20 542:73 543:252 544:252 545:253 546:97 569:47 570:233 571:253 572:253 596:1 597:149 598:252 599:252 600:252 624:1 625:252 626:252 627:246 628:132 652:1 653:169 654:252 655:132
+1 130:116 131:255 132:123 157:29 158:213 159:253 160:122 185:189 186:253 187:253 188:122 213:189 214:253 215:253 216:122 241:189 242:253 243:253 244:122 267:2 268:114 269:243 270:253 271:186 272:19 295:100 296:253 297:253 298:253 299:48 323:172 324:253 325:253 326:253 327:48 351:172 352:253 353:253 354:182 355:19 378:133 379:251 380:253 381:175 382:4 405:107 406:251 407:253 408:253 409:65 432:26 433:194 434:253 435:253 436:214 437:40 459:105 460:205 461:253 462:253 463:125 464:40 487:139 488:253 489:253 490:253 491:81 514:41 515:231 516:253 517:253 518:159 519:16 541:65 542:155 543:253 544:253 545:172 546:4 569:124 570:253 571:253 572:253 573:98 597:124 598:253 599:253 600:214 601:41 624:22 625:207 626:253 627:253 628:139 653:124 654:253 655:162 656:9
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_fpgrowth.txt b/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_fpgrowth.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c451583e5131796daa0bc3dde2a71f1d076d18a8
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_fpgrowth.txt
@@ -0,0 +1,6 @@
+r z h k p
+z y x w v u t s
+s x o n r
+x z y m t s q e
+z
+x z y r q t p
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_isotonic_regression_libsvm_data.txt b/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_isotonic_regression_libsvm_data.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f39fe0269c2fe9a5765b5cd1a65f367cf42854b1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_isotonic_regression_libsvm_data.txt
@@ -0,0 +1,100 @@
+0.24579296 1:0.01
+0.28505864 1:0.02
+0.31208567 1:0.03
+0.35900051 1:0.04
+0.35747068 1:0.05
+0.16675166 1:0.06
+0.17491076 1:0.07
+0.04181540 1:0.08
+0.04793473 1:0.09
+0.03926568 1:0.10
+0.12952575 1:0.11
+0.00000000 1:0.12
+0.01376849 1:0.13
+0.13105558 1:0.14
+0.08873024 1:0.15
+0.12595614 1:0.16
+0.15247323 1:0.17
+0.25956145 1:0.18
+0.20040796 1:0.19
+0.19581846 1:0.20
+0.15757267 1:0.21
+0.13717491 1:0.22
+0.19020908 1:0.23
+0.19581846 1:0.24
+0.20091790 1:0.25
+0.16879143 1:0.26
+0.18510964 1:0.27
+0.20040796 1:0.28
+0.29576747 1:0.29
+0.43396226 1:0.30
+0.53391127 1:0.31
+0.52116267 1:0.32
+0.48546660 1:0.33
+0.49209587 1:0.34
+0.54156043 1:0.35
+0.59765426 1:0.36
+0.56144824 1:0.37
+0.58592555 1:0.38
+0.52983172 1:0.39
+0.50178480 1:0.40
+0.52626211 1:0.41
+0.58286588 1:0.42
+0.64660887 1:0.43
+0.68077511 1:0.44
+0.74298827 1:0.45
+0.64864865 1:0.46
+0.67261601 1:0.47
+0.65782764 1:0.48
+0.69811321 1:0.49
+0.63029067 1:0.50
+0.61601224 1:0.51
+0.63233044 1:0.52
+0.65323814 1:0.53
+0.65323814 1:0.54
+0.67363590 1:0.55
+0.67006629 1:0.56
+0.51555329 1:0.57
+0.50892402 1:0.58
+0.33299337 1:0.59
+0.36206017 1:0.60
+0.43090260 1:0.61
+0.45996940 1:0.62
+0.56348802 1:0.63
+0.54920959 1:0.64
+0.48393677 1:0.65
+0.48495665 1:0.66
+0.46965834 1:0.67
+0.45181030 1:0.68
+0.45843957 1:0.69
+0.47118817 1:0.70
+0.51555329 1:0.71
+0.58031617 1:0.72
+0.55481897 1:0.73
+0.56297807 1:0.74
+0.56603774 1:0.75
+0.57929628 1:0.76
+0.64762876 1:0.77
+0.66241713 1:0.78
+0.69301377 1:0.79
+0.65119837 1:0.80
+0.68332483 1:0.81
+0.66598674 1:0.82
+0.73890872 1:0.83
+0.73992861 1:0.84
+0.84242733 1:0.85
+0.91330954 1:0.86
+0.88016318 1:0.87
+0.90719021 1:0.88
+0.93115757 1:0.89
+0.93115757 1:0.90
+0.91942886 1:0.91
+0.92911780 1:0.92
+0.95665477 1:0.93
+0.95002550 1:0.94
+0.96940337 1:0.95
+1.00000000 1:0.96
+0.89801122 1:0.97
+0.90311066 1:0.98
+0.90362060 1:0.99
+0.83477817 1:1.0
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_kmeans_data.txt b/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_kmeans_data.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50013776b182ae7dbba4a877c6cb3e176bae88bd
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_kmeans_data.txt
@@ -0,0 +1,6 @@
+0 1:0.0 2:0.0 3:0.0
+1 1:0.1 2:0.1 3:0.1
+2 1:0.2 2:0.2 3:0.2
+3 1:9.0 2:9.0 3:9.0
+4 1:9.1 2:9.1 3:9.1
+5 1:9.2 2:9.2 3:9.2
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_lda_data.txt b/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_lda_data.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2e76702ca9d67de507ddbbe482169b17b72d93d9
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_lda_data.txt
@@ -0,0 +1,12 @@
+1 2 6 0 2 3 1 1 0 0 3
+1 3 0 1 3 0 0 2 0 0 1
+1 4 1 0 0 4 9 0 1 2 0
+2 1 0 3 0 0 5 0 2 3 9
+3 1 1 9 3 0 2 0 0 1 3
+4 2 0 3 4 5 1 1 1 4 0
+2 1 0 3 0 0 5 0 2 2 9
+1 1 1 9 2 1 2 0 0 1 3
+4 4 0 3 4 2 1 3 0 0 0
+2 8 2 0 3 0 2 0 2 7 2
+1 1 1 9 0 2 2 0 0 3 3
+4 1 0 0 4 5 1 3 0 1 0
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_lda_libsvm_data.txt b/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_lda_libsvm_data.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bf118d7d5b20fceeff58f8c25e20604930be213b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_lda_libsvm_data.txt
@@ -0,0 +1,12 @@
+0 1:1 2:2 3:6 4:0 5:2 6:3 7:1 8:1 9:0 10:0 11:3
+1 1:1 2:3 3:0 4:1 5:3 6:0 7:0 8:2 9:0 10:0 11:1
+2 1:1 2:4 3:1 4:0 5:0 6:4 7:9 8:0 9:1 10:2 11:0
+3 1:2 2:1 3:0 4:3 5:0 6:0 7:5 8:0 9:2 10:3 11:9
+4 1:3 2:1 3:1 4:9 5:3 6:0 7:2 8:0 9:0 10:1 11:3
+5 1:4 2:2 3:0 4:3 5:4 6:5 7:1 8:1 9:1 10:4 11:0
+6 1:2 2:1 3:0 4:3 5:0 6:0 7:5 8:0 9:2 10:2 11:9
+7 1:1 2:1 3:1 4:9 5:2 6:1 7:2 8:0 9:0 10:1 11:3
+8 1:4 2:4 3:0 4:3 5:4 6:2 7:1 8:3 9:0 10:0 11:0
+9 1:2 2:8 3:2 4:0 5:3 6:0 7:2 8:0 9:2 10:7 11:2
+10 1:1 2:1 3:1 4:9 5:0 6:2 7:2 8:0 9:0 10:3 11:3
+11 1:4 2:1 3:0 4:0 5:4 6:5 7:1 8:3 9:0 10:1 11:0
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_libsvm_data.txt b/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_libsvm_data.txt
new file mode 100644
index 0000000000000000000000000000000000000000..861c70cde7fd27f447db2bd205558a38c7da66f6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_libsvm_data.txt
@@ -0,0 +1,100 @@
+0 128:51 129:159 130:253 131:159 132:50 155:48 156:238 157:252 158:252 159:252 160:237 182:54 183:227 184:253 185:252 186:239 187:233 188:252 189:57 190:6 208:10 209:60 210:224 211:252 212:253 213:252 214:202 215:84 216:252 217:253 218:122 236:163 237:252 238:252 239:252 240:253 241:252 242:252 243:96 244:189 245:253 246:167 263:51 264:238 265:253 266:253 267:190 268:114 269:253 270:228 271:47 272:79 273:255 274:168 290:48 291:238 292:252 293:252 294:179 295:12 296:75 297:121 298:21 301:253 302:243 303:50 317:38 318:165 319:253 320:233 321:208 322:84 329:253 330:252 331:165 344:7 345:178 346:252 347:240 348:71 349:19 350:28 357:253 358:252 359:195 372:57 373:252 374:252 375:63 385:253 386:252 387:195 400:198 401:253 402:190 413:255 414:253 415:196 427:76 428:246 429:252 430:112 441:253 442:252 443:148 455:85 456:252 457:230 458:25 467:7 468:135 469:253 470:186 471:12 483:85 484:252 485:223 494:7 495:131 496:252 497:225 498:71 511:85 512:252 513:145 521:48 522:165 523:252 524:173 539:86 540:253 541:225 548:114 549:238 550:253 551:162 567:85 568:252 569:249 570:146 571:48 572:29 573:85 574:178 575:225 576:253 577:223 578:167 579:56 595:85 596:252 597:252 598:252 599:229 600:215 601:252 602:252 603:252 604:196 605:130 623:28 624:199 625:252 626:252 627:253 628:252 629:252 630:233 631:145 652:25 653:128 654:252 655:253 656:252 657:141 658:37
+1 159:124 160:253 161:255 162:63 186:96 187:244 188:251 189:253 190:62 214:127 215:251 216:251 217:253 218:62 241:68 242:236 243:251 244:211 245:31 246:8 268:60 269:228 270:251 271:251 272:94 296:155 297:253 298:253 299:189 323:20 324:253 325:251 326:235 327:66 350:32 351:205 352:253 353:251 354:126 378:104 379:251 380:253 381:184 382:15 405:80 406:240 407:251 408:193 409:23 432:32 433:253 434:253 435:253 436:159 460:151 461:251 462:251 463:251 464:39 487:48 488:221 489:251 490:251 491:172 515:234 516:251 517:251 518:196 519:12 543:253 544:251 545:251 546:89 570:159 571:255 572:253 573:253 574:31 597:48 598:228 599:253 600:247 601:140 602:8 625:64 626:251 627:253 628:220 653:64 654:251 655:253 656:220 681:24 682:193 683:253 684:220
+1 125:145 126:255 127:211 128:31 152:32 153:237 154:253 155:252 156:71 180:11 181:175 182:253 183:252 184:71 209:144 210:253 211:252 212:71 236:16 237:191 238:253 239:252 240:71 264:26 265:221 266:253 267:252 268:124 269:31 293:125 294:253 295:252 296:252 297:108 322:253 323:252 324:252 325:108 350:255 351:253 352:253 353:108 378:253 379:252 380:252 381:108 406:253 407:252 408:252 409:108 434:253 435:252 436:252 437:108 462:255 463:253 464:253 465:170 490:253 491:252 492:252 493:252 494:42 518:149 519:252 520:252 521:252 522:144 546:109 547:252 548:252 549:252 550:144 575:218 576:253 577:253 578:255 579:35 603:175 604:252 605:252 606:253 607:35 631:73 632:252 633:252 634:253 635:35 659:31 660:211 661:252 662:253 663:35
+1 153:5 154:63 155:197 181:20 182:254 183:230 184:24 209:20 210:254 211:254 212:48 237:20 238:254 239:255 240:48 265:20 266:254 267:254 268:57 293:20 294:254 295:254 296:108 321:16 322:239 323:254 324:143 350:178 351:254 352:143 378:178 379:254 380:143 406:178 407:254 408:162 434:178 435:254 436:240 462:113 463:254 464:240 490:83 491:254 492:245 493:31 518:79 519:254 520:246 521:38 547:214 548:254 549:150 575:144 576:241 577:8 603:144 604:240 605:2 631:144 632:254 633:82 659:230 660:247 661:40 687:168 688:209 689:31
+1 152:1 153:168 154:242 155:28 180:10 181:228 182:254 183:100 209:190 210:254 211:122 237:83 238:254 239:162 265:29 266:254 267:248 268:25 293:29 294:255 295:254 296:103 321:29 322:254 323:254 324:109 349:29 350:254 351:254 352:109 377:29 378:254 379:254 380:109 405:29 406:255 407:254 408:109 433:29 434:254 435:254 436:109 461:29 462:254 463:254 464:63 489:29 490:254 491:254 492:28 517:29 518:254 519:254 520:28 545:29 546:254 547:254 548:35 573:29 574:254 575:254 576:109 601:6 602:212 603:254 604:109 630:203 631:254 632:178 658:155 659:254 660:190 686:32 687:199 688:104
+0 130:64 131:253 132:255 133:63 157:96 158:205 159:251 160:253 161:205 162:111 163:4 184:96 185:189 186:251 187:251 188:253 189:251 190:251 191:31 209:16 210:64 211:223 212:244 213:251 214:251 215:211 216:213 217:251 218:251 219:31 236:80 237:181 238:251 239:253 240:251 241:251 242:251 243:94 244:96 245:251 246:251 247:31 263:92 264:253 265:253 266:253 267:255 268:253 269:253 270:253 271:95 272:96 273:253 274:253 275:31 290:92 291:236 292:251 293:243 294:220 295:233 296:251 297:251 298:243 299:82 300:96 301:251 302:251 303:31 317:80 318:253 319:251 320:251 321:188 323:96 324:251 325:251 326:109 328:96 329:251 330:251 331:31 344:96 345:240 346:253 347:243 348:188 349:42 351:96 352:204 353:109 354:4 356:12 357:197 358:251 359:31 372:221 373:251 374:253 375:121 379:36 380:23 385:190 386:251 387:31 399:48 400:234 401:253 413:191 414:253 415:31 426:44 427:221 428:251 429:251 440:12 441:197 442:251 443:31 454:190 455:251 456:251 457:251 468:96 469:251 470:251 471:31 482:190 483:251 484:251 485:113 495:40 496:234 497:251 498:219 499:23 510:190 511:251 512:251 513:94 522:40 523:217 524:253 525:231 526:47 538:191 539:253 540:253 541:253 548:12 549:174 550:253 551:253 552:219 553:39 566:67 567:236 568:251 569:251 570:191 571:190 572:111 573:72 574:190 575:191 576:197 577:251 578:243 579:121 580:39 595:63 596:236 597:251 598:253 599:251 600:251 601:251 602:251 603:253 604:251 605:188 606:94 624:27 625:129 626:253 627:251 628:251 629:251 630:251 631:229 632:168 633:15 654:95 655:212 656:251 657:211 658:94 659:59
+1 159:121 160:254 161:136 186:13 187:230 188:253 189:248 190:99 213:4 214:118 215:253 216:253 217:225 218:42 241:61 242:253 243:253 244:253 245:74 268:32 269:206 270:253 271:253 272:186 273:9 296:211 297:253 298:253 299:239 300:69 324:254 325:253 326:253 327:133 351:142 352:255 353:253 354:186 355:8 378:149 379:229 380:254 381:207 382:21 405:54 406:229 407:253 408:254 409:105 433:152 434:254 435:254 436:213 437:26 460:112 461:251 462:253 463:253 464:26 487:29 488:212 489:253 490:250 491:149 514:36 515:214 516:253 517:253 518:137 542:75 543:253 544:253 545:253 546:59 570:93 571:253 572:253 573:189 574:17 598:224 599:253 600:253 601:84 625:43 626:235 627:253 628:126 629:1 653:99 654:248 655:253 656:119 682:225 683:235 684:49
+1 100:166 101:222 102:55 128:197 129:254 130:218 131:5 155:29 156:249 157:254 158:254 159:9 183:45 184:254 185:254 186:174 187:2 210:4 211:164 212:254 213:254 214:85 238:146 239:254 240:254 241:254 242:85 265:101 266:245 267:254 268:254 269:254 270:85 292:97 293:248 294:254 295:204 296:254 297:254 298:85 315:12 316:59 317:98 318:151 319:237 320:254 321:254 322:109 323:35 324:254 325:254 326:85 343:41 344:216 345:254 346:254 347:239 348:153 349:37 350:4 351:32 352:254 353:254 354:85 372:7 373:44 374:44 375:30 379:32 380:254 381:254 382:96 407:19 408:230 409:254 410:174 436:197 437:254 438:110 464:197 465:254 466:85 492:197 493:253 494:63 515:37 516:54 517:54 518:45 519:26 520:84 521:221 522:84 523:21 524:31 525:162 526:78 540:6 541:41 542:141 543:244 544:254 545:254 546:248 547:236 548:254 549:254 550:254 551:233 552:239 553:254 554:138 567:23 568:167 569:254 570:254 571:254 572:254 573:229 574:228 575:185 576:138 577:138 578:138 579:138 580:138 581:138 582:44 595:113 596:254 597:254 598:254 599:179 600:64 601:5 623:32 624:209 625:183 626:97
+0 155:53 156:255 157:253 158:253 159:253 160:124 183:180 184:253 185:251 186:251 187:251 188:251 189:145 190:62 209:32 210:217 211:241 212:253 213:251 214:251 215:251 216:251 217:253 218:107 237:37 238:251 239:251 240:253 241:251 242:251 243:251 244:251 245:253 246:107 265:166 266:251 267:251 268:253 269:251 270:96 271:148 272:251 273:253 274:107 291:73 292:253 293:253 294:253 295:253 296:130 299:110 300:253 301:255 302:108 319:73 320:251 321:251 322:251 323:251 327:109 328:251 329:253 330:107 347:202 348:251 349:251 350:251 351:225 354:6 355:129 356:251 357:253 358:107 375:150 376:251 377:251 378:251 379:71 382:115 383:251 384:251 385:253 386:107 403:253 404:251 405:251 406:173 407:20 410:217 411:251 412:251 413:253 414:107 430:182 431:255 432:253 433:216 438:218 439:253 440:253 441:182 457:63 458:221 459:253 460:251 461:215 465:84 466:236 467:251 468:251 469:77 485:109 486:251 487:253 488:251 489:215 492:11 493:160 494:251 495:251 496:96 513:109 514:251 515:253 516:251 517:137 520:150 521:251 522:251 523:251 524:71 541:109 542:251 543:253 544:251 545:35 547:130 548:253 549:251 550:251 551:173 552:20 569:110 570:253 571:255 572:253 573:98 574:150 575:253 576:255 577:253 578:164 597:109 598:251 599:253 600:251 601:251 602:251 603:251 604:253 605:251 606:35 625:93 626:241 627:253 628:251 629:251 630:251 631:251 632:216 633:112 634:5 654:103 655:253 656:251 657:251 658:251 659:251 683:124 684:251 685:225 686:71 687:71
+0 128:73 129:253 130:227 131:73 132:21 156:73 157:251 158:251 159:251 160:174 182:16 183:166 184:228 185:251 186:251 187:251 188:122 210:62 211:220 212:253 213:251 214:251 215:251 216:251 217:79 238:79 239:231 240:253 241:251 242:251 243:251 244:251 245:232 246:77 264:145 265:253 266:253 267:253 268:255 269:253 270:253 271:253 272:253 273:255 274:108 292:144 293:251 294:251 295:251 296:253 297:168 298:107 299:169 300:251 301:253 302:189 303:20 318:27 319:89 320:236 321:251 322:235 323:215 324:164 325:15 326:6 327:129 328:251 329:253 330:251 331:35 345:47 346:211 347:253 348:251 349:251 350:142 354:37 355:251 356:251 357:253 358:251 359:35 373:109 374:251 375:253 376:251 377:251 378:142 382:11 383:148 384:251 385:253 386:251 387:164 400:11 401:150 402:253 403:255 404:211 405:25 410:11 411:150 412:253 413:255 414:211 415:25 428:140 429:251 430:251 431:253 432:107 438:37 439:251 440:251 441:211 442:46 456:190 457:251 458:251 459:253 460:128 461:5 466:37 467:251 468:251 469:51 484:115 485:251 486:251 487:253 488:188 489:20 492:32 493:109 494:129 495:251 496:173 497:103 512:217 513:251 514:251 515:201 516:30 520:73 521:251 522:251 523:251 524:71 540:166 541:253 542:253 543:255 544:149 545:73 546:150 547:253 548:255 549:253 550:253 551:143 568:140 569:251 570:251 571:253 572:251 573:251 574:251 575:251 576:253 577:251 578:230 579:61 596:190 597:251 598:251 599:253 600:251 601:251 602:251 603:251 604:242 605:215 606:55 624:21 625:189 626:251 627:253 628:251 629:251 630:251 631:173 632:103 653:31 654:200 655:253 656:251 657:96 658:71 659:20
+1 155:178 156:255 157:105 182:6 183:188 184:253 185:216 186:14 210:14 211:202 212:253 213:253 214:23 238:12 239:199 240:253 241:128 242:6 266:42 267:253 268:253 269:158 294:42 295:253 296:253 297:158 322:155 323:253 324:253 325:158 350:160 351:253 352:253 353:147 378:160 379:253 380:253 381:41 405:17 406:225 407:253 408:235 409:31 433:24 434:253 435:253 436:176 461:24 462:253 463:253 464:176 489:24 490:253 491:253 492:176 517:24 518:253 519:253 520:176 545:24 546:253 547:253 548:162 573:46 574:253 575:253 576:59 601:142 602:253 603:253 604:59 629:142 630:253 631:253 632:59 657:142 658:253 659:202 660:8 685:87 686:253 687:139
+0 154:46 155:105 156:254 157:254 158:254 159:254 160:255 161:239 162:41 180:37 181:118 182:222 183:254 184:253 185:253 186:253 187:253 188:253 189:253 190:211 191:54 207:14 208:200 209:253 210:253 211:254 212:253 213:253 214:253 215:253 216:253 217:253 218:253 219:116 233:16 234:160 235:236 236:253 237:253 238:253 239:254 240:253 241:253 242:246 243:229 244:253 245:253 246:253 247:116 261:99 262:253 263:253 264:253 265:253 266:253 267:254 268:253 269:253 270:213 271:99 272:253 273:253 274:253 275:116 288:25 289:194 290:253 291:253 292:253 293:253 294:131 295:97 296:169 297:253 298:93 299:99 300:253 301:253 302:253 303:116 316:206 317:253 318:253 319:251 320:233 321:127 322:9 324:18 325:38 326:3 327:15 328:171 329:253 330:253 331:116 343:55 344:240 345:253 346:253 347:233 355:31 356:186 357:253 358:253 359:116 371:176 372:253 373:253 374:253 375:127 383:99 384:253 385:253 386:253 387:116 399:176 400:253 401:253 402:131 403:9 411:99 412:253 413:253 414:253 415:116 426:119 427:254 428:254 429:232 430:75 440:158 441:254 442:254 443:117 454:118 455:253 456:253 457:154 468:156 469:253 470:253 471:116 482:118 483:253 484:253 485:154 496:156 497:253 498:253 499:116 509:46 510:222 511:253 512:253 513:154 522:7 523:116 524:246 525:253 526:180 527:9 538:118 539:253 540:253 541:154 550:116 551:253 552:253 553:253 554:174 566:118 567:253 568:253 569:154 577:110 578:246 579:253 580:253 581:240 582:67 594:118 595:253 596:253 597:238 598:215 599:49 600:20 601:20 602:20 603:66 604:215 605:241 606:253 607:245 608:233 609:64 622:82 623:229 624:253 625:253 626:253 627:253 628:253 629:253 630:253 631:254 632:253 633:253 634:240 635:107 651:176 652:253 653:253 654:253 655:253 656:253 657:253 658:253 659:254 660:253 661:253 662:108 679:40 680:239 681:253 682:253 683:253 684:253 685:253 686:253 687:254 688:161 689:57 690:4
+0 152:56 153:105 154:220 155:254 156:63 178:18 179:166 180:233 181:253 182:253 183:253 184:236 185:209 186:209 187:209 188:77 189:18 206:84 207:253 208:253 209:253 210:253 211:253 212:254 213:253 214:253 215:253 216:253 217:172 218:8 233:57 234:238 235:253 236:253 237:253 238:253 239:253 240:254 241:253 242:253 243:253 244:253 245:253 246:119 260:14 261:238 262:253 263:253 264:253 265:253 266:253 267:253 268:179 269:196 270:253 271:253 272:253 273:253 274:238 275:12 288:33 289:253 290:253 291:253 292:253 293:253 294:248 295:134 297:18 298:83 299:237 300:253 301:253 302:253 303:14 316:164 317:253 318:253 319:253 320:253 321:253 322:128 327:57 328:119 329:214 330:253 331:94 343:57 344:248 345:253 346:253 347:253 348:126 349:14 350:4 357:179 358:253 359:248 360:56 371:175 372:253 373:253 374:240 375:190 376:28 385:179 386:253 387:253 388:173 399:209 400:253 401:253 402:178 413:92 414:253 415:253 416:208 427:211 428:254 429:254 430:179 442:135 443:255 444:209 455:209 456:253 457:253 458:90 470:134 471:253 472:208 483:209 484:253 485:253 486:178 497:2 498:142 499:253 500:208 511:209 512:253 513:253 514:214 515:35 525:30 526:253 527:253 528:208 539:165 540:253 541:253 542:253 543:215 544:36 553:163 554:253 555:253 556:164 567:18 568:172 569:253 570:253 571:253 572:214 573:127 574:7 580:72 581:232 582:253 583:171 584:17 596:8 597:182 598:253 599:253 600:253 601:253 602:162 603:56 607:64 608:240 609:253 610:253 611:14 625:7 626:173 627:253 628:253 629:253 630:253 631:245 632:241 633:239 634:239 635:246 636:253 637:225 638:14 639:1 654:18 655:59 656:138 657:224 658:253 659:253 660:254 661:253 662:253 663:253 664:240 665:96 685:37 686:104 687:192 688:255 689:253 690:253 691:182 692:73
+1 130:7 131:176 132:254 133:224 158:51 159:253 160:253 161:223 185:4 186:170 187:253 188:253 189:214 213:131 214:253 215:253 216:217 217:39 241:209 242:253 243:253 244:134 268:75 269:240 270:253 271:239 272:26 296:184 297:253 298:245 299:63 323:142 324:255 325:253 326:185 350:62 351:229 352:254 353:242 354:73 377:54 378:229 379:253 380:254 381:105 405:152 406:254 407:254 408:213 409:26 432:32 433:243 434:253 435:253 436:115 459:2 460:142 461:253 462:253 463:155 487:30 488:253 489:253 490:232 491:55 515:75 516:253 517:253 518:164 542:72 543:232 544:253 545:189 546:17 570:224 571:253 572:253 573:163 597:43 598:235 599:253 600:253 601:195 602:21 625:28 626:231 627:253 628:253 629:184 630:14 654:225 655:253 656:253 657:75
+0 155:21 156:176 157:253 158:253 159:124 182:105 183:176 184:251 185:251 186:251 187:251 188:105 208:58 209:217 210:241 211:253 212:251 213:251 214:251 215:251 216:243 217:113 218:5 235:63 236:231 237:251 238:251 239:253 240:251 241:251 242:251 243:251 244:253 245:251 246:113 263:144 264:251 265:251 266:251 267:253 268:251 269:251 270:251 271:251 272:253 273:251 274:215 290:125 291:253 292:253 293:253 294:253 295:255 296:253 297:253 298:253 299:253 300:255 301:253 302:227 303:42 318:253 319:251 320:251 321:251 322:251 323:253 324:251 325:251 326:251 327:251 328:253 329:251 330:251 331:142 345:27 346:253 347:251 348:251 349:235 350:241 351:253 352:251 353:246 354:137 355:35 356:98 357:251 358:251 359:236 360:61 372:47 373:211 374:253 375:251 376:235 377:82 378:103 379:253 380:251 381:137 384:73 385:251 386:251 387:251 388:71 399:27 400:211 401:251 402:253 403:251 404:86 407:72 408:71 409:10 412:73 413:251 414:251 415:173 416:20 427:89 428:253 429:253 430:255 431:253 432:35 440:73 441:253 442:253 443:253 444:72 454:84 455:236 456:251 457:251 458:253 459:251 460:138 468:73 469:251 470:251 471:251 472:71 481:63 482:236 483:251 484:251 485:251 486:227 487:251 488:246 489:138 490:11 494:16 495:37 496:228 497:251 498:246 499:137 500:10 509:73 510:251 511:251 512:251 513:173 514:42 515:142 516:142 517:142 518:41 522:109 523:251 524:253 525:251 526:137 537:73 538:251 539:251 540:173 541:20 549:27 550:211 551:251 552:253 553:147 554:10 565:73 566:253 567:253 568:143 575:21 576:176 577:253 578:253 579:253 593:73 594:251 595:251 596:205 597:144 603:176 604:251 605:251 606:188 607:107 621:62 622:236 623:251 624:251 625:251 626:218 627:217 628:217 629:217 630:217 631:253 632:230 633:189 634:20 650:83 651:158 652:251 653:251 654:253 655:251 656:251 657:251 658:251 659:253 660:107 679:37 680:251 681:251 682:253 683:251 684:251 685:251 686:122 687:72 688:30
+1 151:68 152:45 153:131 154:131 155:131 156:101 157:68 158:92 159:44 187:19 188:170 211:29 212:112 213:89 215:40 216:222 239:120 240:254 241:251 242:127 243:40 244:222 267:197 268:254 269:254 270:91 271:40 272:222 294:64 295:247 296:254 297:236 298:50 299:40 300:107 322:184 323:254 324:254 325:91 327:6 328:14 350:203 351:254 352:254 353:71 377:23 378:218 379:254 380:254 381:71 405:113 406:254 407:255 408:239 409:53 433:210 434:254 435:254 436:195 460:62 461:242 462:254 463:241 464:88 468:28 488:86 489:254 490:254 491:189 495:28 496:104 516:106 517:254 518:254 519:168 523:40 524:91 544:216 545:254 546:245 547:51 551:35 552:80 572:216 573:254 574:102 599:55 600:239 601:254 602:52 627:166 628:254 629:210 630:23 655:223 656:252 657:104 683:223 684:169
+0 125:29 126:170 127:255 128:255 129:141 151:29 152:198 153:255 154:255 155:255 156:226 157:255 158:86 178:141 179:255 180:255 181:170 182:29 184:86 185:255 186:255 187:141 204:29 205:226 206:255 207:198 208:57 213:226 214:255 215:255 216:226 217:114 231:29 232:255 233:255 234:114 241:141 242:170 243:114 244:255 245:255 246:141 259:226 260:255 261:170 269:29 270:57 273:141 274:255 275:226 286:57 287:255 288:170 302:114 303:255 304:198 314:226 315:255 331:170 332:255 333:57 342:255 343:226 360:255 361:170 370:255 371:170 388:114 389:198 398:255 399:226 416:86 417:255 426:198 427:255 444:86 445:255 454:114 455:255 456:57 472:86 473:255 482:29 483:255 484:226 500:141 501:255 511:170 512:255 513:170 528:226 529:198 539:29 540:226 541:255 542:170 555:29 556:255 557:114 568:29 569:226 570:255 571:141 582:57 583:226 584:226 598:141 599:255 600:255 601:170 602:86 607:29 608:86 609:226 610:255 611:226 612:29 627:86 628:198 629:255 630:255 631:255 632:255 633:255 634:255 635:255 636:255 637:255 638:141 639:29 657:29 658:114 659:170 660:170 661:170 662:170 663:170 664:86
+0 153:203 154:254 155:252 156:252 157:252 158:214 159:51 160:20 180:62 181:221 182:252 183:250 184:250 185:250 186:252 187:250 188:160 189:20 207:62 208:211 209:250 210:252 211:250 212:250 213:250 214:252 215:250 216:250 217:49 234:41 235:221 236:250 237:250 238:252 239:250 240:250 241:250 242:252 243:250 244:128 245:10 262:254 263:252 264:252 265:252 266:254 267:252 268:252 269:252 270:254 271:252 272:252 273:90 290:150 291:190 292:250 293:250 294:252 295:250 296:250 297:169 298:171 299:250 300:250 301:250 302:82 318:31 319:191 320:250 321:250 322:252 323:189 324:100 325:20 326:172 327:250 328:250 329:250 330:80 346:213 347:250 348:250 349:250 350:212 351:29 354:252 355:250 356:250 357:250 374:92 375:252 376:252 377:252 382:51 383:252 384:252 385:252 386:203 401:82 402:252 403:250 404:250 405:169 410:132 411:250 412:250 413:250 414:121 428:92 429:231 430:252 431:250 432:159 433:20 438:252 439:250 440:250 441:250 456:30 457:211 458:252 459:250 460:221 461:40 466:90 467:250 468:250 469:250 470:163 484:31 485:213 486:254 487:232 488:80 494:92 495:252 496:252 497:212 498:163 512:151 513:250 514:252 515:149 522:252 523:250 524:250 525:49 540:60 541:221 542:252 543:210 544:60 550:252 551:250 552:250 553:49 569:202 570:252 571:250 572:221 573:40 576:123 577:202 578:252 579:250 580:250 581:49 596:123 597:243 598:255 599:252 600:252 601:252 602:254 603:252 604:252 605:252 606:254 607:252 608:100 625:121 626:171 627:250 628:250 629:250 630:252 631:250 632:250 633:250 634:252 635:250 636:100 654:20 655:160 656:250 657:250 658:252 659:250 660:250 661:250 662:252 663:189 664:40 683:20 684:170 685:250 686:252 687:250 688:128 689:49 690:49 691:29
+1 98:64 99:191 100:70 125:68 126:243 127:253 128:249 129:63 152:30 153:223 154:253 155:253 156:247 157:41 179:73 180:238 181:253 182:253 183:253 184:242 206:73 207:236 208:253 209:253 210:253 211:253 212:242 234:182 235:253 236:253 237:191 238:247 239:253 240:149 262:141 263:253 264:143 265:86 266:249 267:253 268:122 290:9 291:36 292:7 293:14 294:233 295:253 296:122 322:230 323:253 324:122 350:230 351:253 352:122 378:231 379:255 380:123 406:230 407:253 408:52 433:61 434:245 435:253 461:98 462:253 463:253 468:35 469:12 489:98 490:253 491:253 494:9 495:142 496:233 497:146 517:190 518:253 519:253 520:128 521:7 522:99 523:253 524:253 525:180 544:29 545:230 546:253 547:253 548:252 549:210 550:253 551:253 552:253 553:140 571:28 572:207 573:253 574:253 575:253 576:254 577:253 578:253 579:235 580:70 581:9 599:126 600:253 601:253 602:253 603:253 604:254 605:253 606:168 607:19 627:79 628:253 629:253 630:201 631:190 632:132 633:63 634:5
+1 125:26 126:240 127:72 153:25 154:238 155:208 182:209 183:226 184:14 210:209 211:254 212:43 238:175 239:254 240:128 266:63 267:254 268:204 294:107 295:254 296:204 322:88 323:254 324:204 350:55 351:254 352:204 378:126 379:254 380:204 406:126 407:254 408:189 434:169 435:254 436:121 462:209 463:254 464:193 490:209 491:254 492:111 517:22 518:235 519:254 520:37 545:137 546:254 547:227 548:16 573:205 574:255 575:185 601:205 602:254 603:125 629:205 630:254 631:125 657:111 658:212 659:43
+0 155:62 156:91 157:213 158:255 159:228 160:91 161:12 182:70 183:230 184:253 185:253 186:253 187:253 188:253 189:152 190:7 210:246 211:253 212:253 213:253 214:253 215:253 216:253 217:253 218:106 237:21 238:247 239:253 240:253 241:253 242:253 243:253 244:253 245:208 246:24 265:156 266:253 267:253 268:253 269:253 270:253 271:253 272:253 273:195 292:88 293:238 294:253 295:253 296:253 297:221 298:253 299:253 300:253 301:195 320:230 321:253 322:253 323:253 324:198 325:40 326:177 327:253 328:253 329:195 346:56 347:156 348:251 349:253 350:189 351:182 352:15 354:86 355:240 356:253 357:210 358:28 374:213 375:253 376:253 377:156 378:3 383:205 384:253 385:253 386:106 401:121 402:252 403:253 404:135 405:3 411:46 412:253 413:253 414:106 428:28 429:212 430:253 431:248 432:23 439:42 440:253 441:253 442:106 456:197 457:253 458:234 459:70 467:42 468:253 469:253 470:106 483:11 484:202 485:253 486:187 495:58 496:253 497:210 498:27 511:107 512:253 513:253 514:40 522:53 523:227 524:253 525:195 539:107 540:253 541:253 542:40 549:47 550:227 551:253 552:231 553:58 567:107 568:253 569:253 570:40 575:5 576:131 577:222 578:253 579:231 580:59 595:14 596:204 597:253 598:226 599:222 600:73 601:58 602:58 603:170 604:253 605:253 606:227 607:58 624:197 625:253 626:253 627:253 628:253 629:253 630:253 631:253 632:253 633:238 634:58 652:33 653:179 654:241 655:253 656:253 657:253 658:253 659:250 660:116 661:14 682:75 683:179 684:253 685:151 686:89 687:86
+1 157:42 158:228 159:253 160:253 185:144 186:251 187:251 188:251 212:89 213:236 214:251 215:235 216:215 239:79 240:253 241:251 242:251 243:142 267:180 268:253 269:251 270:251 271:142 294:32 295:202 296:255 297:253 298:216 322:109 323:251 324:253 325:251 326:112 349:6 350:129 351:251 352:253 353:127 354:5 377:37 378:251 379:251 380:253 381:107 405:166 406:251 407:251 408:201 409:30 432:42 433:228 434:253 435:253 460:144 461:251 462:251 463:147 487:63 488:236 489:251 490:251 491:71 515:150 516:251 517:251 518:204 519:41 543:253 544:251 545:251 546:142 571:255 572:253 573:164 598:105 599:253 600:251 601:35 626:180 627:253 628:251 629:35 654:180 655:253 656:251 657:35 682:180 683:253 684:251 685:35
+1 128:62 129:254 130:213 156:102 157:253 158:252 159:102 160:20 184:102 185:254 186:253 187:254 188:50 212:102 213:253 214:252 215:253 216:50 240:102 241:254 242:253 243:254 244:50 268:142 269:253 270:252 271:253 272:50 295:51 296:253 297:254 298:253 299:224 300:20 323:132 324:252 325:253 326:252 327:162 351:173 352:253 353:254 354:253 355:102 378:82 379:253 380:252 381:253 382:252 383:61 406:203 407:254 408:253 409:254 410:233 433:41 434:243 435:253 436:252 437:253 438:111 461:132 462:253 463:254 464:253 465:203 488:41 489:253 490:252 491:253 492:252 493:40 515:11 516:213 517:254 518:253 519:254 520:151 543:92 544:252 545:253 546:252 547:192 548:50 570:21 571:214 572:253 573:255 574:253 575:41 598:142 599:253 600:252 601:253 602:171 625:113 626:253 627:255 628:253 629:203 630:40 653:30 654:131 655:233 656:111
+0 154:28 155:195 156:254 157:254 158:254 159:254 160:254 161:255 162:61 181:6 182:191 183:253 184:253 185:253 186:253 187:253 188:253 189:253 190:60 208:26 209:190 210:253 211:253 212:253 213:253 214:240 215:191 216:242 217:253 218:60 235:15 236:187 237:253 238:253 239:253 240:253 241:253 242:200 244:211 245:253 246:60 262:22 263:66 264:253 265:253 266:253 267:253 268:241 269:209 270:44 271:23 272:218 273:253 274:60 290:124 291:253 292:253 293:253 294:253 295:253 296:182 299:131 300:253 301:253 302:60 318:38 319:217 320:253 321:253 322:244 323:111 324:37 327:131 328:253 329:253 330:60 346:124 347:253 348:253 349:253 350:165 354:22 355:182 356:253 357:253 358:60 374:124 375:253 376:253 377:240 378:45 382:53 383:253 384:253 385:249 386:58 401:16 402:168 403:253 404:216 405:45 410:53 411:253 412:253 413:138 429:159 430:253 431:253 432:147 438:53 439:253 440:253 441:138 456:136 457:252 458:253 459:227 460:5 466:53 467:253 468:243 469:101 484:140 485:253 486:253 487:124 494:156 495:253 496:218 511:13 512:164 513:253 514:142 515:5 521:32 522:233 523:253 524:218 539:62 540:253 541:253 542:130 548:37 549:203 550:253 551:253 552:127 567:62 568:253 569:253 570:147 571:36 572:36 573:36 574:36 575:151 576:222 577:253 578:245 579:127 580:8 595:34 596:202 597:253 598:253 599:253 600:253 601:253 602:253 603:253 604:253 605:253 606:200 624:140 625:253 626:253 627:253 628:253 629:253 630:253 631:253 632:248 633:235 634:65 652:87 653:173 654:253 655:253 656:253 657:253 658:253 659:253 660:182 681:14 682:78 683:96 684:253 685:253 686:253 687:137 688:56
+0 123:8 124:76 125:202 126:254 127:255 128:163 129:37 130:2 150:13 151:182 152:253 153:253 154:253 155:253 156:253 157:253 158:23 177:15 178:179 179:253 180:253 181:212 182:91 183:218 184:253 185:253 186:179 187:109 205:105 206:253 207:253 208:160 209:35 210:156 211:253 212:253 213:253 214:253 215:250 216:113 232:19 233:212 234:253 235:253 236:88 237:121 238:253 239:233 240:128 241:91 242:245 243:253 244:248 245:114 260:104 261:253 262:253 263:110 264:2 265:142 266:253 267:90 270:26 271:199 272:253 273:248 274:63 287:1 288:173 289:253 290:253 291:29 293:84 294:228 295:39 299:72 300:251 301:253 302:215 303:29 315:36 316:253 317:253 318:203 319:13 328:82 329:253 330:253 331:170 343:36 344:253 345:253 346:164 356:11 357:198 358:253 359:184 360:6 371:36 372:253 373:253 374:82 385:138 386:253 387:253 388:35 399:128 400:253 401:253 402:47 413:48 414:253 415:253 416:35 427:154 428:253 429:253 430:47 441:48 442:253 443:253 444:35 455:102 456:253 457:253 458:99 469:48 470:253 471:253 472:35 483:36 484:253 485:253 486:164 496:16 497:208 498:253 499:211 500:17 511:32 512:244 513:253 514:175 515:4 524:44 525:253 526:253 527:156 540:171 541:253 542:253 543:29 551:30 552:217 553:253 554:188 555:19 568:171 569:253 570:253 571:59 578:60 579:217 580:253 581:253 582:70 596:78 597:253 598:253 599:231 600:48 604:26 605:128 606:249 607:253 608:244 609:94 610:15 624:8 625:151 626:253 627:253 628:234 629:101 630:121 631:219 632:229 633:253 634:253 635:201 636:80 653:38 654:232 655:253 656:253 657:253 658:253 659:253 660:253 661:253 662:201 663:66
+0 127:68 128:254 129:255 130:254 131:107 153:11 154:176 155:230 156:253 157:253 158:253 159:212 180:28 181:197 182:253 183:253 184:253 185:253 186:253 187:229 188:107 189:14 208:194 209:253 210:253 211:253 212:253 213:253 214:253 215:253 216:253 217:53 235:69 236:241 237:253 238:253 239:253 240:253 241:241 242:186 243:253 244:253 245:195 262:10 263:161 264:253 265:253 266:253 267:246 268:40 269:57 270:231 271:253 272:253 273:195 290:140 291:253 292:253 293:253 294:253 295:154 297:25 298:253 299:253 300:253 301:195 318:213 319:253 320:253 321:253 322:135 323:8 325:3 326:128 327:253 328:253 329:195 345:77 346:238 347:253 348:253 349:253 350:7 354:116 355:253 356:253 357:195 372:11 373:165 374:253 375:253 376:231 377:70 378:1 382:78 383:237 384:253 385:195 400:33 401:253 402:253 403:253 404:182 411:200 412:253 413:195 428:98 429:253 430:253 431:253 432:24 439:42 440:253 441:195 456:197 457:253 458:253 459:253 460:24 467:163 468:253 469:195 484:197 485:253 486:253 487:189 488:13 494:53 495:227 496:253 497:121 512:197 513:253 514:253 515:114 521:21 522:227 523:253 524:231 525:27 540:197 541:253 542:253 543:114 547:5 548:131 549:143 550:253 551:231 552:59 568:197 569:253 570:253 571:236 572:73 573:58 574:217 575:223 576:253 577:253 578:253 579:174 596:197 597:253 598:253 599:253 600:253 601:253 602:253 603:253 604:253 605:253 606:253 607:48 624:149 625:253 626:253 627:253 628:253 629:253 630:253 631:253 632:253 633:182 634:15 635:3 652:12 653:168 654:253 655:253 656:253 657:253 658:253 659:248 660:89 661:23
+1 157:85 158:255 159:103 160:1 185:205 186:253 187:253 188:30 213:205 214:253 215:253 216:30 240:44 241:233 242:253 243:244 244:27 268:135 269:253 270:253 271:100 296:153 297:253 298:240 299:76 323:12 324:208 325:253 326:166 351:69 352:253 353:253 354:142 378:14 379:110 380:253 381:235 382:33 406:63 407:223 408:235 409:130 434:186 435:253 436:235 437:37 461:17 462:145 463:253 464:231 465:35 489:69 490:220 491:231 492:123 516:18 517:205 518:253 519:176 520:27 543:17 544:125 545:253 546:185 547:39 571:71 572:214 573:231 574:41 599:167 600:253 601:225 602:33 626:72 627:205 628:207 629:14 653:30 654:249 655:233 656:49 681:32 682:253 683:89
+1 126:94 127:132 154:250 155:250 156:4 182:250 183:254 184:95 210:250 211:254 212:95 238:250 239:254 240:95 266:250 267:254 268:95 294:250 295:254 296:95 322:250 323:254 324:95 350:250 351:254 352:95 378:250 379:254 380:95 405:77 406:254 407:250 408:19 433:96 434:254 435:249 461:53 462:253 463:252 464:43 490:250 491:251 492:32 517:85 518:254 519:249 545:96 546:254 547:249 573:83 574:254 575:250 576:14 602:250 603:254 604:95 630:250 631:255 632:95 658:132 659:254 660:95
+1 124:32 125:253 126:31 152:32 153:251 154:149 180:32 181:251 182:188 208:32 209:251 210:188 236:32 237:251 238:228 239:59 264:32 265:253 266:253 267:95 292:28 293:236 294:251 295:114 321:127 322:251 323:251 349:127 350:251 351:251 377:48 378:232 379:251 406:223 407:253 408:159 434:221 435:251 436:158 462:142 463:251 464:158 490:64 491:251 492:242 493:55 518:64 519:251 520:253 521:161 546:64 547:253 548:255 549:221 574:16 575:181 576:253 577:220 603:79 604:253 605:236 606:63 632:213 633:251 634:126 660:96 661:251 662:126
+1 129:39 130:254 131:255 132:254 133:140 157:136 158:253 159:253 160:228 161:67 184:6 185:227 186:253 187:253 188:58 211:29 212:188 213:253 214:253 215:253 216:17 239:95 240:253 241:253 242:253 243:157 244:8 266:3 267:107 268:253 269:253 270:245 271:77 294:29 295:253 296:253 297:240 298:100 322:141 323:253 324:253 325:215 349:129 350:248 351:253 352:253 353:215 377:151 378:253 379:253 380:253 381:144 405:151 406:253 407:253 408:253 409:27 431:3 432:102 433:242 434:253 435:253 436:110 437:3 459:97 460:253 461:253 462:253 463:214 464:55 487:207 488:253 489:253 490:253 491:158 515:67 516:253 517:253 518:253 519:158 543:207 544:253 545:253 546:240 547:88 571:207 572:253 573:253 574:224 598:32 599:217 600:253 601:253 602:224 626:141 627:253 628:253 629:253 630:133 654:36 655:219 656:253 657:140 658:10
+0 123:59 124:55 149:71 150:192 151:254 152:250 153:147 154:17 176:123 177:247 178:253 179:254 180:253 181:253 182:196 183:79 184:176 185:175 186:175 187:124 188:48 203:87 204:247 205:247 206:176 207:95 208:102 209:117 210:243 211:237 212:192 213:232 214:253 215:253 216:245 217:152 218:6 230:23 231:229 232:253 233:138 238:219 239:58 241:95 242:118 243:80 244:230 245:254 246:196 247:30 258:120 259:254 260:205 261:8 266:114 272:38 273:255 274:254 275:155 276:5 286:156 287:253 288:92 301:61 302:235 303:253 304:102 314:224 315:253 316:78 330:117 331:253 332:196 333:18 342:254 343:253 344:78 358:9 359:211 360:253 361:73 370:254 371:253 372:78 387:175 388:253 389:155 398:194 399:254 400:101 415:79 416:254 417:155 426:112 427:253 428:211 429:9 443:73 444:251 445:200 454:41 455:241 456:253 457:87 471:25 472:240 473:253 483:147 484:253 485:227 486:47 499:94 500:253 501:200 511:5 512:193 513:253 514:230 515:76 527:175 528:253 529:155 540:31 541:219 542:254 543:255 544:126 545:18 553:14 554:149 555:254 556:244 557:45 569:21 570:158 571:254 572:253 573:226 574:162 575:118 576:96 577:20 578:20 579:73 580:118 581:224 582:253 583:247 584:85 598:30 599:155 600:253 601:253 602:253 603:253 604:254 605:253 606:253 607:253 608:253 609:254 610:247 611:84 627:5 628:27 629:117 630:206 631:244 632:229 633:213 634:213 635:213 636:176 637:117 638:32 659:45 660:23
+1 128:58 129:139 156:247 157:247 158:25 183:121 184:253 185:156 186:3 211:133 212:253 213:145 238:11 239:227 240:253 241:145 266:7 267:189 268:253 269:145 294:35 295:252 296:253 297:145 322:146 323:252 324:253 325:131 350:146 351:252 352:253 353:13 378:146 379:252 380:253 381:13 406:147 407:253 408:255 409:13 434:146 435:252 436:253 437:13 462:146 463:252 464:253 465:13 490:146 491:252 492:253 493:13 517:22 518:230 519:252 520:221 521:9 545:22 546:230 547:252 548:133 574:146 575:252 576:133 602:146 603:252 604:120 630:146 631:252 658:146 659:252
+1 129:28 130:247 131:255 132:165 156:47 157:221 158:252 159:252 160:164 184:177 185:252 186:252 187:252 188:164 212:177 213:252 214:252 215:223 216:78 240:177 241:252 242:252 243:197 267:114 268:236 269:252 270:235 271:42 294:5 295:148 296:252 297:252 298:230 321:14 322:135 323:252 324:252 325:252 326:230 349:78 350:252 351:252 352:252 353:252 354:162 377:78 378:252 379:252 380:252 381:252 382:9 405:78 406:252 407:252 408:252 409:252 410:9 432:32 433:200 434:252 435:252 436:252 437:105 438:3 459:10 460:218 461:252 462:252 463:252 464:105 465:8 487:225 488:252 489:252 490:252 491:240 492:69 514:44 515:237 516:252 517:252 518:228 519:85 541:59 542:218 543:252 544:252 545:225 546:93 568:65 569:208 570:252 571:252 572:252 573:175 596:133 597:252 598:252 599:252 600:225 601:68 624:133 625:252 626:252 627:244 628:54 652:133 653:252 654:252 655:48
+0 156:13 157:6 181:10 182:77 183:145 184:253 185:190 186:67 207:11 208:77 209:193 210:252 211:252 212:253 213:252 214:238 215:157 216:71 217:26 233:10 234:78 235:193 236:252 237:252 238:252 239:252 240:253 241:252 242:252 243:252 244:252 245:228 246:128 247:49 248:5 259:6 260:78 261:194 262:252 263:252 264:252 265:252 266:252 267:252 268:253 269:217 270:192 271:232 272:252 273:252 274:252 275:252 276:135 277:3 286:4 287:147 288:252 289:252 290:252 291:252 292:252 293:252 294:252 295:252 296:175 297:26 299:40 300:145 301:235 302:252 303:252 304:252 305:104 314:208 315:252 316:252 317:252 318:252 319:252 320:252 321:133 322:48 323:48 329:71 330:236 331:252 332:252 333:230 342:253 343:185 344:170 345:252 346:252 347:252 348:173 349:22 358:102 359:252 360:252 361:252 370:24 371:141 372:243 373:252 374:252 375:186 376:5 386:8 387:220 388:252 389:252 398:70 399:247 400:252 401:252 402:165 403:37 414:81 415:251 416:252 417:194 426:255 427:253 428:253 429:251 430:69 441:39 442:231 443:253 444:253 445:127 454:253 455:252 456:249 457:127 468:6 469:147 470:252 471:252 472:190 473:5 482:253 483:252 484:216 495:7 496:145 497:252 498:252 499:252 500:69 510:253 511:252 512:223 513:16 522:25 523:185 524:252 525:252 526:252 527:107 528:8 538:167 539:252 540:252 541:181 542:18 549:105 550:191 551:252 552:252 553:235 554:151 555:10 566:37 567:221 568:252 569:252 570:210 571:193 572:96 573:73 574:130 575:188 576:194 577:227 578:252 579:252 580:235 581:128 595:97 596:220 597:252 598:252 599:252 600:252 601:252 602:252 603:252 604:253 605:252 606:252 607:236 608:70 624:40 625:174 626:252 627:252 628:252 629:252 630:252 631:252 632:253 633:197 634:138 635:29 653:5 654:23 655:116 656:143 657:143 658:143 659:143 660:24 661:10
+0 127:28 128:164 129:254 130:233 131:148 132:11 154:3 155:164 156:254 157:234 158:225 159:254 160:204 182:91 183:254 184:235 185:48 186:32 187:166 188:251 189:92 208:33 209:111 210:214 211:205 212:49 215:24 216:216 217:210 235:34 236:217 237:254 238:254 239:211 244:87 245:237 246:43 262:34 263:216 264:254 265:254 266:252 267:243 268:61 272:38 273:248 274:182 290:171 291:254 292:184 293:205 294:175 295:36 301:171 302:227 317:28 318:234 319:190 320:13 321:193 322:157 329:124 330:238 331:26 345:140 346:254 347:131 349:129 350:157 357:124 358:254 359:95 373:201 374:238 375:56 377:70 378:103 385:124 386:254 387:148 400:62 401:255 402:210 413:150 414:254 415:122 428:86 429:254 430:201 431:15 440:28 441:237 442:246 443:44 456:128 457:254 458:143 468:34 469:243 470:227 484:62 485:254 486:210 496:58 497:249 498:179 512:30 513:240 514:210 524:207 525:254 526:64 541:216 542:231 543:34 551:129 552:248 553:170 554:9 569:131 570:254 571:170 577:17 578:129 579:248 580:225 581:24 597:50 598:245 599:245 600:184 601:106 602:106 603:106 604:133 605:231 606:254 607:244 608:53 626:67 627:249 628:254 629:254 630:254 631:254 632:254 633:251 634:193 635:40 655:38 656:157 657:248 658:166 659:166 660:139 661:57
+0 129:105 130:255 131:219 132:67 133:67 134:52 156:20 157:181 158:253 159:253 160:253 161:253 162:226 163:69 182:4 183:129 184:206 185:253 186:253 187:253 188:253 189:253 190:253 191:130 209:9 210:141 211:253 212:253 213:253 214:253 215:253 216:253 217:253 218:253 219:166 220:20 237:134 238:253 239:253 240:253 241:253 242:253 243:253 244:253 245:253 246:253 247:253 248:65 262:2 263:83 264:207 265:246 266:253 267:253 268:253 269:253 270:253 271:249 272:234 273:247 274:253 275:253 276:65 290:83 291:253 292:253 293:253 294:253 295:253 296:189 297:253 298:253 299:205 301:179 302:253 303:253 304:65 317:85 318:234 319:253 320:253 321:253 322:253 323:157 324:26 325:164 326:151 327:83 329:179 330:253 331:253 332:65 344:65 345:237 346:253 347:253 348:253 349:67 350:36 351:14 353:15 354:12 357:179 358:253 359:253 360:65 371:4 372:141 373:253 374:253 375:221 376:158 377:23 385:179 386:253 387:253 388:65 399:129 400:253 401:253 402:241 403:62 412:72 413:226 414:253 415:175 416:24 426:119 427:247 428:253 429:253 430:206 439:8 440:134 441:253 442:253 443:130 454:132 455:253 456:253 457:194 458:27 467:125 468:253 469:253 470:253 471:130 481:45 482:213 483:253 484:253 485:112 493:70 494:170 495:247 496:253 497:253 498:89 499:43 509:67 510:253 511:253 512:196 513:55 514:9 520:8 521:131 522:253 523:253 524:253 525:86 526:1 537:67 538:253 539:253 540:253 541:253 542:129 546:43 547:114 548:134 549:253 550:253 551:231 552:139 553:41 565:20 566:167 567:253 568:253 569:253 570:247 571:179 572:179 573:179 574:206 575:253 576:253 577:253 578:253 579:72 594:103 595:240 596:253 597:253 598:253 599:253 600:253 601:253 602:253 603:253 604:244 605:119 606:8 607:1 623:107 624:253 625:253 626:253 627:253 628:253 629:253 630:253 631:175 632:111 651:3 652:121 653:253 654:253 655:253 656:253 657:253 658:182 659:24
+0 125:22 126:183 127:252 128:254 129:252 130:252 131:252 132:76 151:85 152:85 153:168 154:250 155:250 156:252 157:250 158:250 159:250 160:250 161:71 163:43 164:85 165:14 178:107 179:252 180:250 181:250 182:250 183:250 184:252 185:250 186:250 187:250 188:250 189:210 191:127 192:250 193:146 205:114 206:237 207:252 208:250 209:250 210:250 211:250 212:252 213:250 214:250 215:250 216:250 217:210 219:127 220:250 221:250 232:107 233:237 234:250 235:252 236:250 237:250 238:250 239:74 240:41 241:41 242:41 243:41 244:217 245:34 247:127 248:250 249:250 259:15 260:148 261:252 262:252 263:254 264:238 265:105 275:128 276:252 277:252 286:15 287:140 288:250 289:250 290:250 291:167 292:111 303:127 304:250 305:250 314:43 315:250 316:250 317:250 318:250 331:127 332:250 333:250 342:183 343:250 344:250 345:250 346:110 358:57 359:210 360:250 361:250 370:252 371:250 372:250 373:110 374:7 386:85 387:250 388:250 389:250 398:254 399:252 400:252 401:83 414:86 415:252 416:252 417:217 426:252 427:250 428:250 429:138 430:14 441:15 442:140 443:250 444:250 445:41 454:252 455:250 456:250 457:250 458:41 469:43 470:250 471:250 472:250 473:41 482:252 483:250 484:250 485:250 486:181 497:183 498:250 499:250 500:250 501:41 510:76 511:250 512:250 513:250 514:250 524:177 525:252 526:250 527:250 528:110 529:7 538:36 539:224 540:252 541:252 542:252 543:219 544:43 545:43 546:43 547:7 549:15 550:43 551:183 552:252 553:255 554:252 555:126 567:85 568:250 569:250 570:250 571:252 572:250 573:250 574:250 575:111 576:86 577:140 578:250 579:250 580:250 581:252 582:222 583:83 595:42 596:188 597:250 598:250 599:252 600:250 601:250 602:250 603:250 604:252 605:250 606:250 607:250 608:250 609:126 610:83 624:127 625:250 626:250 627:252 628:250 629:250 630:250 631:250 632:252 633:250 634:250 635:137 636:83 652:21 653:41 654:217 655:252 656:250 657:250 658:250 659:250 660:217 661:41 662:41 663:14
+1 155:114 156:206 157:25 183:238 184:252 185:55 211:222 212:252 213:55 239:113 240:252 241:55 267:113 268:252 269:55 295:255 296:253 297:56 323:253 324:176 325:6 350:32 351:253 352:233 353:43 378:140 379:253 380:195 381:19 406:140 407:253 408:167 433:29 434:253 435:141 461:29 462:252 463:140 489:29 490:252 491:140 517:29 518:252 519:140 545:29 546:252 547:140 573:169 574:253 575:79 601:169 602:252 628:76 629:234 630:141 656:197 657:233 658:37 684:197 685:223
+1 127:73 128:253 129:253 130:63 155:115 156:252 157:252 158:144 183:217 184:252 185:252 186:144 210:63 211:237 212:252 213:252 214:144 238:109 239:252 240:252 241:252 266:109 267:252 268:252 269:252 294:109 295:252 296:252 297:252 322:191 323:252 324:252 325:252 349:145 350:255 351:253 352:253 353:253 376:32 377:237 378:253 379:252 380:252 381:210 404:37 405:252 406:253 407:252 408:252 409:108 432:37 433:252 434:253 435:252 436:252 437:108 460:21 461:207 462:255 463:253 464:253 465:108 489:144 490:253 491:252 492:252 493:108 516:27 517:221 518:253 519:252 520:252 521:108 544:16 545:190 546:253 547:252 548:252 549:108 573:145 574:255 575:253 576:253 577:253 601:144 602:253 603:252 604:252 605:210 629:144 630:253 631:252 632:252 633:108 657:62 658:253 659:252 660:252 661:108
+1 120:85 121:253 122:132 123:9 147:82 148:241 149:251 150:251 151:128 175:175 176:251 177:251 178:251 179:245 180:121 203:13 204:204 205:251 206:251 207:251 208:245 209:107 232:39 233:251 234:251 235:251 236:251 237:167 238:22 260:15 261:155 262:251 263:251 264:251 265:251 266:177 289:15 290:157 291:248 292:251 293:251 294:251 295:165 319:214 320:251 321:251 322:251 323:212 324:78 325:24 347:109 348:251 349:251 350:251 351:253 352:251 353:170 354:10 375:5 376:57 377:162 378:251 379:253 380:251 381:251 382:18 405:106 406:239 407:255 408:253 409:253 410:213 434:105 435:253 436:251 437:251 438:230 439:72 463:253 464:251 465:251 466:251 467:221 468:67 491:72 492:251 493:251 494:251 495:251 496:96 519:36 520:199 521:251 522:251 523:251 524:155 525:15 548:45 549:204 550:251 551:251 552:251 553:157 577:161 578:249 579:251 580:251 581:248 582:147 606:233 607:251 608:251 609:251 610:173 634:233 635:251 636:251 637:251 638:173 662:53 663:131 664:251 665:251 666:173
+1 126:15 127:200 128:255 129:90 154:42 155:254 156:254 157:173 182:42 183:254 184:254 185:199 210:26 211:237 212:254 213:221 214:12 239:213 240:254 241:231 242:17 267:213 268:254 269:199 295:213 296:254 297:199 323:213 324:254 325:96 350:20 351:232 352:254 353:33 378:84 379:254 380:229 381:17 406:168 407:254 408:203 433:8 434:217 435:254 436:187 461:84 462:254 463:254 464:48 489:195 490:254 491:254 492:37 516:20 517:233 518:254 519:212 520:4 544:132 545:254 546:254 547:82 571:9 572:215 573:254 574:254 575:116 576:46 599:55 600:254 601:254 602:254 603:254 604:121 627:113 628:254 629:254 630:254 631:254 632:40 655:12 656:163 657:254 658:185 659:58 660:1
+0 182:32 183:57 184:57 185:57 186:57 187:57 188:57 189:57 208:67 209:185 210:229 211:252 212:252 213:252 214:253 215:252 216:252 217:252 218:185 219:66 234:13 235:188 236:246 237:252 238:253 239:252 240:252 241:252 242:241 243:139 244:177 245:252 246:253 247:246 248:187 249:13 261:26 262:255 263:253 264:244 265:175 266:101 274:126 275:244 276:253 277:153 288:82 289:243 290:253 291:214 292:81 303:169 304:252 305:252 315:19 316:215 317:252 318:206 319:56 331:169 332:252 333:252 343:157 344:252 345:252 346:13 359:169 360:252 361:151 370:41 371:253 372:253 373:128 386:92 387:253 388:206 389:13 398:166 399:252 400:196 401:9 414:216 415:252 416:142 426:253 427:252 428:168 441:89 442:253 443:208 444:13 454:253 455:252 456:68 468:38 469:225 470:253 471:96 482:254 483:253 484:56 495:45 496:229 497:253 498:151 510:253 511:252 512:81 522:70 523:225 524:252 525:227 538:216 539:252 540:168 548:29 549:134 550:253 551:252 552:186 553:31 566:91 567:252 568:243 569:125 573:51 574:114 575:113 576:210 577:252 578:253 579:151 580:19 595:157 596:253 597:253 598:254 599:253 600:253 601:253 602:254 603:253 604:244 605:175 606:51 623:19 624:122 625:196 626:197 627:221 628:196 629:196 630:197 631:121 632:56 655:25
+0 127:42 128:235 129:255 130:84 153:15 154:132 155:208 156:253 157:253 158:171 159:108 180:6 181:177 182:253 183:253 184:253 185:253 186:253 187:242 188:110 208:151 209:253 210:253 211:253 212:253 213:253 214:253 215:253 216:139 235:48 236:208 237:253 238:253 239:253 240:253 241:253 242:253 243:253 244:139 263:85 264:253 265:253 266:253 267:253 268:236 269:156 270:184 271:253 272:148 273:6 290:7 291:141 292:253 293:253 294:253 295:253 296:27 298:170 299:253 300:253 301:74 318:19 319:253 320:253 321:253 322:253 323:253 324:27 326:170 327:253 328:253 329:74 345:16 346:186 347:253 348:253 349:253 350:242 351:105 352:4 354:170 355:253 356:253 357:94 358:1 373:141 374:253 375:253 376:253 377:242 378:100 382:170 383:253 384:253 385:253 386:8 401:141 402:253 403:253 404:253 405:224 410:170 411:253 412:253 413:253 414:8 428:12 429:158 430:253 431:253 432:230 433:51 438:18 439:237 440:253 441:253 442:8 456:76 457:253 458:253 459:218 460:61 467:236 468:253 469:253 470:8 484:76 485:253 486:253 487:168 495:110 496:253 497:132 498:3 512:76 513:253 514:253 515:168 521:20 522:174 523:239 524:147 525:5 539:5 540:155 541:253 542:253 543:168 548:102 549:170 550:253 551:253 552:139 567:3 568:128 569:253 570:253 571:228 572:179 573:179 574:179 575:179 576:245 577:253 578:253 579:219 580:41 596:76 597:253 598:253 599:253 600:253 601:253 602:253 603:253 604:253 605:253 606:253 607:163 624:39 625:199 626:253 627:253 628:253 629:253 630:253 631:253 632:253 633:253 634:170 635:9 653:36 654:219 655:253 656:253 657:253 658:253 659:253 660:224 661:65 662:22
+1 156:202 157:253 158:69 184:253 185:252 186:121 212:253 213:252 214:69 240:253 241:252 242:69 267:106 268:253 269:231 270:37 295:179 296:255 297:196 322:17 323:234 324:253 325:92 350:93 351:252 352:253 353:92 378:93 379:252 380:253 381:92 406:93 407:252 408:232 409:8 434:208 435:253 436:116 462:207 463:252 464:116 490:207 491:252 492:32 517:57 518:244 519:252 545:122 546:252 547:252 573:185 574:253 575:253 601:184 602:252 603:252 629:101 630:252 631:252 657:13 658:173 659:252 660:43 686:9 687:232 688:116
+1 156:73 157:253 158:253 159:253 160:124 184:73 185:251 186:251 187:251 188:251 212:99 213:251 214:251 215:251 216:225 240:253 241:251 242:251 243:251 244:71 266:79 267:180 268:253 269:251 270:251 271:173 272:20 294:110 295:253 296:255 297:253 298:216 322:109 323:251 324:253 325:251 326:215 350:109 351:251 352:253 353:251 354:215 378:109 379:251 380:253 381:251 382:137 406:109 407:251 408:253 409:251 410:35 433:37 434:253 435:253 436:255 437:253 438:35 461:140 462:251 463:251 464:253 465:168 466:15 488:125 489:246 490:251 491:251 492:190 493:15 516:144 517:251 518:251 519:251 520:180 543:53 544:221 545:251 546:251 547:251 548:51 571:125 572:253 573:253 574:253 575:201 598:105 599:253 600:251 601:251 602:188 603:30 626:180 627:253 628:251 629:251 630:142 654:180 655:253 656:251 657:235 658:82 682:180 683:253 684:251 685:215
+1 124:111 125:255 126:48 152:162 153:253 154:237 155:63 180:206 181:253 182:253 183:183 208:87 209:217 210:253 211:205 237:90 238:253 239:238 240:60 265:37 266:225 267:253 268:89 294:206 295:253 296:159 322:206 323:253 324:226 350:206 351:253 352:226 378:206 379:253 380:226 406:206 407:253 408:226 434:206 435:253 436:226 462:206 463:253 464:226 490:206 491:253 492:226 518:206 519:253 520:237 521:45 546:206 547:253 548:253 549:109 574:173 575:253 576:253 577:109 602:69 603:253 604:253 605:109 630:64 631:248 632:253 633:109 659:112 660:253 661:109
+0 99:70 100:255 101:165 102:114 127:122 128:253 129:253 130:253 131:120 155:165 156:253 157:253 158:253 159:234 160:52 183:99 184:253 185:253 186:253 187:253 188:228 189:26 209:60 210:168 211:238 212:202 213:174 214:253 215:253 216:253 217:127 235:91 236:81 237:1 238:215 239:128 240:28 241:12 242:181 243:253 244:253 245:175 246:3 262:18 263:204 264:253 265:77 270:7 271:253 272:253 273:253 274:54 289:54 290:248 291:253 292:253 293:143 298:1 299:127 300:253 301:253 302:188 317:104 318:253 319:253 320:253 321:20 327:81 328:249 329:253 330:191 345:192 346:253 347:253 348:218 349:5 356:203 357:253 358:208 359:21 372:56 373:237 374:253 375:250 376:100 384:104 385:253 386:253 387:75 400:76 401:253 402:253 403:224 412:119 413:253 414:253 415:75 428:80 429:253 430:253 431:103 439:4 440:241 441:253 442:218 443:32 456:213 457:253 458:253 459:103 467:125 468:253 469:253 470:191 484:213 485:253 486:253 487:103 494:3 495:176 496:253 497:253 498:135 512:213 513:253 514:253 515:103 521:9 522:162 523:253 524:253 525:226 526:37 540:179 541:253 542:253 543:135 548:46 549:157 550:253 551:253 552:253 553:63 568:23 569:188 570:253 571:249 572:179 573:179 574:179 575:179 576:233 577:253 578:253 579:233 580:156 581:10 597:51 598:235 599:253 600:253 601:253 602:253 603:253 604:253 605:251 606:232 607:120 626:16 627:124 628:253 629:253 630:253 631:253 632:152 633:104
+1 124:29 125:197 126:255 127:84 152:85 153:251 154:253 155:83 180:86 181:253 182:254 183:253 208:85 209:251 210:253 211:251 236:86 237:253 238:254 239:253 240:169 264:85 265:251 266:253 267:251 268:168 292:86 293:253 294:254 295:253 296:169 320:28 321:196 322:253 323:251 324:168 349:169 350:254 351:253 352:169 377:168 378:253 379:251 380:168 405:169 406:254 407:253 408:169 433:168 434:253 435:251 436:168 462:254 463:253 464:254 465:139 490:253 491:251 492:253 493:251 518:254 519:253 520:254 521:253 522:57 546:253 547:251 548:253 549:251 550:168 574:198 575:253 576:254 577:253 578:114 602:85 603:251 604:253 605:251 630:85 631:253 632:254 633:253 658:28 659:83 660:196 661:83
+1 159:31 160:210 161:253 162:163 187:198 188:252 189:252 190:162 213:10 214:86 215:242 216:252 217:252 218:66 241:164 242:252 243:252 244:252 245:188 246:8 268:53 269:242 270:252 271:252 272:225 273:14 296:78 297:252 298:252 299:252 300:204 323:56 324:231 325:252 326:252 327:212 328:35 351:157 352:252 353:252 354:252 355:37 377:8 378:132 379:253 380:252 381:252 382:230 383:24 405:45 406:252 407:253 408:252 409:154 410:55 427:7 428:55 433:107 434:253 435:255 436:228 437:53 454:15 455:24 456:23 460:110 461:242 462:252 463:228 464:59 482:57 483:83 487:88 488:247 489:252 490:252 491:140 514:15 515:189 516:252 517:252 518:252 542:74 543:252 544:252 545:238 546:90 570:178 571:252 572:252 573:189 597:40 598:217 599:252 600:252 601:59 625:75 626:252 627:252 628:252 629:85 630:61 653:62 654:239 655:252 656:156 657:14 682:178 683:252 684:14
+1 131:159 132:255 133:122 158:167 159:228 160:253 161:121 185:64 186:236 187:251 188:205 189:110 212:48 213:158 214:251 215:251 216:178 217:39 240:190 241:251 242:251 243:251 267:96 268:253 269:253 270:253 271:153 295:194 296:251 297:251 298:211 299:74 322:80 323:174 324:251 325:251 326:140 327:47 349:16 350:181 351:253 352:251 353:219 354:23 377:64 378:251 379:253 380:251 381:204 382:19 405:223 406:253 407:255 408:233 409:48 431:20 432:174 433:244 434:251 435:253 436:109 437:31 459:96 460:189 461:251 462:251 463:126 464:31 486:24 487:106 488:251 489:235 490:188 491:100 514:96 515:251 516:251 517:228 518:59 542:255 543:253 544:253 545:213 546:36 569:100 570:253 571:251 572:251 573:85 574:23 596:32 597:127 598:253 599:235 600:126 601:15 624:104 625:251 626:253 627:240 628:79 652:83 653:193 654:253 655:220
+0 153:92 154:191 155:178 156:253 157:242 158:141 159:104 160:29 180:26 181:253 182:252 183:252 184:252 185:253 186:252 187:252 188:252 189:108 190:19 206:57 207:123 208:222 209:253 210:252 211:252 212:252 213:168 214:224 215:252 216:252 217:253 218:84 233:176 234:243 235:252 236:252 237:253 238:252 239:252 240:252 242:19 243:153 244:252 245:253 246:209 247:25 259:10 260:128 261:255 262:253 263:244 264:225 265:114 266:194 267:253 268:178 272:163 273:254 274:253 275:168 287:85 288:252 289:253 290:189 291:56 294:19 295:133 296:9 300:38 301:253 302:252 303:168 314:19 315:191 316:252 317:194 318:19 329:253 330:252 331:234 332:22 342:107 343:252 344:252 345:13 357:253 358:252 359:252 360:128 370:169 371:253 372:241 385:141 386:253 387:253 388:140 397:19 398:225 399:252 400:139 413:66 414:252 415:252 416:139 425:29 426:252 427:252 428:52 441:29 442:252 443:252 444:139 453:29 454:252 455:252 456:28 469:29 470:252 471:252 472:40 481:141 482:253 483:253 484:91 497:154 498:253 499:168 509:66 510:252 511:252 512:165 525:253 526:252 527:168 537:19 538:224 539:252 540:252 552:126 553:253 554:252 555:80 566:169 567:252 568:252 569:214 570:38 579:126 580:249 581:253 582:151 583:6 594:26 595:223 596:253 597:254 598:253 599:128 600:29 604:13 605:41 606:216 607:253 608:253 609:226 610:38 623:122 624:252 625:253 626:252 627:252 628:252 629:169 630:169 631:169 632:206 633:253 634:252 635:252 636:202 637:38 651:19 652:56 653:168 654:224 655:252 656:252 657:253 658:252 659:252 660:252 661:253 662:233 663:130 664:6 682:94 683:139 684:190 685:153 686:252 687:164 688:139 689:28 690:22
+1 128:53 129:250 130:255 131:25 156:167 157:253 158:253 159:25 182:3 183:123 184:247 185:253 186:253 187:25 210:9 211:253 212:253 213:253 214:253 215:25 238:9 239:253 240:253 241:253 242:253 243:25 266:9 267:253 268:253 269:253 270:180 271:13 294:9 295:253 296:253 297:253 298:104 322:9 323:253 324:253 325:253 326:104 350:15 351:253 352:253 353:253 354:104 378:184 379:253 380:253 381:228 382:68 406:184 407:253 408:253 409:182 433:103 434:251 435:253 436:253 437:12 461:106 462:253 463:253 464:253 465:8 488:24 489:238 490:253 491:253 492:253 493:8 516:27 517:253 518:253 519:253 520:253 521:8 544:27 545:253 546:253 547:253 548:253 549:8 572:27 573:253 574:253 575:253 576:177 577:4 600:160 601:253 602:253 603:253 604:87 628:202 629:253 630:253 631:219 632:54 656:81 657:253 658:247 659:51
+0 122:63 123:176 124:253 125:253 126:159 127:113 128:63 150:140 151:253 152:252 153:252 154:252 155:252 156:241 157:100 158:66 177:54 178:227 179:253 180:252 181:252 182:252 183:252 184:253 185:252 186:239 187:181 188:57 204:38 205:224 206:252 207:253 208:226 209:246 210:252 211:252 212:253 213:252 214:252 215:252 216:252 217:108 218:3 232:57 233:252 234:252 235:253 236:27 237:88 238:112 239:112 240:112 241:112 242:142 243:252 244:252 245:253 246:152 247:31 260:198 261:253 262:253 263:79 270:32 271:153 272:253 273:255 274:253 275:196 287:76 288:246 289:252 290:127 299:3 300:106 301:253 302:252 303:214 304:28 315:194 316:252 317:252 318:112 329:143 330:252 331:252 332:193 343:225 344:252 345:217 346:37 357:38 358:234 359:252 360:223 370:63 371:240 372:252 373:84 386:146 387:252 388:223 398:114 399:253 400:228 401:47 414:147 415:253 416:253 417:112 426:159 427:252 428:195 442:225 443:252 444:252 445:112 454:253 455:252 456:195 470:225 471:252 472:230 473:25 482:159 483:252 484:202 485:10 497:92 498:243 499:252 500:208 510:113 511:252 512:252 513:161 524:79 525:253 526:252 527:220 528:37 538:114 539:253 540:253 541:253 542:174 543:63 550:26 551:128 552:253 553:255 554:253 555:133 566:12 567:228 568:252 569:252 570:252 571:241 572:100 573:85 574:76 576:85 577:131 578:231 579:252 580:252 581:253 582:129 583:6 595:97 596:208 597:252 598:252 599:253 600:252 601:252 602:246 603:197 604:253 605:252 606:252 607:252 608:220 609:133 610:6 624:19 625:99 626:239 627:253 628:252 629:252 630:252 631:252 632:253 633:252 634:245 635:223 636:99 654:63 655:112 656:112 657:221 658:252 659:252 660:253 661:127 662:87
+0 153:12 154:136 155:254 156:255 157:195 158:115 159:3 180:6 181:175 182:253 183:196 184:160 185:252 186:253 187:15 208:130 209:253 210:234 211:4 213:27 214:205 215:232 216:40 235:54 236:246 237:253 238:68 242:24 243:243 244:106 262:3 263:134 264:235 265:99 266:4 271:132 272:247 273:77 290:56 291:253 292:62 299:23 300:233 301:129 318:179 319:183 320:4 328:182 329:220 345:21 346:232 347:59 356:95 357:232 358:21 373:128 374:183 385:228 386:85 401:187 402:124 413:228 414:186 429:187 430:124 441:228 442:104 457:187 458:124 469:169 470:184 485:187 486:124 497:203 498:150 513:187 514:124 524:10 525:220 526:39 541:187 542:155 552:111 553:201 569:129 570:228 571:7 579:12 580:181 581:76 598:234 599:166 600:9 606:24 607:209 608:106 626:139 627:250 628:167 629:11 630:2 631:11 632:11 633:129 634:227 635:90 636:11 655:95 656:247 657:253 658:178 659:253 660:253 661:244 662:86 684:47 685:175 686:253 687:232 688:149 689:40
+1 128:255 129:253 130:57 156:253 157:251 158:225 159:56 183:169 184:254 185:253 186:254 187:84 211:168 212:253 213:251 214:253 215:83 238:85 239:253 240:254 241:253 242:169 266:85 267:251 268:253 269:251 270:56 294:141 295:253 296:254 297:253 322:253 323:251 324:253 325:251 350:254 351:253 352:254 353:253 378:253 379:251 380:253 381:251 406:254 407:253 408:254 409:196 433:114 434:253 435:251 436:253 437:83 461:169 462:254 463:253 464:226 465:56 489:168 490:253 491:251 492:168 516:85 517:253 518:254 519:253 544:85 545:251 546:253 547:251 572:254 573:253 574:254 575:253 600:253 601:251 602:253 603:251 628:254 629:253 630:254 631:253 656:139 657:251 658:253 659:138
+0 151:23 152:167 153:208 154:254 155:255 156:129 157:19 179:151 180:253 181:253 182:253 183:253 184:253 185:209 186:26 207:181 208:253 209:253 210:253 211:227 212:181 213:253 214:207 215:22 235:227 236:253 237:253 238:253 239:92 240:38 241:226 242:253 243:129 244:2 263:193 264:253 265:253 266:248 267:62 269:50 270:253 271:253 272:45 291:170 292:253 293:253 294:135 297:12 298:208 299:253 300:119 318:16 319:232 320:253 321:253 322:21 326:60 327:253 328:185 346:164 347:253 348:253 349:224 350:14 354:14 355:217 356:247 357:62 373:3 374:193 375:253 376:250 377:64 383:199 384:253 385:179 401:67 402:253 403:253 404:205 411:98 412:253 413:188 429:151 430:253 431:245 432:43 439:63 440:250 441:188 457:151 458:253 459:243 468:244 469:222 470:22 485:151 486:253 487:217 496:244 497:253 498:115 512:3 513:195 514:253 515:134 524:156 525:253 526:150 541:140 542:253 543:134 552:239 553:253 554:139 569:44 570:253 571:134 579:53 580:246 581:237 582:32 597:8 598:200 599:229 600:40 606:25 607:225 608:253 609:188 626:120 627:250 628:230 629:58 630:17 632:12 633:42 634:213 635:253 636:238 637:84 655:151 656:253 657:253 658:217 659:179 660:206 661:253 662:253 663:196 664:118 683:18 684:58 685:145 686:152 687:253 688:214 689:145 690:74 691:7
+1 130:24 131:150 132:233 133:38 156:14 157:89 158:253 159:254 160:254 161:71 183:78 184:203 185:254 186:254 187:254 188:232 189:77 190:54 191:8 209:12 210:155 211:240 212:254 213:223 214:76 215:254 216:254 217:254 218:254 219:68 235:3 236:101 237:216 238:254 239:227 240:122 241:26 242:110 243:254 244:254 245:254 246:184 247:100 262:46 263:222 264:254 265:254 266:179 267:48 270:181 271:254 272:254 273:146 274:6 288:2 289:145 290:248 291:254 292:182 293:111 294:4 297:3 298:250 299:254 300:206 301:3 315:6 316:144 317:254 318:254 319:171 325:125 326:254 327:252 328:80 342:6 343:142 344:254 345:179 346:95 347:4 352:61 353:246 354:254 355:150 370:64 371:254 372:177 373:14 380:124 381:254 382:246 383:32 398:108 399:97 400:15 407:24 408:226 409:254 410:116 435:177 436:255 437:254 438:5 463:196 464:254 465:99 466:1 490:3 491:199 492:254 493:79 518:129 519:254 520:254 521:23 546:178 547:254 548:192 549:8 550:3 551:43 573:11 574:198 575:254 576:128 577:66 578:130 579:225 595:137 596:202 597:106 598:84 599:84 600:84 601:112 602:254 603:254 604:254 605:254 606:212 607:151 623:172 624:254 625:254 626:254 627:254 628:254 629:254 630:254 631:254 632:254 633:162 634:75 651:12 652:106 653:177 654:254 655:254 656:254 657:235 658:135 659:100 660:17 661:2
+0 125:120 126:253 127:253 128:63 151:38 152:131 153:246 154:252 155:252 156:203 157:15 179:222 180:252 181:252 182:252 183:252 184:166 185:38 205:4 206:107 207:253 208:252 209:252 210:252 211:252 212:253 213:224 214:137 215:26 233:107 234:252 235:253 236:252 237:220 238:128 239:252 240:253 241:252 242:252 243:239 244:140 261:170 262:253 263:255 264:168 267:79 268:192 269:253 270:253 271:253 272:253 273:255 274:90 288:51 289:243 290:252 291:215 292:33 296:12 297:74 298:233 299:252 300:252 301:253 302:195 303:19 316:166 317:252 318:252 319:31 326:43 327:149 328:195 329:253 330:252 331:177 332:19 343:57 344:234 345:252 346:252 357:237 358:252 359:252 360:180 361:13 371:85 372:252 373:252 374:173 385:50 386:237 387:252 388:252 389:112 399:226 400:253 401:240 402:63 414:163 415:253 416:253 417:112 426:38 427:234 428:252 429:176 442:85 443:252 444:252 445:158 454:113 455:252 456:252 457:84 470:19 471:209 472:252 473:252 482:207 483:252 484:252 485:84 498:10 499:203 500:252 501:236 510:253 511:252 512:252 513:84 526:85 527:252 528:252 529:112 538:114 539:253 540:253 541:146 553:51 554:159 555:253 556:240 557:63 566:75 567:243 568:252 569:249 570:146 579:57 580:85 581:238 582:252 583:252 584:99 595:116 596:252 597:252 598:252 599:198 600:197 601:165 602:57 603:57 604:57 605:182 606:197 607:234 608:252 609:253 610:233 611:164 612:19 623:28 624:84 625:180 626:252 627:253 628:252 629:252 630:252 631:252 632:253 633:252 634:252 635:252 636:252 637:225 638:71 653:13 654:112 655:253 656:252 657:252 658:252 659:252 660:253 661:252 662:252 663:157 664:112
+1 127:155 128:253 129:126 155:253 156:251 157:141 158:4 183:253 184:251 185:251 186:31 211:253 212:251 213:251 214:31 239:253 240:251 241:251 242:31 267:255 268:253 269:253 270:31 293:8 294:131 295:253 296:251 297:235 298:27 321:64 322:251 323:253 324:251 325:126 349:64 350:251 351:253 352:251 353:126 377:64 378:251 379:253 380:251 381:126 405:64 406:253 407:255 408:221 433:182 434:251 435:253 436:200 460:64 461:236 462:251 463:253 464:62 487:8 488:158 489:251 490:251 491:169 492:8 515:32 516:251 517:251 518:251 519:158 543:32 544:253 545:253 546:253 547:159 571:32 572:251 573:251 574:251 575:39 599:32 600:251 601:251 602:251 627:32 628:251 629:251 630:251 631:100 655:32 656:251 657:251 658:251
+0 101:88 102:127 103:5 126:19 127:58 128:20 129:14 130:217 131:19 152:7 153:146 154:247 155:253 156:235 157:27 158:84 159:81 180:126 181:253 182:164 183:19 184:15 187:156 188:9 208:214 209:222 210:34 215:234 216:58 235:59 236:254 237:116 243:235 244:58 263:141 264:251 265:72 271:151 272:140 291:224 292:233 299:136 300:223 319:254 320:218 327:136 328:253 347:254 348:135 355:136 356:253 374:23 375:255 376:114 383:137 384:231 402:98 403:254 404:122 411:136 412:155 430:98 431:254 432:106 439:166 440:155 458:98 459:254 460:128 467:234 468:193 486:98 487:254 488:135 494:61 495:248 496:118 515:255 516:238 517:18 521:13 522:224 523:254 524:58 543:201 544:253 545:128 546:2 548:5 549:150 550:253 551:167 552:9 571:18 572:226 573:253 574:49 575:31 576:156 577:253 578:228 579:13 600:147 601:253 602:243 603:241 604:254 605:227 606:43 628:5 629:126 630:245 631:253 632:231 633:46
+0 127:37 128:141 129:156 130:156 131:194 132:194 133:47 153:11 154:132 155:239 156:253 157:253 158:253 159:253 160:254 161:181 180:25 181:172 182:253 183:235 184:167 185:78 186:93 187:174 188:254 189:247 190:54 207:26 208:210 209:253 210:237 211:90 216:201 217:253 218:78 235:192 236:253 237:237 238:58 244:156 245:253 246:78 262:141 263:254 264:235 265:53 269:19 270:5 272:156 273:254 274:78 289:46 290:254 291:253 292:92 296:17 297:226 298:217 299:49 300:148 301:253 302:78 317:165 318:254 319:239 320:24 324:20 325:253 326:253 327:58 328:18 329:115 330:24 344:37 345:248 346:254 347:91 352:2 353:117 354:250 355:163 356:91 372:77 373:253 374:254 375:39 382:196 383:253 384:173 400:159 401:254 402:218 403:15 410:77 411:254 412:255 413:61 428:234 429:253 430:113 438:21 439:226 440:254 441:135 455:25 456:240 457:253 458:68 467:195 468:254 469:135 483:79 484:253 485:253 495:195 496:254 497:135 511:79 512:253 513:253 514:76 523:195 524:254 525:99 540:212 541:254 542:209 543:9 550:10 551:209 552:196 553:15 568:54 569:253 570:254 571:137 572:36 576:2 577:20 578:168 579:253 580:60 596:28 597:235 598:254 599:253 600:199 601:124 602:79 603:79 604:167 605:253 606:253 607:185 608:30 625:15 626:117 627:217 628:253 629:253 630:253 631:254 632:253 633:240 634:109 635:12 655:27 656:126 657:208 658:253 659:193 660:147 661:40
+0 154:32 155:134 156:218 157:254 158:254 159:254 160:217 161:84 176:44 177:208 178:215 179:156 180:35 181:119 182:236 183:246 184:136 185:91 186:69 187:151 188:249 189:246 190:78 203:44 204:230 205:254 206:254 207:254 208:254 209:254 210:196 211:48 216:60 217:224 218:210 219:24 231:118 232:254 233:202 234:19 235:201 236:254 237:181 238:9 245:35 246:233 247:168 259:193 260:223 261:34 263:59 264:163 265:236 266:15 274:140 275:205 276:8 286:60 287:254 288:176 293:38 302:54 303:237 304:80 314:59 315:254 316:93 331:131 332:200 342:59 343:240 344:24 359:79 360:214 370:59 371:234 387:67 388:248 389:54 398:59 399:234 416:235 417:58 426:60 427:235 443:79 444:255 445:59 454:59 455:251 456:66 471:79 472:250 473:54 482:59 483:254 484:108 499:146 500:214 510:5 511:203 512:187 513:3 526:4 527:188 528:199 539:118 540:254 541:57 554:96 555:254 556:117 567:16 568:237 569:224 570:14 581:14 582:187 583:206 584:8 596:88 597:252 598:186 599:16 608:16 609:187 610:252 611:125 625:100 626:254 627:237 628:94 629:24 635:13 636:214 637:254 638:166 653:3 654:57 655:215 656:248 657:241 658:235 659:197 660:137 661:137 662:137 663:231 664:238 665:155 666:25 684:57 685:155 686:246 687:254 688:254 689:254 690:254 691:147 692:36
+1 124:102 125:252 126:252 127:41 152:102 153:250 154:250 155:202 180:102 181:250 182:250 183:232 184:91 208:102 209:250 210:250 211:212 212:29 236:102 237:252 238:252 239:254 240:150 264:102 265:250 266:250 267:252 268:149 292:102 293:250 294:250 295:252 296:149 320:102 321:250 322:250 323:252 324:231 325:80 349:152 350:252 351:254 352:252 353:100 377:151 378:250 379:252 380:250 381:100 405:151 406:250 407:252 408:250 409:100 433:151 434:250 435:252 436:250 437:100 461:123 462:243 463:254 464:252 465:100 490:202 491:252 492:250 493:100 518:80 519:252 520:250 521:190 522:30 547:252 548:250 549:250 550:49 575:255 576:252 577:252 578:252 579:214 580:31 603:171 604:250 605:250 606:250 607:252 608:190 609:40 631:20 632:160 633:250 634:250 635:252 636:250 637:100 660:20 661:170 662:250 663:212 664:49 665:20
+0 124:20 125:121 126:197 127:253 128:64 151:23 152:200 153:252 154:252 155:252 156:184 157:6 178:25 179:197 180:252 181:252 182:252 183:252 184:253 185:228 186:107 187:15 205:26 206:196 207:252 208:252 209:252 210:252 211:252 212:253 213:252 214:252 215:219 216:178 217:21 233:186 234:252 235:238 236:94 237:67 238:224 239:217 240:53 241:109 242:245 243:252 244:252 245:213 246:63 260:98 261:242 262:252 263:101 266:39 267:31 270:109 271:128 272:241 273:252 274:207 275:97 287:17 288:230 289:252 290:241 291:56 300:109 301:252 302:252 303:229 304:17 314:13 315:192 316:252 317:243 318:96 328:25 329:127 330:252 331:252 332:120 342:121 343:252 344:252 345:165 357:125 358:252 359:252 360:246 361:70 370:190 371:252 372:252 373:39 385:26 386:210 387:252 388:252 389:119 398:255 399:253 400:159 414:22 415:209 416:253 417:183 426:253 427:252 428:103 443:34 444:252 445:252 454:253 455:252 456:26 471:27 472:252 473:252 482:253 483:252 484:168 485:13 499:70 500:252 501:209 510:147 511:252 512:252 513:75 526:68 527:233 528:252 529:119 538:121 539:252 540:252 541:189 542:40 552:15 553:82 554:231 555:252 556:214 557:31 566:38 567:135 568:248 569:252 570:231 571:145 572:41 573:41 574:41 575:41 576:20 577:24 578:37 579:83 580:194 581:252 582:252 583:212 584:33 596:83 597:213 598:252 599:252 600:252 601:252 602:252 603:252 604:204 605:213 606:243 607:252 608:252 609:252 610:212 611:34 625:34 626:140 627:238 628:248 629:252 630:252 631:252 632:253 633:252 634:252 635:241 636:238 637:238 638:75 656:82 657:119 658:119 659:119 660:120 661:119 662:119 663:19
+1 127:20 128:254 129:255 130:37 155:19 156:253 157:253 158:134 183:19 184:253 185:253 186:246 187:125 211:76 212:253 213:253 214:253 215:158 239:207 240:253 241:253 242:253 243:158 267:207 268:253 269:253 270:253 271:158 294:48 295:223 296:253 297:253 298:243 299:106 322:141 323:253 324:253 325:253 326:113 349:65 350:237 351:253 352:253 353:253 354:36 377:76 378:253 379:253 380:253 381:253 382:36 405:76 406:253 407:253 408:253 409:253 410:36 433:76 434:253 435:253 436:253 437:118 438:4 460:4 461:148 462:253 463:253 464:253 465:103 488:10 489:253 490:253 491:253 492:253 493:103 516:10 517:253 518:253 519:253 520:173 521:7 544:10 545:253 546:253 547:253 548:168 572:143 573:253 574:253 575:239 576:49 600:198 601:253 602:253 603:234 615:140 628:198 629:253 630:253 631:234 656:198 657:253 658:253 659:234
+0 235:40 236:37 238:7 239:77 240:137 241:136 242:136 243:136 244:136 245:40 246:6 261:16 262:135 263:254 264:233 266:152 267:215 268:96 269:140 270:155 271:118 272:230 273:254 274:158 275:68 288:19 289:164 290:254 291:114 294:235 295:140 301:99 302:230 303:254 304:186 305:14 315:70 316:226 317:242 318:121 322:104 323:195 324:38 330:33 331:179 332:253 333:140 342:41 343:241 344:198 345:43 359:24 360:209 361:223 370:164 371:250 372:66 388:136 389:253 398:254 399:158 416:136 417:215 426:255 427:76 442:5 443:127 444:246 445:133 454:254 455:122 469:5 470:150 471:247 472:91 473:9 482:254 483:165 495:13 496:79 497:194 498:216 499:84 510:111 511:251 512:87 519:16 520:25 521:40 522:107 523:186 524:213 525:117 526:25 538:14 539:185 540:235 541:142 542:23 546:91 547:157 548:231 549:207 550:126 551:49 569:143 570:195 571:255 572:254 573:254 574:244 575:157 576:76 599:39 600:39 601:39 602:33
+1 128:166 129:255 130:187 131:6 156:165 157:253 158:253 159:13 183:15 184:191 185:253 186:253 187:13 211:49 212:253 213:253 214:253 215:13 239:141 240:253 241:253 242:169 243:4 266:4 267:189 268:253 269:249 270:53 294:69 295:253 296:253 297:246 322:69 323:253 324:253 325:246 350:118 351:253 352:253 353:124 378:206 379:253 380:231 381:21 405:66 406:241 407:253 408:199 433:105 434:253 435:253 436:89 460:3 461:228 462:253 463:252 464:86 488:111 489:253 490:253 491:205 516:166 517:253 518:253 519:75 543:43 544:249 545:253 546:193 547:9 570:4 571:160 572:253 573:253 574:184 598:37 599:253 600:253 601:253 602:88 626:140 627:253 628:253 629:186 630:18 654:14 655:253 656:253 657:27
+1 128:117 129:128 155:2 156:199 157:127 183:81 184:254 185:87 211:116 212:254 213:48 239:175 240:234 241:18 266:5 267:230 268:168 294:80 295:255 296:142 322:80 323:255 324:142 350:80 351:251 352:57 378:129 379:239 406:164 407:209 433:28 434:245 435:159 461:64 462:254 463:144 489:84 490:254 491:80 517:143 518:254 519:30 544:3 545:225 546:200 572:48 573:254 574:174 600:48 601:254 602:174 628:93 629:254 630:129 656:53 657:234 658:41
+1 129:159 130:142 156:11 157:220 158:141 184:78 185:254 186:141 212:111 213:254 214:109 240:196 241:221 242:15 267:26 268:221 269:159 295:63 296:254 297:159 323:178 324:254 325:93 350:7 351:191 352:254 353:97 378:42 379:255 380:254 381:41 406:42 407:254 408:195 409:10 434:141 435:255 436:78 461:11 462:202 463:254 464:59 489:86 490:254 491:254 492:59 517:142 518:254 519:248 520:52 545:142 546:254 547:195 573:142 574:254 575:164 601:142 602:254 603:77 629:142 630:254 631:131 657:77 658:172 659:5
+0 124:66 125:254 126:254 127:58 128:60 129:59 130:59 131:50 151:73 152:233 153:253 154:253 155:148 156:254 157:253 158:253 159:232 160:73 179:156 180:253 181:253 182:253 183:117 184:255 185:253 186:253 187:253 188:223 189:176 190:162 205:37 206:116 207:246 208:253 209:180 210:18 211:4 212:18 213:109 214:241 215:253 216:253 217:253 218:236 219:28 233:235 234:253 235:253 236:245 237:107 242:109 243:170 244:253 245:253 246:253 247:174 261:235 262:253 263:253 264:233 271:15 272:156 273:253 274:253 275:223 276:72 287:10 288:156 289:250 290:253 291:253 292:67 300:99 301:253 302:253 303:253 304:127 305:5 315:118 316:253 317:253 318:253 319:204 320:26 328:68 329:223 330:253 331:253 332:253 333:57 342:32 343:191 344:253 345:253 346:253 347:97 357:156 358:253 359:253 360:253 361:57 370:59 371:253 372:253 373:253 374:253 375:97 385:36 386:224 387:253 388:253 389:57 398:60 399:254 400:255 401:254 402:156 413:37 414:226 415:254 416:254 417:58 426:59 427:253 428:253 429:253 430:154 441:156 442:253 443:253 444:253 445:57 454:59 455:253 456:253 457:253 458:154 469:156 470:253 471:253 472:253 473:57 482:59 483:253 484:253 485:253 486:246 487:90 496:16 497:171 498:253 499:253 500:231 501:49 510:59 511:253 512:253 513:253 514:253 515:156 516:91 524:99 525:253 526:253 527:222 528:71 538:59 539:253 540:253 541:253 542:253 543:253 544:245 545:109 551:145 552:194 553:253 554:253 555:174 566:9 567:38 568:174 569:251 570:253 571:253 572:253 573:241 574:215 575:215 576:217 577:215 578:215 579:250 580:253 581:253 582:221 583:26 597:235 598:253 599:253 600:253 601:253 602:253 603:253 604:254 605:253 606:253 607:253 608:253 609:204 610:26 625:108 626:116 627:200 628:253 629:253 630:253 631:253 632:254 633:253 634:253 635:253 636:199 637:44 655:36 656:57 657:118 658:253 659:253 660:58 661:57 662:57 663:57 664:35
+1 129:101 130:222 131:84 157:225 158:252 159:84 184:89 185:246 186:208 187:19 212:128 213:252 214:195 239:79 240:253 241:252 242:195 267:141 268:255 269:253 270:133 294:26 295:240 296:253 297:252 298:55 322:60 323:252 324:253 325:154 326:12 349:7 350:178 351:252 352:253 353:27 377:57 378:252 379:252 380:253 381:27 405:57 406:253 407:253 408:204 409:15 433:104 434:252 435:252 436:94 460:19 461:209 462:252 463:252 488:101 489:252 490:252 491:157 516:225 517:252 518:252 519:112 544:226 545:253 546:240 547:63 572:225 573:252 574:223 600:225 601:252 602:223 628:225 629:252 630:242 631:75 656:146 657:252 658:236 659:50
+0 124:41 125:254 126:254 127:157 128:34 129:34 130:218 131:255 132:206 133:34 134:18 151:53 152:238 153:252 154:252 155:252 156:252 157:252 158:252 159:252 160:252 161:252 162:162 163:26 178:66 179:220 180:252 181:252 182:252 183:209 184:153 185:223 186:252 187:252 188:252 189:252 190:252 191:98 206:166 207:252 208:252 209:252 210:252 211:141 213:85 214:230 215:252 216:252 217:252 218:252 219:98 234:166 235:252 236:252 237:252 238:252 239:141 242:73 243:102 244:252 245:252 246:252 247:98 262:166 263:252 264:252 265:252 266:191 267:30 271:5 272:97 273:252 274:252 275:220 276:51 289:123 290:245 291:252 292:252 293:202 294:14 300:56 301:252 302:252 303:252 304:65 316:18 317:154 318:252 319:252 320:241 328:56 329:252 330:252 331:252 332:65 343:21 344:146 345:252 346:252 347:252 348:241 356:56 357:252 358:252 359:252 360:65 371:67 372:252 373:252 374:252 375:252 376:241 384:56 385:252 386:252 387:252 388:65 399:67 400:252 401:252 402:252 403:252 404:116 412:56 413:252 414:252 415:252 416:65 427:67 428:252 429:252 430:252 431:252 432:20 440:56 441:252 442:252 443:252 444:65 455:67 456:252 457:252 458:252 459:87 460:4 468:56 469:252 470:252 471:124 472:11 483:67 484:252 485:252 486:252 487:54 494:19 495:236 496:245 497:252 498:252 499:98 511:67 512:252 513:252 514:252 515:97 516:5 521:39 522:219 523:252 524:252 525:252 526:252 527:98 539:67 540:252 541:252 542:252 543:252 544:102 545:89 546:89 547:89 548:89 549:203 550:252 551:252 552:252 553:252 554:209 555:64 567:67 568:252 569:252 570:252 571:252 572:252 573:252 574:252 575:252 576:252 577:252 578:252 579:252 580:226 581:130 582:68 595:67 596:252 597:252 598:252 599:252 600:252 601:252 602:252 603:252 604:252 605:252 606:252 607:239 608:77 623:17 624:65 625:163 626:252 627:252 628:252 629:252 630:252 631:252 632:252 633:252 634:96 635:59 653:17 654:176 655:252 656:252 657:252 658:252 659:155 660:32 661:32 662:6
+0 96:56 97:247 98:121 124:24 125:242 126:245 127:122 153:231 154:253 155:253 156:104 157:12 181:90 182:253 183:253 184:254 185:221 186:120 187:120 188:85 206:67 207:75 208:36 209:11 210:56 211:222 212:254 213:253 214:253 215:253 216:245 217:207 218:36 233:86 234:245 235:249 236:105 239:44 240:224 241:230 242:253 243:253 244:253 245:253 246:214 247:10 260:8 261:191 262:253 263:143 269:29 270:119 271:119 272:158 273:253 274:253 275:94 288:15 289:253 290:226 291:48 300:4 301:183 302:253 303:248 304:56 316:42 317:253 318:178 329:179 330:253 331:184 332:14 344:164 345:253 346:178 357:179 358:253 359:163 371:61 372:254 373:254 374:179 384:76 385:254 386:254 387:164 399:60 400:253 401:253 402:178 411:29 412:206 413:253 414:253 415:40 427:60 428:253 429:253 430:178 439:120 440:253 441:253 442:245 443:13 455:60 456:253 457:253 458:178 467:120 468:253 469:239 470:63 483:60 484:253 485:253 486:178 494:14 495:238 496:253 497:179 511:18 512:190 513:253 514:231 515:70 521:43 522:184 523:253 524:253 525:74 540:86 541:253 542:253 543:239 544:134 545:8 548:56 549:163 550:253 551:253 552:213 553:35 568:16 569:253 570:253 571:253 572:253 573:240 574:239 575:239 576:247 577:253 578:253 579:210 580:27 596:4 597:59 598:204 599:253 600:253 601:253 602:253 603:253 604:254 605:253 606:250 607:110 626:31 627:122 628:253 629:253 630:253 631:253 632:255 633:217 634:98
+0 125:19 126:164 127:253 128:255 129:253 130:118 131:59 132:36 153:78 154:251 155:251 156:253 157:251 158:251 159:251 160:199 161:45 180:14 181:198 182:251 183:251 184:253 185:251 186:251 187:251 188:251 189:204 190:26 208:5 209:117 210:251 211:251 212:243 213:212 214:239 215:251 216:251 217:251 218:218 236:95 237:251 238:251 239:251 240:120 242:175 243:251 244:251 245:251 246:231 263:97 264:237 265:251 266:251 267:251 270:67 271:240 272:251 273:251 274:243 275:108 290:8 291:163 292:251 293:251 294:240 295:81 299:68 300:251 301:251 302:251 303:179 304:9 317:13 318:145 319:251 320:251 321:226 322:80 327:39 328:251 329:251 330:251 331:251 332:115 345:144 346:251 347:251 348:251 349:173 355:18 356:167 357:251 358:251 359:251 360:115 373:233 374:251 375:251 376:251 377:173 384:98 385:251 386:251 387:251 388:115 400:176 401:253 402:253 403:216 404:179 412:99 413:253 414:253 415:253 416:116 427:55 428:210 429:251 430:251 431:96 440:98 441:251 442:251 443:214 444:62 455:117 456:251 457:251 458:251 459:96 467:28 468:204 469:251 470:237 471:53 482:55 483:241 484:251 485:251 486:160 487:7 494:28 495:222 496:251 497:251 498:231 510:59 511:251 512:251 513:251 514:153 520:23 521:98 522:204 523:251 524:251 525:251 526:156 538:59 539:251 540:251 541:251 542:153 546:85 547:155 548:179 549:251 550:251 551:251 552:251 553:154 554:15 566:59 567:251 568:251 569:251 570:236 571:214 572:214 573:214 574:234 575:251 576:253 577:251 578:251 579:248 580:156 581:15 594:41 595:209 596:251 597:251 598:251 599:251 600:251 601:251 602:251 603:251 604:253 605:251 606:196 607:146 623:54 624:115 625:241 626:251 627:251 628:251 629:251 630:251 631:251 632:253 633:187 634:35 653:83 654:251 655:251 656:251 657:251 658:251 659:101 660:57 661:31
+1 129:232 130:255 131:107 156:58 157:244 158:253 159:106 184:95 185:253 186:253 187:106 212:95 213:253 214:253 215:106 240:95 241:253 242:249 243:69 268:144 269:253 270:192 295:97 296:233 297:253 298:66 323:195 324:253 325:253 326:5 350:38 351:232 352:253 353:182 354:2 377:10 378:160 379:253 380:231 381:53 405:42 406:253 407:253 408:158 433:141 434:253 435:253 436:115 460:75 461:245 462:253 463:183 464:4 487:1 488:147 489:253 490:251 491:58 515:20 516:253 517:253 518:180 543:202 544:253 545:226 546:27 571:243 572:253 573:212 598:85 599:251 600:253 601:173 626:209 627:253 628:244 629:57 654:169 655:253 656:174
+1 127:63 128:128 129:2 155:63 156:254 157:123 183:63 184:254 185:179 211:63 212:254 213:179 239:63 240:254 241:179 267:142 268:254 269:179 295:187 296:254 297:158 323:187 324:254 325:55 350:68 351:235 352:254 353:55 378:181 379:254 380:254 381:55 406:181 407:254 408:202 409:14 434:181 435:254 436:186 462:181 463:254 464:146 490:181 491:254 492:62 518:181 519:254 520:62 546:181 547:254 548:62 574:181 575:255 576:62 602:181 603:254 604:241 605:52 630:181 631:254 632:222 633:30 658:181 659:224 660:34
+1 130:131 131:255 132:184 133:15 157:99 158:247 159:253 160:182 161:15 185:124 186:253 187:253 188:253 189:38 212:9 213:171 214:253 215:253 216:140 217:1 240:47 241:253 242:253 243:251 244:117 267:43 268:219 269:253 270:253 271:153 295:78 296:253 297:253 298:253 299:84 323:97 324:253 325:253 326:244 327:74 350:69 351:243 352:253 353:253 354:183 377:10 378:168 379:253 380:253 381:215 382:34 405:31 406:253 407:253 408:253 409:129 433:107 434:253 435:253 436:242 437:67 460:24 461:204 462:253 463:253 464:187 488:95 489:253 490:253 491:201 492:25 516:239 517:253 518:253 519:176 543:119 544:251 545:253 546:253 547:138 570:30 571:212 572:253 573:252 574:165 575:8 598:193 599:253 600:253 601:222 626:193 627:253 628:253 629:189 654:193 655:253 656:201 657:27
+0 125:57 126:255 127:253 128:198 129:85 153:168 154:253 155:251 156:253 157:251 158:169 159:56 180:86 181:253 182:254 183:253 184:254 185:253 186:254 187:253 188:57 208:197 209:251 210:253 211:251 212:253 213:251 214:253 215:251 216:225 217:56 235:169 236:255 237:253 238:226 239:56 241:114 242:254 243:253 244:254 245:84 262:57 263:224 264:253 265:251 266:56 270:139 271:251 272:253 273:83 290:141 291:253 292:255 293:84 298:57 299:225 300:254 301:196 318:253 319:251 320:253 321:83 327:168 328:253 329:83 345:169 346:254 347:253 348:169 355:169 356:254 357:253 358:169 373:168 374:253 375:251 376:56 383:168 384:253 385:251 386:56 401:169 402:254 403:84 412:254 413:253 429:168 430:253 431:83 440:253 441:251 456:29 457:197 458:254 459:84 467:169 468:254 469:196 484:85 485:251 486:253 487:83 494:57 495:224 496:253 497:83 512:57 513:225 514:254 515:139 521:57 522:141 523:253 524:254 525:84 541:168 542:253 543:251 544:169 545:56 547:114 548:169 549:224 550:253 551:251 552:253 553:83 569:169 570:254 571:253 572:254 573:253 574:254 575:253 576:254 577:253 578:254 579:253 580:226 581:56 597:56 598:253 599:251 600:253 601:251 602:253 603:251 604:253 605:251 606:253 607:251 608:56 626:169 627:225 628:254 629:253 630:254 631:253 632:254 633:253 634:226 635:56 655:56 656:253 657:251 658:253 659:251 660:84 661:83 662:56
+0 127:12 128:105 129:224 130:255 131:247 132:22 155:131 156:254 157:254 158:243 159:252 160:76 182:131 183:225 184:254 185:224 186:48 187:136 208:13 209:109 210:252 211:254 212:254 213:254 214:197 215:76 235:9 236:181 237:254 238:254 239:240 240:229 241:237 242:254 243:252 244:152 245:21 262:9 263:143 264:254 265:254 266:226 267:36 269:22 270:138 271:254 272:254 273:188 289:13 290:181 291:254 292:254 293:250 294:64 298:2 299:53 300:236 301:252 302:131 317:102 318:254 319:254 320:254 321:111 328:56 329:243 330:251 331:42 344:30 345:186 346:254 347:254 348:206 349:29 357:199 358:254 359:91 372:92 373:254 374:254 375:237 376:13 385:134 386:254 387:91 400:133 401:254 402:254 403:126 413:134 414:250 415:17 428:187 429:254 430:237 431:23 441:200 442:183 456:187 457:254 458:213 467:2 468:134 469:252 470:101 484:183 485:254 486:133 495:14 496:254 497:234 498:34 512:92 513:254 514:161 522:84 523:204 524:254 525:56 540:92 541:254 542:229 549:85 550:252 551:252 552:188 553:11 568:56 569:252 570:229 575:3 576:53 577:235 578:253 579:166 597:224 598:245 599:130 600:68 601:68 602:134 603:214 604:254 605:254 606:159 625:141 626:254 627:254 628:254 629:254 630:254 631:254 632:233 633:95 634:3 653:14 654:152 655:254 656:254 657:254 658:186 659:157 660:53
+1 130:226 131:247 132:55 157:99 158:248 159:254 160:230 161:30 185:125 186:254 187:254 188:254 189:38 213:125 214:254 215:254 216:212 217:24 240:18 241:223 242:254 243:252 244:118 268:24 269:254 270:254 271:239 295:27 296:195 297:254 298:254 299:93 323:78 324:254 325:254 326:246 327:74 351:158 352:254 353:254 354:185 378:41 379:239 380:254 381:254 382:43 405:22 406:218 407:254 408:254 409:167 410:9 433:32 434:254 435:254 436:254 437:130 460:24 461:187 462:254 463:254 464:234 465:16 488:189 489:254 490:254 491:254 492:128 515:64 516:247 517:254 518:255 519:219 520:42 543:139 544:254 545:254 546:222 547:40 570:30 571:213 572:254 573:235 574:45 598:194 599:254 600:254 601:223 626:194 627:254 628:254 629:190 654:194 655:254 656:202 657:27
+1 130:166 131:253 132:124 133:53 158:140 159:251 160:251 161:180 185:125 186:246 187:251 188:251 189:51 212:32 213:190 214:251 215:251 216:251 217:103 240:21 241:174 242:251 243:251 244:251 268:73 269:176 270:253 271:253 272:201 296:149 297:251 298:251 299:251 300:71 323:27 324:228 325:251 326:251 327:157 328:10 351:180 352:253 353:251 354:251 355:142 377:27 378:180 379:231 380:253 381:251 382:96 383:41 405:89 406:253 407:253 408:255 409:211 410:25 433:217 434:251 435:251 436:253 437:107 460:21 461:221 462:251 463:251 464:242 465:92 487:32 488:190 489:251 490:251 491:251 492:103 515:202 516:251 517:251 518:251 519:122 542:53 543:255 544:253 545:253 546:221 547:51 570:180 571:253 572:251 573:251 574:142 598:180 599:253 600:251 601:251 602:142 626:180 627:253 628:251 629:157 630:82 654:180 655:253 656:147 657:10
+1 129:17 130:206 131:229 132:44 157:2 158:125 159:254 160:123 185:95 186:254 187:254 188:123 212:78 213:240 214:254 215:254 216:123 240:100 241:254 242:254 243:254 244:123 267:2 268:129 269:254 270:254 271:220 272:20 295:9 296:254 297:254 298:254 299:123 322:22 323:179 324:254 325:254 326:254 327:49 350:83 351:254 352:254 353:254 354:183 355:19 378:136 379:254 380:254 381:254 382:139 404:3 405:111 406:252 407:254 408:254 409:232 410:45 432:67 433:254 434:254 435:254 436:216 437:40 459:14 460:192 461:254 462:254 463:254 464:140 486:23 487:192 488:254 489:254 490:254 491:246 514:77 515:254 516:254 517:255 518:241 519:100 541:65 542:235 543:254 544:254 545:254 546:172 568:30 569:238 570:254 571:254 572:254 573:219 574:26 596:34 597:254 598:254 599:254 600:216 601:41 624:34 625:254 626:254 627:254 628:188 652:12 653:170 654:254 655:254 656:82
+1 130:218 131:253 132:124 157:84 158:236 159:251 160:251 184:63 185:236 186:251 187:251 188:122 212:73 213:251 214:251 215:251 216:173 240:202 241:251 242:251 243:251 244:71 267:53 268:255 269:253 270:253 271:253 272:72 295:180 296:253 297:251 298:251 299:188 300:30 323:180 324:253 325:251 326:251 327:142 350:47 351:211 352:253 353:251 354:235 355:82 377:27 378:211 379:251 380:253 381:251 382:215 405:89 406:253 407:253 408:255 409:253 410:164 433:217 434:251 435:251 436:253 437:168 438:15 460:21 461:221 462:251 463:251 464:253 465:107 487:32 488:190 489:251 490:251 491:251 492:221 493:61 515:73 516:251 517:251 518:251 519:251 520:180 543:255 544:253 545:253 546:253 547:201 570:105 571:253 572:251 573:251 574:251 575:71 598:180 599:253 600:251 601:246 602:137 603:10 626:180 627:253 628:251 629:215 654:180 655:253 656:251 657:86
+1 124:102 125:180 126:1 152:140 153:254 154:130 180:140 181:254 182:204 208:140 209:254 210:204 236:72 237:254 238:204 264:25 265:231 266:250 267:135 292:11 293:211 294:254 295:222 321:101 322:254 323:250 324:15 349:96 350:254 351:254 352:95 377:2 378:251 379:254 380:95 405:2 406:251 407:254 408:95 433:96 434:254 435:254 436:95 461:53 462:253 463:254 464:139 490:250 491:254 492:235 493:27 518:201 519:254 520:254 521:128 546:80 547:254 548:254 549:139 574:65 575:254 576:254 577:139 602:150 603:254 604:254 605:139 630:229 631:254 632:254 633:43 658:52 659:196 660:168 661:9
+0 128:87 129:208 130:249 155:27 156:212 157:254 158:195 182:118 183:225 184:254 185:254 186:232 187:147 188:46 209:115 210:248 211:254 212:254 213:254 214:254 215:254 216:230 217:148 218:12 236:18 237:250 238:254 239:245 240:226 241:254 242:254 243:254 244:254 245:254 246:148 263:92 264:205 265:254 266:250 267:101 268:20 269:194 270:254 271:254 272:254 273:254 274:229 275:53 291:152 292:254 293:254 294:94 297:14 298:124 299:187 300:254 301:254 302:254 303:213 318:95 319:252 320:254 321:206 322:15 327:3 328:6 329:51 330:231 331:254 332:94 345:50 346:246 347:254 348:254 349:20 358:200 359:254 360:96 372:21 373:184 374:254 375:254 376:147 377:2 386:200 387:254 388:96 400:177 401:254 402:254 403:218 404:33 413:16 414:211 415:254 416:96 427:11 428:219 429:254 430:251 431:92 441:84 442:254 443:232 444:44 455:101 456:254 457:254 458:141 469:162 470:254 471:231 472:42 483:235 484:254 485:227 486:42 496:51 497:238 498:254 499:213 511:235 512:254 513:199 524:160 525:254 526:229 527:52 539:235 540:254 541:199 549:10 550:84 551:150 552:253 553:254 554:147 567:235 568:254 569:213 570:20 575:17 576:63 577:158 578:254 579:254 580:254 581:155 582:12 595:122 596:248 597:254 598:204 599:98 600:42 601:177 602:180 603:200 604:254 605:254 606:253 607:213 608:82 609:10 624:203 625:254 626:254 627:254 628:254 629:254 630:254 631:254 632:251 633:219 634:94 652:35 653:221 654:254 655:254 656:254 657:254 658:254 659:217 660:95
+1 126:134 127:230 154:133 155:231 156:10 182:133 183:253 184:96 210:133 211:253 212:96 238:133 239:253 240:183 266:133 267:253 268:217 294:133 295:253 296:217 322:133 323:253 324:217 350:133 351:253 352:217 378:133 379:253 380:217 406:134 407:254 408:218 434:133 435:253 436:159 462:133 463:253 464:199 490:156 491:253 492:96 518:254 519:247 520:73 546:254 547:248 548:74 573:99 574:254 575:245 576:64 600:89 601:230 602:254 603:125 627:140 628:251 629:253 630:243 631:10 655:114 656:242 657:195 658:69
+1 125:29 126:85 127:255 128:139 153:197 154:251 155:253 156:251 181:254 182:253 183:254 184:253 209:253 210:251 211:253 212:251 237:254 238:253 239:254 240:253 265:253 266:251 267:253 268:138 293:254 294:253 295:254 296:196 321:253 322:251 323:253 324:196 349:254 350:253 351:254 352:84 377:253 378:251 379:253 380:196 405:254 406:253 407:254 408:253 433:253 434:251 435:253 436:251 461:254 462:253 463:254 464:253 489:253 490:251 491:253 492:251 517:254 518:253 519:254 520:253 545:253 546:251 547:253 548:251 573:254 574:253 575:254 576:253 601:253 602:251 603:253 604:251 629:57 630:225 631:254 632:253 658:56 659:253 660:251
+1 125:149 126:255 127:254 128:58 153:215 154:253 155:183 156:2 180:41 181:232 182:253 183:181 208:92 209:253 210:253 211:181 236:92 237:253 238:253 239:181 264:92 265:253 266:253 267:181 292:92 293:253 294:253 295:181 320:92 321:253 322:253 323:181 348:92 349:253 350:253 351:181 376:92 377:253 378:253 379:181 404:92 405:253 406:253 407:181 432:92 433:253 434:253 435:181 460:92 461:253 462:253 463:181 488:31 489:228 490:253 491:181 517:198 518:253 519:228 520:54 545:33 546:226 547:253 548:195 549:7 574:199 575:253 576:253 577:75 602:34 603:218 604:253 605:228 606:117 607:14 608:12 631:33 632:219 633:253 634:253 635:253 636:211 660:32 661:123 662:149 663:230 664:41
+1 130:79 131:203 132:141 157:51 158:240 159:240 160:140 185:88 186:252 187:252 188:140 213:197 214:252 215:252 216:140 241:197 242:252 243:252 244:140 268:147 269:253 270:253 271:253 295:38 296:234 297:252 298:242 299:89 323:113 324:252 325:252 326:223 350:16 351:207 352:252 353:252 354:129 377:16 378:203 379:253 380:252 381:220 382:37 405:29 406:253 407:255 408:253 409:56 432:19 433:181 434:252 435:253 436:176 437:6 460:166 461:252 462:252 463:228 464:52 487:10 488:203 489:252 490:252 491:126 514:63 515:178 516:252 517:252 518:173 542:114 543:253 544:253 545:225 570:238 571:252 572:252 573:99 596:7 597:135 598:253 599:252 600:176 601:19 624:29 625:252 626:253 627:252 628:55 652:13 653:189 654:253 655:204 656:25
+1 126:94 127:254 128:75 154:166 155:253 156:231 182:208 183:253 184:147 210:208 211:253 212:116 238:208 239:253 240:168 266:146 267:254 268:222 294:166 295:253 296:116 322:208 323:253 324:116 350:166 351:253 352:158 378:145 379:253 380:231 406:209 407:254 408:169 434:187 435:253 436:168 462:93 463:253 464:116 490:93 491:253 492:116 518:93 519:253 520:116 546:94 547:254 548:179 549:11 574:93 575:253 576:246 577:101 602:145 603:253 604:255 605:92 630:93 631:253 632:246 633:59 658:93 659:253 660:74
+0 127:46 128:105 129:254 130:254 131:224 132:59 133:59 134:9 155:196 156:254 157:253 158:253 159:253 160:253 161:253 162:128 182:96 183:235 184:254 185:253 186:253 187:253 188:253 189:253 190:247 191:122 208:4 209:101 210:244 211:253 212:254 213:234 214:241 215:253 216:253 217:253 218:253 219:186 220:18 236:96 237:253 238:253 239:253 240:232 241:83 242:109 243:170 244:253 245:253 246:253 247:253 248:116 264:215 265:253 266:253 267:253 268:196 271:40 272:253 273:253 274:253 275:253 276:116 290:8 291:141 292:247 293:253 294:253 295:237 296:29 299:6 300:38 301:171 302:253 303:253 304:116 317:13 318:146 319:253 320:253 321:253 322:253 323:57 329:156 330:253 331:253 332:116 345:40 346:253 347:253 348:253 349:253 350:178 351:27 357:156 358:253 359:253 360:116 372:136 373:204 374:253 375:253 376:253 377:192 378:27 385:156 386:253 387:253 388:116 399:28 400:195 401:254 402:254 403:254 404:250 405:135 412:99 413:255 414:254 415:254 416:117 427:118 428:253 429:253 430:253 431:253 432:142 439:19 440:170 441:253 442:253 443:216 444:62 454:42 455:212 456:253 457:253 458:253 459:253 460:38 466:124 467:188 468:253 469:253 470:253 471:174 482:59 483:253 484:253 485:253 486:237 487:93 488:3 491:31 492:40 493:130 494:247 495:253 496:253 497:253 498:204 499:13 510:59 511:253 512:253 513:253 514:154 518:54 519:218 520:254 521:253 522:253 523:253 524:253 525:253 526:38 538:59 539:253 540:253 541:253 542:215 543:156 544:156 545:156 546:209 547:253 548:255 549:253 550:253 551:253 552:192 553:97 554:15 566:55 567:242 568:253 569:253 570:253 571:253 572:253 573:253 574:253 575:253 576:254 577:253 578:253 579:204 580:23 595:118 596:253 597:253 598:253 599:253 600:253 601:253 602:253 603:253 604:254 605:216 606:174 607:13 623:54 624:116 625:243 626:253 627:253 628:253 629:253 630:253 631:146 632:117 633:62 653:53 654:132 655:253 656:253 657:192 658:57 659:13
+1 125:42 126:232 127:254 128:58 153:86 154:253 155:253 156:58 181:86 182:253 183:253 184:58 209:206 210:253 211:253 212:58 237:215 238:253 239:253 240:58 265:215 266:253 267:253 268:58 293:215 294:253 295:253 296:58 321:215 322:253 323:253 324:58 349:215 350:253 351:253 352:58 377:215 378:253 379:253 380:58 405:215 406:253 407:253 408:58 433:188 434:253 435:253 436:85 461:86 462:253 463:253 464:200 465:12 489:29 490:223 491:253 492:253 493:151 518:209 519:253 520:253 521:194 546:128 547:253 548:253 549:200 550:8 574:32 575:213 576:253 577:253 578:152 579:6 603:32 604:221 605:253 606:253 607:153 608:5 632:90 633:215 634:253 635:253 636:151 661:59 662:253 663:253 664:84
+1 156:60 157:229 158:38 184:187 185:254 186:78 211:121 212:252 213:254 214:78 239:197 240:254 241:206 242:6 267:197 268:254 269:202 294:27 295:218 296:233 297:62 322:117 323:254 324:195 350:203 351:254 352:195 377:64 378:244 379:254 380:195 405:79 406:254 407:255 408:161 433:79 434:254 435:254 436:65 461:79 462:254 463:241 464:52 489:79 490:254 491:189 517:79 518:254 519:189 545:79 546:254 547:189 573:79 574:254 575:189 601:79 602:254 603:194 604:5 629:35 630:219 631:254 632:72 658:34 659:223 660:195 687:129 688:195
+1 101:11 102:150 103:72 129:37 130:251 131:71 157:63 158:251 159:71 185:217 186:251 187:71 213:217 214:251 215:71 240:145 241:253 242:253 243:72 267:42 268:206 269:251 270:251 271:71 295:99 296:251 297:251 298:251 299:71 323:253 324:251 325:251 326:251 327:71 350:130 351:253 352:251 353:251 354:251 355:71 377:110 378:253 379:255 380:253 381:253 382:253 383:72 405:109 406:251 407:253 408:251 409:251 410:188 411:30 433:109 434:251 435:253 436:251 437:246 438:123 460:16 461:170 462:251 463:253 464:251 465:215 488:37 489:251 490:251 491:253 492:251 493:86 516:218 517:253 518:253 519:255 520:253 521:35 543:84 544:236 545:251 546:251 547:253 548:168 549:15 571:144 572:251 573:251 574:251 575:190 576:15 599:144 600:251 601:251 602:251 603:180 626:53 627:221 628:251 629:251 630:251 631:180
+0 127:45 128:254 129:254 130:254 131:148 132:24 133:9 154:43 155:254 156:252 157:252 158:252 159:252 160:252 161:121 162:13 181:58 182:237 183:254 184:252 185:252 186:252 187:252 188:252 189:252 190:68 208:69 209:224 210:252 211:254 212:252 213:252 214:252 215:252 216:252 217:252 218:135 219:17 235:75 236:216 237:252 238:252 239:254 240:231 241:168 242:252 243:252 244:252 245:252 246:252 247:45 262:77 263:212 264:252 265:252 266:252 267:242 268:93 269:32 270:114 271:177 272:252 273:252 274:252 275:158 276:12 289:75 290:212 291:252 292:252 293:252 294:252 295:231 299:116 300:252 301:252 302:252 303:252 304:21 316:69 317:216 318:252 319:252 320:252 321:252 322:252 323:62 327:116 328:252 329:252 330:252 331:252 332:21 344:93 345:252 346:252 347:252 348:252 349:252 350:62 355:21 356:158 357:252 358:252 359:252 360:21 371:64 372:239 373:252 374:252 375:252 376:252 377:252 378:21 384:139 385:252 386:252 387:252 388:21 398:5 399:87 400:254 401:254 402:254 403:254 404:237 405:41 411:11 412:150 413:254 414:254 415:254 416:22 425:5 426:85 427:252 428:252 429:252 430:252 431:222 432:55 439:116 440:252 441:252 442:252 443:214 444:18 453:24 454:252 455:252 456:252 457:252 458:252 459:91 466:26 467:153 468:252 469:252 470:252 471:45 481:24 482:252 483:252 484:252 485:252 486:252 487:91 492:18 493:93 494:151 495:252 496:252 497:252 498:184 499:28 509:24 510:252 511:252 512:252 513:252 514:252 515:164 516:116 517:116 518:116 519:117 520:141 521:252 522:252 523:252 524:252 525:252 526:68 537:24 538:252 539:252 540:252 541:252 542:252 543:252 544:252 545:252 546:252 547:254 548:252 549:252 550:252 551:252 552:252 553:163 554:31 565:9 566:121 567:252 568:252 569:252 570:252 571:252 572:252 573:252 574:252 575:254 576:252 577:252 578:252 579:178 580:91 581:33 594:13 595:119 596:252 597:252 598:252 599:252 600:252 601:252 602:252 603:254 604:252 605:252 606:184 607:37 623:13 624:121 625:252 626:252 627:252 628:252 629:252 630:252 631:254 632:214 633:45 634:28 652:8 653:21 654:21 655:169 656:252 657:252 658:41 659:22 660:18
+0 125:218 126:253 127:253 128:255 129:149 130:62 151:42 152:144 153:236 154:251 155:251 156:253 157:251 158:236 159:144 160:144 179:99 180:251 181:251 182:251 183:225 184:253 185:251 186:251 187:251 188:251 189:166 190:16 206:79 207:253 208:251 209:251 210:204 211:41 212:143 213:205 214:251 215:251 216:251 217:253 218:169 219:15 233:79 234:231 235:253 236:251 237:225 238:41 241:41 242:226 243:251 244:251 245:253 246:251 247:164 260:37 261:253 262:253 263:255 264:253 265:35 271:79 272:232 273:255 274:253 275:227 276:42 288:140 289:251 290:251 291:253 292:168 293:15 300:77 301:253 302:251 303:251 304:142 315:21 316:221 317:251 318:251 319:164 320:15 329:227 330:251 331:251 332:236 333:61 342:32 343:190 344:251 345:251 346:251 357:73 358:251 359:251 360:251 361:71 370:73 371:251 372:251 373:251 374:251 385:73 386:251 387:251 388:251 389:71 398:73 399:253 400:253 401:253 402:201 413:73 414:253 415:253 416:253 417:72 426:176 427:251 428:251 429:251 430:71 441:73 442:251 443:251 444:251 445:71 454:253 455:251 456:251 457:157 458:10 469:73 470:251 471:251 472:251 473:71 482:253 483:251 484:251 485:142 497:150 498:251 499:251 500:204 501:41 510:124 511:251 512:251 513:220 514:180 524:130 525:253 526:251 527:225 528:41 538:73 539:253 540:253 541:253 542:253 543:73 544:73 545:10 549:42 550:73 551:150 552:253 553:255 554:253 555:216 566:31 567:189 568:251 569:251 570:251 571:253 572:251 573:159 574:144 575:144 576:145 577:206 578:251 579:251 580:251 581:253 582:168 583:92 595:20 596:195 597:251 598:251 599:253 600:251 601:251 602:251 603:251 604:253 605:251 606:251 607:251 608:225 609:164 610:15 624:21 625:142 626:220 627:253 628:251 629:251 630:251 631:251 632:253 633:251 634:251 635:204 636:41 654:51 655:72 656:174 657:251 658:251 659:251 660:253 661:147 662:71 663:41
+0 127:60 128:96 129:96 130:48 153:16 154:171 155:228 156:253 157:251 158:220 159:51 160:32 181:127 182:251 183:251 184:253 185:251 186:251 187:251 188:251 189:80 207:24 208:182 209:236 210:251 211:211 212:189 213:236 214:251 215:251 216:251 217:242 218:193 234:100 235:194 236:251 237:251 238:211 239:35 241:71 242:173 243:251 244:251 245:253 246:240 247:158 248:19 261:64 262:253 263:255 264:253 265:205 266:19 271:40 272:218 273:255 274:253 275:253 276:91 288:16 289:186 290:251 291:253 292:247 293:110 300:39 301:233 302:251 303:251 304:188 315:16 316:189 317:251 318:251 319:205 320:110 329:48 330:220 331:251 332:220 333:48 343:72 344:251 345:251 346:251 347:158 358:51 359:251 360:251 361:232 371:190 372:251 373:251 374:251 375:59 386:32 387:251 388:251 389:251 398:96 399:253 400:253 401:253 402:95 414:32 415:253 416:253 417:193 426:214 427:251 428:251 429:204 430:23 442:52 443:251 444:251 445:94 454:253 455:251 456:251 457:109 469:48 470:221 471:251 472:219 473:47 482:253 483:251 484:251 485:70 497:234 498:251 499:251 500:188 510:253 511:251 512:251 513:188 523:40 524:158 525:253 526:251 527:172 528:70 539:191 540:253 541:253 542:253 543:96 544:24 549:12 550:174 551:253 552:253 553:255 554:221 567:71 568:251 569:251 570:251 571:253 572:205 573:190 574:190 575:190 576:191 577:197 578:251 579:251 580:231 581:221 582:93 595:16 596:126 597:236 598:251 599:253 600:251 601:251 602:251 603:251 604:253 605:251 606:251 607:140 608:47 625:67 626:188 627:189 628:188 629:188 630:188 631:188 632:189 633:188 634:109 635:4
+0 126:32 127:202 128:255 129:253 130:253 131:175 132:21 152:84 153:144 154:190 155:251 156:253 157:251 158:251 159:251 160:174 176:6 177:37 178:166 179:218 180:236 181:251 182:251 183:251 184:253 185:251 186:251 187:251 188:251 189:156 204:115 205:251 206:251 207:253 208:251 209:251 210:251 211:251 212:253 213:251 214:251 215:251 216:251 217:180 231:105 232:241 233:251 234:251 235:253 236:251 237:251 238:251 239:122 240:72 241:71 242:71 243:148 244:251 245:180 258:73 259:253 260:253 261:253 262:253 263:202 264:253 265:253 266:143 286:31 287:189 288:251 289:251 290:251 291:31 292:189 293:251 294:142 314:63 315:236 316:251 317:251 318:96 320:124 321:246 322:142 330:21 331:166 332:21 342:73 343:251 344:251 345:251 346:71 349:217 350:142 357:32 358:190 359:251 360:142 370:73 371:251 372:251 373:251 374:71 377:217 378:142 385:73 386:251 387:251 388:142 398:73 399:253 400:253 401:253 402:72 405:156 406:103 413:73 414:253 415:253 416:253 417:72 426:73 427:251 428:251 429:251 430:174 441:73 442:251 443:251 444:251 445:71 454:73 455:251 456:251 457:251 458:251 469:73 470:251 471:251 472:251 473:71 482:42 483:205 484:251 485:251 486:251 487:79 497:73 498:251 499:251 500:251 501:71 511:41 512:226 513:251 514:251 515:232 516:77 525:73 526:251 527:251 528:251 529:71 540:166 541:253 542:253 543:255 544:253 545:227 546:73 547:21 553:125 554:253 555:253 556:143 568:16 569:169 570:251 571:253 572:251 573:251 574:251 575:174 576:105 579:63 580:144 581:253 582:251 583:251 584:142 597:15 598:35 599:253 600:251 601:251 602:251 603:251 604:243 605:217 606:217 607:231 608:251 609:253 610:251 611:220 612:20 627:143 628:142 629:236 630:251 631:251 632:253 633:251 634:251 635:251 636:251 637:253 638:251 639:137 657:61 658:71 659:200 660:253 661:251 662:251 663:251 664:251 665:201 666:71 667:10
+1 130:218 131:170 132:108 157:32 158:227 159:252 160:232 185:129 186:252 187:252 188:252 212:1 213:253 214:252 215:252 216:168 240:144 241:253 242:252 243:236 244:62 268:144 269:253 270:252 271:215 296:144 297:253 298:252 299:112 323:21 324:206 325:253 326:252 327:71 351:99 352:253 353:255 354:119 378:63 379:242 380:252 381:253 382:35 406:94 407:252 408:252 409:154 410:10 433:145 434:237 435:252 436:252 461:255 462:253 463:253 464:108 487:11 488:155 489:253 490:252 491:179 492:15 514:11 515:150 516:252 517:253 518:200 519:20 542:73 543:252 544:252 545:253 546:97 569:47 570:233 571:253 572:253 596:1 597:149 598:252 599:252 600:252 624:1 625:252 626:252 627:246 628:132 652:1 653:169 654:252 655:132
+1 130:116 131:255 132:123 157:29 158:213 159:253 160:122 185:189 186:253 187:253 188:122 213:189 214:253 215:253 216:122 241:189 242:253 243:253 244:122 267:2 268:114 269:243 270:253 271:186 272:19 295:100 296:253 297:253 298:253 299:48 323:172 324:253 325:253 326:253 327:48 351:172 352:253 353:253 354:182 355:19 378:133 379:251 380:253 381:175 382:4 405:107 406:251 407:253 408:253 409:65 432:26 433:194 434:253 435:253 436:214 437:40 459:105 460:205 461:253 462:253 463:125 464:40 487:139 488:253 489:253 490:253 491:81 514:41 515:231 516:253 517:253 518:159 519:16 541:65 542:155 543:253 544:253 545:172 546:4 569:124 570:253 571:253 572:253 573:98 597:124 598:253 599:253 600:214 601:41 624:22 625:207 626:253 627:253 628:139 653:124 654:253 655:162 656:9
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_linear_regression_data.txt b/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_linear_regression_data.txt
new file mode 100755
index 0000000000000000000000000000000000000000..9aaaa4297548b90527e4c8c9c62a2720a510a55a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_linear_regression_data.txt
@@ -0,0 +1,501 @@
+-9.490009878824548 1:0.4551273600657362 2:0.36644694351969087 3:-0.38256108933468047 4:-0.4458430198517267 5:0.33109790358914726 6:0.8067445293443565 7:-0.2624341731773887 8:-0.44850386111659524 9:-0.07269284838169332 10:0.5658035575800715
+0.2577820163584905 1:0.8386555657374337 2:-0.1270180511534269 3:0.499812362510895 4:-0.22686625128130267 5:-0.6452430441812433 6:0.18869982177936828 7:-0.5804648622673358 8:0.651931743775642 9:-0.6555641246242951 10:0.17485476357259122
+-4.438869807456516 1:0.5025608135349202 2:0.14208069682973434 3:0.16004976900412138 4:0.505019897181302 5:-0.9371635223468384 6:-0.2841601610457427 7:0.6355938616712786 8:-0.1646249064941625 9:0.9480713629917628 10:0.42681251564645817
+-19.782762789614537 1:-0.0388509668871313 2:-0.4166870051763918 3:0.8997202693189332 4:0.6409836467726933 5:0.273289095712564 6:-0.26175701211620517 7:-0.2794902492677298 8:-0.1306778297187794 9:-0.08536581111046115 10:-0.05462315824828923
+-7.966593841555266 1:-0.06195495876886281 2:0.6546448480299902 3:-0.6979368909424835 4:0.6677324708883314 5:-0.07938725467767771 6:-0.43885601665437957 7:-0.608071585153688 8:-0.6414531182501653 9:0.7313735926547045 10:-0.026818676347611925
+-7.896274316726144 1:-0.15805658673794265 2:0.26573958270655806 3:0.3997172901343442 4:-0.3693430998846541 5:0.14324061105995334 6:-0.25797542063247825 7:0.7436291919296774 8:0.6114618853239959 9:0.2324273700703574 10:-0.25128128782199144
+-8.464803554195287 1:0.39449745853945895 2:0.817229160415142 3:-0.6077058562362969 4:0.6182496334554788 5:0.2558665508269453 6:-0.07320145794330979 7:-0.38884168866510227 8:0.07981886851873865 9:0.27022202891277614 10:-0.7474843534024693
+2.1214592666251364 1:-0.005346215048158909 2:-0.9453716674280683 3:-0.9270309666195007 4:-0.032312290091389695 5:0.31010676221964206 6:-0.20846743965751569 7:0.8803449313707621 8:-0.23077831216541722 9:0.29246395759528565 10:0.5409312755478819
+1.0720117616524107 1:0.7880855916368177 2:0.19767407429003536 3:0.9520689432368168 4:-0.845829774129496 5:0.5502413918543512 6:-0.44235539500246457 7:0.7984106594591154 8:-0.2523277127589152 9:-0.1373808897290778 10:-0.3353514432305029
+-13.772441561702871 1:-0.3697050572653644 2:-0.11452811582755928 3:-0.807098168238352 4:0.4903066124307711 5:-0.6582805242342049 6:0.6107814398427647 7:-0.7204208094262783 8:-0.8141063661170889 9:-0.9459402662357332 10:0.09666938346350307
+-5.082010756207233 1:-0.43560342773870375 2:0.9349906440170221 3:0.8090021580031235 4:-0.3121157071110545 5:-0.9718883630945336 6:0.6191882496201251 7:0.0429886073795116 8:0.670311110015402 9:0.16692329718223786 10:0.37649213869502973
+7.887786536531237 1:0.11276440263810383 2:-0.7684997525607482 3:0.1770172737885798 4:0.7902845707138706 5:0.2529503304079441 6:-0.23483801763662826 7:0.8072501895004851 8:0.6673992021927047 9:-0.4796127376677324 10:0.9244724404994455
+14.323146365332388 1:-0.2049276879687938 2:0.1470694373531216 3:-0.48366999792166787 4:0.643491115907358 5:0.3183669486383729 6:0.22821350958477082 7:-0.023605251086149304 8:-0.2770587742156372 9:0.47596326458377436 10:0.7107229819632654
+-20.057482615789212 1:-0.3205057828114841 2:0.51605972926996 3:0.45215640988181516 4:0.01712446974606241 5:0.5508198371849293 6:-0.2478254241316491 7:0.7256483175955235 8:0.39418662792516 9:-0.6797384914236382 10:0.6001217520150142
+-0.8995693247765151 1:0.4508991072414843 2:0.589749448443134 3:0.6464818311502738 4:0.7005669004769028 5:0.9699584106930381 6:-0.7417466269908464 7:0.22818964839784495 8:0.08574936236270037 9:-0.6945765138377225 10:0.06915201979238828
+-19.16829262296376 1:0.09798746565879424 2:-0.34288007110901964 3:0.440249350802451 4:-0.22440768392359534 5:-0.9695067570891225 6:-0.7942032659310758 7:-0.792286205517398 8:-0.6535487038528798 9:0.7952676470618951 10:-0.1622831617066689
+5.601801561245534 1:0.6949189734965766 2:-0.32697929564739403 3:-0.15359663581829275 4:-0.8951865090520432 5:0.2057889391931318 6:-0.6676656789571533 7:-0.03553655732400762 8:0.14550349954571096 9:0.034600542078191854 10:0.4223352065067103
+-3.2256352187273354 1:0.35278245969741096 2:0.7022211035026023 3:0.5686638754605697 4:-0.4202155290448111 5:-0.26102723928249216 6:0.010688215941416779 7:-0.4311544807877927 8:0.9500151672991208 9:0.14380635780710693 10:-0.7549354840975826
+1.5299675726687754 1:-0.13079299081883855 2:0.0983382230287082 3:0.15347083875928424 4:0.45507300685816965 5:0.1921083467305864 6:0.6361110540492223 7:0.7675261182370992 8:-0.2543488202081907 9:0.2927051050236915 10:0.680182444769418
+-0.250102447941961 1:-0.8062832278617296 2:0.8266289890474885 3:0.22684501241708888 4:0.1726291966578266 5:-0.6778773666126594 6:0.9993906921393696 7:0.1789490173139363 8:0.5584053824232391 9:0.03495894704368174 10:-0.8505720014852347
+12.792267926563595 1:-0.008461200645088818 2:-0.648273596036564 3:-0.005334477339629995 4:0.3781469006858833 5:0.30565234666790686 6:-0.2822867492866177 7:0.10175120738413801 8:0.5342432888482425 9:0.05146513075475534 10:-0.6459729964194652
+6.082192787194888 1:0.42519013450094767 2:0.09441503345243984 3:-0.07898439043103522 4:-0.32207498048636474 5:-0.9180071861219266 6:0.5951317320731633 7:0.41000814588717693 8:-0.3926260640533046 9:0.2789036768568971 10:0.13163692286014528
+-7.481405271455238 1:0.03324842612749346 2:0.07055844751995122 3:-0.47199515597021113 4:-0.682690342465275 5:0.3983414713797069 6:-0.2136729393256811 7:-0.09066563475481249 8:-0.4640338194317184 9:-0.03513782089224482 10:-0.1711809802758364
+6.739533816100517 1:0.1774546460228057 2:-0.6783644553523549 3:-0.47871398278230504 4:0.02272121490463097 5:-0.5047649289302389 6:0.26479596144873896 7:-0.32045436544054096 8:0.3113047940487379 9:0.6269418147567556 10:0.9710114516962312
+3.780807062175497 1:0.01715676997104909 2:0.8975962429865936 3:-0.46594560920034134 4:0.2873623499953055 5:0.8894362304584083 6:0.17973981232418468 7:0.49105791400707743 8:-0.7359842740294882 9:0.38941133808001127 10:-0.7151884777228046
+4.564039393483412 1:0.07478785545033317 2:-0.8672651994084235 3:0.450599300176334 4:0.35104802298560056 5:0.6797318185095045 6:-0.03891997518827006 7:-0.33208695871398675 8:0.6166574577055226 9:0.5730212324012205 10:-0.4194925751047054
+-0.3195679646035633 1:0.054527683864544096 2:-0.15591931640565093 3:0.9266742559542833 4:0.888522581905147 5:0.6576203900699167 6:0.6417770212400336 7:0.7509788029052338 8:-0.3104974571382815 9:0.7234744267051683 10:-0.15869049651427103
+11.290452658023497 1:0.20173310976772196 2:0.8657502566551409 3:0.9325160601080682 4:0.24570884032596263 5:-0.6546108813337841 6:-0.14020032028377583 7:-0.8825687891702743 8:-0.21420166926412865 9:-0.8600275184792756 10:-0.7990574622230739
+-4.003499192090455 1:0.8325875503351796 2:-0.5956350140619129 3:0.12598048009007923 4:0.12340188733473134 5:-0.839435659309717 6:-0.16623481818728414 7:0.12028795301041662 8:-0.7994713170657952 9:0.2216721974907896 10:0.8407561415075087
+-19.872991038068406 1:-0.9325810772922609 2:-0.6411471147334535 3:0.9949216290375054 4:0.483048267470493 5:-0.8736297429070232 6:-0.36222771685582544 7:0.26397860162786957 8:0.45527588775737704 9:-0.9424989711186325 10:0.6251162293059616
+10.502762149373098 1:-0.2307778924009991 2:0.6977871128979924 3:0.022830408261390822 4:0.6257738824362347 5:0.9770979848265122 6:0.09985730624684575 7:-0.9755858424230182 8:-0.689969833240031 9:-0.7294587311376761 10:0.3496326193951331
+-14.328978509075442 1:0.37929821892417404 2:0.8402056881660709 3:-0.1806835799958202 4:0.766314307210441 5:0.865876513623024 6:-0.7113501219432434 7:-0.0932956557986735 8:-0.7042025810921411 9:0.47530696925672267 10:-0.4629102077669889
+-16.26143027545273 1:-0.9309578475799722 2:0.7591795880911123 3:0.06296957473213705 4:0.786790093290086 5:-0.9527998391625465 6:-0.08573982501921895 7:-0.3812232026687308 8:-0.6890669703685022 9:0.25415911467755015 10:-0.07664746267502509
+11.772544195529013 1:0.3614756404325046 2:0.14508027508253818 3:0.23042774014795753 4:0.4164348685332022 5:0.4109091750657461 6:0.03853098236933272 7:0.38911994885223145 8:-0.5031309357181766 9:-0.596467768575587 10:0.17884522225228028
+14.697703557439503 1:0.24508864174863 2:0.7576193329655578 3:0.09030511120334461 4:0.9537528991778741 5:-0.7224092160621338 6:-0.34089385162121943 7:0.6924170720838818 8:0.32912306214891784 9:-0.4064624712125904 10:-0.5344662061201593
+-13.976130931152703 1:0.5891192531479754 2:0.29862103742464274 3:-0.36153976712796343 4:-0.6552669564323226 5:-0.22672513691161766 6:0.3001336202535376 7:0.34490251346382617 8:0.2072633053920192 9:-0.5659371284058774 10:0.49599636156628835
+-14.762758252931127 1:0.31302496164254223 2:-0.6062773982342133 3:-0.9874007658402217 4:-0.6214904627601421 5:-0.11421073677207683 6:-0.5850843421161205 7:0.1250679146774638 8:-0.7108170726393621 9:-0.6888351241194393 10:0.6077343683084389
+-3.300641320608255 1:-0.1407178879203672 2:0.12960233233004925 3:-0.4236196478321872 4:0.7903078296084356 5:-0.8755754953628643 6:-0.2062360260394529 7:-0.045680124889026175 8:0.783182093429277 9:-0.02995737262668463 10:-0.33382351650328435
+-15.72351561304857 1:-0.1802575775708093 2:-0.991006951265341 3:-0.9107951763247621 4:0.9069820084047908 5:-0.12691921206803047 6:-0.7087012119383593 7:-0.9179510577925369 8:0.18480349982718325 9:-0.4478459144114004 10:-0.5560585660624608
+-22.949825936196074 1:0.4797855980916854 2:0.01997502546020402 3:-0.8827928315487465 4:0.2755107907750989 5:0.015544482147298977 6:0.9652687138748801 7:0.6622667860970648 8:-0.7708138539912186 9:0.17728148663006627 10:0.47818190728952925
+12.092431628826905 1:0.1358843437335564 2:0.03643446587894239 3:-0.31070823939673287 4:0.5283033206569152 5:0.3469111543845367 6:-0.5162518174930761 7:0.24270234207184016 8:0.7352292800096338 9:0.8860322286740037 10:0.6748068653962045
+-23.51088409032297 1:-0.4683538422180036 2:0.1469540185936138 3:0.9113612952591796 4:-0.9838482669789823 5:0.4506466371133697 6:0.6456121712599778 7:0.8264783725578371 8:0.562664168655115 9:-0.8299281852090683 10:0.40690300256653256
+5.998186124881712 1:-0.9781302074883151 2:0.32984303335155785 3:0.7303430847899663 4:0.841481297188956 5:0.05580773881989276 6:0.7130788298702062 7:-0.218087116119847 8:-0.9889494995220598 9:0.9182854134226501 10:-0.7501751701020942
+9.852316338642547 1:0.146854160091757 2:-0.3611508707370965 3:0.3517016971654914 4:0.6187697988029395 5:-0.010768583697787548 6:0.5236725885871243 7:0.5945666964145524 8:-0.009180562740628506 9:-0.44474762415618274 10:0.41852743519493685
+-5.313930756588526 1:-0.6304209277071555 2:-0.37010359785263813 3:-0.3194739026510125 4:-0.750533359080716 5:0.45500303301733114 6:-0.012727544364283805 7:-0.43941651856862274 8:0.927108876532093 9:-0.24164903158058149 10:0.44134972919002124
+-4.2775224863223915 1:-0.35785764991284363 2:0.942797043714243 3:0.4539569191274251 4:-0.6944903010994341 5:-0.08357221983075225 6:0.4433049548665855 7:-0.5488972050023557 8:-0.24014623658145773 9:-0.6178118485382511 10:-0.4575463952834564
+-10.57769830424322 1:0.22693864400257335 2:-0.041639691095668674 3:0.9948726461115123 4:-0.7450471554938383 5:-0.1114847126717804 6:-0.27881184842402673 7:0.029766812446276214 8:-0.3727649352432578 9:-0.7791732805568077 10:0.9425576681069683
+-0.8430338600258201 1:0.4607090007225536 2:-0.6079961642969514 3:-0.5671626932935381 4:0.12784576080614185 5:-0.30766031989910236 6:-0.21232963505711555 7:0.3310463755850872 8:-0.6807682731528943 9:0.7826634145951483 10:0.0608057623636995
+13.450586257053727 1:-0.2697769964284986 2:0.07743737732312428 3:-0.8459687499864881 4:0.6091901514177853 5:-0.9464815428211699 6:0.15780407422581533 7:-0.28552052619478996 8:-0.27500859181806403 9:-0.7207541548282903 10:0.05215593729084533
+20.358241877831016 1:0.29768927445620164 2:-0.5379390525163252 3:0.6591913001003027 4:0.6635992348010928 5:0.3786594651413009 6:-0.7217135278882543 7:0.9634013908615768 8:0.03961253903778861 9:0.1335121312144949 10:0.7933944303463509
+9.800993960518852 1:0.39896823489212285 2:0.30948413101894023 3:0.08568060094378493 4:-0.7454513450113371 5:0.8054125831421357 6:-0.24464240413169347 7:-0.18294406588625112 8:-0.883455504399858 9:0.2468431033653562 10:-0.708151566382103
+-21.432387764165806 1:-0.4785033857256795 2:0.520350718059089 3:-0.2988515012130126 4:-0.46260150057299754 5:0.5394344995663083 6:0.39320468081626836 7:0.1890560923345248 8:0.13123799325264507 9:0.43613839380760355 10:0.39541998419731494
+-4.090570760187878 1:0.3909705814857716 2:0.9830271975811611 3:0.672523651785939 4:0.0035177223850744177 5:0.567082732451311 6:-0.2620454326881394 7:0.46622578556708105 8:0.646246879249865 9:0.4263175536668733 10:0.8982696975276223
+3.7459201216906926 1:-0.9480167656870653 2:-4.888270196095057E-4 3:0.48226844071577646 4:-0.23706663537631645 5:0.22420266627462127 6:0.2981747607694978 7:0.3893425967975348 8:0.6302701381298614 9:-0.21909113816064196 10:0.8371697958140494
+9.767952084958061 1:-0.2300790371078303 2:-0.4457883630748676 3:0.28710853302295325 4:0.7112839743052013 5:-0.8765858382640623 6:-0.6470779468607217 7:0.4369262584371727 8:-0.7175412028407337 9:0.5506733477278882 10:0.5393007189573547
+6.9802839308913365 1:0.21769855012808215 2:0.8653818331675485 3:0.2322943113578111 4:0.3760591265797468 5:0.06554014167292377 6:0.6866096712933549 7:0.866929973115441 8:-0.6462263417217329 9:0.2507247465275353 10:-0.7005877782050307
+16.014720800069103 1:0.6058055248984549 2:0.048517868234337014 3:-0.15744912875924877 4:0.32598079708869365 5:-0.587791997223768 6:-0.4636187312118474 7:0.7771908559246068 8:-0.349403853888719 9:0.229800030145503 10:-0.674614818934488
+8.417571532985823 1:-0.21164946152466801 2:-0.9981936663594053 3:0.8611869575187896 4:0.11100891297254312 5:-0.7406067304729631 6:-0.7613837395522254 7:-0.9617573325708704 8:0.5697426971647488 9:-0.5830879716990833 10:0.5951448538064159
+-12.491442077546413 1:-0.19172117564625735 2:-0.12421304883392126 3:0.7095605786791346 4:0.6401582292398038 5:-0.9347790209840108 6:0.6592209285686903 7:0.702282297844389 8:-0.22765902007749528 9:-0.17746922342943816 10:0.7196663432778121
+-8.605713514762092 1:0.36490454976480846 2:0.6991204480538957 3:0.6546945560337121 4:-0.032324845758738174 5:0.2453935969836043 6:0.5363119225093116 7:0.6266741350524205 8:-0.2132266305382322 9:-0.308105870487996 10:-0.08219413867616465
+-10.35591860037468 1:-0.014204168485027147 2:-0.7077035677144325 3:0.024004217785642767 4:0.818971992516166 5:0.9081305263471056 6:0.808854493237229 7:-0.6474336785461867 8:-0.32559288177031465 9:-0.32850453072496055 10:-0.7035310416695784
+3.605002621628445 1:0.6085817977516599 2:0.8101072412357928 3:0.7697891508923966 4:-0.5738750389864677 5:-0.734314989863889 6:-0.7879014492215499 7:0.6884442838920775 8:-0.46131231930402383 9:-0.7730585954271005 10:-0.7819874019145132
+12.30435312415091 1:0.3283668768730639 2:-0.18316686990068187 3:0.3955614099142126 4:0.8450470350842108 5:0.3958042901611589 6:0.6578475571960676 7:-0.4395488558075096 8:0.15720430113495376 9:-0.5318362828977672 10:0.45585285255232044
+9.020048819638827 1:-0.5986521145193395 2:0.3266542215286443 3:-0.09911773729611917 4:-0.21478254478908676 5:0.6546175049764293 6:-0.1414796368932345 7:0.25802631337510085 8:-0.6773828562539816 9:-0.22038193899258718 10:-0.17465737306657902
+14.854262978981406 1:0.5293763924477841 2:-0.24658868331583683 3:0.8268631648872109 4:0.8969207203400265 5:0.03933229861213983 6:-0.6212951181360529 7:-0.36695460282178205 8:-0.5468014636386027 9:-0.3419492829414976 10:-0.8273314086998671
+5.658665647926016 1:0.9543096383762801 2:0.13230023957687176 3:-0.3071929861496465 4:-0.3646067841449696 5:0.6979929890816723 6:-0.20721664168809228 7:0.6676482547655365 8:0.944757051233543 9:0.024377296173674567 10:-0.9413728609667691
+-6.930603551528371 1:0.09198647857985232 2:-0.3685113649452161 3:-0.2361728930325453 4:0.3674268130607439 5:0.27385598384498344 6:-0.7151900241735676 7:0.3310154476154119 8:-0.24328111897361682 9:0.2511378679668912 10:-0.35825141175578934
+13.361196783041926 1:0.11676665169094824 2:-0.49968608916548307 3:0.9941342810313298 4:-0.17858967215374988 5:0.1993744673440312 6:0.14596837574280297 7:-0.8245495433125194 8:-0.5637934691545672 9:-0.8589185806222286 10:-0.4923216901915597
+-3.280508467210429 1:-0.9917770074538397 2:-0.1547651813493751 3:0.621733177563484 4:0.7303326279246298 5:-0.0786900332560696 6:0.9107127797641994 7:0.7104513024299466 8:-0.32858522942354407 9:0.17013652749847386 10:0.27656984316288824
+11.13509519160867 1:0.6874932143640391 2:-0.46610293161038907 3:0.8744681017967024 4:0.40900365224695956 5:-0.49770054448432055 6:-0.0635770754462921 7:-0.5705387648707747 8:-0.577988250149829 9:-0.8099463063934682 10:0.42132700180827354
+-11.857350365429426 1:-0.24607974991258308 2:-0.943388538022258 3:0.8679112109377674 4:0.7779951176637694 5:-0.5802336023276593 6:-0.9093352471884992 7:0.29337797938742316 8:0.498519874589175 9:0.3493034812120912 10:-0.07307210651399076
+11.421632138263703 1:0.3911519359353859 2:-0.8154393787235621 3:0.47194271125243237 4:0.14014792298759593 5:-0.3589345913619957 6:0.7887695409762479 7:0.49962792312858895 8:-0.6402670146359797 9:-0.2314041601683119 10:-0.798901341175887
+5.194792012146463 1:0.810279303469398 2:-0.9772756877199589 3:-0.20925958437085557 4:0.8797562461102444 5:0.3211532423260066 6:0.25250279470783754 7:0.14387831263435813 8:-0.021466789385169882 9:0.18909293657271564 10:-0.5981349964027893
+12.242677118499806 1:0.3565715672082048 2:0.7366743237221687 3:0.1922233582434527 4:-0.3551925780624561 5:0.5290849503909634 6:0.7744214641246749 7:0.7277215028580597 8:-0.590440215391044 9:0.7427328184290733 10:-0.6231904162251609
+3.496172341296411 1:0.5028717258135624 2:-0.5838871888624848 3:-0.5540116561110324 4:0.8502487679795261 5:-0.7983061034328727 6:-0.3853123296389005 7:-0.1493800684643869 8:0.6008798629354264 9:-0.32299062155495406 10:-0.5827019502242026
+-15.437384793431217 1:0.41994681418237345 2:0.7106426870657483 3:-0.45211033467567696 4:-0.7272406549392239 5:-0.35736594496490737 6:0.4764507578985955 7:-0.5249912641281373 8:0.8562010912051132 9:0.45927621623833637 10:-0.3701817429794385
+5.490036861541498 1:0.8414999442459015 2:0.9273442862476728 3:-0.054654787893199774 4:-0.23126134156257327 5:-0.9155048245317694 6:0.25750538376376975 7:-0.8470916763665326 8:0.9105674676753848 9:0.5026028522378054 10:-0.06650501561108468
+-1.074065343287859 1:0.37484830603001607 2:-0.9858854245832975 3:0.007159356555897611 4:0.8172796295244154 5:0.519147377529164 6:0.8211049991970722 7:0.9901658817979146 8:-0.026519560032641998 9:-0.2328762488733862 10:0.43161994187258035
+2.0482082496444622 1:0.24940246021565793 2:0.47248358864259177 3:0.23833814894291105 4:-0.3449172512379757 5:0.7412869866239866 6:0.1351422898741914 7:-0.003784141556894216 8:-0.6321917152754075 9:0.8246267827865776 10:0.5057520480449009
+16.709794859608397 1:-0.5977424405191092 2:-0.13991362149785713 3:0.613487896720806 4:-0.37681525320218157 5:-0.4369592282569783 6:0.4702242879506955 7:0.07498463532645339 8:-0.9942304127133292 9:0.41304209196175257 10:0.6799250665519481
+4.598881854940949 1:-0.41212838137243835 2:0.6737124633791323 3:0.8376369191216593 4:0.2848328781926128 5:-0.17960265353296 6:0.0035488712665472377 7:-0.8355355482928055 8:-0.7439716673142398 9:-0.009043467128117433 10:0.7423272515054122
+9.566038608555402 1:-0.662329643040616 2:0.4727113884417973 3:-0.15734218732411365 4:-0.3950754785173889 5:0.13837083076070011 6:0.633261314089351 7:0.9929998062307679 8:-0.4639028424346423 9:-0.073992579817449 10:0.3413166410117088
+1.629198477883475 1:-0.2875719791707101 2:0.9395753700232541 3:-0.45090801750966314 4:-0.384528069378699 5:-0.35937736478702753 6:0.9597102694501136 7:-0.6898325123180971 8:-0.11436012866371303 9:-0.5330550575952768 10:0.24688769932037258
+-7.374620970147229 1:0.16864051681940984 2:-0.08391828256018252 3:-0.8184503043836224 4:0.5461252511055263 5:0.7264676659099087 6:-0.9654384426822686 7:-0.8537533138667612 8:0.9189716013058653 9:-0.03449322582531389 10:0.5490329745887035
+-0.5741704240890674 1:0.9392753294760656 2:-0.5579682000156501 3:-0.8083270703362093 4:-0.7022804026958895 5:-0.30426803430649896 6:0.8211432527140852 7:-0.8101343265051797 8:-0.0945946325760949 9:0.49546915718101814 10:0.5184327698839013
+12.583032451116004 1:0.20496323995364651 2:0.5082017540304999 3:0.2428646053751764 4:0.7101854338863274 5:-0.9619925264660094 6:0.4610134502825909 7:-0.5620669052678122 8:0.6766614078376236 9:-0.7169693435782278 10:-0.14362322382035164
+-10.489157123372898 1:-0.7441633083637054 2:0.07069898351187809 3:-0.47119552972566336 4:-0.43970155900871344 5:0.43192289605353973 6:-0.0798550143899397 7:0.2111188135787776 8:0.9101748615761336 9:-0.4079984876629721 10:-0.8101424982394589
+-3.811365493249739 1:0.7250263461647963 2:0.22182621035333838 3:-0.12735342714215725 4:0.26222861719040624 5:0.3928174057935714 6:0.817131411734006 7:-0.056109765698795 8:0.7908779197353637 9:-0.06768319505245768 10:0.4107045608924882
+-7.604636483513961 1:0.876751634787073 2:0.04037085575852295 3:0.18142385658771398 4:0.38350565074271903 5:-0.30937664332011905 6:-0.9544807672006823 7:0.008643477632712449 8:-0.27676843472226276 9:-0.12938540988602476 10:-0.2929762262661819
+-1.9889499615051784 1:-0.4243149295090465 2:0.22578711943818686 3:0.662530786460152 4:0.28592235843136105 5:0.4170345231441832 6:0.9290881132120887 7:0.5332443368002588 8:-0.33248958421809927 9:0.16273139830495942 10:0.6899022585936985
+-1.99891354174786 1:-0.1732078452611825 2:0.2361029542296429 3:-0.8455867017505336 4:0.31638672033240867 5:-0.648387667144986 6:-0.7647886103837449 7:0.6910155501192978 8:-0.2665663102538198 9:-0.5980899570876459 10:-0.9165896495676276
+9.74348630903265 1:0.18934450539532244 2:-0.715110505416745 3:-0.453777527810155 4:0.2743741252197758 5:-0.8439310405443103 6:-0.533835190276116 7:-0.5911710854054728 8:0.21026462628920695 9:-0.45849607678093585 10:0.1327074179200407
+20.221961806051706 1:0.624731930687735 2:-0.39914395421723015 3:0.781887900750925 4:0.5442619051596436 5:0.16651193067479153 6:0.9064846121246533 7:-0.3643159594276202 8:-0.5182065337246469 9:-0.6785628247191553 10:0.7111152852903913
+20.456947955410897 1:-0.21923785332346513 2:0.11340668617783778 3:0.7397883986253251 4:-0.11748081084695605 5:0.06314872700777197 6:-0.7124574845946587 7:0.18043581960897104 8:-0.09023925260092103 9:-0.7256417560118238 10:-0.5038088673851804
+12.241006086129564 1:-0.15271598143132215 2:0.9038942665552285 3:-0.6168514099878155 4:-0.12219038322317011 5:0.5402785935596728 6:0.4059744401803913 7:0.258870596734184 8:0.3190881033039108 9:0.2372469007313076 10:0.367188299614863
+3.980473021620311 1:-0.9025895351376971 2:-0.03333947011476446 3:-0.8220776066161464 4:0.449117985679933 5:0.9970519437779266 6:0.27430911004640457 7:0.039081352882204046 8:-0.8621514950929796 9:-0.569587565933642 10:-0.9118346349929578
+-13.420594775890757 1:0.3697979495309094 2:0.07383664120111888 3:0.7199366131785143 4:0.2118625428869032 5:-0.9015976323216077 6:-0.5298395275757712 7:-0.9517419542156635 8:0.39554920787574743 9:-0.3721957439110324 10:-0.4750272836396878
+-1.052659359353786 1:0.02106845330888185 2:0.7571245678782959 3:0.8034228830223251 4:0.32968340513846917 5:-0.6510386482911554 6:0.2710115488605187 7:-0.1319580272290235 8:0.932600992666184 9:0.8260461527035414 10:-0.8507648952138052
+9.813440129324034 1:0.41048687946340134 2:0.9384639988086239 3:0.4569555844323441 4:-0.3084729082645552 5:-0.7299010284877061 6:-0.6925012997779212 7:-0.6798013915257548 8:-0.504368104320321 9:-0.6234398059664716 10:0.8633407902005543
+-2.8942782378157714 1:0.5546381825677706 2:0.7959405841824887 3:0.584699836289184 4:-0.5726371777829862 5:-0.2827976152663936 6:0.138034013875719 7:-0.2935080791661324 8:-0.5323479091625714 9:0.6837641044797451 10:0.5986680812032501
+8.562937733537664 1:0.14753220510180776 2:-0.31591341855048327 3:-0.748545617199091 4:0.3251888821665734 5:0.8228589483149358 6:0.046659706976506676 7:-0.35049927996132624 8:0.2953170004605874 9:-0.6429374177050204 10:0.4624083116836044
+13.413187970975178 1:-0.7229883396779724 2:0.8876940454894067 3:-0.033794226589695775 4:0.46700071356381523 5:0.1599557295166274 6:-0.8944619785248653 7:-0.1258464584151997 8:-0.8797551785991506 9:-0.14408879184669354 10:0.11020655997336015
+-5.491389764900794 1:-0.366507395597937 2:0.630480481240723 3:-0.16600801981741609 4:0.09842042773854076 5:0.30129535029579047 6:0.14102166298628882 7:-0.28131788612036623 8:0.49635295715686234 9:0.0625636989631968 10:-0.41748132718912
+-10.29566593602992 1:-0.7898597726154271 2:-0.05425577320946573 3:0.5992645759265662 4:-0.4716868549309716 5:-0.020137302700854676 6:0.6216515277233232 7:-0.7295510954484412 8:-0.41443875567123967 9:-0.610576632050404 10:-0.9515988311377204
+7.084732852050431 1:0.9990215581592679 2:-0.9868954542412269 3:0.49133473382040704 4:0.7697599878561228 5:-0.34668939907967267 6:0.9777705993519483 7:0.4449043102759509 8:0.9812971199646168 9:0.6666598587737487 10:0.14398842572598514
+0.23715467505851734 1:0.21628799185444336 2:-0.4526390568867018 3:0.6558486691929235 4:0.13730688681492142 5:0.23076986155942736 6:0.7020484017619715 7:-0.12077999528458938 8:0.8306084972447003 9:-0.49337323198621563 10:-0.8270028152572872
+1.1552619549601455 1:-0.48202394020369277 2:-0.6274878708695264 3:-0.27623674153600697 4:-0.5312153415813432 5:-0.030820182786174044 6:-0.5893370965577813 7:0.6666315120904487 8:-0.36482991729570036 9:0.6065771813692735 10:0.05831057330788525
+-0.20433879835596253 1:-0.4702220250018212 2:0.9123705796362889 3:-0.2045657170490376 4:-0.18922063450309534 5:-0.31431213362503163 6:0.4150130060120387 7:0.34016193625941127 8:0.8391374136299805 9:0.6884250315764333 10:-0.7916408854251566
+-9.751622607785082 1:-0.0014232315621649505 2:-0.1284246813729939 3:0.5228953023175369 4:0.9688522449007109 5:-0.7857721219549156 6:-0.7812922263391038 7:-0.5916136652814756 8:0.793988610184206 9:0.7982949061274296 10:-0.592785473963741
+-22.837460416919342 1:-0.17363144173810174 2:-0.3340314573781735 3:0.9351424971322297 4:-0.6430601902397572 5:-0.13363305808148818 6:-0.42446359566938585 7:-0.4093070316761178 8:-0.9302259781839204 9:0.47004365892170585 10:-0.6231289889808045
+-3.6318714209289436 1:-0.8296410705737971 2:-0.6056572341069668 3:-0.2975417404042737 4:0.07134138175064741 5:-0.8966463747179154 6:-0.4112675899658855 7:0.7908013478009401 8:0.407396254566472 9:0.9227769302156879 10:0.12418427404473764
+-3.8909712376010583 1:-0.6552751548581366 2:-0.5641921108932855 3:-0.6340486345063014 4:-0.5441069121131075 5:0.908720622198947 6:-0.026054643814348077 7:0.03706191653058433 8:-0.6672524338819317 9:0.7958274915288801 10:-0.19029619970124023
+-10.600130341909033 1:-0.7457695999520562 2:-0.3739453132549577 3:0.01327423342620393 4:-0.08482897201178563 5:0.84573456086082 6:0.6279927575103963 7:0.014494803555804125 8:0.9420647557771027 9:-0.13484113287285893 10:0.3037405853352888
+-12.094351278535258 1:0.9982796018306028 2:0.8354271779265348 3:0.46284321795736116 4:0.07693347919601745 5:-0.4753440408996932 6:-0.47098252868073787 7:0.4810729184846003 8:-0.6136990339205741 9:-0.6715833036640317 10:-0.6247058955319091
+9.936399360181602 1:0.7330323083522969 2:0.47204204993669197 3:0.3850471475752122 4:0.21483460195167958 5:0.3806220122265147 6:0.6336993433402796 7:-0.47987416364572 8:-0.195509010865196 9:-0.6561820282562041 10:-0.45300480439842894
+-4.706701061062994 1:-0.847895844561626 2:-0.29946646506145114 3:0.25432868082106497 4:0.1352958872054535 5:-0.8803017974303002 6:-0.3675110562764785 7:0.10888496324899721 8:0.9620737605396772 9:-0.031046632561323895 10:-0.09466883461500908
+5.101614991255809 1:-0.5174248135588373 2:0.14394061894828014 3:0.5613709266711013 4:-0.5678634944326011 5:0.930216209978763 6:-0.6204727890080077 7:0.4133141749872311 8:0.6262685035917408 9:0.03382924477926896 10:-0.15231139191832854
+-8.772667465932606 1:-0.2117605577769197 2:-0.4283897136887762 3:0.44686767473401035 4:-0.5507826261358746 5:0.237124956028401 6:0.6413157520982717 7:0.2409214827604571 8:-0.8505503638033114 9:-0.9811997368468401 10:-0.9499963936664035
+-11.615775265015627 1:0.8782018665273386 2:-0.9751473570197167 3:0.6307050068521085 4:0.7012721336851997 5:0.21318736263512283 6:0.024885128053773853 7:-0.4580644243558505 8:0.1318650007251434 9:-0.9306090092992167 10:-0.5688746770986652
+19.64829023536192 1:0.14426537998360645 2:0.3557716894181753 3:-0.8577143134654568 4:0.5288643233801469 5:0.9231529738221469 6:0.975999712077738 7:0.24700404691888678 8:0.10206517527052283 9:-0.10041951294847062 10:-0.9412918491876225
+2.7409415438025486 1:-0.7404936009304737 2:-0.9792071376296605 3:-0.49510748520932113 4:0.9538460112904268 5:-0.5075114153141447 6:-0.5890791308058669 7:-0.775366087491284 8:0.4983912525892249 9:-0.2976197956132913 10:0.6791258030468514
+-4.394658158733604 1:-0.41628618754613345 2:-0.1472602552309057 3:0.44136102233464025 4:0.011882653940414434 5:-0.6559502840386595 6:-0.4022529016339016 7:0.048402312931387526 8:0.8753776623326166 9:-0.8528247288266961 10:0.6593783978826002
+1.1915739133607073 1:-0.7840827624854878 2:-0.4860418508208426 3:-0.7418773161179972 4:0.129874781837924 5:-0.22631682294184796 6:0.47794208013755024 7:0.5532183426143056 8:0.11879859459306741 9:0.09927630694484524 10:-0.19268618891399636
+2.156192215438919 1:0.44325986644475646 2:-0.6057278708888592 3:0.3943381582091723 4:0.6560336238050575 5:-0.9651308100517204 6:-0.2358219003943678 7:-0.04143043460232465 8:0.8623951169233035 9:-0.4933545255502605 10:0.8990427200454263
+-1.1009750789589774 1:-0.4515707618788496 2:-0.745936099912899 3:0.41307003181926794 4:0.6127760492402428 5:0.9250878169732681 6:-0.6778628527469126 7:0.42794190420905753 8:0.4943969797578971 9:0.7762709104958854 10:-0.6932349268610041
+10.04434496594037 1:-0.0995467494040092 2:-0.7766769414838959 3:-0.6608009972582911 4:0.7075788021090594 5:0.5208396359138381 6:-0.09724033794207299 7:-0.743087245352148 8:0.765372791789753 9:0.3788699859744704 10:-0.6898257995055466
+8.038039859115667 1:-0.5321510657026671 2:0.5571925538006008 3:0.747268102801854 4:0.09079641165917596 5:0.25861122989509266 6:-0.9948187479498878 7:-0.9665136866462685 8:-0.3904629432867681 9:-0.9975425877998279 10:0.32024289816988416
+5.14371929922303 1:-0.4829199170694627 2:-0.5713285263827719 3:-0.43889652467111184 4:0.18478247261988967 5:-0.27374063120041225 6:-0.8069125377696931 7:-0.15497746743367058 8:0.32448521325998714 9:-0.39397735035206227 10:0.08184957956614292
+-1.6848276484379352 1:-0.39250489761445895 2:0.02730338852529557 3:0.9916055514435305 4:-0.07571433435055064 5:0.19024527726403728 6:0.6385182319185971 7:0.32480605537471297 8:0.5807543325220577 9:-0.35642510103381153 10:-0.9060482769392468
+-11.640549677888826 1:0.03707410390488852 2:0.2527049166981137 3:0.4114872952854447 4:-0.8508977901757795 5:-0.42791544663481895 6:-0.9864047295390463 7:0.6023685964407528 8:0.12018443688097036 9:-0.36816249877130414 10:-0.9583147535652901
+11.672104494601319 1:-0.2416258355340175 2:0.6737553249072334 3:0.9041602191361382 4:-0.2123232797997281 5:-0.008255188002961988 6:-0.5151894064136904 7:-0.7341877977528246 8:0.624625272218277 9:-0.6261434804192929 10:-0.5710586715741532
+-2.2960192492344627 1:-0.7457768645184579 2:-0.5954998103421847 3:0.5428846769211537 4:-0.6176587961491775 5:0.46222150678166574 6:0.7852238239427731 7:-0.3614580530629148 8:-0.325840253127059 9:-0.5660596710348922 10:-0.8060263366626401
+5.428302298615722 1:0.8774286357993033 2:-0.23110126319781088 3:0.6264134914476072 4:-0.143015582616014 5:0.350109539755298 6:-0.147747167834422 7:0.05020570422182824 8:-0.5469605849960337 9:0.951112567977048 10:-0.34800121380288185
+-17.32672073267595 1:0.31374599099683476 2:-0.36270498808879115 3:0.7456203273799138 4:0.046239858938568856 5:-0.030136501929084014 6:-0.06596637210739509 7:-0.46829487815816484 8:-0.2054839116368734 9:-0.7006480295111763 10:-0.6886047709544985
+7.314490512652487 1:0.8745354279105222 2:-0.9270067504840309 3:0.965218170323435 4:0.12808957052353698 5:-0.5309399625085234 6:-0.5968520990090951 7:-0.667403236513185 8:0.08154410986660832 9:0.33025488397543934 10:0.03406708067839537
+4.687373993408297 1:0.6731426721418288 2:-0.7111023070261273 3:-0.9849054116048603 4:-0.12831346258317322 5:-0.04095946352836921 6:0.6967001556166801 7:0.8479895229743999 8:-0.35600791972899404 9:0.5005979045264868 10:0.6421341979636503
+-6.82923852156868 1:-0.04849233571020073 2:-0.8505855619911602 3:0.2927180954190314 4:0.5780268040086791 5:-0.22322207765417268 6:-0.8436513934568071 7:-0.3906240514635124 8:0.7258714963093444 9:-0.21695043530813085 10:0.8049335285918169
+-8.24622879369294 1:0.12154833675098842 2:-0.26446415445316673 3:-0.06653791221669247 4:-0.7920694887292259 5:0.6128791496627621 6:-0.6927179137980173 7:-0.24584418172709932 8:0.3557416365779935 9:0.22868636757755234 10:-0.8288196322549064
+-5.090863544403131 1:-0.1535668648046895 2:-0.59868738365189 3:-0.8822518703008675 4:-0.1790505106198006 5:0.9467581256591948 6:-0.0661313762905984 7:0.31263046332923694 8:-0.03628894224569357 9:0.8969599435828515 10:-0.05386674051170348
+-15.780685032623301 1:-0.2568492063716883 2:0.7740976197426315 3:-0.7829158104387535 4:0.8578846037465748 5:-0.6111039318672586 6:-0.26939268282639306 7:0.3659136640533909 8:-0.8205938562638555 9:-0.24945505706767923 10:-0.935948184861368
+-3.9916779937384743 1:0.22925954469403154 2:0.18159238246979537 3:0.05607027262862396 4:-0.3376037702047998 5:-0.10630000583678934 6:-0.7148277241201622 7:-0.08327294541727137 8:0.6532439360618307 9:0.34352364313237294 10:-0.21028242388807916
+8.798748248458631 1:0.509058184822212 2:-0.17532831457577935 3:-0.6387880909085213 4:-0.966194650702529 5:0.7829797328120436 6:0.5470735549914605 7:-0.38312745239682333 8:-0.8890923931840893 9:0.6823342859396513 10:0.9231260597729121
+14.341273640964873 1:0.6996156678090684 2:0.8612833977834464 3:0.9131301694042417 4:0.5199385192744859 5:-0.32605907950755086 6:-0.9816465962348846 7:-0.5939885763232406 8:-0.7730924566676425 9:0.6367821449954114 10:0.10873812383881054
+9.75855501262469 1:0.2933324921347933 2:-0.4652534314332506 3:-0.2940640558090537 4:0.9883453215038367 5:-0.042460731786114314 6:-0.15438550895912062 7:-0.11182397625560592 8:0.7425954283250873 9:0.5063859049644963 10:0.3012211854180429
+7.695200921242407 1:0.3554353390157281 2:0.08707592690448718 3:-0.10815435665633877 4:0.05524046679762784 5:0.8000157491787581 6:0.3756193347272323 7:-0.18659830666742527 8:-0.08168623764933125 9:-0.2551379303720174 10:0.8560030587463281
+26.903524792043335 1:-0.4672678144441864 2:0.868381965588082 3:-0.04748335609643428 4:-0.0908285508827269 5:-0.22436865911994275 6:-0.953965287326564 7:0.40644848732968164 8:-0.33391575325981115 9:0.008337907338700212 10:-0.45597904754961416
+9.87318781117539 1:0.7310287890171705 2:-0.38300115058116324 3:0.5492682498036086 4:0.552016070316655 5:0.3715022458396897 6:-0.3888040017277252 7:0.21348231125683648 8:0.23219558685722874 9:-0.6271161253492459 10:-0.009137052604519136
+7.6930514050666625 1:0.48603550488592284 2:-0.9218820771919889 3:0.17678612698428053 4:0.5110501870908806 5:0.5817010201164554 6:0.4488707800038747 7:0.4977618637956498 8:0.1683214570038094 9:0.17237242672259323 10:-0.5276084644007359
+3.155413914311745 1:0.04582517188512947 2:-0.9060800653779759 3:0.049786270132956556 4:-0.4236784487542993 5:0.6293910028372613 6:-0.7370237043436467 7:-0.4599678991281728 8:0.5317111095323057 9:0.0029525239228334055 10:0.9294876800738165
+-10.18815737519111 1:-0.9023553189306839 2:0.08434165073970856 3:0.7163931103395633 4:0.41749986495957914 5:-0.8190972970472759 6:-0.9996126872234177 7:0.1779075727741255 8:0.18212754689351862 9:0.24628508239298963 10:0.667589863190412
+18.585731475373457 1:-0.8399129036462931 2:-0.10024819268489127 3:-0.5011350892733817 4:-0.7299256348863585 5:-0.9412022985072928 6:-0.245064895931544 7:-0.1032512650854267 8:0.9943863256441088 9:-0.6429371028855466 10:0.062299742931960056
+8.998359297106072 1:-0.16850226855111905 2:0.7262839202089402 3:-0.04876255055071854 4:0.8948164957242868 5:-0.10720585418953132 6:0.2622719447841948 7:0.26433837506661373 8:-0.5143449147399106 9:0.17444585508955002 10:-0.813182163328944
+13.032424230011074 1:0.4014766166181287 2:-0.1710502754125871 3:-0.309850483152607 4:0.255642456909988 5:0.9949117714165621 6:0.12553772251510864 7:0.6412602805648968 8:-0.6225679446416825 9:-0.15867011477056936 10:-0.4970695349036196
+-6.931030745953174 1:0.5151452174260762 2:0.10077064818539072 3:0.9515221270405545 4:-0.21551878535257907 5:0.29152528087481366 6:-0.10995497026133605 7:-0.7872786530801681 8:0.9909149980139627 9:-0.6044617953251021 10:0.4135285912642448
+15.538062451207367 1:-0.493569696351595 2:0.7280914440594639 3:-0.5399160539735497 4:0.5688018985826291 5:0.8296550361854862 6:-0.3519274619833537 7:-0.5536583684230114 8:-0.9648774930921231 9:-0.2649670832738824 10:-0.2337289004188019
+9.499729032920945 1:0.22017490770298553 2:0.7693082799289328 3:-0.7645745307823122 4:-0.4243400515554365 5:-0.7065281515163817 6:-0.9488470141298047 7:-0.7888781431404843 8:-0.38027758953310964 9:0.11329243985448345 10:-0.5636550498916204
+-0.6039115764951412 1:0.3128791250125589 2:0.4690308315665288 3:-0.9819748103687955 4:0.28931283693913223 5:-0.6283983933456656 6:-0.10795935596621975 7:0.7785831799196448 8:0.4453768248295542 9:0.4055410615499917 10:-0.581108383985806
+9.682301463907875 1:0.5039970331368235 2:-0.008965105921562966 3:-0.5415225380115054 4:0.4677111860370293 5:-0.3854089758945243 6:-0.8468317339287676 7:-0.29258253017713587 8:0.7361173598968789 9:0.5722561668394952 10:0.8524030171340933
+-2.8752191903928064 1:-0.45407356732165205 2:0.6563221064539377 3:-0.8938366926767671 4:0.6028173420234533 5:0.6792881349943096 6:-0.6295604812779405 7:-0.21641416912497213 8:-0.8703620515028858 9:-0.3397362922228042 10:-0.0966947467107604
+-28.046018037776633 1:0.9493308195854675 2:0.3285214661535252 3:0.749300278016316 4:-0.006681618268088219 5:0.2936055273341429 6:0.0044706790416966236 7:0.5006172205470896 8:0.38751814960349473 9:0.6069735922707928 10:-0.794612882855285
+2.8752582614589373 1:-0.9443232811926943 2:0.3153126492983107 3:0.6423843271417344 4:-0.09528333043829118 5:-0.2318773828230698 6:0.32597909562645766 7:0.42808555740416065 8:0.2895959316734451 9:-0.5106491076955746 10:-0.2917418155655722
+-2.203945173593806 1:-0.13844025039418084 2:-0.024638102806725293 3:0.6114514176076162 4:-0.6939316676972749 5:-0.036549673716341324 6:0.0942395290460385 7:0.7943411369475493 8:0.7025693796408046 9:-0.21822635487138853 10:-0.6045250179827362
+-5.070655299509993 1:-0.8035156105848074 2:-0.5344928236067734 3:0.6105404604447127 4:-0.7538635525543969 5:0.9836765037886612 6:-0.5700253195942724 7:0.9232380985458313 8:-0.26374377078100464 9:0.9079431466301682 10:0.8404281771949533
+-2.540181413836895 1:0.220453181647285 2:-0.03105792440486077 3:-0.17131282366411926 4:-0.41800060634660485 5:-0.1477564564540963 6:0.055537469469941536 7:-0.8092076926316594 8:-0.29815112444525727 9:-0.20030580647762464 10:0.337865838755971
+19.341342586351033 1:-0.32052868280788616 2:0.954507993011956 3:0.38642226954792824 4:0.9240442034609888 5:-0.46077559741256824 6:-0.7991393493824104 7:0.9396232321156679 8:-0.2486930151964184 9:-0.6256485833035617 10:0.14861843824730103
+0.31398559122529757 1:-0.4684215762946897 2:0.07873308388585198 3:-0.3589594894052015 4:0.14284662079329458 5:-0.8936272055527841 6:0.5647217242826741 7:0.49613233215723507 8:-0.501698787526992 9:-0.46710107378968724 10:0.898517179577361
+12.243117462926584 1:-0.8147610562690222 2:0.21104006948075482 3:0.42405323019132957 4:-0.667965573810795 5:-0.267026607469405 6:0.7949752815579358 7:-0.07664414977654532 8:-0.6023087644686556 9:-0.659375887511856 10:0.459735946423397
+-4.623091296763939 1:0.08405646515942733 2:-0.40009448092691446 3:-0.39831245310544094 4:0.8794137836499942 5:-0.04788565812369017 6:-0.06763019434549333 7:0.41324877265674065 8:0.39746868847324146 9:-0.986729367280818 10:0.7001677710291752
+-5.782162271139417 1:0.29127970805530157 2:0.6712715787317827 3:0.27575757044478477 4:0.31525054647682804 5:0.6905016168465983 6:-0.5195319089267731 7:-0.06598129860341295 8:-0.5142554034519407 9:-0.11546331150946942 10:-0.2934524891698944
+-9.892155927826222 1:-0.7048583334456604 2:-0.46598491327111247 3:-0.02034722477413209 4:-0.663294196316965 5:0.4485329128582778 6:0.4553619594861118 7:0.7154814909138205 8:0.7532937661147989 9:0.020693077287389894 10:-0.23131986644633207
+0.5422764698408844 1:-0.1513298744027669 2:-0.4591544337339648 3:-0.7192219559850723 4:0.21236658135317632 5:0.12050445497328166 6:-0.42411528242712127 7:-0.15103925528861595 8:0.9032115729799512 9:-0.9228817525021624 10:0.2604090001033641
+4.187800872274017 1:0.3084355607627949 2:0.7029638272178733 3:0.34098344122299573 4:-0.5818421369891376 5:0.1332356708082485 6:0.22671316744441716 7:-0.6750469752494854 8:-0.4065302428716193 9:-0.48213803977370073 10:0.17918596677210186
+4.487701812297124 1:0.8352061350259052 2:0.2757393215770836 3:0.9310504392364667 4:0.519503546762708 5:0.5270245209143005 6:-0.08181154800488488 7:0.5148324302455536 8:-0.6680946101511949 9:0.7574060703813035 10:-0.4721334895419935
+-5.150140984417682 1:0.8113709439821006 2:0.21939305063309278 3:0.02109986546311826 4:0.07450107676582762 5:0.723883853128624 6:0.5392035186380486 7:-0.1382740221237464 8:0.9990201540159807 9:0.10429329766137108 10:-0.1365266408862309
+-6.544633229269576 1:-0.08278037549320039 2:0.6982730989138761 3:0.9090685953368327 4:0.6754092061339365 5:0.5889199822482736 6:0.020678619551471433 7:0.47605785660672084 8:-0.49926771127869873 9:-0.28380077002944093 10:0.5282319276258469
+7.216836352055753 1:-0.8510680074642156 2:0.42611818262128476 3:0.593607821624947 4:0.5635067468583634 5:0.2121930523769171 6:0.2708063180622071 7:-0.31491113345871735 8:0.005990053407278095 9:0.8985259402559085 10:-0.44549339042232794
+20.874246167942125 1:-0.53010692413621 2:-0.9897084749945524 3:-0.9083978261828305 4:-0.15581655583739495 5:0.9974035542095165 6:0.9894717992956665 7:-0.7287287537245402 8:0.06425127137526943 9:-0.06684164745938337 10:-0.3600621883071937
+-6.556192430758147 1:-0.7655958349167471 2:-0.08083170734199419 3:-0.8540636958251198 4:-0.09994429443696973 5:0.1734809016500265 6:-0.29563180244063325 7:0.2158497607364409 8:-0.6071644305523003 9:0.8063426715403785 10:0.47092299197899345
+7.252748885335252 1:-0.36403312429467216 2:0.1237451136826817 3:-0.5756427605741237 4:0.7612833636750866 5:0.9350628314096134 6:-0.012087843264624754 7:-0.03742573515965031 8:-0.05210460803183037 9:-0.5333214800203341 10:-0.013320030179712505
+-9.2679651250406 1:-0.5057250557539077 2:-0.41655319851679495 3:0.1897431234740683 4:-0.038318717640150046 5:0.9136495575471062 6:-0.8890525036858237 7:0.40859501498633377 8:-0.8746985847539293 9:-0.005836984002720369 10:0.7838036026237987
+-15.732088272239245 1:-0.8546867577633044 2:-0.3003980324850013 3:0.49649883896876834 4:0.710496747220617 5:0.5848510480601048 6:0.5714826756665468 7:0.5487975165953451 8:0.5654333402837335 9:0.863539315599626 10:-0.9699410102494574
+-0.20412431312519014 1:0.13323548063028934 2:-0.3030177580658542 3:-0.6358920925969869 4:0.3729380701923921 5:-0.8131818118430312 6:0.11567152703716288 7:-0.3645508535812394 8:-0.5487213252460876 9:0.5605886387366952 10:-0.8400308993051686
+10.445759684895373 1:-0.92707280355555 2:-0.9307772570299944 3:-0.11971873660640964 4:0.5140245291069254 5:0.5751145648836897 6:-0.43850910073502347 7:-0.7872208869913662 8:-0.3087975452145404 9:-0.4645849758749403 10:-0.1563641826381328
+3.349708377102383 1:-0.6334394121009499 2:-0.9008086683014112 3:-0.2678892493467009 4:0.7626514243443427 5:0.6406493676995701 6:0.3669245573649391 7:-0.052050629941784665 8:0.6713394117904852 9:-0.11458974566378233 10:-0.25949626043219576
+-23.487440120936512 1:-0.5195354431261132 2:0.8080357948412571 3:0.8498613208566037 4:0.044766977500795946 5:-0.9031972948753286 6:0.284006053218262 7:0.9640004956647206 8:-0.04090127960289358 9:0.44190479952918427 10:-0.7359820144913463
+-11.827072996392571 1:0.9409739656166973 2:0.17053032210347996 3:-0.5735271206214345 4:0.2713064952443933 5:-0.11725988807909005 6:0.34413389399753047 7:-0.2987734110474076 8:-0.5436538528015331 9:-0.06578668798680076 10:0.7901644743575837
+-3.650649176738987 1:0.9665344025238449 2:0.1395514751689353 3:0.954697162791015 4:0.2093601878355722 5:-0.42841737775246336 6:-0.02877209657213764 7:-0.8382526163632971 8:-0.03773878779258388 9:-0.3751775119106411 10:0.6477987464528951
+0.21915863046310957 1:0.25143109618049353 2:-0.06463696557011112 3:-0.3324862332340037 4:-0.7184623449423757 5:-0.8897217937178385 6:-0.7336278194091297 7:0.8547631637534296 8:-0.7582613025929346 9:0.9080481791309838 10:0.9427850135311773
+4.813247597584681 1:-0.4564689661727537 2:-0.4315414033069003 3:0.09676404446694242 4:0.6024645727173434 5:0.20466090997530606 6:-0.09432916868838737 7:0.6402934161890248 8:0.741842551426011 9:-0.343937669190693 10:0.308871619426873
+-3.0700825038127206 1:0.660084046469162 2:-0.02317305725931229 3:0.7567569356692221 4:0.2528834502236612 5:-0.3935091635208898 6:-0.9965507922509653 7:0.9065754202428946 8:0.6988037588300844 9:0.05145737657924321 10:0.4697377584426863
+9.762542323725354 1:-0.036129448543738896 2:-0.8252508992030534 3:-0.752854859129851 4:-0.9027424488033049 5:-0.4763092428375775 6:0.4832492121777574 7:-0.2935697977919014 8:-0.9197908986231211 9:0.8914359296658816 10:0.8688484670974876
+6.690913813146277 1:-0.7649833946109403 2:0.0419327356721928 3:0.5420954694310764 4:-0.7373259510045522 5:-0.9187577877864708 6:0.6431180783847401 7:-0.6272529754533058 8:-0.43356486537110106 9:0.16848266440424364 10:0.3129700315745716
+21.325049167466855 1:-0.36392795201361383 2:0.846518905511275 3:-0.26361421923150097 4:0.5140384860444887 5:-0.9147771624497878 6:-0.22044646197773576 7:0.14099760779666948 8:-0.546631395802236 9:-0.4345465263406878 10:-0.2759894364167672
+0.41237529640734055 1:0.05016964684797287 2:0.21708512805176072 3:-0.9444942733586354 4:-0.6118772896807114 5:-0.18053631846913665 6:-0.06752556529755416 7:-0.0081819952134361 8:-0.7774039956687315 9:-0.5548994336153177 10:0.7510833121912588
+-15.056482974542433 1:0.6012054064354875 2:-0.6127014811673221 3:-0.8356741843949218 4:0.19830469052767397 5:-0.07726493085289698 6:-0.5756891943805014 7:-0.49010583357941884 8:0.7493759119974515 9:-0.7828994218436376 10:0.6154265137741459
+-2.109441044710089 1:-0.5757976103755722 2:0.3686657403505862 3:0.5418762444017706 4:-0.5896052565388463 5:-0.1000712585735879 6:-0.8114188394866342 7:-0.5863884932327266 8:0.28289838755838015 9:0.5378646921099333 10:0.5063780890366179
+-5.249715067336168 1:0.6828022788286754 2:0.9044668986252975 3:-0.6010464361571437 4:0.8416122052398811 5:-0.9846446498408039 6:-0.3777762313579811 7:0.5763775880953983 8:-0.07608009385213488 9:-0.29576023599575474 10:0.8845728751981716
+6.907770824878343 1:-0.9751352215365647 2:-0.8297271715190588 3:-0.07240311280415779 4:0.4796310183582191 5:0.358213469979769 6:0.4628020211207058 7:-0.9753405605972942 8:-0.765583403709019 9:0.5623611232648877 10:-0.14725965272406616
+-9.299021854126096 1:0.8784076266914045 2:-0.8314918563417382 3:0.8701529449600536 4:-0.8070129727442199 5:0.07396877198841345 6:0.0040889707225901795 7:0.40529205456687145 8:0.6412485325027342 9:0.1443450351498905 10:0.404997568726581
+10.95643670126225 1:-0.37321642594676097 2:0.47766490569544473 3:0.9201313123144423 4:-0.649393433578801 5:-0.9084894063674787 6:-0.2547160991750408 7:0.7674649994523459 8:0.646056370118979 9:0.6014100713287893 10:-0.15130291862509182
+-2.6397202393123336 1:0.3285252466844373 2:-0.2714281159811125 3:-0.5869561846815805 4:-0.5643935541712441 5:-0.7285201267315389 6:0.6502951074428092 7:0.8611880383193904 8:0.6380425291162128 9:0.5118538704085516 10:0.4012684110865874
+12.521131042032012 1:0.4843931319727355 2:0.06440853455169626 3:-0.6151259240105509 4:-0.4180928328467284 5:-0.4607061773323424 6:0.8104775289268906 7:0.3284199695768064 8:0.8425028998495565 9:-0.34822319854822825 10:0.1969239149176112
+-16.151349351277112 1:0.7787909191620395 2:-0.5219981442072688 3:-0.7230569918898555 4:-0.05707801168212101 5:-0.8134225824740247 6:0.09950684183685454 7:0.6261274830059296 8:-0.9502006765164366 9:-0.6724983095526844 10:-0.600347212281825
+-5.039628433467326 1:0.7680701397575322 2:0.7956844224408437 3:0.4131717201035916 4:-0.3127895385265915 5:0.7226571953995224 6:-0.06845863083031967 7:-0.1007291660029832 8:-0.9130249132342207 9:-0.4605180615474036 10:0.42093879298156
+9.007596502870785 1:-0.6562175566238462 2:0.08420074013217049 3:0.589801949672486 4:-0.11964901133703987 5:-0.8145711913860048 6:0.43854302140351065 7:0.5992967124729605 8:0.253745043289755 9:-0.10742030998120033 10:-0.5993228348160153
+-12.41094640284016 1:0.31035917086763765 2:-0.8053417167237813 3:0.5754655536186164 4:-0.3645388095106201 5:-0.9135176753316416 6:-0.8690739610562535 7:-0.14039224825138197 8:-0.7112835675593987 9:0.25762942117230825 10:-0.9483300117501923
+-12.130353212287929 1:-0.41404309625298485 2:-0.7695984204591535 3:-0.44569447239245275 4:-0.3168863099965644 5:-0.26669244730409036 6:-0.33484042698895755 7:-0.41062396946367685 8:-0.09075804785640385 9:0.8511367190902208 10:0.021918606255194595
+-15.375857723312297 1:-0.9794952880997945 2:-0.9547237660069134 3:0.2460912345929791 4:0.3174335823329406 5:-0.23758562926743054 6:-0.113610303129287 7:0.18292675847568063 8:-0.9656446754474337 9:-0.58300134324846 10:-0.6689602908128025
+-6.397510534969392 1:0.440780662587545 2:-0.03737991637410243 3:0.9506435891605849 4:0.8177486462589998 5:-0.2917628929963241 6:0.42365289098031034 7:-0.4280555544979745 8:-0.18388426864865903 9:0.5057230088452542 10:-0.1699163749308643
+-9.789294452221961 1:-0.25066699970459694 2:0.1772977344415987 3:0.5913498268900952 4:0.6293756431864681 5:-0.6430441015863757 6:-0.7238519180293621 7:0.13639541626580498 8:-0.6620281401715837 9:-0.9515237061912034 10:-0.4333426289849791
+-13.15333560636553 1:0.3491978525665129 2:-0.4067353159374012 3:-0.8677040612253524 4:-0.5757086910974862 5:-0.3186886816681207 6:-0.06609938943414573 7:-0.5419747642754873 8:0.9632759660044383 9:0.2673520823110991 10:0.36463236596724546
+2.2307697392937795 1:0.12285527276472785 2:0.8938323722714365 3:-0.16995870341610209 4:-0.3298643049714254 5:0.16781582791954253 6:0.42381594687105895 7:0.9245288214717629 8:-0.08709025093361689 9:-0.14227085487682722 10:-0.2888302862659746
+5.892885365508635 1:0.10116053019915738 2:-0.41641547074900154 3:-0.3750004290914961 4:-0.5619470211369917 5:0.33343039544460384 6:0.46766042657994733 7:-0.6937940929321615 8:0.7044604392055189 9:0.8879353764416567 10:-0.5490902425042639
+-16.692207021311106 1:0.9117919458569854 2:0.628599902089868 3:-0.29426892743208954 4:-0.7936280881977256 5:0.8429787263741186 6:0.7932494418330283 7:0.31956207523432667 8:0.9890773145202636 9:-0.7936494627564858 10:0.9917688731048739
+10.454641756541454 1:0.3490213088098768 2:0.6103387992494194 3:0.6753935651135747 4:-0.39560763769937934 5:-0.3042308221531884 6:-0.9283481899557042 7:-0.7904038212853011 8:0.37488335848537346 9:-0.296477977723397 10:0.30894819444660304
+0.08978797103855778 1:-0.13445409764877803 2:-0.6404150831493631 3:-0.24740260669490133 4:0.031151119464385646 5:0.9207882173498612 6:-0.6146471129497393 7:-0.9736175690408087 8:-0.2673180325645341 9:0.5800384183301572 10:0.479811220263183
+1.7362099941626894 1:0.5171681395917551 2:0.6177735922313075 3:0.6446678302226738 4:-0.5731769722311459 5:-0.2686270617709168 6:-0.6048534221658814 7:0.7002124303669326 8:-0.1479765297345712 9:0.009254061109394307 10:-0.31519081920853287
+-1.0349488340235453 1:0.612980711993536 2:0.05771318707554962 3:-0.10821368362160744 4:-0.8755559420458141 5:0.42566546089913326 6:-0.7966341558699277 7:-0.45253617234374466 8:-0.8289517557653971 9:-0.8968075137250837 10:-0.6325457096866376
+0.10157453780074743 1:0.9143592240573388 2:0.06419631741815457 3:-0.9961326744227916 4:-0.47174548800139715 5:-0.0821464027819967 6:-0.5495006555498168 7:-0.5627911401420294 8:-0.43426056724099005 9:0.892026786364895 10:-0.23546485121284055
+-12.92222310337042 1:0.218687524173371 2:0.013626751799176162 3:-0.8372219908323961 4:0.6197296846266354 5:0.7429130827811232 6:0.48009972886541896 7:-0.35667717521227904 8:0.18337067878780533 9:-0.22935396092245197 10:0.4076715024284059
+22.923352376063196 1:-0.7522075505725567 2:-0.20686029838909326 3:-0.1386664769095396 4:0.157117595808127 5:0.9462377653889174 6:0.9182504509330662 7:0.18170057747293833 8:0.27735387813088863 9:-0.6355799944714868 10:0.9764849106195284
+-6.132450015997121 1:0.2822534275343054 2:0.2625905791399692 3:-0.02565260641304068 4:0.4891221076432757 5:-0.6426178913585772 6:-0.8999539149461033 7:0.12659507663825287 8:0.5889572439755832 9:0.49107548332672857 10:0.47595749470452
+-9.173693798406978 1:0.4430245286298278 2:0.9923116639471541 3:-0.5609082824097824 4:-0.36542266258313916 5:-0.5814039716882617 6:0.20413852042674874 7:0.6097541611931963 8:0.5743002479324253 9:0.4735459963431561 10:-0.053969823043886755
+-5.814408490931223 1:-0.9206287328000513 2:-0.48298486023273157 3:-0.8477202041890262 4:0.5801385102362351 5:0.7146074564553095 6:-0.5987672678579339 7:0.6829077928212723 8:-0.3522788540815065 9:0.7729595638821951 10:0.5264904880591215
+6.474329501040298 1:0.6914309300550991 2:-0.4507700505202725 3:0.713821440501512 4:0.41599059910235847 5:0.507160951750409 6:0.8635615811574222 7:-0.6235518270244333 8:-0.5336201820384283 9:-0.7989630679361768 10:0.837293162455248
+6.984517471584806 1:0.16745919469723392 2:0.018033079961716103 3:-0.7339201095541323 4:0.17042828693740697 5:0.4493471632580528 6:-0.8938445962323078 7:-0.3206968104792325 8:-0.616617071238893 9:0.9327878222034172 10:-0.6575294247048245
+-12.479280211451497 1:0.9769767754725367 2:0.7706430863248943 3:-0.4506244622476816 4:0.12921761745628713 5:-0.0697804449658812 6:-0.7702703569987461 7:0.017734558413919688 8:0.7216294158911261 9:0.42547357862241886 10:-0.9001915116155741
+2.8363866587728186 1:0.11478724114928918 2:-0.4679790550082039 3:0.2344912687736711 4:0.5524878060045462 5:0.5252859884051309 6:0.5080674087215156 7:0.5010449021825665 8:0.048046765816400105 9:0.06654581719548891 10:-0.5801934713347348
+4.186809777233374 1:-0.02335342201396018 2:0.9035437912091193 3:-0.9283585631882163 4:0.454351316397237 5:-0.6948564428085262 6:0.11495485234890368 7:-0.23683956078769963 8:0.6442534752881419 9:-0.013866407845647188 10:0.23369602940650736
+2.8235031660626415 1:0.5609344938188046 2:0.3449103464885612 3:0.03972169049525687 4:0.31858762565827137 5:0.4409953589124853 6:0.22836189275697016 7:-0.1497811991899889 8:-0.23248048920679265 9:-0.30066618281100177 10:-0.9247232456911632
+6.96223432848425 1:-0.8160398553437558 2:-0.8212180893749699 3:0.7728655115832999 4:0.02387973088796369 5:-0.043499804905828166 6:-0.6997726250046865 7:-0.8686633773265577 8:-0.12597318402253976 9:0.967018116368416 10:0.5951339624149812
+4.669684795838683 1:-0.32226903644852833 2:0.5465858078942492 3:0.5228467793266189 4:-0.013157722224545143 5:0.5810668818928995 6:-0.1372653090293532 7:0.6446157527288279 8:-0.06005754873230629 9:0.014302180040152379 10:0.43474245441042636
+16.112744845653285 1:0.37257742858083365 2:0.19398954512844124 3:-0.11860882189887478 4:0.6492510749703395 5:-0.41273736981203313 6:0.18643017041815835 7:0.29136917186214384 8:0.47602883023389 9:0.7126916980867937 10:0.48462508659691483
+-9.196003366226202 1:-0.7263358951920722 2:-0.8503799288093836 3:-0.3120563620589105 4:0.3925562655164563 5:0.027666662972283484 6:-0.35173134138805406 7:-0.32703527910354757 8:0.3060102722285065 9:0.8609161725740202 10:0.33394557004432923
+1.242972458167591 1:-0.9029238804456814 2:-0.6392681059531908 3:0.8940879647942577 4:-0.8807357173896475 5:-0.13628130467470512 6:-0.5487534785116224 7:-0.40270307148061346 8:0.09152108686997096 9:-0.20745066734844642 10:-0.20624830574384978
+3.453659210660726 1:0.2710596844435682 2:0.6510497900145247 3:-0.2899158136103117 4:-0.13531811694554707 5:0.6965847786422426 6:0.9105343028780231 7:-0.007340232468413754 8:0.7672537187738411 9:0.3538906829188173 10:0.35387524540947646
+-0.48115211266405217 1:-0.17943755364759517 2:-0.1384979591151625 3:0.8425773648797268 4:-0.43234064993405097 5:0.919754442523921 6:0.8390197802990036 7:0.43890653121452683 8:-0.7647648217789051 9:0.14770258954363835 10:-0.6681813635676657
+6.965069440749298 1:-0.9158261471030473 2:0.5228494114644282 3:-0.07760531122743153 4:0.6154296244963067 5:0.5231830145381096 6:0.4892535590799165 7:0.1987053183082137 8:0.9995670294711712 9:-0.2020375688074112 10:-0.7853579334836087
+-1.6896486293598596 1:0.4638529147853421 2:0.0953805943546191 3:0.8506904243225251 4:-0.028262644692445438 5:-0.9462342015500664 6:-0.6934738957112123 7:0.601125018257533 8:-0.04871041957758315 9:-0.015245062056267411 10:0.6119856200040805
+-1.763729644326212 1:0.5376618752928528 2:0.8062119856717131 3:0.44996834959923593 4:0.9917728248530817 5:0.5974717482179492 6:-0.406972851600659 7:-0.8523198502065281 8:-0.3076377139692321 9:0.9099974915864462 10:-0.43374966692373484
+9.012829566937228 1:0.6885456531832366 2:-0.0631164354373237 3:0.8394182300770314 4:0.7207913383891218 5:0.4715324450375691 6:-0.34417503908167757 7:-0.31448279255342126 8:-0.020591617987411936 9:-0.37668573574418107 10:-0.6528048324896532
+-15.951512565794573 1:-0.6112828771933607 2:0.4867007149846869 3:0.863494046941478 4:-0.7292072742454481 5:0.6338749652624007 6:0.5980798993978542 7:-0.5119002889878654 8:0.8394383182101366 9:-0.1412423080445726 10:-0.15838730884968655
+-0.29622788243318465 1:-0.9436253326661384 2:0.2907259958032098 3:-0.1530538226933904 4:-0.6174176535420375 5:0.8209632215649141 6:0.5060548803172731 7:0.8212448453211292 8:0.33506684706740386 9:-0.5408309869188785 10:-0.8105966349150977
+-7.683213587039055 1:0.2525015766703558 2:0.6417869320191234 3:-0.7569571597336913 4:0.5265130776924394 5:-0.03992944660560949 6:0.18292946303778823 7:0.4286344960738724 8:0.9158523573288766 9:0.5039796366711773 10:0.27660486075533797
+3.9061298856792797 1:-0.6501789225392032 2:-0.6040685518173872 3:-0.6448094322678659 4:-0.2019498832769746 5:-0.5302977370883424 6:-0.010754341856880067 7:0.8791702222974846 8:-0.2283571791337704 9:0.4726320486679656 10:0.3413255179758332
+12.928385148211825 1:0.7793178379505685 2:-0.5207562047491976 3:0.37253320760898934 4:0.7540757518052998 5:-0.679378421540417 6:-0.11966022036636881 7:-0.4317798870297489 8:-0.004211291952602059 9:0.39024653887361693 10:0.45391057946097146
+5.787566514603203 1:-0.20596730554338039 2:-0.8840796727164746 3:-0.749416279057892 4:-0.5511023306046077 5:0.9941631901218697 6:-0.09907966722992234 7:0.701617914811792 8:0.9696055014561289 9:-0.7083648075748707 10:0.5781111533720358
+5.701262468657861 1:-0.7066995012593675 2:-0.6756815056791965 3:-0.5720277255842998 4:-0.09218662060241067 5:0.21494136076896653 6:-0.37012884573008153 7:-0.6828277646796448 8:-0.10038134655965236 9:-0.46253754509583356 10:-0.20813933595648115
+0.9473494330088033 1:0.6876806675510589 2:-0.9530860102792402 3:-0.4043172626863887 4:0.6696455505098386 5:0.17863581804857254 6:0.1944646561635497 7:-0.5283662172535679 8:0.4872263841818012 9:-0.2882651789318431 10:-0.06293411605141874
+-2.6834375589185675 1:-0.22376759986120187 2:0.36555755546798885 3:-0.5223502955721961 4:-0.20702347869224624 5:-0.7745351063999764 6:0.22879328233099971 7:-0.5440007473902635 8:-0.6959483071829207 9:-0.131433881760733 10:0.2764225554693165
+-3.2766108642276146 1:0.0304613976530983 2:-0.3148062986719251 3:0.24950420590071953 4:0.7152023826801459 5:0.9656885739650887 6:-0.3210562623763835 7:-0.7305896664502614 8:-0.49074917893875836 9:0.7802670253347352 10:0.8667409958355992
+-1.1838791995691869 1:0.06642047806096318 2:0.5336148776806793 3:-0.6199614859883396 4:-0.15342280723497237 5:0.8407250402808968 6:0.7060811811107444 7:-0.2913182140909305 8:-0.5925203360011633 9:0.22644925021629692 10:0.42395071889002467
+-1.5856680515554806 1:-0.8724712788102853 2:0.11445744032031424 3:0.5483166457680566 4:0.9469521544884028 5:0.2541682828467746 6:-0.436750733871873 7:-0.9001249399695319 8:-0.7555793441458385 9:0.06946992897983018 10:0.9724148045760346
+-13.039928064104615 1:-0.558607026518148 2:-0.7356765018678253 3:-0.7547644426290201 4:-0.24898664843938745 5:-0.3606374046883567 6:0.5836652368902306 7:0.8497678666873467 8:0.21331875915717635 9:0.3558733809635668 10:0.9642603628738968
+-17.428674570939506 1:0.8562209225926345 2:0.7077202100653552 3:0.7449487615498371 4:0.4648122665228682 5:0.20867633509077188 6:0.08516406450475422 7:0.22426604902631664 8:-0.5503074163123833 9:-0.40653248591627533 10:-0.34680731694527833
+13.886853032969585 1:-0.6354915752033683 2:-0.9132338112681755 3:-0.4816479770266455 4:0.5448417181244594 5:-0.6250746297187781 6:0.7410618768880199 7:-0.18029029550083675 8:0.777358236920447 9:0.9625064189449102 10:0.048040935468046
+15.61684729251139 1:0.2980237970192188 2:-0.8160931971814265 3:-0.29649852157138445 4:0.3896688599904572 5:-0.17552110506337826 6:0.8721328328445139 7:0.48984799668438916 8:0.9984496052876473 9:0.9665885195526289 10:0.8966559812150274
+10.33625540376971 1:0.09939495068155724 2:0.9790332181038015 3:0.9483428886275702 4:-0.5717299810793317 5:0.4876405069057712 6:0.163962913892302 7:-0.4095537988924203 8:0.8608269751255508 9:0.010028680058212114 10:0.9095786494455713
+9.706032970113723 1:0.7687898546315146 2:-0.9825109379412285 3:-0.5423211794439926 4:-0.3099509487314134 5:-0.11561305536236333 6:0.9012327035409926 7:0.5257495475790148 8:-0.33804422025989433 9:-0.144428735681567 10:0.28019332199039604
+6.189043888072968 1:0.13246655756059478 2:-0.751192382628302 3:0.2233421456265161 4:-0.1933575076984373 5:0.8681727702736863 6:-0.7656847407654899 7:0.1033145549916572 8:0.33909210370257403 9:-0.22241363302770267 10:-0.14479004187830435
+-8.680225911784335 1:-0.07718769939880432 2:0.6702228057326558 3:0.6647810334933819 4:-0.05115658747070784 5:-0.850780588302118 6:-0.040961453376221924 7:-0.8407690297644956 8:0.33775829053563156 9:-0.45421556034898547 10:0.8238500771967823
+-9.42898793151394 1:0.8925906426831107 2:-0.6771269725125597 3:-0.11635105688280678 4:-0.7266044201050157 5:-0.6902918845825077 6:-0.5911234800910024 7:0.49395074569300657 8:0.43660804414878274 9:0.8736983081269782 10:-0.8001177058312081
+8.486245765579415 1:0.5614295382716652 2:0.3972427851719582 3:-0.276268504977494 4:0.7803448249454739 5:-0.358957923558495 6:0.3477822689529795 7:-0.7944805581842691 8:0.8356932134547437 9:-0.4783293647580624 10:-0.2522633417723845
+-1.8722161156986976 1:0.11831037290857482 2:-0.7309091607574014 3:-0.7339122716951587 4:0.2046641765436359 5:-0.9914679283125301 6:0.13518339528098555 7:-0.9760821540963867 8:-0.6080636193563043 9:0.3890502262427238 10:0.33864957953815145
+0.5122357093733743 1:-0.9555852441641726 2:0.4754771858792488 3:0.3743376249200432 4:-0.2651772997462427 5:-0.7915484529586028 6:-0.7575915279708862 7:-0.10432268807273859 8:0.021604934223709238 9:-0.6458011732912265 10:0.40773716196391674
+-18.845922472898582 1:-0.6031480148285926 2:-0.8736524730197766 3:-0.311456616524979 4:0.420921703897325 5:-0.2904011177124777 6:0.6683252350591937 7:-0.3436202976676894 8:0.5023604359385605 9:-0.33056149241985633 10:0.5168854058825227
+6.492106438811399 1:0.7824832256885428 2:0.6105456307389117 3:-0.0436873997963223 4:0.46730493583332855 5:0.2057529813440686 6:0.5738310686722767 7:0.6307964411259019 8:0.6208424783086652 9:0.8931894299284251 10:0.7164648197763028
+-1.6472226859532182 1:0.8854767145642171 2:-0.8175744681485637 3:-0.14894858038610903 4:0.9667400540136402 5:-0.3575837217508149 6:-0.9211342680517054 7:-0.956785876301889 8:0.6558217028031554 9:0.8014538160668165 10:-0.9475520920917395
+0.185861229793925 1:-0.8181719548530746 2:0.9990094335332504 3:-0.8195848911987829 4:0.6991933015233858 5:0.07295718417836583 6:0.5968996100546737 7:0.4871410306452193 8:0.2980483098540927 9:0.779953293728507 10:-0.7978867112395516
+-5.973450525185694 1:-0.975435413991927 2:-0.7832951303253313 3:0.5098999023442101 4:0.46795978867990007 5:0.2538986807863044 6:-0.8182887550010198 7:0.8335391734637112 8:0.4286082996234335 9:-0.1726765956719154 10:0.7649845978453362
+-12.773226999251197 1:-0.383327656965585 2:-0.9439560491389036 3:0.25039001869622446 4:-0.9342091044843222 5:0.8711023711291135 6:-0.6027135241543655 7:0.9456874780319795 8:-0.243290468946338 9:0.625765915285031 10:0.5160550067618355
+24.290551295953957 1:-0.8368553572749229 2:-0.5859456648150321 3:0.873779532007048 4:0.7462623178738954 5:-0.08133011570245352 6:0.36767541461776676 7:-0.33129619282275047 8:0.6104289727615573 9:0.9416581563055089 10:0.18201841676606856
+14.490247980976621 1:-0.4765937762114507 2:0.16430711839945555 3:-0.526776940706293 4:-0.6802269991653915 5:0.40748236413299344 6:-0.500290139207977 7:-0.31915972151663885 8:-0.4586068416002418 9:-0.15572660263944127 10:-0.32925702602833073
+8.377230871265601 1:0.44141613060964846 2:0.1582267687752743 3:0.8760950367284166 4:0.40434058393690364 5:-0.7063758409891474 6:-0.616055773516162 7:0.996372393127579 8:0.6142084876085476 9:-0.528320587432094 10:-0.2815909691094802
+-3.2987560995836653 1:-0.4600479783378091 2:-0.04201794336103326 3:-0.8934505203905587 4:-0.44991326751905536 5:-0.5220579476363783 6:0.46060949186328703 7:0.9169289030735643 8:-0.022458426893944283 9:0.08100795210565637 10:0.5726732415540354
+0.3422568955736137 1:-0.9888686059817204 2:0.22752298580182706 3:-0.5048696915520232 4:-0.059433420464226616 5:0.7823831512651716 6:0.9865977573980389 7:0.9164100011124972 8:-0.3638554550863984 9:0.3038282907667611 10:0.4652367033461571
+-8.24116881862084 1:0.7565819250331731 2:-0.3733277500524168 3:-0.8841150081071696 4:-0.922282989989148 5:-0.041520813551309876 6:0.8615967014876558 7:0.8474207144091339 8:-0.7518437864641427 9:0.45076605239968837 10:-0.48912984167595375
+-4.367083147104942 1:-0.276459380002813 2:-0.957555271384241 3:-0.3761632810202544 4:-0.3897414804149022 5:-0.3133861519856074 6:0.0777990809172171 7:0.6638552243422928 8:-0.3477312155364247 9:0.5934885465182675 10:-0.5238903641193555
+1.9280240152322783 1:-0.40051093785549696 2:0.5070348672240661 3:0.7506759969575532 4:0.5042104954516786 5:0.9959688260926507 6:0.4657024999761399 7:0.910611131925299 8:0.9836517468598804 9:-0.6263172749113686 10:0.16955852322929155
+8.918138317441574 1:-0.22407391224687023 2:0.5545084933214972 3:0.6335932367683528 4:-0.2786481116648991 5:-0.9549992830441785 6:-0.5577873948545062 7:-0.960657200286197 8:0.3709573488946196 9:-0.9191180485753339 10:0.5033478020271929
+-5.657796797481157 1:0.6359910361030725 2:-0.1742637774815281 3:0.39699327107265137 4:-0.9841991491194473 5:-0.622093571871533 6:-0.5433497301426455 7:-0.6731178481686009 8:0.930615153085582 9:-0.3065877908950827 10:-0.5456093749639228
+8.697079562319692 1:0.4815820396629933 2:0.1173457441514223 3:0.7313645402039386 4:0.3354835387237334 5:-0.10300554535074702 6:0.5116687640761355 7:-0.8850803659104614 8:0.10654026377571157 9:-0.864976708975602 10:0.01345035085413615
+0.033954684723234596 1:0.6703241653088159 2:-0.13447915740201166 3:0.026022550037831937 4:-0.5145659862194116 5:-0.6963587636078901 6:0.652083884947352 7:0.22644722530715278 8:0.2671580129293405 9:0.9659035105360283 10:0.9547989197693989
+7.359108382166921 1:-0.6855762478384229 2:-0.7543318537260015 3:0.4772611975128618 4:-0.5588002332845741 5:-0.24271386844336496 6:-0.28595644325868896 7:0.8732728098501104 8:-0.8026384804471058 9:0.7589508830210041 10:-0.9992933613402135
+4.953597303754355 1:0.8915633023548608 2:0.04688596266450751 3:-0.26866754730613374 4:0.16694236975718102 5:0.23465297255622608 6:0.36488427850844407 7:-0.06717041145276781 8:0.9470029805221898 9:0.32483835237272674 10:-0.7892521260150298
+0.683536559775105 1:-0.32176084249781556 2:0.5446298870866526 3:0.4095848716057642 4:-0.42579711490120187 5:0.4482850543749355 6:-0.0982243826242506 7:-0.9190317048427039 8:0.06234509402976718 9:0.21327512416175054 10:-0.38023673796734525
+-28.571478869743427 1:-0.4597184465402242 2:-0.5489429386926741 3:0.33422914572951634 4:-0.15992695377395516 5:-0.7310003311728188 6:0.18241063863467488 7:-0.48385214010599453 8:0.08139879039334552 9:-0.8401239538877046 10:-0.8896372220209929
+-19.884560774273424 1:0.4619217451285318 2:0.28157115824800005 3:-0.3829811521605375 4:0.5802544015450464 5:0.1117061271473403 6:-0.8926034502584623 7:-0.34862293810401956 8:0.2733254857260612 9:0.6514176550598809 10:-0.02758604919357066
+-17.494200356883344 1:-0.4218585945316018 2:0.15566399304488754 3:-0.164665303422032 4:-0.8579743106885072 5:0.5651453461779163 6:-0.6582935645654426 7:-0.40838717556437576 8:-0.19258926475033356 9:0.9864284520934183 10:0.7156150246487265
+-15.86200932757056 1:-0.6341453831788726 2:-0.9259180639727085 3:0.302702923864538 4:0.749555004323947 5:-0.7932989575334761 6:-0.5620972938631934 7:0.020542041027870717 8:0.11610338700447698 9:-0.7912600154897766 10:0.5108307672038874
+9.027804254487519 1:0.1746878011084212 2:-0.5872807344913673 3:0.6018547246457264 4:0.5106104933121229 5:0.7329523371170135 6:-0.40058771577765895 7:-0.48753463550174025 8:0.34308791976318 9:0.3407668956765344 10:0.5964472848798394
+15.949172086880687 1:-0.7790584545657173 2:-0.017224094786103317 3:-0.0974907790179953 4:-0.10287391996036166 5:0.6007953354774878 6:-0.7032497754397848 7:-0.36068070856329437 8:0.021391994204512432 9:-0.6509100388083549 10:-0.5410899936281377
+-6.151586699415245 1:-0.5318094974022525 2:-0.830796057445983 3:0.603828597318087 4:0.6660892552257192 5:-0.18529748408390523 6:-0.47166833767648986 7:0.592915541856605 8:0.9944601563352204 9:-0.6981606574244703 10:0.34942553665003584
+2.010398523297265 1:-0.9293899922307269 2:-0.07588009904844029 3:-0.8500855420709359 4:0.12191867923536615 5:-0.528778681165414 6:0.3117086447237414 7:-0.4222963938187163 8:-0.03247894950300623 9:-0.05387792412717962 10:0.4053568741659812
+-6.749023248121471 1:-0.9875370165216966 2:0.7137693455001415 3:-0.2510160963160164 4:0.8732150877079123 5:0.49658934612905314 6:-0.9817012857861731 7:-0.2045309437850289 8:0.7562713668333418 9:-0.6787434327188155 10:-0.6147932888026117
+4.452639829999693 1:-0.35256148944834176 2:0.7581152951164591 3:-0.37755890552299265 4:0.9480813371197343 5:-0.3419340388717347 6:0.3487602851799074 7:-0.5576726724270562 8:0.4899696188087421 9:0.563074979676983 10:0.7865891460062227
+-4.938733988900586 1:-0.4108386466193119 2:0.3287655432069885 3:-0.5853553038038923 4:-0.6480591422742821 5:-0.4787998161299789 6:-0.5828003484675421 7:0.42835744317623003 8:0.8378098987706633 9:-0.5645180498703375 10:0.28981512694646705
+-3.373242544176224 1:0.04989033652617936 2:0.6575826440927308 3:-0.24028051935833128 4:-0.6649808138961095 5:-0.6530198970442704 6:-0.19331254127919362 7:-0.6743004878881749 8:-0.7214986105015062 9:-0.30648035516261385 10:-0.6455097687924254
+-3.2843694575334834 1:-0.3548536057581908 2:0.7350125943559394 3:-0.3635282827378974 4:-0.8552820154885781 5:0.9140879208466111 6:0.21870365067770892 7:-0.17738543429561382 8:-0.052851966578491005 9:-0.36066059517759097 10:-0.9020765799355679
+-3.277146077677404 1:0.910961221014513 2:0.4302525202590246 3:0.11079959840001119 4:-0.3614188274820125 5:0.5080231397310961 6:0.013940825892631237 7:0.33583012240022403 8:0.5008797094229163 9:-0.663083147090173 10:-0.0865028013627418
+-0.202246147968096 1:-0.4929308143227653 2:0.8374300027105082 3:0.08763999085193186 4:-0.499738438136623 5:0.5926071511295365 6:-0.5135396038023627 7:0.6946715869746543 8:-0.5184428793490325 9:0.21753085495829239 10:-0.33796308746585235
+-7.1237150573506955 1:-0.8506203499039495 2:-0.6581804183622855 3:0.6484205342724825 4:0.013914696389758285 5:-0.6214530117645831 6:-0.011163110491807293 7:-0.6025372583334574 8:-0.0371573886520411 9:-0.7933455929226487 10:-0.38653838674273455
+6.298226129171093 1:0.7304191211928768 2:0.8128475475660479 3:-0.03161148630216015 4:-0.6018899317958344 5:0.19277055729934367 6:0.3002272616310928 7:0.949169758830406 8:-0.1011823256970481 9:0.16093341376629966 10:0.9596833606094763
+14.906594657519511 1:0.5053240355803015 2:0.6775698974866082 3:-0.6194771000646291 4:-0.02876927004033525 5:-0.5481504206112477 6:-0.9239150546263386 7:0.471216755072994 8:-0.0027794620943384363 9:-0.8954411386878227 10:0.8991742143686698
+2.1710965297686267 1:0.4578509053930304 2:0.9270194505165124 3:0.22470373699901236 4:0.21526179917432753 5:0.5299563895862103 6:-0.5824108997775908 7:0.03801922095671095 8:-0.5164033454609385 9:0.4370246809487237 10:0.6514133050988229
+15.05806598279517 1:0.48645077410559057 2:0.7821442063987365 3:0.1943681666933883 4:0.8289246958621577 5:-0.08034311437806041 6:0.03709694472527203 7:-0.895481297246602 8:-0.42921579749551664 9:0.5447075872378688 10:0.844397849728866
+-0.4683784136986876 1:-0.5083135683360327 2:0.626070365769088 3:-0.8737725909401557 4:0.725622293853621 5:0.0018794384199978253 6:-0.9343604622552886 7:0.6655593328822609 8:0.47501755618845753 9:0.8388618477210947 10:-0.5143806767304449
+5.823027255871114 1:0.08635467091841886 2:0.6314532702073175 3:0.8862069437865836 4:0.6542025864928516 5:-0.6846784290231471 6:0.048487096050569445 7:0.30828004933669395 8:-0.49438881988995687 9:0.5706936923061823 10:0.037705651885639346
+7.03779380408974 1:-0.07193682621291098 2:-0.5816975957307158 3:-0.8426927090342973 4:-0.37504851992255306 5:0.4473129018316815 6:0.3101938194888525 7:0.6160050428837607 8:-0.913998555949695 9:0.40461966540531313 10:-0.7581141330823786
+-9.770500546345563 1:-0.31358873581579894 2:0.11771478839130278 3:-0.3404842110585631 4:-0.0604362797252429 5:0.2159524972176814 6:-0.24737863017398087 7:-0.8541428610709716 8:-0.06753562283135062 9:-0.11567537916769255 10:-0.5606246203677223
+20.000154367451547 1:-0.344717847914646 2:0.8454969480099985 3:-0.58856299370874 4:0.5884510299634649 5:0.49162879631128553 6:0.7958075013181658 7:0.7781911267315837 8:-0.6780885011989877 9:0.9797694629597928 10:-0.1872163682079866
+-6.239848349456753 1:0.9132793720646253 2:0.1680340663118458 3:0.01740115925682284 4:-0.26580395408599133 5:0.28551914590761074 6:-0.9939706142381568 7:-0.8740927279520219 8:-0.8731218126652498 9:-0.10993630739903892 10:-0.3069565039708746
+-4.173072569004537 1:0.7864835254860851 2:-0.5614522227484218 3:-0.7718396381376464 4:0.49508673889127985 5:0.24030155936964714 6:0.8080778221819038 7:0.05395496402881128 8:-0.3045148076729973 9:-0.6134406357458853 10:0.7447268183581948
+-11.328415936777782 1:-0.10183127796258096 2:0.5689039487721601 3:-0.07015335898840225 4:0.23254189629731292 5:-0.3226974656715038 6:0.2859450214054784 7:-0.4916677058012495 8:-0.27564895614732055 9:-0.9416483232894219 10:-0.7472248333434015
+8.719164753818454 1:-0.8231424386390782 2:-0.03953537069863633 3:-0.3271580541537027 4:0.892192314973022 5:-0.6759017192358232 6:-0.419591686354591 7:-0.23967385135363606 8:0.936992531568956 9:-0.12946409158671512 10:-0.9082863469271643
+22.31738046492344 1:0.37030851555335365 2:-0.06654751559177563 3:-0.5759425437665169 4:0.9179952251152963 5:0.8628921839116359 6:0.8421952184405965 7:0.9625804174561126 8:-0.03075332253237728 9:0.12227386374957994 10:-0.6243390357793757
+-1.189108450798179 1:0.5681776913545951 2:0.46049028271139436 3:-0.366463711956754 4:0.025856437432560275 5:0.7547565372954261 6:0.5506193192167212 7:-0.6279807084274867 8:-0.38698884324386107 9:-0.9885778854008227 10:0.7814740172261654
+2.8767042393531965 1:-0.6841229745503388 2:0.6252203895646273 3:-0.6737644654353572 4:-0.7321040107741059 5:0.3162570540986238 6:0.6211089085315002 7:-0.33984617437403464 8:0.1227089818682312 9:0.04586594421613177 10:-0.4679977358965799
+2.783332151730615 1:-0.39148258540779013 2:-0.3037233649803406 3:0.7955133548911926 4:-0.1729544208044842 5:-0.18247049275020033 6:-0.1315085429729259 7:-4.447133918370483E-4 8:-0.805837119503338 9:0.11574866650006688 10:0.8517519041042676
+-8.99205564094827 1:-0.45501536967706535 2:-0.35829694693457914 3:0.775695048377375 4:-0.25331195582275745 5:0.15524612858817055 6:0.7400717904631442 7:0.8382485596668376 8:-0.5619009369436814 9:0.4386801597659249 10:0.09960232210246622
+-9.808386702564658 1:-0.987404834666963 2:-0.6732308850750186 3:0.5528285725528492 4:-0.8796302275267409 5:0.30705569958232193 6:0.8635312232105203 7:-0.14033675947074187 8:0.5516086773506235 9:-0.7487899106678442 10:0.8851518933134919
+4.948281656077033 1:0.4331269064492329 2:0.4628446087354616 3:0.33730748244242537 4:0.3473124014683382 5:-0.1707966473106064 6:0.8558057784524846 7:0.1390312032172829 8:-0.7918343112673001 9:-0.85993782695915 10:0.33563174747577107
+10.791261476321019 1:-0.5417345768902055 2:-0.06334901799780424 3:0.027652223245870466 4:-0.9881487640651161 5:-0.19441123027957707 6:0.40295156581142355 7:-0.8315553696517317 8:0.11405283165483926 9:0.5377980570161418 10:-0.24581620554740824
+-0.7287230169119936 1:0.33985587202063283 2:0.6841261099887705 3:-0.9441564997438197 4:0.28660913255058906 5:-0.7597915572726905 6:-0.8535957517473378 7:0.609134673753593 8:0.29636368731717977 9:0.05791523580926916 10:0.5589907965230858
+-26.805483428483072 1:0.4572552704218824 2:-0.576096954000229 3:-0.20809839485012915 4:0.9140086345619809 5:-0.5922981637492224 6:-0.8969369345510854 7:0.3741080343476908 8:-0.01854004246308416 9:0.07834089512221243 10:0.3838413057880994
+-16.71909683360509 1:-0.24375714099465773 2:-0.11915875769929496 3:-0.3741442802364221 4:-0.3812947578178094 5:-0.7032156297055756 6:-0.18339122712542388 7:-0.8634662520461855 8:-0.714561692659166 9:0.020558676493369177 10:0.22804428969949986
+-8.822357870425154 1:0.39332200105884363 2:0.5652370435795515 3:0.6220479966351453 4:-0.018976695481651484 5:-0.6868425195058918 6:0.2029750380170401 7:-0.5550873767310935 8:0.16864133648532342 9:-0.008843355054633628 10:0.6472547984399621
+0.36392761004065594 1:-0.9059630492963144 2:-0.41039282402227384 3:-0.006673269562094131 4:-0.4989314017618798 5:-0.17726034513032318 6:0.037764439388023874 7:0.30703957185016595 8:-0.09040426404909185 9:0.38661451965066274 10:0.1630571642147851
+7.415902871490132 1:0.188586850708651 2:-0.33013604761672566 3:0.6667976416858177 4:0.8537064956198137 5:0.03971370422819254 6:-0.43229195778759966 7:-0.9607154505216515 8:0.8413204878098277 9:0.40010565279599897 10:0.7306602852367441
+-4.129456164370826 1:-0.7967510984807558 2:0.545111159425699 3:0.16038228447433012 4:0.6311115528116698 5:-0.01985759480036542 6:-0.9516543115476572 7:0.18022912194075458 8:-0.2177157123823752 9:-0.5433158910016767 10:-0.4603867691069983
+-9.211066571082247 1:-0.3611235296125135 2:0.1402619601475985 3:-0.23132525512647795 4:0.5534401725834837 5:-0.34978585787763206 6:-0.24147682088922773 7:0.8089009287617064 8:-0.09075864922490862 9:-0.05759391404550773 10:0.3371306765964468
+6.52392916461972 1:0.19122050285976044 2:-0.625453376800498 3:-0.26804961781489856 4:0.9669297468261109 5:0.9142504122291741 6:0.7678963028488108 7:-0.6852943621882759 8:0.5898129788981794 9:-0.6580947533327339 10:0.46875109532259396
+-12.46765638103286 1:0.35148385951742633 2:-0.5206883134357769 3:0.35436280451876345 4:-0.8837833467474128 5:0.3433887284719144 6:0.3914771858025621 7:-0.17813796710416252 8:0.6553344538056296 9:0.3721548243590813 10:0.9442185832979726
+-4.937258492902948 1:0.9150659354384785 2:-0.17085510578573548 3:0.8233227233543232 4:0.2539669132090434 5:0.18955049451212935 6:-0.2833188558310358 7:-0.48483747414616496 8:0.8917378487725669 9:-0.13169122011498646 10:0.9815059855284158
+-0.5233425797210233 1:0.4238363705720569 2:-0.18363058784066522 3:0.2949874786744968 4:0.12235592695567354 5:-0.9746310186182559 6:-0.8990867637441311 7:-0.8580982328464586 8:-0.7930887027205957 9:0.16757307988090275 10:0.988861929608575
+-11.904986902675114 1:-0.3692990475534952 2:0.32166293883244323 3:0.3401547722249436 4:0.10009747375878408 5:0.7598877208920192 6:0.2853003389082669 7:0.22880221701675074 8:0.4521491122351502 9:0.33222018268933895 10:-0.9500018867461919
+8.324969054805921 1:-0.48086111720736513 2:0.3705524122401185 3:0.43635448766342133 4:0.6544321903349255 5:0.059000747296945155 6:0.3328036763371236 7:0.9609146376298034 8:0.5943082361322021 9:-0.3074246170581105 10:-0.6763916655761453
+0.21701641918233017 1:-0.29449708766806304 2:0.040640346437143426 3:-0.6524819533513639 4:0.37482287233702394 5:-0.29800608396043216 6:-0.537030944860492 7:0.2862394027536084 8:-0.3783043133672048 9:-0.5292179323972728 10:-0.09583783955916791
+-6.84977373580439 1:0.825136109631339 2:-0.5722868691442817 3:0.11048134523744757 4:-0.5946054293068455 5:0.28061485657354823 6:0.9135611623885838 7:0.35590421873954603 8:0.8943562249941011 9:0.4183378981109729 10:0.5714160298247304
+-11.039347808253828 1:-0.9620263418414967 2:0.22669065740934724 3:-0.7378036492234086 4:-0.4460191511609126 5:-0.2594476006347024 6:-0.989879976130936 7:0.762096015449097 8:0.6983868222083149 9:0.8729993459982626 10:0.3426647417451305
+-5.882860061103163 1:0.5247178959769465 2:-0.6217169944869176 3:-0.13640714414758315 4:0.6608201052790283 5:0.5789945243704264 6:-0.12686057623612612 7:0.7277882307863026 8:-0.47949544949858236 9:0.9781208432412936 10:-0.8980068284379361
+23.52945433069272 1:-0.12339549394875426 2:-0.6769524283089239 3:0.9324962870874394 4:0.28956947294105206 5:-0.2957355479338608 6:0.7504385350771912 7:-0.8769262306643106 8:0.41591311300668155 9:-0.7694611231426498 10:0.9885110924181837
+19.043184423383824 1:-0.13783178628851878 2:-0.853631844645959 3:-0.12792415583066052 4:0.6936898387576049 5:0.8488563282318959 6:-0.6530521292304581 7:0.27832187660440666 8:0.09838048719062442 9:-0.5913230087557231 10:0.260839433107553
+6.83105883806984 1:-0.9085282656519695 2:0.65203708247844 3:-0.687580071985604 4:-0.045008726377529173 5:0.4762107922777967 6:0.15939259525248506 7:-0.46363191848939334 8:-0.25856682230410266 9:0.313842004143269 10:0.5042938214484851
+-9.409197719620593 1:-0.34356198962701945 2:-0.06381545064099514 3:-0.9332814619122063 4:-0.2629675367181199 5:-0.03876014002851913 6:-0.4606936151803749 7:0.49272969757318563 8:0.5550196351479111 9:-0.1758425343811718 10:0.20285868144226837
+-1.3101852978323116 1:-0.3740821549570985 2:-0.9788976137554464 3:-0.6078739734947245 4:-0.8007745980271539 5:0.7381298546055934 6:0.7407750458109124 7:-0.7711351008178868 8:-0.9895256155202141 9:0.35793767138197174 10:0.6589909255086295
+0.5180809608973377 1:0.19289850282287446 2:0.6301214514538145 3:-0.15311307199521518 4:-0.8607670552113709 5:-0.46422067276745316 6:-0.29812862604449464 7:0.519464836430044 8:-0.9480450997338103 9:0.973503038633444 10:-0.7843880226794626
+1.9947872601406775 1:-0.15799682110486057 2:0.22645891561571352 3:0.3141842574216682 4:-0.36086019480721676 5:-0.1429373936064291 6:0.8097261636650581 7:0.11764088861630029 8:-0.9151998265501957 9:0.6536711690904891 10:-0.17232697113157425
+12.352290000973428 1:0.8176113135335772 2:0.39342616792621987 3:0.44011948797971234 4:-0.4412435869837865 5:-0.24509203724837314 6:0.8636655043434542 7:-0.4251583124505798 8:0.2068056615503988 9:-0.3501114760443049 10:-0.23701353324739483
+-2.891643319177732 1:0.7722403010820704 2:0.7994121584045861 3:0.18520464815273208 4:0.7273575609391227 5:-0.3758589216283552 6:-0.7598404862373955 7:0.5748649410179301 8:0.6897988099260968 9:0.5638920860629713 10:-0.992567809902162
+4.803737144054077 1:-0.7367711178556622 2:0.07370548192399351 3:-0.5510509754264419 4:0.11949095653894504 5:-0.7723751845800411 6:0.6450480728551136 7:-0.9508825019800493 8:-0.3250395411575804 9:-0.24913562167143777 10:-0.3617439870343031
+5.051689886526102 1:-0.09854955786627007 2:0.5298224514703289 3:-0.014996634675966236 4:-0.4462048687049027 5:0.22912790083984547 6:-0.513533454471272 7:0.1452771069237353 8:0.371152210841464 9:0.9204732090987018 10:0.7472990716905279
+3.8591142298280476 1:0.7532169023970261 2:0.8291433156934658 3:0.9255891263525324 4:0.3248663809949248 5:0.9905320652281553 6:-0.10383453745167626 7:0.8519246838852608 8:0.6024015353989258 9:-0.06958036249881938 10:0.5862142389541998
+11.30005914221598 1:0.026411858067972194 2:-0.6968445330429607 3:-0.8194566946165238 4:-0.12780659247925996 5:0.8406393783194903 6:-0.24617182945415128 7:0.30199973460219853 8:0.6062457235841974 9:-0.19314055910416927 10:-0.48313233883372964
+-10.288657252388708 1:-0.7388306404020344 2:0.07753617971873439 3:-0.5735498713988352 4:0.2183581175474576 5:-0.873572721679176 6:-0.8788755575751708 7:0.7087858362905568 8:0.7126712562404713 9:-0.7607334319316799 10:-0.4627367552114916
+4.895250842405817 1:0.9772954128558484 2:0.6020087399988574 3:0.16946626176056134 4:-0.011334492807484997 5:-0.5391845039589362 6:-0.4315843612118535 7:0.9065130011032458 8:-0.4860160207844919 9:0.0921755607946162 10:-0.022200673265013515
+1.0479421939727227 1:-0.055436367433274514 2:-0.6710483362647659 3:0.9222786043047919 4:-0.22005981623386184 5:-0.8141845044113469 6:-0.31766631447334226 7:0.6067696845798944 8:-0.1445661385071555 9:0.9172271611227454 10:-0.8079554780561127
+-9.754451457291598 1:0.533713237587885 2:0.6499588942067549 3:-0.49188790503368285 4:-0.6925119436487435 5:0.3345265979579788 6:-0.8117849521672496 7:0.9312055115656304 8:0.3273803451149724 9:0.7567478475677727 10:-0.6256676928549367
+5.869027126482974 1:0.7273823383600513 2:-0.2519813990388706 3:-0.8239584025397881 4:-0.13749750031735974 5:0.6142824732416132 6:0.6251630800232315 7:-0.6138240706157267 8:0.7210396245391326 9:-0.41832155201953714 10:-0.8965988320689853
+9.14234252751227 1:0.7295320896113133 2:0.6150271212503227 3:-0.9785024737101733 4:0.30006672036705506 5:0.11703528191771406 6:0.2971639460196238 7:-0.7920108995168815 8:0.32649036066184567 9:0.03522428067355543 10:-0.1766251898148803
+-5.643698771141404 1:0.27360638280623983 2:-0.6124401810442446 3:0.24950528730210886 4:0.09920211684887548 5:0.7187490549286091 6:0.6212724115415782 7:0.5864634211269566 8:0.114951165007104 9:0.44859258949094283 10:-0.3768352371578665
+12.781643819428492 1:0.9144335582094396 2:-0.4579872615218674 3:-0.6521934534632468 4:0.4462086111316512 5:0.240360283350179 6:0.23974046479581124 7:0.4840439971437822 8:-0.7250363120037027 9:-0.29769496257362094 10:-0.3382859512018359
+8.393556738722923 1:-0.8263387132502396 2:0.9434824094966923 3:0.1607861709872136 4:0.15217100448798782 5:-0.6517945935711484 6:-3.354731073326178E-4 7:0.07846631386981562 8:0.687844846942889 9:0.9277854407325892 10:-0.8855380268588307
+-15.348871155379253 1:-0.5734707274250155 2:-0.2526008551945753 3:0.23752094195309925 4:-0.7074613963298721 5:0.4674168537545218 6:-0.3198997855552628 7:-0.10415974108745596 8:0.5616912699671224 9:0.43742425558560694 10:0.19732530755184596
+13.138260063721448 1:-0.9415220143797984 2:0.6015431361268124 3:0.38898046240229545 4:-0.5750448371021175 5:-0.5803995196333898 6:0.11772198725731342 7:0.7512685244060366 8:-0.6683465740662857 9:0.9515652825318053 10:-0.32405935964523547
+-26.736207182601724 1:-0.47083104147202404 2:0.28748860067800597 3:0.007399318769021113 4:-0.8189013750589702 5:-0.5156633937248272 6:-0.9906928746525896 7:-0.8848419810272337 8:0.2197280161306785 9:0.12855082514870197 10:-0.7862803985146845
+-20.212077258958672 1:0.5609065808412279 2:-0.9201904391147984 3:0.908305865183735 4:0.9255146658282842 5:0.6871419344095282 6:0.4201876217923466 7:-0.42906289792612684 8:0.5787691868233418 9:0.7260522064761288 10:0.28251641556690554
+-0.44652227528840105 1:0.37640618494870504 2:-0.20012451052963542 3:0.9420894309510319 4:0.4218728633972739 5:0.5551974480349577 6:0.07615991810462619 7:-0.12409220462011294 8:-0.22212591926375946 9:0.21160498862483723 10:-0.6092792830633924
+-1.9481059746438067 1:-0.43820030250217457 2:-0.6836588417639442 3:0.733018205278934 4:-0.6564348753121718 5:0.7333385435136448 6:-0.5577457688360317 7:-0.31035811050608975 8:-0.7189201447768139 9:-0.7629842028723994 10:0.7179459779331092
+1.1951162998609508 1:0.19541555859727744 2:-0.4796785506546435 3:0.14123852670749248 4:0.7161847585887089 5:-0.2502765085719578 6:0.8815667909545981 7:-0.6418691905513725 8:0.49600147195728783 9:-0.3091837674381053 10:0.4320162841463153
+-8.99125390483227 1:-0.01183888602092864 2:-0.5901829024081027 3:-0.4343074406380647 4:-0.40450313056290166 5:0.05269590196351448 6:0.733631212862198 7:0.9575176715505025 8:0.5974628692830348 9:-0.20284241796038271 10:0.9577348510907686
+-7.955533026930219 1:0.6104830760481679 2:0.5915483572646505 3:0.3275427350991458 4:0.48361434056132424 5:-0.9466590639056058 6:-0.24662428438925743 7:0.9856361456534972 8:0.9434155212648045 9:0.3466736921968707 10:0.12927980558284102
+-12.500773785355054 1:0.5733321361720694 2:0.39154119830075085 3:-0.9347116355607772 4:0.0920586614926524 5:-0.6959457183810456 6:0.2136579936466858 7:0.17595268059814395 8:0.8828168055200465 9:0.18934277314853398 10:0.7565908584660754
+-11.43180236554046 1:0.082018621904135 2:0.9074181204118958 3:0.46125595008850273 4:0.40328845936169966 5:0.7803064691948824 6:0.20802011482729377 7:-0.41368899649077284 8:-0.8997565495498339 9:-0.1880483213318005 10:-0.15538597634233264
+-5.055293333055445 1:0.4442675297698402 2:0.19045719972922193 3:0.4877438951288897 4:0.7984474402420494 5:0.3251350777349489 6:-0.18676050499673869 7:-0.2701840041572374 8:0.4486609996458524 9:0.5403637876036615 10:-0.8971614841211264
+1.0276485382241776 1:0.7953696703382547 2:-0.3245779681908927 3:-0.3507435626548021 4:0.9510986059491036 5:-0.8655491074076527 6:0.20729233888498677 7:-0.43078300089533594 8:0.19504657032168216 9:-0.3173814102187291 10:-0.042479969052890754
+9.690201571311908 1:0.16852987139559206 2:-0.2514893273405625 3:-0.9993240281686275 4:-0.2166013247997891 5:0.33294165754921234 6:-0.5824203831560628 7:-0.15253642946648616 8:0.3547892367555441 9:-0.047604356104869794 10:0.9229112136183077
+2.2591036039970347 1:-0.9919593184325572 2:0.6323551392201245 3:-0.20815293136790447 4:-0.002395046469600759 5:-0.5015903362190326 6:-0.16698803749234048 7:0.7901657583805675 8:0.33755402936964973 9:-0.3707337678548108 10:0.6995480653730146
+1.5130881908855742 1:0.973710432688613 2:0.6518972988019702 3:-0.16491318496856833 4:-0.6066757853095415 5:0.8762371591845273 6:-0.9056066630820714 7:-0.3388079327070965 8:0.3934146060660142 9:-0.8756168865642253 10:0.9522427911640303
+4.023618949132531 1:-0.14974626191548301 2:-0.5874962377709136 3:0.6780439909311404 4:-0.37291203746764356 5:0.08104034602232169 6:-0.4706923395029945 7:-0.8924577368048239 8:-0.3363784341297067 9:-0.4139746050396018 10:-0.5107600309932907
+-2.8674162893420965 1:-0.7554383289076523 2:-0.1355597928418868 3:-0.3891904246986413 4:0.43949832438341785 5:-0.43859957095446833 6:0.37548094528561093 7:-0.5228633291549518 8:0.24169710795100352 9:0.7131753590746546 10:0.03458176767001042
+4.661164232198611 1:-0.12738868751385546 2:0.9446285809821182 3:-0.17981416859193433 4:-0.7535879975625193 5:-0.08594548726529161 6:-0.9983154486609989 7:-0.7272748852665216 8:-0.8197811039616518 9:0.5177610923333253 10:-0.6180731281817853
+-0.12347625601866746 1:0.10820547757674692 2:0.1825421454873002 3:-0.3412486258429426 4:-0.14925445930975534 5:-0.6594599831395103 6:0.9552502376248448 7:-0.7875626067291472 8:0.3854984181307912 9:0.014303876202374832 10:-0.7300443667550689
+14.546296184422973 1:0.2459523985646046 2:0.9434777073825811 3:0.2112745925235362 4:0.7730688005214974 5:-0.13727994893203732 6:0.6140037510172511 7:0.7545298281668846 8:0.7814551909982614 9:0.0026683642139069264 10:0.5633973602849358
+-19.66731861537172 1:0.9353590082406811 2:0.8768609458072838 3:0.9618210554140587 4:0.12103715737151921 5:-0.7691766106953688 6:-0.4220229608873225 7:-0.18117247651928658 8:-0.14333978019692784 9:-0.31512358142857066 10:0.4022153556528465
+18.84119697288412 1:0.4423204637505467 2:-0.4364821709544735 3:0.3935363893778452 4:-0.7750286735195999 5:-0.6981814766625978 6:0.6889512553826111 7:0.3646791168217727 8:0.0023536025493677837 9:-0.08378048150085249 10:-0.05659381771155503
+17.40329212914592 1:0.9155980216177384 2:-0.35593866074295355 3:0.44775710780914824 4:-0.42914421567532357 5:-0.2734430718503955 6:-0.8937042912745483 7:-0.3143761936611371 8:0.07805814979426184 9:-0.31386151509289784 10:0.6202932236456253
+-19.402336030214553 1:0.462288625222409 2:-0.902975525942725 3:0.7442695642729447 4:0.3802724233363486 5:0.4068685903786069 6:-0.5054707879424198 7:-0.8686166000900748 8:-0.014710838968344575 9:-0.1362606460134499 10:0.8444452252816472
+-3.855123203007599 1:0.5072557393175969 2:0.4626973233672753 3:-0.20910077161652119 4:0.9431415515135266 5:-0.1293690767585638 6:-0.2033835058111637 7:0.501429131658198 8:0.175133281735671 9:-0.6091682952201736 10:0.543010689352589
+1.493768355655548 1:-0.7772812666041105 2:-0.7743738591348672 3:-0.2848754060915175 4:0.3336846848765145 5:0.6219572132443736 6:-0.11144657683793624 7:0.7606913325884337 8:0.8547085151723017 9:-0.31728444617771134 10:-0.4668474022688931
+-17.803626188664516 1:0.5176340000264179 2:0.23048377874011128 3:0.6162746928601832 4:0.16908590014785418 5:0.9695207469685181 6:-0.34713218673384705 7:0.8526833760069625 8:0.9895592279649763 9:0.8805561957342884 10:-0.43452438291417894
+1.4060200157931342 1:-0.41964471941333525 2:0.7738486114171979 3:-0.0964606192284374 4:-0.25351781452566025 5:-0.21065389913054244 6:-0.40490416354122916 7:-0.7696501777959646 8:-0.7710488116813146 9:-0.6777228721053572 10:-0.09381158095961428
+-17.026492264209548 1:0.8367805314799452 2:0.1559190443625338 3:0.048200110551483544 4:-0.7340083467235765 5:0.2661150265782781 6:0.3881661781792165 7:0.9485287302765621 8:0.7201540574376382 9:0.8509234862656003 10:0.9658114866648093
+8.729450606651499 1:0.6404862166906327 2:0.16516090922657822 3:0.29013117743588057 4:0.37056732180613317 5:-0.3376494575302882 6:0.9012625630650577 7:-0.42150978319487 8:-0.05630249989686087 9:0.706104255632954 10:0.01935884085365225
+-5.516822117602276 1:-0.5718348423045241 2:-0.2145777722920088 3:-0.09307467998835195 4:-0.7311274103678378 5:0.5272184003067053 6:-0.00528176138162495 7:0.2852826178935919 8:0.6180999884045897 9:-0.7526372151008776 10:0.20416472532830543
+13.001541259752251 1:-0.5137703877272299 2:-0.15452359837207896 3:-0.25657600903152744 4:-0.9773110735601165 5:0.0718147980090178 6:0.18965211809311744 7:0.7795354990363292 8:0.21976898743223638 9:-0.20364089221752524 10:0.33822332985943304
+18.443388694564348 1:-0.9278344397401963 2:0.2678538727090136 3:-0.46932389854374734 4:0.8494176173177825 5:0.45765527018197694 6:0.20546395745879287 7:-0.199860294349123 8:0.47798730134403256 9:-0.2279771893187592 10:-0.30836118564314274
+8.952089112152663 1:-0.7371671220953286 2:0.8160149639986789 3:-0.026630089188139028 4:0.5931015267817183 5:-0.12216243475451294 6:0.161290795125286 7:0.7423016751095652 8:-0.5212872902985852 9:5.606147011660845E-5 10:-0.409626733921443
+-3.7062463981908027 1:0.5633514321449928 2:0.9914900963311462 3:0.1867799930236702 4:-0.15960235736142847 5:0.1204791067384241 6:-0.7733281422620872 7:-0.887447048141158 8:0.7931515335800692 9:0.732289882696125 10:-0.034992898370363124
+-10.58331129986813 1:0.6627003739767989 2:0.10688718810947728 3:-0.49230090744757216 4:0.8936580036513948 5:0.012227929286241057 6:-0.1442038886014838 7:0.9203452040795139 8:-0.20719832624131262 9:0.29561869366253335 10:-0.08597725084864649
+9.818996211259908 1:0.580133516885796 2:0.07422424429848573 3:0.33438634998226924 4:0.26054797992533696 5:-0.8771304726537796 6:-0.9710990591964794 7:-0.1869287393875041 8:-0.6167738073093247 9:0.34401921428837245 10:0.6737600514607418
+-11.87816749996684 1:-0.7193071334885193 2:0.5247127705364141 3:-0.02978727198197606 4:0.18353223007701058 5:0.40350110058596944 6:-0.36002841871228686 7:-0.20781535546501528 8:0.5517883176456557 9:-0.9938027872744732 10:0.6245061418135955
+-12.198096564661412 1:0.27542314155961156 2:0.3459734388741733 3:-0.38737776987446937 4:0.6244101669171684 5:-0.7801218302490938 6:0.20444733666197523 7:-0.5667599464182904 8:-0.9462131580071358 9:0.5576565405741785 10:-0.9307557040059242
+-3.6610413123521357 1:0.045569951437504086 2:0.32203961277046145 3:-0.04228927426053675 4:-0.9435304938416831 5:0.3750509710699601 6:0.21298970117620142 7:0.5491054691791977 8:0.33695088608872203 9:-0.9923500858828505 10:-0.6402707119893463
+3.782742149409224 1:0.7795250611996376 2:0.43296979846218275 3:-0.6481485005937841 4:0.3235717281667645 5:-0.8067382770768907 6:-0.06740397503468509 7:-0.2835017205434338 8:-0.5875853498478532 9:-0.25699561837680585 10:0.7813561594373908
+-17.065399625876015 1:-0.01772446594568744 2:0.563282914714494 3:0.14232420381013955 4:0.031667902604941345 5:-0.7815348482900619 6:0.3657733497576803 7:0.7208326162626688 8:-0.7863253120180662 9:0.3329194167867533 10:0.6175752945608013
+16.23248797654815 1:0.2615647748812251 2:-0.6631801348538622 3:0.6420349382574477 4:-0.31980528388089846 5:0.38021930887251365 6:-0.060298437830818896 7:-0.8911652782989568 8:0.3424617259589986 9:-0.8515350749364614 10:-0.42354709676980207
+-5.015963911416578 1:-0.07890564237014686 2:-0.09864377281008885 3:-0.13139943914680408 4:0.6610949669857866 5:0.06777579108221987 6:-0.26586245727222835 7:0.17443498956808612 8:-0.3129854922817781 9:-0.37913757211269505 10:0.7627186373372121
+22.647750304177556 1:-0.03666997412165163 2:0.49691867674483814 3:-0.45898559472166967 4:-0.09932248891016404 5:0.05692910907689508 6:-0.5006743461081364 7:0.9992936758550379 8:0.8252525466172065 9:0.9431711015127009 10:-0.4891497061921315
+-3.731112242951253 1:0.44353490207818513 2:0.23112032838224117 3:0.4697682541445527 4:-0.7507514828346664 5:-0.06323257550543837 6:0.0997091431243109 7:0.9394036761509628 8:0.4103869738859962 9:0.6564209227640914 10:-0.5427466755921158
+0.6761872737225261 1:-0.30051626190360503 2:-0.26699232020158803 3:0.8668758741279379 4:-0.40325291744583347 5:-0.9756425738484267 6:-0.5116398654634617 7:0.16424789009043073 8:0.8034099442414044 9:0.8554935001446193 10:0.42747702930667497
+8.449247195197387 1:-0.6588765973399024 2:0.2502285196526799 3:-0.20481547024283087 4:0.3770725284683252 5:-0.169707887761277 6:-0.0804075502584003 7:-0.3580757176408007 8:-0.6042549664471129 9:0.360349278976142 10:0.15899650901110962
+27.111027963108548 1:0.7106841652047162 2:0.6853699382312817 3:-0.8076297545289823 4:0.7932321056591545 5:-0.8011085095234463 6:-0.7017292726737878 7:0.10568649778064154 8:-0.40755358264969255 9:-0.061008981132773865 10:0.08895972651409556
+27.78383192005107 1:-0.8378790218922778 2:-0.6651002504721837 3:0.021049638665430415 4:0.32994334871293196 5:-0.7981304887988308 6:-0.2947962117284566 7:0.9739408711845776 8:0.9442893181893954 9:0.010541491359981059 10:0.8332791453382604
+15.700710963871254 1:-0.538773982400854 2:-0.5966426806845984 3:0.14570292467314627 4:-0.5937791901212952 5:0.7779251136963325 6:0.9962962075803357 7:-0.4774083823748394 8:-0.02528476957876369 9:-0.17305036341254398 10:-0.6013841506503688
+-12.558575788856189 1:0.03250364930617211 2:-0.6723950859659307 3:0.7090474884514901 4:0.25034305882632735 5:0.7036774024093582 6:0.622650236684523 7:0.5776881238206741 8:0.7999754726258337 9:0.21332972563833508 10:0.33849062947231645
+6.2776776518215955 1:-0.009605588630256623 2:0.5786496865369053 3:0.9208276908400748 4:-0.9477397424337148 5:0.6306053656362194 6:0.5396434662389846 7:-0.9841930450269964 8:0.5492682920407823 9:-0.020767248025529206 10:-0.8684655435686472
+6.424586997399564 1:0.861374923392324 2:0.8356037964367176 3:-0.7173479824827564 4:-0.6309584820438245 5:0.16136758138471285 6:-0.7485184163431866 7:-0.006053583829132236 8:-0.8762221084691306 9:0.19195377669247726 10:0.07259634302552964
+-9.64772485466405 1:0.7568015336230662 2:-0.4221524485756756 3:0.011711847664269248 4:0.7387065048724242 5:-0.04347512566745104 6:0.06642100869974654 7:-0.6993705848315939 8:0.16312217088045422 9:-0.11975577990989916 10:-0.6188717473788392
+3.8183706502283647 1:-0.7226937936463145 2:-0.5462756960199258 3:-0.39158419906610664 4:0.014310440945434433 5:-0.9950315917350652 6:-0.1844037449550875 7:0.9023517651879036 8:0.7948752060508435 9:-0.6792702010973877 10:0.40730074403235617
+1.1585019476700562 1:0.5575546848694 2:0.8997032130006739 3:0.6088643323129037 4:0.4872893656051758 5:-0.03977520372748922 6:0.3202565433572042 7:-0.31231768645537206 8:-0.6861153669592381 9:-0.08561643820383291 10:0.522243657731251
+-8.18651039877047 1:-0.809069379967462 2:-0.04827229852445103 3:0.19963602092982624 4:0.2568971171641006 5:-0.0015346733366310428 6:-0.6104625526166494 7:0.7746715041233412 8:-0.7343750018341593 9:-0.49272635466510106 10:-0.8115191199688623
+-3.377690136019927 1:-0.9408187510685164 2:0.9654993263332854 3:-0.16725010447984268 4:0.2574069587853294 5:-0.6930506968932861 6:0.11124762075550176 7:0.39145805505914866 8:0.2906495128462767 9:-0.27454907309824916 10:0.9001175309434777
+12.692571815413245 1:0.7404426710258791 2:0.9060576634778448 3:0.7023712021897308 4:-0.9808126157768493 5:0.03447666475715194 6:-0.4146339211599541 7:-0.7329651749553896 8:-0.2696019807317358 9:-0.9885367164723897 10:-0.8540304023043486
+2.5111054050889354 1:0.7448154454968356 2:-0.7532143233138027 3:-0.9724617436335079 4:0.662620399592766 5:0.45517204589358307 6:0.37409736074838684 7:0.337245076577648 8:0.50951903847353 9:0.2590369923587328 10:-0.3248257475117191
+-8.300340493749207 1:0.5504850435404609 2:0.5077232940244447 3:0.778859307357816 4:0.2601916883813373 5:-0.0032275666062382413 6:0.039752927221862855 7:0.19468432568826755 8:-0.2859531554546477 9:-0.4113477962970582 10:0.43272011953041667
+5.904938653193952 1:0.6622293273002955 2:0.6428891633785236 3:0.6999663090423285 4:0.9132698742913088 5:-0.3960072336866507 6:-0.14500922264286054 7:-0.4390171033743564 8:0.002067106212897185 9:-0.6079874251539117 10:-0.7131416109696531
+5.004048239623824 1:0.7212309895357449 2:0.3425199843383353 3:-0.7290323633040705 4:-0.5563097960397918 5:-0.7577898297822001 6:0.647883070472203 7:-0.23710559062843073 8:0.34398507133293954 9:-0.5440251617348038 10:-0.2971638032112218
+6.21255598077158 1:0.2498685983586959 2:-0.2586857335205359 3:-0.6380810501916263 4:0.17008841621855852 5:0.9485802018202867 6:-0.2580306792121272 7:0.032916516140567786 8:0.32950951532163675 9:-0.9291915084526683 10:0.8454021164786922
+-3.741044592262687 1:0.763300390779396 2:-0.1832552896771813 3:-0.39361907876758573 4:0.9050768615040607 5:-0.8850093869496836 6:0.9302208653737598 7:-0.12972094056755412 8:-0.459442486378308 9:0.5044112394875107 10:0.1399067554681861
+7.378402183384303 1:-0.27686808475610114 2:0.12735524561214606 3:0.5216635958678004 4:-0.9418584785460469 5:0.20441570818728771 6:-0.35073421178920583 7:0.7847501694079704 8:0.3222999552829353 9:0.21025696511089764 10:-0.5813710201294744
+-7.1500991588127265 1:-0.1945259148773102 2:-0.4089845159829022 3:-0.1971859124232922 4:0.9531447983295496 5:0.07996455700202221 6:0.17013529724757648 7:-0.2442095218739362 8:-0.8564146371721229 9:-0.5843910532907555 10:-0.33846471424918767
+-4.288417758202577 1:0.020710986120182184 2:-0.7450564238727908 3:0.3674992023059285 4:0.46737461414601555 5:0.9411702705113052 6:-0.7257365059912877 7:0.5813280037560231 8:-0.01567531846894843 9:0.24734195293533467 10:0.6516001002566887
+5.916426037500391 1:0.8260000862135342 2:-0.11324162495165968 3:0.13061304369435334 4:0.5762591624576425 5:0.548049763999644 6:-0.9751599851764361 7:0.02828821483057764 8:-0.4113286027346803 9:0.8912856976307486 10:-0.8470910204808244
+2.431004294471012 1:0.14088576701299083 2:-0.45104190898994734 3:0.29891134031619115 4:0.955503074037666 5:0.15962522624750242 6:0.7664481093046553 7:0.051697815479792686 8:-0.3471787155014081 9:-0.8007151537631465 10:-0.5598899500902301
+-16.08565904102149 1:0.3946137229565083 2:0.8443779319638349 3:0.5116855547320893 4:-0.5319339991982652 5:0.26564506849312797 6:0.18905397829944448 7:0.1976357098053687 8:0.15505612242632538 9:-0.935633748308776 10:-0.9782957013204887
+18.058440348477184 1:0.8402487524597533 2:-0.6200725197687718 3:-0.6158487677192792 4:0.0709328308135515 5:0.7501256905495493 6:0.38092209802839583 7:-0.8192579128383128 8:-0.9304002828581583 9:-0.6570300818845025 10:-0.5252554781538985
+-1.0026720160736349 1:0.46122079684901474 2:-0.7609201036934166 3:-0.9372178059537293 4:-0.25391036498391006 5:-0.7487429157699828 6:0.38024314675291637 7:0.21886059803198576 8:0.027516853267765207 9:0.33483464322377765 10:0.618580130027746
+-2.6688695419207162 1:-0.8775911623423445 2:-0.6647410420697879 3:0.05948516302547313 4:0.7278526664475804 5:-0.011366224409705028 6:0.33475665968289436 7:-0.6386120399761575 8:0.39609772177595115 9:-0.7872076290319412 10:-0.6195857302948329
+-13.867087895158768 1:-0.9114780602695882 2:0.7997695296649912 3:0.8337252417804881 4:-0.7927267913881113 5:0.6863829853181673 6:0.4162562153517635 7:0.2659922421074139 8:-0.551994669040742 9:-0.6403900338772157 10:-0.8680387717518072
+7.826011095515239 1:-0.2881951904396949 2:-0.19317071325391022 3:-0.06581062483451183 4:-0.6074074436315555 5:-0.9434740067975405 6:0.9426572655575483 7:-0.1812629432036228 8:0.39425575292939863 9:0.5065890539615039 10:0.8969825696966649
+1.4213836206303339 1:0.6996840540120932 2:0.1283999569152492 3:-0.2537375462472613 4:0.24772110606788456 5:0.9040210381745799 6:0.47062010977660207 7:0.9697678931927365 8:-0.9215764371674713 9:-0.27541598110075793 10:0.44277003247067803
+-0.973650798730175 1:-0.2121645467631068 2:-0.6770222508071349 3:-0.5733067523949165 4:0.27979529516037105 5:0.7128588235545461 6:-0.9208763636184307 7:0.14128337151047532 8:-0.002851660400375433 9:0.6943908711123281 10:-0.9201922993121072
+-0.17500848560451965 1:-0.6015070903427717 2:0.7815998200409671 3:-0.9932006200204946 4:-0.3303953411379028 5:-0.3329917860768894 6:-0.2822852019877604 7:0.6834785385197197 8:-0.6458607648553825 9:-0.06171476054995373 10:0.11421513352405444
+-15.310980589416289 1:-0.35290763483001486 2:-0.7263565311032778 3:-0.8688987069582226 4:-0.991098319894185 5:0.7029028082332363 6:-0.20251284356518684 7:-0.10928416773360117 8:0.307764663956116 9:0.6423143148384418 10:-0.15527637175127107
+3.260298266762908 1:-0.7817510582064782 2:0.45336200757318257 3:-0.15365670773321338 4:0.5063951567230205 5:-0.7102867196895872 6:-0.48050036620725955 7:0.9838016675169072 8:0.07854601230194436 9:-0.18953694857147863 10:0.19370072527454107
+3.846123583197846 1:0.6665586449040093 2:-0.2894063530813835 3:0.29965348483445386 4:0.23590344101670313 5:-0.7456743720187828 6:-0.4680876353446175 7:0.8106301610699425 8:0.691280702194663 9:-0.6060141408622055 10:0.34018639920235194
+-10.945919657782932 1:0.7669971723591666 2:0.38702771863552776 3:-0.6664311930513411 4:-0.2817072090916286 5:-0.16955916900934387 6:-0.9425831315444453 7:0.5685476711649924 8:-0.20782258743798265 9:0.015213591474494637 10:0.8183723865760859
+9.820049725467145 1:0.9582163993327679 2:0.7503905881505508 3:0.6255110430336392 4:0.6522701954798096 5:0.09248037700932144 6:-0.2833482854986902 7:-0.9841968940607242 8:-0.9343780716625845 9:-0.605526104070818 10:0.6000165028195326
+11.398715935456183 1:0.6605086903456443 2:0.14675454515266395 3:-0.7880053589830274 4:-0.8570785944515658 5:-0.4317693974151271 6:-0.12244918233307645 7:0.9808241653220866 8:0.5455853515046201 9:0.6870972425676756 10:0.7427686762232875
+-7.846310147695936 1:0.4355817642106965 2:0.7659504362110916 3:-0.3784171977305315 4:-0.5675896574776877 5:-0.20116390539973938 6:0.8775467546326667 7:-0.2824903364469842 8:0.7470660314619617 9:0.8967783051712528 10:0.7133700339519966
+-1.3847391232663768 1:0.3707613476850027 2:0.6931092598460797 3:-0.7701621508103305 4:-0.5679366502518555 5:-0.7234356749703683 6:-0.8059255104944509 7:-0.8307993875388229 8:0.6133975694770035 9:-0.7399749904168824 10:-0.1534990394513953
+16.93981662267873 1:0.6552665678625891 2:0.023248457840923775 3:-0.6850641408327465 4:0.7129790774369389 5:0.04166304042825364 6:-0.7160289667702797 7:-0.4733073680976494 8:0.2720897719417634 9:0.05850741911975099 10:0.34427554125371174
+2.8497179990245116 1:0.6664937514484015 2:0.3343796939204209 3:0.2611910348746209 4:-0.13658810351647 5:-0.5821801257591224 6:0.9854683468621908 7:-0.21396555404689188 8:-0.5923272173716836 9:-0.5674796199927252 10:-0.5681633547764235
+4.981807952389501 1:0.7517426071091595 2:0.7029291090701855 3:0.7126619831046563 4:-0.9982007415355478 5:-0.7743343367502893 6:-0.9048858749551119 7:-0.8243783842398396 8:0.4936163270697016 9:-0.6835495591484724 10:0.8412758607464845
+8.508637575729951 1:0.6837354268578517 2:-0.435346907350056 3:0.6597448795477736 4:0.8870204157376871 5:-0.6938576101541436 6:0.9199495715292882 7:0.33119640706964293 8:-0.6181273221979411 9:0.12929034268333317 10:0.6855150395247027
+14.369378079132883 1:-0.9489372180887643 2:-0.6577177233364067 3:0.543899463531252 4:0.5411152154119976 5:0.43733244485250733 6:0.5927084968109424 7:0.6100068837998656 8:0.9392735722529637 9:-0.9806701698603073 10:0.3984176141500082
+-6.456944198081549 1:0.8380442392342373 2:0.05166133486184443 3:-0.25864153418691704 4:-0.9506672344106888 5:0.5227275493542325 6:-0.03899736644563956 7:0.7660133053649136 8:-0.9375236703284806 9:-0.37213210747743175 10:0.0560768367274771
+-10.041353112580456 1:0.5293717914660876 2:-0.35874932480194044 3:0.14403824250820763 4:-0.4106496629336782 5:-0.794648717231762 6:-0.4369956159772408 7:0.8273613210141495 8:0.9212255384858874 9:0.00409867676727993 10:-0.23796544184855795
+-6.606325361718908 1:0.2765102732490652 2:0.10184669160432525 3:-0.9406443798496789 4:-0.46661976112717896 5:-0.5836573778289609 6:0.1308554421925976 7:0.05232199712543473 8:-0.4965370542771641 9:-0.3695836654343949 10:0.4874427445939513
+-15.359544879832677 1:-0.8253830145927283 2:0.29683545543963885 3:-0.9790356574071053 4:0.33749594518426473 5:-0.449483349548623 6:0.1740013774913005 7:0.5737323257916764 8:0.20159372721320645 9:-0.1812760896634873 10:-0.17652712339895738
+2.1801769966756845 1:0.3664130766917151 2:-0.1929450967547921 3:-0.7834945448457515 4:-0.03806442314852432 5:-0.6167622313628849 6:0.34919852301325394 7:-0.785891329691004 8:-0.5704062599527768 9:0.9846140894872721 10:-0.548571249100203
+-2.7006646885251415 1:-0.48505178676353067 2:0.06347121974094883 3:-0.3704723119141229 4:0.7407080276548548 5:0.06713252857406937 6:-0.2103524488773294 7:-0.9402467715192988 8:-0.8555624501612784 9:0.6244760190429901 10:-0.9038885681517279
+0.2105613019270259 1:-0.17125223509187282 2:-0.23328463772140529 3:-0.6497773470047024 4:0.33111604806115524 5:0.7944287248398398 6:0.5163977380074081 7:-0.025715995643062595 8:0.11762566041047462 9:0.9938658554834845 10:0.5363394203614278
+-0.6433952980357234 1:-0.905126800719938 2:0.5826442985002787 3:-0.8207546276288018 4:-0.0773547002692121 5:-0.6420058913410687 6:-0.9290787206193325 7:0.21829202840889095 8:-0.7752845890678082 9:0.4533233304372326 10:0.5457315861825041
+5.622874731146287 1:0.5486636398086722 2:-0.21867854114956642 3:0.13260110994566032 4:-0.024868470628895967 5:0.9246597814546305 6:0.07490395250443149 7:-0.21327567620097132 8:-0.33970581204395867 9:-0.19408398882121713 10:0.9757334811378136
+-18.27521356600463 1:-0.489685764918109 2:0.6832314342743568 3:0.9115808714640257 4:-4.680515344936964E-4 5:0.03760860984717218 6:0.4344127744883004 7:-0.30019645809377127 8:-0.48339658188341783 9:-0.5488933834939806 10:-0.4735052851773165
+5.518650144654079 1:-0.16881374315243192 2:0.22747702179774354 3:-0.8555270909193926 4:-0.6914231522703247 5:0.03618437407657238 6:-0.8404831131806643 7:0.16378525699004887 8:-0.333895928854854 9:0.23026574917978326 10:0.9409087845740918
+2.5599738684677646 1:-0.24371170373626905 2:-0.1752613047793694 3:-0.7930324885557696 4:0.17288443448968627 5:0.7233942014077801 6:0.47222694561171963 7:0.7878187692414558 8:-0.6520011755878357 9:-0.9952507460157223 10:-0.32951026378415094
+-8.508663400554862 1:0.9194236423060742 2:0.9517284917259223 3:-0.18723709334016392 4:-0.24913001260985546 5:0.8818286401027424 6:0.13661210218384512 7:-0.40792517201812983 8:-0.33132907984544957 9:-0.49137388288628703 10:-0.3273925353006979
+-10.233439586953153 1:0.0960128812383454 2:-0.8611756848964027 3:0.11807312551418647 4:-0.24570750746947145 5:-0.047826307143366886 6:-0.717269426008625 7:-0.2841658181308486 8:-0.31500935950449516 9:0.23183474949267713 10:-0.512986169560546
+-6.3459370724834265 1:0.9537835418930307 2:0.4598546399405288 3:-0.257013655072986 4:-0.29185820894937575 5:-0.6843688281544562 6:0.8343952028925479 7:-0.9656517094615942 8:-0.447440560943553 9:-0.9510349521362857 10:0.5918946980259567
+1.114406550703455 1:-0.5721838436595965 2:0.1201917297381252 3:-0.5253701290141362 4:-0.5874011312890843 5:0.7893580092022578 6:-0.18012813622584134 7:0.4781905737504004 8:-4.6732390143988667E-4 9:-0.7965374182885014 10:-0.8515444146742359
+8.688243146888663 1:0.2245581140502393 2:-0.0697600364101425 3:-0.7661833153629154 4:-0.2289151515902894 5:-0.5643191391300282 6:0.08069861795512168 7:-0.9670317635091523 8:0.14826752863715287 9:0.9325364047311011 10:0.4071178661803092
+14.896035572185347 1:0.20630949870309911 2:-0.5738578325975092 3:0.5664829389128903 4:0.3732752326637825 5:0.04079303403038881 6:-0.6604984910400766 7:0.15136076091734352 8:-0.6244939282579305 9:-0.5236288549540624 10:0.47284992666739023
+4.396558596072123 1:0.5565602414172521 2:0.1444095747909111 3:0.028227502879770272 4:0.38297378287943773 5:-0.26739745457451725 6:-0.708209627997985 7:0.7604483272526881 8:0.8072075261139096 9:0.11460574885028274 10:-0.07669406807610635
+1.7457141275341528 1:0.3668576517164046 2:-0.5352200081463954 3:0.5853385976871426 4:-0.4482551060006992 5:-0.5676795208498786 6:0.8043295590331514 7:-0.02160829797068753 8:0.42281303847010454 9:0.027894531623162466 10:-0.541120112980032
+-15.334767479922341 1:-0.036676500783341615 2:0.804758241454594 3:-0.0642091078911513 4:0.1402705435750966 5:-0.9215322030628859 6:0.7951173116514345 7:-0.994819896842561 8:0.2382406912119326 9:0.6634166177958731 10:0.7623222578718651
+5.017247792012723 1:-0.5925393497160352 2:0.48506599831456443 3:-0.5079795649118319 4:0.6668553329827696 5:-0.1103174867779837 6:0.7048535526809607 7:-0.9819230894106692 8:0.19609620625274982 9:0.5173985272313828 10:-0.11269849619148875
+6.201510810634532 1:-0.6802942101330738 2:0.898957584078176 3:0.853293387559251 4:0.6089336185656065 5:-0.9352626288322801 6:0.3208583332890447 7:-0.964481544931127 8:-0.8294773786068643 9:-0.8817311989413614 10:0.5165364663580934
+19.174935630244647 1:-0.20026105252200788 2:0.7276178994821614 3:0.7748716685190951 4:-0.7423420145576229 5:0.13147770471985032 6:-0.8382015712894606 7:0.021760992104270294 8:-0.24586987823702944 9:-0.05958177281299326 10:0.47347236224860834
+-14.822152909751189 1:0.7255660700197897 2:-0.22751988933383926 3:-0.08409197084114317 4:0.072750455428638 5:0.1841692073989072 6:-0.33838406658716513 7:-0.44701963574290526 8:0.5031210959133143 9:0.09640858549693743 10:0.9857351194637847
+-6.310082095945472 1:-0.7692076133438608 2:0.8533601511731044 3:0.676268298275629 4:-0.783895030001512 5:-0.8195462819549715 6:0.3963101354895673 7:-0.6254922461977397 8:-0.7521135990258581 9:-0.8032003997516024 10:0.8388672800826487
+8.853802632714807 1:0.46950948246522195 2:-0.6148693581037883 3:0.028739220735170656 4:-0.024281643566285815 5:-0.3495458137792231 6:-0.12347196435522867 7:0.5253894065203333 8:0.5100713458262918 9:0.63975795701667 10:0.08644353314625053
+-10.293714040655924 1:-0.17971950768550893 2:-0.6621720204354751 3:0.888036885802737 4:-0.04977483590350751 5:-0.8964991391283221 6:0.6873490822438724 7:0.42369087852118836 8:0.48972554317650663 9:0.8617233178519317 10:-0.8348331836605276
+0.23985611568891863 1:0.050526696983213215 2:0.8544297176525815 3:0.8586358519997579 4:-0.021299752441110487 5:0.2606696929560939 6:-0.39446486150105997 7:-0.4166234435381613 8:-0.6097643266459343 9:0.46633996256010146 10:-0.22521646199731027
+21.57719950299147 1:-0.5878491135126271 2:0.802134056970349 3:-0.5471017580843434 4:0.6067966843473331 5:-0.691712219323007 6:0.7814323754276735 7:0.31689445927290016 8:-0.1668780061940922 9:0.5285692389527452 10:0.8027091025203246
+-0.7836538830323514 1:0.5766794801558166 2:0.8281463568384935 3:0.5087453132796032 4:0.5212853344036532 5:0.6294700781054074 6:-0.9385097739886943 7:-0.13127371407538302 8:0.9845390503404141 9:-0.7224166213906742 10:-0.11155327354295896
+6.710413649604831 1:-0.6919803228062729 2:-0.6526904017578161 3:-0.34211291948607014 4:0.9094842803341618 5:-0.9454398661995895 6:0.3780766512494227 7:0.5823385348738088 8:0.8817830051841733 9:-0.514843382774189 10:0.32579701113259296
+5.384747201245483 1:-0.9661857672086316 2:-0.519769534339731 3:-0.4466396856529564 4:-0.4370113024678448 5:-0.6397400687811474 6:0.08225309277403725 7:-0.25936524603970756 8:-0.1711463274766858 9:-0.42848099098115755 10:-0.8096854737357237
+7.688509532916731 1:0.3892872094452817 2:-0.13306620868059982 3:-0.932974891205117 4:-0.8921357494146682 5:0.4806996560679244 6:-0.21500288444218696 7:-0.8911268070046585 8:-0.9510264953215406 9:0.1899740993687098 10:-0.43944320580463536
+2.2546997585565296 1:-0.5963883101717473 2:-0.01115153603404151 3:0.8781871380140298 4:0.7736250964135891 5:-0.7325745711528668 6:0.2518631794989008 7:0.5760249284318746 8:0.8690107952725199 9:0.02320853138646095 10:0.08570951531344417
+5.597710012706039 1:-0.5323512235815979 2:0.03366944321271936 3:0.5143537675853551 4:0.28471250955283445 5:0.4012202634439719 6:0.12032039285431151 7:-0.08108716844967812 8:0.30231384371011294 9:0.03259115565303028 10:0.9567467516929173
+-12.977848725392104 1:-0.5908891529017144 2:-0.7678208242918028 3:0.8512434510178621 4:-0.14910196410347298 5:0.6250260229199651 6:0.5393378705290228 7:-0.9573580597625002 8:-0.864881502860934 9:0.4175735160503429 10:0.4872169215922426
+10.35887243981476 1:-0.09126023790482862 2:0.18852634121926526 3:-0.13523918100503107 4:0.8333842692409983 5:-0.6015442103644761 6:0.5347736461652235 7:-0.823489760471118 8:0.5562688292037381 9:-0.807478561291906 10:-0.666881464988351
+0.4250502150408626 1:0.7771717566171905 2:-0.8729202752916785 3:-0.25782888805127024 4:-0.13605474993771205 5:0.5911781118120025 6:-0.8444023967853633 7:0.6787302541469229 8:-0.5444299313083194 9:0.356121883138657 10:-0.8845333845080687
+-0.8743487925900991 1:-0.9087681208947878 2:-0.292625136739453 3:-0.35113758823291774 4:-0.705933223571676 5:-0.6882289471031144 6:0.8350131255297044 7:-0.7659016065609232 8:0.11400114955653207 9:-0.9466143658505732 10:-0.5033643125229932
+-5.615143641864686 1:-0.6688289820084299 2:-0.4623159855015393 3:0.012827807007503855 4:-0.44521264878006117 5:-0.5563111031201406 6:-0.6065295981983794 7:0.3806712426786838 8:-0.11317152118817408 9:0.507896127467435 10:-0.8487801189674464
+-0.1829397047693725 1:0.09377558075225512 2:0.5774384503027374 3:-0.7104684187448009 4:-0.07285914169135976 5:-0.8797920488335114 6:0.6099615504974201 7:-0.8047440624324915 8:-0.6877856114263066 9:0.5843004021777447 10:0.5190581455348131
+18.479680552020344 1:0.9635517137863321 2:0.9954507816218203 3:0.11959899129360774 4:0.3753283274192787 5:-0.9386713095183621 6:0.0926833703812433 7:0.48003949462701323 8:0.9432769781973132 9:-0.9637036991931129 10:-0.4064407447273508
+1.3850645873427236 1:0.14476184437006356 2:-0.11280617018445871 3:-0.4385084538142101 4:-0.5961619435136434 5:0.419554626795412 6:-0.5047767472761191 7:0.457180284958592 8:-0.9129360314541999 9:-0.6320022059786656 10:-0.44989608519659363
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_movielens_data.txt b/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_movielens_data.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f0eee19875f763186338fbc53673ef8725071856
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_movielens_data.txt
@@ -0,0 +1,1501 @@
+0::2::3
+0::3::1
+0::5::2
+0::9::4
+0::11::1
+0::12::2
+0::15::1
+0::17::1
+0::19::1
+0::21::1
+0::23::1
+0::26::3
+0::27::1
+0::28::1
+0::29::1
+0::30::1
+0::31::1
+0::34::1
+0::37::1
+0::41::2
+0::44::1
+0::45::2
+0::46::1
+0::47::1
+0::48::1
+0::50::1
+0::51::1
+0::54::1
+0::55::1
+0::59::2
+0::61::2
+0::64::1
+0::67::1
+0::68::1
+0::69::1
+0::71::1
+0::72::1
+0::77::2
+0::79::1
+0::83::1
+0::87::1
+0::89::2
+0::91::3
+0::92::4
+0::94::1
+0::95::2
+0::96::1
+0::98::1
+0::99::1
+1::2::2
+1::3::1
+1::4::2
+1::6::1
+1::9::3
+1::12::1
+1::13::1
+1::14::1
+1::16::1
+1::19::1
+1::21::3
+1::27::1
+1::28::3
+1::33::1
+1::36::2
+1::37::1
+1::40::1
+1::41::2
+1::43::1
+1::44::1
+1::47::1
+1::50::1
+1::54::1
+1::56::2
+1::57::1
+1::58::1
+1::60::1
+1::62::4
+1::63::1
+1::67::1
+1::68::4
+1::70::2
+1::72::1
+1::73::1
+1::74::2
+1::76::1
+1::77::3
+1::78::1
+1::81::1
+1::82::1
+1::85::3
+1::86::2
+1::88::2
+1::91::1
+1::92::2
+1::93::1
+1::94::2
+1::96::1
+1::97::1
+2::4::3
+2::6::1
+2::8::5
+2::9::1
+2::10::1
+2::12::3
+2::13::1
+2::15::2
+2::18::2
+2::19::4
+2::22::1
+2::26::1
+2::28::1
+2::34::4
+2::35::1
+2::37::5
+2::38::1
+2::39::5
+2::40::4
+2::47::1
+2::50::1
+2::52::2
+2::54::1
+2::55::1
+2::57::2
+2::58::2
+2::59::1
+2::61::1
+2::62::1
+2::64::1
+2::65::1
+2::66::3
+2::68::1
+2::71::3
+2::76::1
+2::77::1
+2::78::1
+2::80::1
+2::83::5
+2::85::1
+2::87::2
+2::88::1
+2::89::4
+2::90::1
+2::92::4
+2::93::5
+3::0::1
+3::1::1
+3::2::1
+3::7::3
+3::8::3
+3::9::1
+3::14::1
+3::15::1
+3::16::1
+3::18::4
+3::19::1
+3::24::3
+3::26::1
+3::29::3
+3::33::1
+3::34::3
+3::35::1
+3::36::3
+3::37::1
+3::38::2
+3::43::1
+3::44::1
+3::46::1
+3::47::1
+3::51::5
+3::52::3
+3::56::1
+3::58::1
+3::60::3
+3::62::1
+3::65::2
+3::66::1
+3::67::1
+3::68::2
+3::70::1
+3::72::2
+3::76::3
+3::79::3
+3::80::4
+3::81::1
+3::83::1
+3::84::1
+3::86::1
+3::87::2
+3::88::4
+3::89::1
+3::91::1
+3::94::3
+4::1::1
+4::6::1
+4::8::1
+4::9::1
+4::10::1
+4::11::1
+4::12::1
+4::13::1
+4::14::2
+4::15::1
+4::17::1
+4::20::1
+4::22::1
+4::23::1
+4::24::1
+4::29::4
+4::30::1
+4::31::1
+4::34::1
+4::35::1
+4::36::1
+4::39::2
+4::40::3
+4::41::4
+4::43::2
+4::44::1
+4::45::1
+4::46::1
+4::47::1
+4::49::2
+4::50::1
+4::51::1
+4::52::4
+4::54::1
+4::55::1
+4::60::3
+4::61::1
+4::62::4
+4::63::3
+4::65::1
+4::67::2
+4::69::1
+4::70::4
+4::71::1
+4::73::1
+4::78::1
+4::84::1
+4::85::1
+4::87::3
+4::88::3
+4::89::2
+4::96::1
+4::97::1
+4::98::1
+4::99::1
+5::0::1
+5::1::1
+5::4::1
+5::5::1
+5::8::1
+5::9::3
+5::10::2
+5::13::3
+5::15::1
+5::19::1
+5::20::3
+5::21::2
+5::23::3
+5::27::1
+5::28::1
+5::29::1
+5::31::1
+5::36::3
+5::38::2
+5::39::1
+5::42::1
+5::48::3
+5::49::4
+5::50::3
+5::51::1
+5::52::1
+5::54::1
+5::55::5
+5::56::3
+5::58::1
+5::60::1
+5::61::1
+5::64::3
+5::65::2
+5::68::4
+5::70::1
+5::71::1
+5::72::1
+5::74::1
+5::79::1
+5::81::2
+5::84::1
+5::85::1
+5::86::1
+5::88::1
+5::90::4
+5::91::2
+5::95::2
+5::99::1
+6::0::1
+6::1::1
+6::2::3
+6::5::1
+6::6::1
+6::9::1
+6::10::1
+6::15::2
+6::16::2
+6::17::1
+6::18::1
+6::20::1
+6::21::1
+6::22::1
+6::24::1
+6::25::5
+6::26::1
+6::28::1
+6::30::1
+6::33::1
+6::38::1
+6::39::1
+6::43::4
+6::44::1
+6::45::1
+6::48::1
+6::49::1
+6::50::1
+6::53::1
+6::54::1
+6::55::1
+6::56::1
+6::58::4
+6::59::1
+6::60::1
+6::61::3
+6::63::3
+6::66::1
+6::67::3
+6::68::1
+6::69::1
+6::71::2
+6::73::1
+6::75::1
+6::77::1
+6::79::1
+6::81::1
+6::84::1
+6::85::3
+6::86::1
+6::87::1
+6::88::1
+6::89::1
+6::91::2
+6::94::1
+6::95::2
+6::96::1
+7::1::1
+7::2::2
+7::3::1
+7::4::1
+7::7::1
+7::10::1
+7::11::2
+7::14::2
+7::15::1
+7::16::1
+7::18::1
+7::21::1
+7::22::1
+7::23::1
+7::25::5
+7::26::1
+7::29::4
+7::30::1
+7::31::3
+7::32::1
+7::33::1
+7::35::1
+7::37::2
+7::39::3
+7::40::2
+7::42::2
+7::44::1
+7::45::2
+7::47::4
+7::48::1
+7::49::1
+7::53::1
+7::54::1
+7::55::1
+7::56::1
+7::59::1
+7::61::2
+7::62::3
+7::63::2
+7::66::1
+7::67::3
+7::74::1
+7::75::1
+7::76::3
+7::77::1
+7::81::1
+7::82::1
+7::84::2
+7::85::4
+7::86::1
+7::92::2
+7::96::1
+7::97::1
+7::98::1
+8::0::1
+8::2::4
+8::3::2
+8::4::2
+8::5::1
+8::7::1
+8::9::1
+8::11::1
+8::15::1
+8::18::1
+8::19::1
+8::21::1
+8::29::5
+8::31::3
+8::33::1
+8::35::1
+8::36::1
+8::40::2
+8::44::1
+8::45::1
+8::50::1
+8::51::1
+8::52::5
+8::53::5
+8::54::1
+8::55::1
+8::56::1
+8::58::4
+8::60::3
+8::62::4
+8::64::1
+8::67::3
+8::69::1
+8::71::1
+8::72::3
+8::77::3
+8::78::1
+8::79::1
+8::83::1
+8::85::5
+8::86::1
+8::88::1
+8::90::1
+8::92::2
+8::95::4
+8::96::3
+8::97::1
+8::98::1
+8::99::1
+9::2::3
+9::3::1
+9::4::1
+9::5::1
+9::6::1
+9::7::5
+9::9::1
+9::12::1
+9::14::3
+9::15::1
+9::19::1
+9::21::1
+9::22::1
+9::24::1
+9::25::1
+9::26::1
+9::30::3
+9::32::4
+9::35::2
+9::36::2
+9::37::2
+9::38::1
+9::39::1
+9::43::3
+9::49::5
+9::50::3
+9::53::1
+9::54::1
+9::58::1
+9::59::1
+9::60::1
+9::61::1
+9::63::3
+9::64::3
+9::68::1
+9::69::1
+9::70::3
+9::71::1
+9::73::2
+9::75::1
+9::77::2
+9::81::2
+9::82::1
+9::83::1
+9::84::1
+9::86::1
+9::87::4
+9::88::1
+9::90::3
+9::94::2
+9::95::3
+9::97::2
+9::98::1
+10::0::3
+10::2::4
+10::4::3
+10::7::1
+10::8::1
+10::10::1
+10::13::2
+10::14::1
+10::16::2
+10::17::1
+10::18::1
+10::21::1
+10::22::1
+10::24::1
+10::25::3
+10::28::1
+10::35::1
+10::36::1
+10::37::1
+10::38::1
+10::39::1
+10::40::4
+10::41::2
+10::42::3
+10::43::1
+10::49::3
+10::50::1
+10::51::1
+10::52::1
+10::55::2
+10::56::1
+10::58::1
+10::63::1
+10::66::1
+10::67::2
+10::68::1
+10::75::1
+10::77::1
+10::79::1
+10::86::1
+10::89::3
+10::90::1
+10::97::1
+10::98::1
+11::0::1
+11::6::2
+11::9::1
+11::10::1
+11::11::1
+11::12::1
+11::13::4
+11::16::1
+11::18::5
+11::19::4
+11::20::1
+11::21::1
+11::22::1
+11::23::5
+11::25::1
+11::27::5
+11::30::5
+11::32::5
+11::35::3
+11::36::2
+11::37::2
+11::38::4
+11::39::1
+11::40::1
+11::41::1
+11::43::2
+11::45::1
+11::47::1
+11::48::5
+11::50::4
+11::51::3
+11::59::1
+11::61::1
+11::62::1
+11::64::1
+11::66::4
+11::67::1
+11::69::5
+11::70::1
+11::71::3
+11::72::3
+11::75::3
+11::76::1
+11::77::1
+11::78::1
+11::79::5
+11::80::3
+11::81::4
+11::82::1
+11::86::1
+11::88::1
+11::89::1
+11::90::4
+11::94::2
+11::97::3
+11::99::1
+12::2::1
+12::4::1
+12::6::1
+12::7::3
+12::8::1
+12::14::1
+12::15::2
+12::16::4
+12::17::5
+12::18::2
+12::21::1
+12::22::2
+12::23::3
+12::24::1
+12::25::1
+12::27::5
+12::30::2
+12::31::4
+12::35::5
+12::38::1
+12::41::1
+12::44::2
+12::45::1
+12::50::4
+12::51::1
+12::52::1
+12::53::1
+12::54::1
+12::56::2
+12::57::1
+12::60::1
+12::63::1
+12::64::5
+12::66::3
+12::67::1
+12::70::1
+12::72::1
+12::74::1
+12::75::1
+12::77::1
+12::78::1
+12::79::3
+12::82::2
+12::83::1
+12::84::1
+12::85::1
+12::86::1
+12::87::1
+12::88::1
+12::91::3
+12::92::1
+12::94::4
+12::95::2
+12::96::1
+12::98::2
+13::0::1
+13::3::1
+13::4::2
+13::5::1
+13::6::1
+13::12::1
+13::14::2
+13::15::1
+13::17::1
+13::18::3
+13::20::1
+13::21::1
+13::22::1
+13::26::1
+13::27::1
+13::29::3
+13::31::1
+13::33::1
+13::40::2
+13::43::2
+13::44::1
+13::45::1
+13::49::1
+13::51::1
+13::52::2
+13::53::3
+13::54::1
+13::62::1
+13::63::2
+13::64::1
+13::68::1
+13::71::1
+13::72::3
+13::73::1
+13::74::3
+13::77::2
+13::78::1
+13::79::2
+13::83::3
+13::85::1
+13::86::1
+13::87::2
+13::88::2
+13::90::1
+13::93::4
+13::94::1
+13::98::1
+13::99::1
+14::1::1
+14::3::3
+14::4::1
+14::5::1
+14::6::1
+14::7::1
+14::9::1
+14::10::1
+14::11::1
+14::12::1
+14::13::1
+14::14::3
+14::15::1
+14::16::1
+14::17::1
+14::20::1
+14::21::1
+14::24::1
+14::25::2
+14::27::1
+14::28::1
+14::29::5
+14::31::3
+14::34::1
+14::36::1
+14::37::2
+14::39::2
+14::40::1
+14::44::1
+14::45::1
+14::47::3
+14::48::1
+14::49::1
+14::51::1
+14::52::5
+14::53::3
+14::54::1
+14::55::1
+14::56::1
+14::62::4
+14::63::5
+14::67::3
+14::68::1
+14::69::3
+14::71::1
+14::72::4
+14::73::1
+14::76::5
+14::79::1
+14::82::1
+14::83::1
+14::88::1
+14::93::3
+14::94::1
+14::95::2
+14::96::4
+14::98::1
+15::0::1
+15::1::4
+15::2::1
+15::5::2
+15::6::1
+15::7::1
+15::13::1
+15::14::1
+15::15::1
+15::17::2
+15::19::2
+15::22::2
+15::23::2
+15::25::1
+15::26::3
+15::27::1
+15::28::2
+15::29::1
+15::32::1
+15::33::2
+15::34::1
+15::35::2
+15::36::1
+15::37::1
+15::39::1
+15::42::1
+15::46::5
+15::48::2
+15::50::2
+15::51::1
+15::52::1
+15::58::1
+15::62::1
+15::64::3
+15::65::2
+15::72::1
+15::73::1
+15::74::1
+15::79::1
+15::80::1
+15::81::1
+15::82::2
+15::85::1
+15::87::1
+15::91::2
+15::96::1
+15::97::1
+15::98::3
+16::2::1
+16::5::3
+16::6::2
+16::7::1
+16::9::1
+16::12::1
+16::14::1
+16::15::1
+16::19::1
+16::21::2
+16::29::4
+16::30::2
+16::32::1
+16::34::1
+16::36::1
+16::38::1
+16::46::1
+16::47::3
+16::48::1
+16::49::1
+16::50::1
+16::51::5
+16::54::5
+16::55::1
+16::56::2
+16::57::1
+16::60::1
+16::63::2
+16::65::1
+16::67::1
+16::72::1
+16::74::1
+16::80::1
+16::81::1
+16::82::1
+16::85::5
+16::86::1
+16::90::5
+16::91::1
+16::93::1
+16::94::3
+16::95::2
+16::96::3
+16::98::3
+16::99::1
+17::2::1
+17::3::1
+17::6::1
+17::10::4
+17::11::1
+17::13::2
+17::17::5
+17::19::1
+17::20::5
+17::22::4
+17::28::1
+17::29::1
+17::33::1
+17::34::1
+17::35::2
+17::37::1
+17::38::1
+17::45::1
+17::46::5
+17::47::1
+17::49::3
+17::51::1
+17::55::5
+17::56::3
+17::57::1
+17::58::1
+17::59::1
+17::60::1
+17::63::1
+17::66::1
+17::68::4
+17::69::1
+17::70::1
+17::72::1
+17::73::3
+17::78::1
+17::79::1
+17::82::2
+17::84::1
+17::90::5
+17::91::3
+17::92::1
+17::93::1
+17::94::4
+17::95::2
+17::97::1
+18::1::1
+18::4::3
+18::5::2
+18::6::1
+18::7::1
+18::10::1
+18::11::4
+18::12::2
+18::13::1
+18::15::1
+18::18::1
+18::20::1
+18::21::2
+18::22::1
+18::23::2
+18::25::1
+18::26::1
+18::27::1
+18::28::5
+18::29::1
+18::31::1
+18::32::1
+18::36::1
+18::38::5
+18::39::5
+18::40::1
+18::42::1
+18::43::1
+18::44::4
+18::46::1
+18::47::1
+18::48::1
+18::51::2
+18::55::1
+18::56::1
+18::57::1
+18::62::1
+18::63::1
+18::66::3
+18::67::1
+18::70::1
+18::75::1
+18::76::3
+18::77::1
+18::80::3
+18::81::3
+18::82::1
+18::83::5
+18::84::1
+18::97::1
+18::98::1
+18::99::2
+19::0::1
+19::1::1
+19::2::1
+19::4::1
+19::6::2
+19::11::1
+19::12::1
+19::14::1
+19::23::1
+19::26::1
+19::31::1
+19::32::4
+19::33::1
+19::34::1
+19::37::1
+19::38::1
+19::41::1
+19::43::1
+19::45::1
+19::48::1
+19::49::1
+19::50::2
+19::53::2
+19::54::3
+19::55::1
+19::56::2
+19::58::1
+19::61::1
+19::62::1
+19::63::1
+19::64::1
+19::65::1
+19::69::2
+19::72::1
+19::74::3
+19::76::1
+19::78::1
+19::79::1
+19::81::1
+19::82::1
+19::84::1
+19::86::1
+19::87::2
+19::90::4
+19::93::1
+19::94::4
+19::95::2
+19::96::1
+19::98::4
+20::0::1
+20::1::1
+20::2::2
+20::4::2
+20::6::1
+20::8::1
+20::12::1
+20::21::2
+20::22::5
+20::24::2
+20::25::1
+20::26::1
+20::29::2
+20::30::2
+20::32::2
+20::39::1
+20::40::1
+20::41::2
+20::45::2
+20::48::1
+20::50::1
+20::51::3
+20::53::3
+20::55::1
+20::57::2
+20::60::1
+20::61::1
+20::64::1
+20::66::1
+20::70::2
+20::72::1
+20::73::2
+20::75::4
+20::76::1
+20::77::4
+20::78::1
+20::79::1
+20::84::2
+20::85::2
+20::88::3
+20::89::1
+20::90::3
+20::91::1
+20::92::2
+20::93::1
+20::94::4
+20::97::1
+21::0::1
+21::2::4
+21::3::1
+21::7::2
+21::11::1
+21::12::1
+21::13::1
+21::14::3
+21::17::1
+21::19::1
+21::20::1
+21::21::1
+21::22::1
+21::23::1
+21::24::1
+21::27::1
+21::29::5
+21::30::2
+21::38::1
+21::40::2
+21::43::3
+21::44::1
+21::45::1
+21::46::1
+21::48::1
+21::51::1
+21::53::5
+21::54::1
+21::55::1
+21::56::1
+21::58::3
+21::59::3
+21::64::1
+21::66::1
+21::68::1
+21::71::1
+21::73::1
+21::74::4
+21::80::1
+21::81::1
+21::83::1
+21::84::1
+21::85::3
+21::87::4
+21::89::2
+21::92::2
+21::96::3
+21::99::1
+22::0::1
+22::3::2
+22::5::2
+22::6::2
+22::9::1
+22::10::1
+22::11::1
+22::13::1
+22::14::1
+22::16::1
+22::18::3
+22::19::1
+22::22::5
+22::25::1
+22::26::1
+22::29::3
+22::30::5
+22::32::4
+22::33::1
+22::35::1
+22::36::3
+22::37::1
+22::40::1
+22::41::3
+22::44::1
+22::45::2
+22::48::1
+22::51::5
+22::55::1
+22::56::2
+22::60::3
+22::61::1
+22::62::4
+22::63::1
+22::65::1
+22::66::1
+22::68::4
+22::69::4
+22::70::3
+22::71::1
+22::74::5
+22::75::5
+22::78::1
+22::80::3
+22::81::1
+22::82::1
+22::84::1
+22::86::1
+22::87::3
+22::88::5
+22::90::2
+22::92::3
+22::95::2
+22::96::2
+22::98::4
+22::99::1
+23::0::1
+23::2::1
+23::4::1
+23::6::2
+23::10::4
+23::12::1
+23::13::4
+23::14::1
+23::15::1
+23::18::4
+23::22::2
+23::23::4
+23::24::1
+23::25::1
+23::26::1
+23::27::5
+23::28::1
+23::29::1
+23::30::4
+23::32::5
+23::33::2
+23::36::3
+23::37::1
+23::38::1
+23::39::1
+23::43::1
+23::48::5
+23::49::5
+23::50::4
+23::53::1
+23::55::5
+23::57::1
+23::59::1
+23::60::1
+23::61::1
+23::64::4
+23::65::5
+23::66::2
+23::67::1
+23::68::3
+23::69::1
+23::72::1
+23::73::3
+23::77::1
+23::82::2
+23::83::1
+23::84::1
+23::85::1
+23::87::3
+23::88::1
+23::95::2
+23::97::1
+24::4::1
+24::6::3
+24::7::1
+24::10::2
+24::12::1
+24::15::1
+24::19::1
+24::24::1
+24::27::3
+24::30::5
+24::31::1
+24::32::3
+24::33::1
+24::37::1
+24::39::1
+24::40::1
+24::42::1
+24::43::3
+24::45::2
+24::46::1
+24::47::1
+24::48::1
+24::49::1
+24::50::1
+24::52::5
+24::57::1
+24::59::4
+24::63::4
+24::65::1
+24::66::1
+24::67::1
+24::68::3
+24::69::5
+24::71::1
+24::72::4
+24::77::4
+24::78::1
+24::80::1
+24::82::1
+24::84::1
+24::86::1
+24::87::1
+24::88::2
+24::89::1
+24::90::5
+24::91::1
+24::92::1
+24::94::2
+24::95::1
+24::96::5
+24::98::1
+24::99::1
+25::1::3
+25::2::1
+25::7::1
+25::9::1
+25::12::3
+25::16::3
+25::17::1
+25::18::1
+25::20::1
+25::22::1
+25::23::1
+25::26::2
+25::29::1
+25::30::1
+25::31::2
+25::33::4
+25::34::3
+25::35::2
+25::36::1
+25::37::1
+25::40::1
+25::41::1
+25::43::1
+25::47::4
+25::50::1
+25::51::1
+25::53::1
+25::56::1
+25::58::2
+25::64::2
+25::67::2
+25::68::1
+25::70::1
+25::71::4
+25::73::1
+25::74::1
+25::76::1
+25::79::1
+25::82::1
+25::84::2
+25::85::1
+25::91::3
+25::92::1
+25::94::1
+25::95::1
+25::97::2
+26::0::1
+26::1::1
+26::2::1
+26::3::1
+26::4::4
+26::5::2
+26::6::3
+26::7::5
+26::13::3
+26::14::1
+26::16::1
+26::18::3
+26::20::1
+26::21::3
+26::22::5
+26::23::5
+26::24::5
+26::27::1
+26::31::1
+26::35::1
+26::36::4
+26::40::1
+26::44::1
+26::45::2
+26::47::1
+26::48::1
+26::49::3
+26::50::2
+26::52::1
+26::54::4
+26::55::1
+26::57::3
+26::58::1
+26::61::1
+26::62::2
+26::66::1
+26::68::4
+26::71::1
+26::73::4
+26::76::1
+26::81::3
+26::85::1
+26::86::3
+26::88::5
+26::91::1
+26::94::5
+26::95::1
+26::96::1
+26::97::1
+27::0::1
+27::9::1
+27::10::1
+27::18::4
+27::19::3
+27::20::1
+27::22::2
+27::24::2
+27::25::1
+27::27::3
+27::28::1
+27::29::1
+27::31::1
+27::33::3
+27::40::1
+27::42::1
+27::43::1
+27::44::3
+27::45::1
+27::51::3
+27::52::1
+27::55::3
+27::57::1
+27::59::1
+27::60::1
+27::61::1
+27::64::1
+27::66::3
+27::68::1
+27::70::1
+27::71::2
+27::72::1
+27::75::3
+27::78::1
+27::80::3
+27::82::1
+27::83::3
+27::86::1
+27::87::2
+27::90::1
+27::91::1
+27::92::1
+27::93::1
+27::94::2
+27::95::1
+27::98::1
+28::0::3
+28::1::1
+28::2::4
+28::3::1
+28::6::1
+28::7::1
+28::12::5
+28::13::2
+28::14::1
+28::15::1
+28::17::1
+28::19::3
+28::20::1
+28::23::3
+28::24::3
+28::27::1
+28::29::1
+28::33::1
+28::34::1
+28::36::1
+28::38::2
+28::39::2
+28::44::1
+28::45::1
+28::49::4
+28::50::1
+28::52::1
+28::54::1
+28::56::1
+28::57::3
+28::58::1
+28::59::1
+28::60::1
+28::62::3
+28::63::1
+28::65::1
+28::75::1
+28::78::1
+28::81::5
+28::82::4
+28::83::1
+28::85::1
+28::88::2
+28::89::4
+28::90::1
+28::92::5
+28::94::1
+28::95::2
+28::98::1
+28::99::1
+29::3::1
+29::4::1
+29::5::1
+29::7::2
+29::9::1
+29::10::3
+29::11::1
+29::13::3
+29::14::1
+29::15::1
+29::17::3
+29::19::3
+29::22::3
+29::23::4
+29::25::1
+29::29::1
+29::31::1
+29::32::4
+29::33::2
+29::36::2
+29::38::3
+29::39::1
+29::42::1
+29::46::5
+29::49::3
+29::51::2
+29::59::1
+29::61::1
+29::62::1
+29::67::1
+29::68::3
+29::69::1
+29::70::1
+29::74::1
+29::75::1
+29::79::2
+29::80::1
+29::81::2
+29::83::1
+29::85::1
+29::86::1
+29::90::4
+29::93::1
+29::94::4
+29::97::1
+29::99::1
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_multiclass_classification_data.txt b/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_multiclass_classification_data.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0d7f90113919c88d893b361d485d53916a2706f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_multiclass_classification_data.txt
@@ -0,0 +1,150 @@
+1 1:-0.222222 2:0.5 3:-0.762712 4:-0.833333 
+1 1:-0.555556 2:0.25 3:-0.864407 4:-0.916667 
+1 1:-0.722222 2:-0.166667 3:-0.864407 4:-0.833333 
+1 1:-0.722222 2:0.166667 3:-0.694915 4:-0.916667 
+0 1:0.166667 2:-0.416667 3:0.457627 4:0.5 
+1 1:-0.833333 3:-0.864407 4:-0.916667 
+2 1:-1.32455e-07 2:-0.166667 3:0.220339 4:0.0833333 
+2 1:-1.32455e-07 2:-0.333333 3:0.0169491 4:-4.03573e-08 
+1 1:-0.5 2:0.75 3:-0.830508 4:-1 
+0 1:0.611111 3:0.694915 4:0.416667 
+0 1:0.222222 2:-0.166667 3:0.423729 4:0.583333 
+1 1:-0.722222 2:-0.166667 3:-0.864407 4:-1 
+1 1:-0.5 2:0.166667 3:-0.864407 4:-0.916667 
+2 1:-0.222222 2:-0.333333 3:0.0508474 4:-4.03573e-08 
+2 1:-0.0555556 2:-0.833333 3:0.0169491 4:-0.25 
+2 1:-0.166667 2:-0.416667 3:-0.0169491 4:-0.0833333 
+1 1:-0.944444 3:-0.898305 4:-0.916667 
+2 1:-0.277778 2:-0.583333 3:-0.0169491 4:-0.166667 
+0 1:0.111111 2:-0.333333 3:0.38983 4:0.166667 
+2 1:-0.222222 2:-0.166667 3:0.0847457 4:-0.0833333 
+0 1:0.166667 2:-0.333333 3:0.559322 4:0.666667 
+1 1:-0.611111 2:0.0833333 3:-0.864407 4:-0.916667 
+2 1:-0.333333 2:-0.583333 3:0.0169491 4:-4.03573e-08 
+0 1:0.555555 2:-0.166667 3:0.661017 4:0.666667 
+2 1:0.166667 3:0.186441 4:0.166667 
+2 1:0.111111 2:-0.75 3:0.152542 4:-4.03573e-08 
+2 1:0.166667 2:-0.25 3:0.118644 4:-4.03573e-08 
+0 1:-0.0555556 2:-0.833333 3:0.355932 4:0.166667 
+0 1:-0.277778 2:-0.333333 3:0.322034 4:0.583333 
+2 1:-0.222222 2:-0.5 3:-0.152542 4:-0.25 
+2 1:-0.111111 3:0.288136 4:0.416667 
+2 1:-0.0555556 2:-0.25 3:0.186441 4:0.166667 
+2 1:0.333333 2:-0.166667 3:0.355932 4:0.333333 
+1 1:-0.611111 2:0.25 3:-0.898305 4:-0.833333 
+0 1:0.166667 2:-0.333333 3:0.559322 4:0.75 
+0 1:0.111111 2:-0.25 3:0.559322 4:0.416667 
+0 1:0.833333 2:-0.166667 3:0.898305 4:0.666667 
+2 1:-0.277778 2:-0.166667 3:0.186441 4:0.166667 
+0 1:-0.666667 2:-0.583333 3:0.186441 4:0.333333 
+1 1:-0.666667 2:-0.0833334 3:-0.830508 4:-1 
+1 1:-0.166667 2:0.666667 3:-0.932203 4:-0.916667 
+0 1:0.0555554 2:-0.333333 3:0.288136 4:0.416667 
+1 1:-0.666667 2:-0.0833334 3:-0.830508 4:-1 
+1 1:-0.833333 2:0.166667 3:-0.864407 4:-0.833333 
+0 1:0.0555554 2:0.166667 3:0.491525 4:0.833333 
+0 1:0.722222 2:-0.333333 3:0.728813 4:0.5 
+2 1:-0.166667 2:-0.416667 3:0.0508474 4:-0.25 
+2 1:0.5 3:0.254237 4:0.0833333 
+0 1:0.111111 2:-0.583333 3:0.355932 4:0.5 
+1 1:-0.944444 2:-0.166667 3:-0.898305 4:-0.916667 
+2 1:0.277778 2:-0.25 3:0.220339 4:-4.03573e-08 
+0 1:0.666667 2:-0.25 3:0.79661 4:0.416667 
+0 1:0.111111 2:0.0833333 3:0.694915 4:1 
+0 1:0.444444 3:0.59322 4:0.833333 
+2 1:-0.0555556 2:0.166667 3:0.186441 4:0.25 
+1 1:-0.833333 2:0.333333 3:-1 4:-0.916667 
+1 1:-0.555556 2:0.416667 3:-0.830508 4:-0.75 
+2 1:-0.333333 2:-0.5 3:0.152542 4:-0.0833333 
+1 1:-1 2:-0.166667 3:-0.966102 4:-1 
+1 1:-0.333333 2:0.25 3:-0.898305 4:-0.916667 
+2 1:0.388889 2:-0.333333 3:0.288136 4:0.0833333 
+2 1:0.277778 2:-0.166667 3:0.152542 4:0.0833333 
+0 1:0.333333 2:0.0833333 3:0.59322 4:0.666667 
+1 1:-0.777778 3:-0.79661 4:-0.916667 
+1 1:-0.444444 2:0.416667 3:-0.830508 4:-0.916667 
+0 1:0.222222 2:-0.166667 3:0.627119 4:0.75 
+1 1:-0.555556 2:0.5 3:-0.79661 4:-0.916667 
+1 1:-0.555556 2:0.5 3:-0.694915 4:-0.75 
+2 1:-1.32455e-07 2:-0.25 3:0.254237 4:0.0833333 
+1 1:-0.5 2:0.25 3:-0.830508 4:-0.916667 
+0 1:0.166667 3:0.457627 4:0.833333 
+2 1:0.444444 2:-0.0833334 3:0.322034 4:0.166667 
+0 1:0.111111 2:0.166667 3:0.559322 4:0.916667 
+1 1:-0.611111 2:0.25 3:-0.79661 4:-0.583333 
+0 1:0.388889 3:0.661017 4:0.833333 
+1 1:-0.722222 2:0.166667 3:-0.79661 4:-0.916667 
+1 1:-0.722222 2:-0.0833334 3:-0.79661 4:-0.916667 
+1 1:-0.555556 2:0.166667 3:-0.830508 4:-0.916667 
+2 1:-0.666667 2:-0.666667 3:-0.220339 4:-0.25 
+2 1:-0.611111 2:-0.75 3:-0.220339 4:-0.25 
+2 1:0.0555554 2:-0.833333 3:0.186441 4:0.166667 
+0 1:-0.166667 2:-0.416667 3:0.38983 4:0.5 
+0 1:0.611111 2:0.333333 3:0.728813 4:1 
+2 1:0.0555554 2:-0.25 3:0.118644 4:-4.03573e-08 
+1 1:-0.666667 2:-0.166667 3:-0.864407 4:-0.916667 
+1 1:-0.833333 2:-0.0833334 3:-0.830508 4:-0.916667 
+0 1:0.611111 2:-0.166667 3:0.627119 4:0.25 
+0 1:0.888889 2:0.5 3:0.932203 4:0.75 
+2 1:0.222222 2:-0.333333 3:0.220339 4:0.166667 
+1 1:-0.555556 2:0.25 3:-0.864407 4:-0.833333 
+0 1:-1.32455e-07 2:-0.166667 3:0.322034 4:0.416667 
+0 1:-1.32455e-07 2:-0.5 3:0.559322 4:0.0833333 
+1 1:-0.611111 3:-0.932203 4:-0.916667 
+1 1:-0.333333 2:0.833333 3:-0.864407 4:-0.916667 
+0 1:-0.166667 2:-0.333333 3:0.38983 4:0.916667 
+2 1:-0.333333 2:-0.666667 3:-0.0847458 4:-0.25 
+2 1:-0.0555556 2:-0.416667 3:0.38983 4:0.25 
+1 1:-0.388889 2:0.416667 3:-0.830508 4:-0.916667 
+0 1:0.444444 2:-0.0833334 3:0.38983 4:0.833333 
+1 1:-0.611111 2:0.333333 3:-0.864407 4:-0.916667 
+0 1:0.111111 2:-0.416667 3:0.322034 4:0.416667 
+0 1:0.166667 2:-0.0833334 3:0.525424 4:0.416667 
+2 1:0.333333 2:-0.0833334 3:0.152542 4:0.0833333 
+0 1:-0.0555556 2:-0.166667 3:0.288136 4:0.416667 
+0 1:-0.166667 2:-0.416667 3:0.38983 4:0.5 
+1 1:-0.611111 2:0.166667 3:-0.830508 4:-0.916667 
+0 1:0.888889 2:-0.166667 3:0.728813 4:0.833333 
+2 1:-0.277778 2:-0.25 3:-0.118644 4:-4.03573e-08 
+2 1:-0.222222 2:-0.333333 3:0.186441 4:-4.03573e-08 
+0 1:0.333333 2:-0.583333 3:0.627119 4:0.416667 
+0 1:0.444444 2:-0.0833334 3:0.491525 4:0.666667 
+2 1:-0.222222 2:-0.25 3:0.0847457 4:-4.03573e-08 
+1 1:-0.611111 2:0.166667 3:-0.79661 4:-0.75 
+2 1:-0.277778 2:-0.166667 3:0.0508474 4:-4.03573e-08 
+0 1:1 2:0.5 3:0.830508 4:0.583333 
+2 1:-0.333333 2:-0.666667 3:-0.0508475 4:-0.166667 
+2 1:-0.277778 2:-0.416667 3:0.0847457 4:-4.03573e-08 
+0 1:0.888889 2:-0.333333 3:0.932203 4:0.583333 
+2 1:-0.111111 2:-0.166667 3:0.0847457 4:0.166667 
+2 1:0.111111 2:-0.583333 3:0.322034 4:0.166667 
+0 1:0.333333 2:0.0833333 3:0.59322 4:1 
+0 1:0.222222 2:-0.166667 3:0.525424 4:0.416667 
+1 1:-0.555556 2:0.5 3:-0.830508 4:-0.833333 
+0 1:-0.111111 2:-0.166667 3:0.38983 4:0.416667
+0 1:0.888889 2:-0.5 3:1 4:0.833333 
+1 1:-0.388889 2:0.583333 3:-0.898305 4:-0.75 
+2 1:0.111111 2:0.0833333 3:0.254237 4:0.25 
+0 1:0.333333 2:-0.166667 3:0.423729 4:0.833333 
+1 1:-0.388889 2:0.166667 3:-0.762712 4:-0.916667 
+0 1:0.333333 2:-0.0833334 3:0.559322 4:0.916667 
+2 1:-0.333333 2:-0.75 3:0.0169491 4:-4.03573e-08 
+1 1:-0.222222 2:1 3:-0.830508 4:-0.75 
+1 1:-0.388889 2:0.583333 3:-0.762712 4:-0.75 
+2 1:-0.611111 2:-1 3:-0.152542 4:-0.25 
+2 1:-1.32455e-07 2:-0.333333 3:0.254237 4:-0.0833333 
+2 1:-0.5 2:-0.416667 3:-0.0169491 4:0.0833333 
+1 1:-0.888889 2:-0.75 3:-0.898305 4:-0.833333 
+1 1:-0.666667 2:-0.0833334 3:-0.830508 4:-1 
+2 1:-0.555556 2:-0.583333 3:-0.322034 4:-0.166667 
+2 1:-0.166667 2:-0.5 3:0.0169491 4:-0.0833333 
+1 1:-0.555556 2:0.0833333 3:-0.762712 4:-0.666667 
+1 1:-0.777778 3:-0.898305 4:-0.916667 
+0 1:0.388889 2:-0.166667 3:0.525424 4:0.666667 
+0 1:0.222222 3:0.38983 4:0.583333 
+2 1:0.333333 2:-0.0833334 3:0.254237 4:0.166667 
+2 1:-0.388889 2:-0.166667 3:0.186441 4:0.166667 
+0 1:-0.222222 2:-0.583333 3:0.355932 4:0.583333 
+1 1:-0.611111 2:-0.166667 3:-0.79661 4:-0.916667 
+1 1:-0.944444 2:-0.25 3:-0.864407 4:-0.916667 
+1 1:-0.388889 2:0.166667 3:-0.830508 4:-0.75 
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_svm_data.txt b/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_svm_data.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ab30bd93ceafb717a3e3b87d49a77ba277ef5e2
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/sample_svm_data.txt
@@ -0,0 +1,322 @@
+1 0 2.52078447201548 0 0 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 0 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 0 0 2.061393766919624 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 0 0 0 0 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 0 0 0 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 0 0 0 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 12.72816758217773 0
+0 2.857738033247042 0 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 0 0 0 0 0 0 0 0
+1 0 0 0 0 4.745052855503306 2.004684436494304 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 2.122974378789621 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 0 0 2.061393766919624 2.619965104088255 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 0 0 2.004684436494304 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 0 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 2.000347299268466 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 2.52078447201548 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 0 0 0 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 2.122974378789621 0 0 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 2.52078447201548 2.061393766919624 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 2.52078447201548 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 0 0 2.061393766919624 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 0 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 0 0 0 2.619965104088255 0 2.004684436494304 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 2.52078447201548 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 0 0 0 0 0
+1 0 0 0 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 2.52078447201548 0 0 0 2.004684436494304 0 2.122974378789621 0 0 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 2.228387042742021 2.228387042742023 6.857275130999357 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 0 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 0 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 0 0 0 0 0 10.4087817597473 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 2.52078447201548 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 0 2.122974378789621 0 0 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 2.52078447201548 2.061393766919624 2.619965104088255 0 2.004684436494304 0 0 0 0 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 0 0 2.061393766919624 0 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 2.52078447201548 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 0 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 2.228387042742021 2.228387042742023 0 0 12.72816758217773 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 0 0 0 0 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 0 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 4.745052855503306 0 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 0 2.52078447201548 2.061393766919624 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+1 0 0 2.061393766919624 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 2.52078447201548 0 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 0 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 0 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 0 0 0 2.055002875864414 0 0 0 0
+1 0 2.52078447201548 0 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 2.52078447201548 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 0 0 0 0 0 0 0 0
+0 0 2.52078447201548 2.061393766919624 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 2.52078447201548 2.061393766919624 0 0 2.004684436494304 0 2.122974378789621 0 0 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 0 2.122974378789621 0 0 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 0 0 0 0 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 0 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 0 2.122974378789621 0 0 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 4.745052855503306 0 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 2.122974378789621 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 0 2.52078447201548 2.061393766919624 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 0 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 0 0 2.061393766919624 2.619965104088255 0 2.004684436494304 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 0 0 0 2.619965104088255 0 0 2.000347299268466 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 0 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 2.52078447201548 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 0 2.122974378789621 0 0 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 0 0 2.004684436494304 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 0 0 0 0 0 0 0 0
+1 0 0 2.061393766919624 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 0 2.52078447201548 2.061393766919624 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 0 0 0 0 0 0 0 0
+1 2.857738033247042 0 0 0 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 0 0 0 0 0
+1 0 0 0 2.619965104088255 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 6.857275130999357 0 0 0 0 0
+0 2.857738033247042 2.52078447201548 0 2.619965104088255 0 0 0 2.122974378789621 0 0 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 2.52078447201548 0 2.619965104088255 0 0 2.000347299268466 2.122974378789621 0 0 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 4.745052855503306 2.004684436494304 2.000347299268466 0 0 0 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 0 2.52078447201548 2.061393766919624 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 0 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 0 2.122974378789621 0 0 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 0 0 0 2.619965104088255 0 2.004684436494304 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 0 0 0 2.619965104088255 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 4.745052855503306 2.004684436494304 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 2.52078447201548 2.061393766919624 0 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 0 0 0 2.000347299268466 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 2.122974378789621 0 0 0 2.055002875864414 0 0 0 0
+0 0 2.52078447201548 2.061393766919624 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 0 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 0 0 2.061393766919624 2.619965104088255 4.745052855503306 2.004684436494304 2.000347299268466 0 0 0 0 0 0 0 0 0
+1 2.857738033247042 2.52078447201548 2.061393766919624 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 0 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 2.52078447201548 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 0 2.52078447201548 0 0 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 0 2.122974378789621 0 0 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 2.52078447201548 0 2.619965104088255 0 2.004684436494304 0 2.122974378789621 0 0 0 0 0 0 0 0
+0 0 0 2.061393766919624 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 2.52078447201548 0 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 0 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 0 0 2.004684436494304 2.000347299268466 2.122974378789621 0 0 6.857275130999357 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 0 0 0 0 0 0 0 0
+0 0 2.52078447201548 2.061393766919624 0 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 2.52078447201548 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 0 0 2.061393766919624 2.619965104088255 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 0 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 2.52078447201548 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 2.122974378789621 0 0 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 2.52078447201548 0 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 2.52078447201548 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 2.122974378789621 0 0 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 0 0 0 0 0
+1 2.857738033247042 2.52078447201548 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 2.52078447201548 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 0 2.52078447201548 0 0 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 2.52078447201548 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 0 0 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 2.122974378789621 0 0 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 2.52078447201548 2.061393766919624 2.619965104088255 0 0 2.000347299268466 0 0 0 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 0 2.122974378789621 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 0 2.122974378789621 0 0 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 0 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 2.52078447201548 2.061393766919624 2.619965104088255 0 0 2.000347299268466 0 0 0 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 2.52078447201548 2.061393766919624 2.619965104088255 0 0 2.000347299268466 0 0 0 0 0 0 0 0 0
+0 2.857738033247042 2.52078447201548 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 0 0 0 2.000347299268466 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 0 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 0 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 0 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 2.52078447201548 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 2.000347299268466 2.122974378789621 0 0 0 0 0 0 0 0
+1 0 0 2.061393766919624 0 0 2.004684436494304 2.000347299268466 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 0 0 2.004684436494304 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 0 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 0 2.52078447201548 0 2.619965104088255 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 2.52078447201548 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 4.745052855503306 2.004684436494304 2.000347299268466 0 0 0 0 0 0 0 0 0
+1 0 0 0 0 0 0 0 2.122974378789621 0 0 0 0 12.72816758217773 10.4087817597473 12.72816758217773 17.97228742438751
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 4.745052855503306 2.004684436494304 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 0 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 0 2.52078447201548 2.061393766919624 0 0 2.004684436494304 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 0 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 0 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 2.52078447201548 2.061393766919624 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 2.52078447201548 2.061393766919624 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 0 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 0 0 2.061393766919624 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 2.52078447201548 2.061393766919624 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 0 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 0 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 2.52078447201548 0 0 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 0 0 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 4.745052855503306 2.004684436494304 0 0 0 0 0 0 0 0 0 0
+1 0 0 2.061393766919624 0 0 2.004684436494304 2.000347299268466 0 0 0 6.857275130999357 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 2.52078447201548 2.061393766919624 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 0 0 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 0 4.745052855503306 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 6.857275130999357 0 0 0 0 0
+0 2.857738033247042 2.52078447201548 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 0 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 0 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 0 4.745052855503306 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 2.52078447201548 0 2.619965104088255 0 0 0 2.122974378789621 0 0 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 0 0 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 0 0 0 0 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 0 0 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 2.52078447201548 0 0 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 4.745052855503306 2.004684436494304 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 0 2.122974378789621 0 0 0 2.055002875864414 0 0 0 0
+0 0 2.52078447201548 2.061393766919624 0 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 4.745052855503306 2.004684436494304 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 0 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 0 0 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 0 2.52078447201548 2.061393766919624 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 0 0 0 2.000347299268466 2.122974378789621 0 0 6.857275130999357 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 4.745052855503306 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 2.52078447201548 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 2.52078447201548 0 2.619965104088255 0 0 2.000347299268466 2.122974378789621 0 0 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 0 0 2.061393766919624 0 0 2.004684436494304 2.000347299268466 2.122974378789621 0 0 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 2.52078447201548 2.061393766919624 0 0 0 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 0 0 0 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 6.857275130999357 0 0 0 0 0
+1 2.857738033247042 0 2.061393766919624 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 2.000347299268466 0 0 0 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 2.52078447201548 2.061393766919624 2.619965104088255 0 0 2.000347299268466 0 0 0 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 0 0 0 0 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 2.52078447201548 0 0 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 4.745052855503306 0 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 0 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 2.52078447201548 0 0 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 2.857738033247042 2.52078447201548 0 2.619965104088255 0 2.004684436494304 0 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 2.061393766919624 0 0 2.004684436494304 0 2.122974378789621 0 0 0 0 0 10.4087817597473 0 0
+0 2.857738033247042 0 2.061393766919624 2.619965104088255 0 2.004684436494304 0 2.122974378789621 0 0 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+0 2.857738033247042 0 0 2.619965104088255 0 2.004684436494304 2.000347299268466 2.122974378789621 0 0 0 0 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 2.000347299268466 0 2.228387042742021 2.228387042742023 0 0 0 0 0 0
+1 0 2.52078447201548 0 2.619965104088255 0 0 0 0 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
+1 2.857738033247042 0 0 2.619965104088255 0 0 0 2.122974378789621 2.228387042742021 2.228387042742023 0 2.055002875864414 0 0 0 0
diff --git a/spark-2.4.0-bin-hadoop2.7/data/mllib/streaming_kmeans_data_test.txt b/spark-2.4.0-bin-hadoop2.7/data/mllib/streaming_kmeans_data_test.txt
new file mode 100644
index 0000000000000000000000000000000000000000..649a0d6cf4e227809688d11ec03fba798ed7c0dc
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/mllib/streaming_kmeans_data_test.txt
@@ -0,0 +1,2 @@
+(1.0), [1.7, 0.4, 0.9]
+(2.0), [2.2, 1.8, 0.0]
diff --git a/spark-2.4.0-bin-hadoop2.7/data/streaming/AFINN-111.txt b/spark-2.4.0-bin-hadoop2.7/data/streaming/AFINN-111.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f6fb8ebaa0bffc148ef49c83d34d72b2a6e27d8
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/data/streaming/AFINN-111.txt
@@ -0,0 +1,2477 @@
+abandon	-2
+abandoned	-2
+abandons	-2
+abducted	-2
+abduction	-2
+abductions	-2
+abhor	-3
+abhorred	-3
+abhorrent	-3
+abhors	-3
+abilities	2
+ability	2
+aboard	1
+absentee	-1
+absentees	-1
+absolve	2
+absolved	2
+absolves	2
+absolving	2
+absorbed	1
+abuse	-3
+abused	-3
+abuses	-3
+abusive	-3
+accept	1
+accepted	1
+accepting	1
+accepts	1
+accident	-2
+accidental	-2
+accidentally	-2
+accidents	-2
+accomplish	2
+accomplished	2
+accomplishes	2
+accusation	-2
+accusations	-2
+accuse	-2
+accused	-2
+accuses	-2
+accusing	-2
+ache	-2
+achievable	1
+aching	-2
+acquit	2
+acquits	2
+acquitted	2
+acquitting	2
+acrimonious	-3
+active	1
+adequate	1
+admire	3
+admired	3
+admires	3
+admiring	3
+admit	-1
+admits	-1
+admitted	-1
+admonish	-2
+admonished	-2
+adopt	1
+adopts	1
+adorable	3
+adore	3
+adored	3
+adores	3
+advanced	1
+advantage	2
+advantages	2
+adventure	2
+adventures	2
+adventurous	2
+affected	-1
+affection	3
+affectionate	3
+afflicted	-1
+affronted	-1
+afraid	-2
+aggravate	-2
+aggravated	-2
+aggravates	-2
+aggravating	-2
+aggression	-2
+aggressions	-2
+aggressive	-2
+aghast	-2
+agog	2
+agonise	-3
+agonised	-3
+agonises	-3
+agonising	-3
+agonize	-3
+agonized	-3
+agonizes	-3
+agonizing	-3
+agree	1
+agreeable	2
+agreed	1
+agreement	1
+agrees	1
+alarm	-2
+alarmed	-2
+alarmist	-2
+alarmists	-2
+alas	-1
+alert	-1
+alienation	-2
+alive	1
+allergic	-2
+allow	1
+alone	-2
+amaze	2
+amazed	2
+amazes	2
+amazing	4
+ambitious	2
+ambivalent	-1
+amuse	3
+amused	3
+amusement	3
+amusements	3
+anger	-3
+angers	-3
+angry	-3
+anguish	-3
+anguished	-3
+animosity	-2
+annoy	-2
+annoyance	-2
+annoyed	-2
+annoying	-2
+annoys	-2
+antagonistic	-2
+anti	-1
+anticipation	1
+anxiety	-2
+anxious	-2
+apathetic	-3
+apathy	-3
+apeshit	-3
+apocalyptic	-2
+apologise	-1
+apologised	-1
+apologises	-1
+apologising	-1
+apologize	-1
+apologized	-1
+apologizes	-1
+apologizing	-1
+apology	-1
+appalled	-2
+appalling	-2
+appease	2
+appeased	2
+appeases	2
+appeasing	2
+applaud	2
+applauded	2
+applauding	2
+applauds	2
+applause	2
+appreciate	2
+appreciated	2
+appreciates	2
+appreciating	2
+appreciation	2
+apprehensive	-2
+approval	2
+approved	2
+approves	2
+ardent	1
+arrest	-2
+arrested	-3
+arrests	-2
+arrogant	-2
+ashame	-2
+ashamed	-2
+ass	-4
+assassination	-3
+assassinations	-3
+asset	2
+assets	2
+assfucking	-4
+asshole	-4
+astonished	2
+astound	3
+astounded	3
+astounding	3
+astoundingly	3
+astounds	3
+attack	-1
+attacked	-1
+attacking	-1
+attacks	-1
+attract	1
+attracted	1
+attracting	2
+attraction	2
+attractions	2
+attracts	1
+audacious	3
+authority	1
+avert	-1
+averted	-1
+averts	-1
+avid	2
+avoid	-1
+avoided	-1
+avoids	-1
+await	-1
+awaited	-1
+awaits	-1
+award	3
+awarded	3
+awards	3
+awesome	4
+awful	-3
+awkward	-2
+axe	-1
+axed	-1
+backed	1
+backing	2
+backs	1
+bad	-3
+badass	-3
+badly	-3
+bailout	-2
+bamboozle	-2
+bamboozled	-2
+bamboozles	-2
+ban	-2
+banish	-1
+bankrupt	-3
+bankster	-3
+banned	-2
+bargain	2
+barrier	-2
+bastard	-5
+bastards	-5
+battle	-1
+battles	-1
+beaten	-2
+beatific	3
+beating	-1
+beauties	3
+beautiful	3
+beautifully	3
+beautify	3
+belittle	-2
+belittled	-2
+beloved	3
+benefit	2
+benefits	2
+benefitted	2
+benefitting	2
+bereave	-2
+bereaved	-2
+bereaves	-2
+bereaving	-2
+best	3
+betray	-3
+betrayal	-3
+betrayed	-3
+betraying	-3
+betrays	-3
+better	2
+bias	-1
+biased	-2
+big	1
+bitch	-5
+bitches	-5
+bitter	-2
+bitterly	-2
+bizarre	-2
+blah	-2
+blame	-2
+blamed	-2
+blames	-2
+blaming	-2
+bless	2
+blesses	2
+blessing	3
+blind	-1
+bliss	3
+blissful	3
+blithe	2
+block	-1
+blockbuster	3
+blocked	-1
+blocking	-1
+blocks	-1
+bloody	-3
+blurry	-2
+boastful	-2
+bold	2
+boldly	2
+bomb	-1
+boost	1
+boosted	1
+boosting	1
+boosts	1
+bore	-2
+bored	-2
+boring	-3
+bother	-2
+bothered	-2
+bothers	-2
+bothersome	-2
+boycott	-2
+boycotted	-2
+boycotting	-2
+boycotts	-2
+brainwashing	-3
+brave	2
+breakthrough	3
+breathtaking	5
+bribe	-3
+bright	1
+brightest	2
+brightness	1
+brilliant	4
+brisk	2
+broke	-1
+broken	-1
+brooding	-2
+bullied	-2
+bullshit	-4
+bully	-2
+bullying	-2
+bummer	-2
+buoyant	2
+burden	-2
+burdened	-2
+burdening	-2
+burdens	-2
+calm	2
+calmed	2
+calming	2
+calms	2
+can't stand	-3
+cancel	-1
+cancelled	-1
+cancelling	-1
+cancels	-1
+cancer	-1
+capable	1
+captivated	3
+care	2
+carefree	1
+careful	2
+carefully	2
+careless	-2
+cares	2
+cashing in	-2
+casualty	-2
+catastrophe	-3
+catastrophic	-4
+cautious	-1
+celebrate	3
+celebrated	3
+celebrates	3
+celebrating	3
+censor	-2
+censored	-2
+censors	-2
+certain	1
+chagrin	-2
+chagrined	-2
+challenge	-1
+chance	2
+chances	2
+chaos	-2
+chaotic	-2
+charged	-3
+charges	-2
+charm	3
+charming	3
+charmless	-3
+chastise	-3
+chastised	-3
+chastises	-3
+chastising	-3
+cheat	-3
+cheated	-3
+cheater	-3
+cheaters	-3
+cheats	-3
+cheer	2
+cheered	2
+cheerful	2
+cheering	2
+cheerless	-2
+cheers	2
+cheery	3
+cherish	2
+cherished	2
+cherishes	2
+cherishing	2
+chic	2
+childish	-2
+chilling	-1
+choke	-2
+choked	-2
+chokes	-2
+choking	-2
+clarifies	2
+clarity	2
+clash	-2
+classy	3
+clean	2
+cleaner	2
+clear	1
+cleared	1
+clearly	1
+clears	1
+clever	2
+clouded	-1
+clueless	-2
+cock	-5
+cocksucker	-5
+cocksuckers	-5
+cocky	-2
+coerced	-2
+collapse	-2
+collapsed	-2
+collapses	-2
+collapsing	-2
+collide	-1
+collides	-1
+colliding	-1
+collision	-2
+collisions	-2
+colluding	-3
+combat	-1
+combats	-1
+comedy	1
+comfort	2
+comfortable	2
+comforting	2
+comforts	2
+commend	2
+commended	2
+commit	1
+commitment	2
+commits	1
+committed	1
+committing	1
+compassionate	2
+compelled	1
+competent	2
+competitive	2
+complacent	-2
+complain	-2
+complained	-2
+complains	-2
+comprehensive	2
+conciliate	2
+conciliated	2
+conciliates	2
+conciliating	2
+condemn	-2
+condemnation	-2
+condemned	-2
+condemns	-2
+confidence	2
+confident	2
+conflict	-2
+conflicting	-2
+conflictive	-2
+conflicts	-2
+confuse	-2
+confused	-2
+confusing	-2
+congrats	2
+congratulate	2
+congratulation	2
+congratulations	2
+consent	2
+consents	2
+consolable	2
+conspiracy	-3
+constrained	-2
+contagion	-2
+contagions	-2
+contagious	-1
+contempt	-2
+contemptuous	-2
+contemptuously	-2
+contend	-1
+contender	-1
+contending	-1
+contentious	-2
+contestable	-2
+controversial	-2
+controversially	-2
+convince	1
+convinced	1
+convinces	1
+convivial	2
+cool	1
+cool stuff	3
+cornered	-2
+corpse	-1
+costly	-2
+courage	2
+courageous	2
+courteous	2
+courtesy	2
+cover-up	-3
+coward	-2
+cowardly	-2
+coziness	2
+cramp	-1
+crap	-3
+crash	-2
+crazier	-2
+craziest	-2
+crazy	-2
+creative	2
+crestfallen	-2
+cried	-2
+cries	-2
+crime	-3
+criminal	-3
+criminals	-3
+crisis	-3
+critic	-2
+criticism	-2
+criticize	-2
+criticized	-2
+criticizes	-2
+criticizing	-2
+critics	-2
+cruel	-3
+cruelty	-3
+crush	-1
+crushed	-2
+crushes	-1
+crushing	-1
+cry	-1
+crying	-2
+cunt	-5
+curious	1
+curse	-1
+cut	-1
+cute	2
+cuts	-1
+cutting	-1
+cynic	-2
+cynical	-2
+cynicism	-2
+damage	-3
+damages	-3
+damn	-4
+damned	-4
+damnit	-4
+danger	-2
+daredevil	2
+daring	2
+darkest	-2
+darkness	-1
+dauntless	2
+dead	-3
+deadlock	-2
+deafening	-1
+dear	2
+dearly	3
+death	-2
+debonair	2
+debt	-2
+deceit	-3
+deceitful	-3
+deceive	-3
+deceived	-3
+deceives	-3
+deceiving	-3
+deception	-3
+decisive	1
+dedicated	2
+defeated	-2
+defect	-3
+defects	-3
+defender	2
+defenders	2
+defenseless	-2
+defer	-1
+deferring	-1
+defiant	-1
+deficit	-2
+degrade	-2
+degraded	-2
+degrades	-2
+dehumanize	-2
+dehumanized	-2
+dehumanizes	-2
+dehumanizing	-2
+deject	-2
+dejected	-2
+dejecting	-2
+dejects	-2
+delay	-1
+delayed	-1
+delight	3
+delighted	3
+delighting	3
+delights	3
+demand	-1
+demanded	-1
+demanding	-1
+demands	-1
+demonstration	-1
+demoralized	-2
+denied	-2
+denier	-2
+deniers	-2
+denies	-2
+denounce	-2
+denounces	-2
+deny	-2
+denying	-2
+depressed	-2
+depressing	-2
+derail	-2
+derailed	-2
+derails	-2
+deride	-2
+derided	-2
+derides	-2
+deriding	-2
+derision	-2
+desirable	2
+desire	1
+desired	2
+desirous	2
+despair	-3
+despairing	-3
+despairs	-3
+desperate	-3
+desperately	-3
+despondent	-3
+destroy	-3
+destroyed	-3
+destroying	-3
+destroys	-3
+destruction	-3
+destructive	-3
+detached	-1
+detain	-2
+detained	-2
+detention	-2
+determined	2
+devastate	-2
+devastated	-2
+devastating	-2
+devoted	3
+diamond	1
+dick	-4
+dickhead	-4
+die	-3
+died	-3
+difficult	-1
+diffident	-2
+dilemma	-1
+dipshit	-3
+dire	-3
+direful	-3
+dirt	-2
+dirtier	-2
+dirtiest	-2
+dirty	-2
+disabling	-1
+disadvantage	-2
+disadvantaged	-2
+disappear	-1
+disappeared	-1
+disappears	-1
+disappoint	-2
+disappointed	-2
+disappointing	-2
+disappointment	-2
+disappointments	-2
+disappoints	-2
+disaster	-2
+disasters	-2
+disastrous	-3
+disbelieve	-2
+discard	-1
+discarded	-1
+discarding	-1
+discards	-1
+disconsolate	-2
+disconsolation	-2
+discontented	-2
+discord	-2
+discounted	-1
+discouraged	-2
+discredited	-2
+disdain	-2
+disgrace	-2
+disgraced	-2
+disguise	-1
+disguised	-1
+disguises	-1
+disguising	-1
+disgust	-3
+disgusted	-3
+disgusting	-3
+disheartened	-2
+dishonest	-2
+disillusioned	-2
+disinclined	-2
+disjointed	-2
+dislike	-2
+dismal	-2
+dismayed	-2
+disorder	-2
+disorganized	-2
+disoriented	-2
+disparage	-2
+disparaged	-2
+disparages	-2
+disparaging	-2
+displeased	-2
+dispute	-2
+disputed	-2
+disputes	-2
+disputing	-2
+disqualified	-2
+disquiet	-2
+disregard	-2
+disregarded	-2
+disregarding	-2
+disregards	-2
+disrespect	-2
+disrespected	-2
+disruption	-2
+disruptions	-2
+disruptive	-2
+dissatisfied	-2
+distort	-2
+distorted	-2
+distorting	-2
+distorts	-2
+distract	-2
+distracted	-2
+distraction	-2
+distracts	-2
+distress	-2
+distressed	-2
+distresses	-2
+distressing	-2
+distrust	-3
+distrustful	-3
+disturb	-2
+disturbed	-2
+disturbing	-2
+disturbs	-2
+dithering	-2
+dizzy	-1
+dodging	-2
+dodgy	-2
+does not work	-3
+dolorous	-2
+dont like	-2
+doom	-2
+doomed	-2
+doubt	-1
+doubted	-1
+doubtful	-1
+doubting	-1
+doubts	-1
+douche	-3
+douchebag	-3
+downcast	-2
+downhearted	-2
+downside	-2
+drag	-1
+dragged	-1
+drags	-1
+drained	-2
+dread	-2
+dreaded	-2
+dreadful	-3
+dreading	-2
+dream	1
+dreams	1
+dreary	-2
+droopy	-2
+drop	-1
+drown	-2
+drowned	-2
+drowns	-2
+drunk	-2
+dubious	-2
+dud	-2
+dull	-2
+dumb	-3
+dumbass	-3
+dump	-1
+dumped	-2
+dumps	-1
+dupe	-2
+duped	-2
+dysfunction	-2
+eager	2
+earnest	2
+ease	2
+easy	1
+ecstatic	4
+eerie	-2
+eery	-2
+effective	2
+effectively	2
+elated	3
+elation	3
+elegant	2
+elegantly	2
+embarrass	-2
+embarrassed	-2
+embarrasses	-2
+embarrassing	-2
+embarrassment	-2
+embittered	-2
+embrace	1
+emergency	-2
+empathetic	2
+emptiness	-1
+empty	-1
+enchanted	2
+encourage	2
+encouraged	2
+encouragement	2
+encourages	2
+endorse	2
+endorsed	2
+endorsement	2
+endorses	2
+enemies	-2
+enemy	-2
+energetic	2
+engage	1
+engages	1
+engrossed	1
+enjoy	2
+enjoying	2
+enjoys	2
+enlighten	2
+enlightened	2
+enlightening	2
+enlightens	2
+ennui	-2
+enrage	-2
+enraged	-2
+enrages	-2
+enraging	-2
+enrapture	3
+enslave	-2
+enslaved	-2
+enslaves	-2
+ensure	1
+ensuring	1
+enterprising	1
+entertaining	2
+enthral	3
+enthusiastic	3
+entitled	1
+entrusted	2
+envies	-1
+envious	-2
+envy	-1
+envying	-1
+erroneous	-2
+error	-2
+errors	-2
+escape	-1
+escapes	-1
+escaping	-1
+esteemed	2
+ethical	2
+euphoria	3
+euphoric	4
+eviction	-1
+evil	-3
+exaggerate	-2
+exaggerated	-2
+exaggerates	-2
+exaggerating	-2
+exasperated	2
+excellence	3
+excellent	3
+excite	3
+excited	3
+excitement	3
+exciting	3
+exclude	-1
+excluded	-2
+exclusion	-1
+exclusive	2
+excuse	-1
+exempt	-1
+exhausted	-2
+exhilarated	3
+exhilarates	3
+exhilarating	3
+exonerate	2
+exonerated	2
+exonerates	2
+exonerating	2
+expand	1
+expands	1
+expel	-2
+expelled	-2
+expelling	-2
+expels	-2
+exploit	-2
+exploited	-2
+exploiting	-2
+exploits	-2
+exploration	1
+explorations	1
+expose	-1
+exposed	-1
+exposes	-1
+exposing	-1
+extend	1
+extends	1
+exuberant	4
+exultant	3
+exultantly	3
+fabulous	4
+fad	-2
+fag	-3
+faggot	-3
+faggots	-3
+fail	-2
+failed	-2
+failing	-2
+fails	-2
+failure	-2
+failures	-2
+fainthearted	-2
+fair	2
+faith	1
+faithful	3
+fake	-3
+fakes	-3
+faking	-3
+fallen	-2
+falling	-1
+falsified	-3
+falsify	-3
+fame	1
+fan	3
+fantastic	4
+farce	-1
+fascinate	3
+fascinated	3
+fascinates	3
+fascinating	3
+fascist	-2
+fascists	-2
+fatalities	-3
+fatality	-3
+fatigue	-2
+fatigued	-2
+fatigues	-2
+fatiguing	-2
+favor	2
+favored	2
+favorite	2
+favorited	2
+favorites	2
+favors	2
+fear	-2
+fearful	-2
+fearing	-2
+fearless	2
+fearsome	-2
+fed up	-3
+feeble	-2
+feeling	1
+felonies	-3
+felony	-3
+fervent	2
+fervid	2
+festive	2
+fiasco	-3
+fidgety	-2
+fight	-1
+fine	2
+fire	-2
+fired	-2
+firing	-2
+fit	1
+fitness	1
+flagship	2
+flees	-1
+flop	-2
+flops	-2
+flu	-2
+flustered	-2
+focused	2
+fond	2
+fondness	2
+fool	-2
+foolish	-2
+fools	-2
+forced	-1
+foreclosure	-2
+foreclosures	-2
+forget	-1
+forgetful	-2
+forgive	1
+forgiving	1
+forgotten	-1
+fortunate	2
+frantic	-1
+fraud	-4
+frauds	-4
+fraudster	-4
+fraudsters	-4
+fraudulence	-4
+fraudulent	-4
+free	1
+freedom	2
+frenzy	-3
+fresh	1
+friendly	2
+fright	-2
+frightened	-2
+frightening	-3
+frikin	-2
+frisky	2
+frowning	-1
+frustrate	-2
+frustrated	-2
+frustrates	-2
+frustrating	-2
+frustration	-2
+ftw	3
+fuck	-4
+fucked	-4
+fucker	-4
+fuckers	-4
+fuckface	-4
+fuckhead	-4
+fucking	-4
+fucktard	-4
+fud	-3
+fuked	-4
+fuking	-4
+fulfill	2
+fulfilled	2
+fulfills	2
+fuming	-2
+fun	4
+funeral	-1
+funerals	-1
+funky	2
+funnier	4
+funny	4
+furious	-3
+futile	2
+gag	-2
+gagged	-2
+gain	2
+gained	2
+gaining	2
+gains	2
+gallant	3
+gallantly	3
+gallantry	3
+generous	2
+genial	3
+ghost	-1
+giddy	-2
+gift	2
+glad	3
+glamorous	3
+glamourous	3
+glee	3
+gleeful	3
+gloom	-1
+gloomy	-2
+glorious	2
+glory	2
+glum	-2
+god	1
+goddamn	-3
+godsend	4
+good	3
+goodness	3
+grace	1
+gracious	3
+grand	3
+grant	1
+granted	1
+granting	1
+grants	1
+grateful	3
+gratification	2
+grave	-2
+gray	-1
+great	3
+greater	3
+greatest	3
+greed	-3
+greedy	-2
+green wash	-3
+green washing	-3
+greenwash	-3
+greenwasher	-3
+greenwashers	-3
+greenwashing	-3
+greet	1
+greeted	1
+greeting	1
+greetings	2
+greets	1
+grey	-1
+grief	-2
+grieved	-2
+gross	-2
+growing	1
+growth	2
+guarantee	1
+guilt	-3
+guilty	-3
+gullibility	-2
+gullible	-2
+gun	-1
+ha	2
+hacked	-1
+haha	3
+hahaha	3
+hahahah	3
+hail	2
+hailed	2
+hapless	-2
+haplessness	-2
+happiness	3
+happy	3
+hard	-1
+hardier	2
+hardship	-2
+hardy	2
+harm	-2
+harmed	-2
+harmful	-2
+harming	-2
+harms	-2
+harried	-2
+harsh	-2
+harsher	-2
+harshest	-2
+hate	-3
+hated	-3
+haters	-3
+hates	-3
+hating	-3
+haunt	-1
+haunted	-2
+haunting	1
+haunts	-1
+havoc	-2
+healthy	2
+heartbreaking	-3
+heartbroken	-3
+heartfelt	3
+heaven	2
+heavenly	4
+heavyhearted	-2
+hell	-4
+help	2
+helpful	2
+helping	2
+helpless	-2
+helps	2
+hero	2
+heroes	2
+heroic	3
+hesitant	-2
+hesitate	-2
+hid	-1
+hide	-1
+hides	-1
+hiding	-1
+highlight	2
+hilarious	2
+hindrance	-2
+hoax	-2
+homesick	-2
+honest	2
+honor	2
+honored	2
+honoring	2
+honour	2
+honoured	2
+honouring	2
+hooligan	-2
+hooliganism	-2
+hooligans	-2
+hope	2
+hopeful	2
+hopefully	2
+hopeless	-2
+hopelessness	-2
+hopes	2
+hoping	2
+horrendous	-3
+horrible	-3
+horrific	-3
+horrified	-3
+hostile	-2
+huckster	-2
+hug	2
+huge	1
+hugs	2
+humerous	3
+humiliated	-3
+humiliation	-3
+humor	2
+humorous	2
+humour	2
+humourous	2
+hunger	-2
+hurrah	5
+hurt	-2
+hurting	-2
+hurts	-2
+hypocritical	-2
+hysteria	-3
+hysterical	-3
+hysterics	-3
+idiot	-3
+idiotic	-3
+ignorance	-2
+ignorant	-2
+ignore	-1
+ignored	-2
+ignores	-1
+ill	-2
+illegal	-3
+illiteracy	-2
+illness	-2
+illnesses	-2
+imbecile	-3
+immobilized	-1
+immortal	2
+immune	1
+impatient	-2
+imperfect	-2
+importance	2
+important	2
+impose	-1
+imposed	-1
+imposes	-1
+imposing	-1
+impotent	-2
+impress	3
+impressed	3
+impresses	3
+impressive	3
+imprisoned	-2
+improve	2
+improved	2
+improvement	2
+improves	2
+improving	2
+inability	-2
+inaction	-2
+inadequate	-2
+incapable	-2
+incapacitated	-2
+incensed	-2
+incompetence	-2
+incompetent	-2
+inconsiderate	-2
+inconvenience	-2
+inconvenient	-2
+increase	1
+increased	1
+indecisive	-2
+indestructible	2
+indifference	-2
+indifferent	-2
+indignant	-2
+indignation	-2
+indoctrinate	-2
+indoctrinated	-2
+indoctrinates	-2
+indoctrinating	-2
+ineffective	-2
+ineffectively	-2
+infatuated	2
+infatuation	2
+infected	-2
+inferior	-2
+inflamed	-2
+influential	2
+infringement	-2
+infuriate	-2
+infuriated	-2
+infuriates	-2
+infuriating	-2
+inhibit	-1
+injured	-2
+injury	-2
+injustice	-2
+innovate	1
+innovates	1
+innovation	1
+innovative	2
+inquisition	-2
+inquisitive	2
+insane	-2
+insanity	-2
+insecure	-2
+insensitive	-2
+insensitivity	-2
+insignificant	-2
+insipid	-2
+inspiration	2
+inspirational	2
+inspire	2
+inspired	2
+inspires	2
+inspiring	3
+insult	-2
+insulted	-2
+insulting	-2
+insults	-2
+intact	2
+integrity	2
+intelligent	2
+intense	1
+interest	1
+interested	2
+interesting	2
+interests	1
+interrogated	-2
+interrupt	-2
+interrupted	-2
+interrupting	-2
+interruption	-2
+interrupts	-2
+intimidate	-2
+intimidated	-2
+intimidates	-2
+intimidating	-2
+intimidation	-2
+intricate	2
+intrigues	1
+invincible	2
+invite	1
+inviting	1
+invulnerable	2
+irate	-3
+ironic	-1
+irony	-1
+irrational	-1
+irresistible	2
+irresolute	-2
+irresponsible	2
+irreversible	-1
+irritate	-3
+irritated	-3
+irritating	-3
+isolated	-1
+itchy	-2
+jackass	-4
+jackasses	-4
+jailed	-2
+jaunty	2
+jealous	-2
+jeopardy	-2
+jerk	-3
+jesus	1
+jewel	1
+jewels	1
+jocular	2
+join	1
+joke	2
+jokes	2
+jolly	2
+jovial	2
+joy	3
+joyful	3
+joyfully	3
+joyless	-2
+joyous	3
+jubilant	3
+jumpy	-1
+justice	2
+justifiably	2
+justified	2
+keen	1
+kill	-3
+killed	-3
+killing	-3
+kills	-3
+kind	2
+kinder	2
+kiss	2
+kudos	3
+lack	-2
+lackadaisical	-2
+lag	-1
+lagged	-2
+lagging	-2
+lags	-2
+lame	-2
+landmark	2
+laugh	1
+laughed	1
+laughing	1
+laughs	1
+laughting	1
+launched	1
+lawl	3
+lawsuit	-2
+lawsuits	-2
+lazy	-1
+leak	-1
+leaked	-1
+leave	-1
+legal	1
+legally	1
+lenient	1
+lethargic	-2
+lethargy	-2
+liar	-3
+liars	-3
+libelous	-2
+lied	-2
+lifesaver	4
+lighthearted	1
+like	2
+liked	2
+likes	2
+limitation	-1
+limited	-1
+limits	-1
+litigation	-1
+litigious	-2
+lively	2
+livid	-2
+lmao	4
+lmfao	4
+loathe	-3
+loathed	-3
+loathes	-3
+loathing	-3
+lobby	-2
+lobbying	-2
+lol	3
+lonely	-2
+lonesome	-2
+longing	-1
+loom	-1
+loomed	-1
+looming	-1
+looms	-1
+loose	-3
+looses	-3
+loser	-3
+losing	-3
+loss	-3
+lost	-3
+lovable	3
+love	3
+loved	3
+lovelies	3
+lovely	3
+loving	2
+lowest	-1
+loyal	3
+loyalty	3
+luck	3
+luckily	3
+lucky	3
+lugubrious	-2
+lunatic	-3
+lunatics	-3
+lurk	-1
+lurking	-1
+lurks	-1
+mad	-3
+maddening	-3
+made-up	-1
+madly	-3
+madness	-3
+mandatory	-1
+manipulated	-1
+manipulating	-1
+manipulation	-1
+marvel	3
+marvelous	3
+marvels	3
+masterpiece	4
+masterpieces	4
+matter	1
+matters	1
+mature	2
+meaningful	2
+meaningless	-2
+medal	3
+mediocrity	-3
+meditative	1
+melancholy	-2
+menace	-2
+menaced	-2
+mercy	2
+merry	3
+mess	-2
+messed	-2
+messing up	-2
+methodical	2
+mindless	-2
+miracle	4
+mirth	3
+mirthful	3
+mirthfully	3
+misbehave	-2
+misbehaved	-2
+misbehaves	-2
+misbehaving	-2
+mischief	-1
+mischiefs	-1
+miserable	-3
+misery	-2
+misgiving	-2
+misinformation	-2
+misinformed	-2
+misinterpreted	-2
+misleading	-3
+misread	-1
+misreporting	-2
+misrepresentation	-2
+miss	-2
+missed	-2
+missing	-2
+mistake	-2
+mistaken	-2
+mistakes	-2
+mistaking	-2
+misunderstand	-2
+misunderstanding	-2
+misunderstands	-2
+misunderstood	-2
+moan	-2
+moaned	-2
+moaning	-2
+moans	-2
+mock	-2
+mocked	-2
+mocking	-2
+mocks	-2
+mongering	-2
+monopolize	-2
+monopolized	-2
+monopolizes	-2
+monopolizing	-2
+moody	-1
+mope	-1
+moping	-1
+moron	-3
+motherfucker	-5
+motherfucking	-5
+motivate	1
+motivated	2
+motivating	2
+motivation	1
+mourn	-2
+mourned	-2
+mournful	-2
+mourning	-2
+mourns	-2
+mumpish	-2
+murder	-2
+murderer	-2
+murdering	-3
+murderous	-3
+murders	-2
+myth	-1
+n00b	-2
+naive	-2
+nasty	-3
+natural	1
+naïve	-2
+needy	-2
+negative	-2
+negativity	-2
+neglect	-2
+neglected	-2
+neglecting	-2
+neglects	-2
+nerves	-1
+nervous	-2
+nervously	-2
+nice	3
+nifty	2
+niggas	-5
+nigger	-5
+no	-1
+no fun	-3
+noble	2
+noisy	-1
+nonsense	-2
+noob	-2
+nosey	-2
+not good	-2
+not working	-3
+notorious	-2
+novel	2
+numb	-1
+nuts	-3
+obliterate	-2
+obliterated	-2
+obnoxious	-3
+obscene	-2
+obsessed	2
+obsolete	-2
+obstacle	-2
+obstacles	-2
+obstinate	-2
+odd	-2
+offend	-2
+offended	-2
+offender	-2
+offending	-2
+offends	-2
+offline	-1
+oks	2
+ominous	3
+once-in-a-lifetime	3
+opportunities	2
+opportunity	2
+oppressed	-2
+oppressive	-2
+optimism	2
+optimistic	2
+optionless	-2
+outcry	-2
+outmaneuvered	-2
+outrage	-3
+outraged	-3
+outreach	2
+outstanding	5
+overjoyed	4
+overload	-1
+overlooked	-1
+overreact	-2
+overreacted	-2
+overreaction	-2
+overreacts	-2
+oversell	-2
+overselling	-2
+oversells	-2
+oversimplification	-2
+oversimplified	-2
+oversimplifies	-2
+oversimplify	-2
+overstatement	-2
+overstatements	-2
+overweight	-1
+oxymoron	-1
+pain	-2
+pained	-2
+panic	-3
+panicked	-3
+panics	-3
+paradise	3
+paradox	-1
+pardon	2
+pardoned	2
+pardoning	2
+pardons	2
+parley	-1
+passionate	2
+passive	-1
+passively	-1
+pathetic	-2
+pay	-1
+peace	2
+peaceful	2
+peacefully	2
+penalty	-2
+pensive	-1
+perfect	3
+perfected	2
+perfectly	3
+perfects	2
+peril	-2
+perjury	-3
+perpetrator	-2
+perpetrators	-2
+perplexed	-2
+persecute	-2
+persecuted	-2
+persecutes	-2
+persecuting	-2
+perturbed	-2
+pesky	-2
+pessimism	-2
+pessimistic	-2
+petrified	-2
+phobic	-2
+picturesque	2
+pileup	-1
+pique	-2
+piqued	-2
+piss	-4
+pissed	-4
+pissing	-3
+piteous	-2
+pitied	-1
+pity	-2
+playful	2
+pleasant	3
+please	1
+pleased	3
+pleasure	3
+poised	-2
+poison	-2
+poisoned	-2
+poisons	-2
+pollute	-2
+polluted	-2
+polluter	-2
+polluters	-2
+pollutes	-2
+poor	-2
+poorer	-2
+poorest	-2
+popular	3
+positive	2
+positively	2
+possessive	-2
+postpone	-1
+postponed	-1
+postpones	-1
+postponing	-1
+poverty	-1
+powerful	2
+powerless	-2
+praise	3
+praised	3
+praises	3
+praising	3
+pray	1
+praying	1
+prays	1
+prblm	-2
+prblms	-2
+prepared	1
+pressure	-1
+pressured	-2
+pretend	-1
+pretending	-1
+pretends	-1
+pretty	1
+prevent	-1
+prevented	-1
+preventing	-1
+prevents	-1
+prick	-5
+prison	-2
+prisoner	-2
+prisoners	-2
+privileged	2
+proactive	2
+problem	-2
+problems	-2
+profiteer	-2
+progress	2
+prominent	2
+promise	1
+promised	1
+promises	1
+promote	1
+promoted	1
+promotes	1
+promoting	1
+propaganda	-2
+prosecute	-1
+prosecuted	-2
+prosecutes	-1
+prosecution	-1
+prospect	1
+prospects	1
+prosperous	3
+protect	1
+protected	1
+protects	1
+protest	-2
+protesters	-2
+protesting	-2
+protests	-2
+proud	2
+proudly	2
+provoke	-1
+provoked	-1
+provokes	-1
+provoking	-1
+pseudoscience	-3
+punish	-2
+punished	-2
+punishes	-2
+punitive	-2
+pushy	-1
+puzzled	-2
+quaking	-2
+questionable	-2
+questioned	-1
+questioning	-1
+racism	-3
+racist	-3
+racists	-3
+rage	-2
+rageful	-2
+rainy	-1
+rant	-3
+ranter	-3
+ranters	-3
+rants	-3
+rape	-4
+rapist	-4
+rapture	2
+raptured	2
+raptures	2
+rapturous	4
+rash	-2
+ratified	2
+reach	1
+reached	1
+reaches	1
+reaching	1
+reassure	1
+reassured	1
+reassures	1
+reassuring	2
+rebellion	-2
+recession	-2
+reckless	-2
+recommend	2
+recommended	2
+recommends	2
+redeemed	2
+refuse	-2
+refused	-2
+refusing	-2
+regret	-2
+regretful	-2
+regrets	-2
+regretted	-2
+regretting	-2
+reject	-1
+rejected	-1
+rejecting	-1
+rejects	-1
+rejoice	4
+rejoiced	4
+rejoices	4
+rejoicing	4
+relaxed	2
+relentless	-1
+reliant	2
+relieve	1
+relieved	2
+relieves	1
+relieving	2
+relishing	2
+remarkable	2
+remorse	-2
+repulse	-1
+repulsed	-2
+rescue	2
+rescued	2
+rescues	2
+resentful	-2
+resign	-1
+resigned	-1
+resigning	-1
+resigns	-1
+resolute	2
+resolve	2
+resolved	2
+resolves	2
+resolving	2
+respected	2
+responsible	2
+responsive	2
+restful	2
+restless	-2
+restore	1
+restored	1
+restores	1
+restoring	1
+restrict	-2
+restricted	-2
+restricting	-2
+restriction	-2
+restricts	-2
+retained	-1
+retard	-2
+retarded	-2
+retreat	-1
+revenge	-2
+revengeful	-2
+revered	2
+revive	2
+revives	2
+reward	2
+rewarded	2
+rewarding	2
+rewards	2
+rich	2
+ridiculous	-3
+rig	-1
+rigged	-1
+right direction	3
+rigorous	3
+rigorously	3
+riot	-2
+riots	-2
+risk	-2
+risks	-2
+rob	-2
+robber	-2
+robed	-2
+robing	-2
+robs	-2
+robust	2
+rofl	4
+roflcopter	4
+roflmao	4
+romance	2
+rotfl	4
+rotflmfao	4
+rotflol	4
+ruin	-2
+ruined	-2
+ruining	-2
+ruins	-2
+sabotage	-2
+sad	-2
+sadden	-2
+saddened	-2
+sadly	-2
+safe	1
+safely	1
+safety	1
+salient	1
+sappy	-1
+sarcastic	-2
+satisfied	2
+save	2
+saved	2
+scam	-2
+scams	-2
+scandal	-3
+scandalous	-3
+scandals	-3
+scapegoat	-2
+scapegoats	-2
+scare	-2
+scared	-2
+scary	-2
+sceptical	-2
+scold	-2
+scoop	3
+scorn	-2
+scornful	-2
+scream	-2
+screamed	-2
+screaming	-2
+screams	-2
+screwed	-2
+screwed up	-3
+scumbag	-4
+secure	2
+secured	2
+secures	2
+sedition	-2
+seditious	-2
+seduced	-1
+self-confident	2
+self-deluded	-2
+selfish	-3
+selfishness	-3
+sentence	-2
+sentenced	-2
+sentences	-2
+sentencing	-2
+serene	2
+severe	-2
+sexy	3
+shaky	-2
+shame	-2
+shamed	-2
+shameful	-2
+share	1
+shared	1
+shares	1
+shattered	-2
+shit	-4
+shithead	-4
+shitty	-3
+shock	-2
+shocked	-2
+shocking	-2
+shocks	-2
+shoot	-1
+short-sighted	-2
+short-sightedness	-2
+shortage	-2
+shortages	-2
+shrew	-4
+shy	-1
+sick	-2
+sigh	-2
+significance	1
+significant	1
+silencing	-1
+silly	-1
+sincere	2
+sincerely	2
+sincerest	2
+sincerity	2
+sinful	-3
+singleminded	-2
+skeptic	-2
+skeptical	-2
+skepticism	-2
+skeptics	-2
+slam	-2
+slash	-2
+slashed	-2
+slashes	-2
+slashing	-2
+slavery	-3
+sleeplessness	-2
+slick	2
+slicker	2
+slickest	2
+sluggish	-2
+slut	-5
+smart	1
+smarter	2
+smartest	2
+smear	-2
+smile	2
+smiled	2
+smiles	2
+smiling	2
+smog	-2
+sneaky	-1
+snub	-2
+snubbed	-2
+snubbing	-2
+snubs	-2
+sobering	1
+solemn	-1
+solid	2
+solidarity	2
+solution	1
+solutions	1
+solve	1
+solved	1
+solves	1
+solving	1
+somber	-2
+some kind	0
+son-of-a-bitch	-5
+soothe	3
+soothed	3
+soothing	3
+sophisticated	2
+sore	-1
+sorrow	-2
+sorrowful	-2
+sorry	-1
+spam	-2
+spammer	-3
+spammers	-3
+spamming	-2
+spark	1
+sparkle	3
+sparkles	3
+sparkling	3
+speculative	-2
+spirit	1
+spirited	2
+spiritless	-2
+spiteful	-2
+splendid	3
+sprightly	2
+squelched	-1
+stab	-2
+stabbed	-2
+stable	2
+stabs	-2
+stall	-2
+stalled	-2
+stalling	-2
+stamina	2
+stampede	-2
+startled	-2
+starve	-2
+starved	-2
+starves	-2
+starving	-2
+steadfast	2
+steal	-2
+steals	-2
+stereotype	-2
+stereotyped	-2
+stifled	-1
+stimulate	1
+stimulated	1
+stimulates	1
+stimulating	2
+stingy	-2
+stolen	-2
+stop	-1
+stopped	-1
+stopping	-1
+stops	-1
+stout	2
+straight	1
+strange	-1
+strangely	-1
+strangled	-2
+strength	2
+strengthen	2
+strengthened	2
+strengthening	2
+strengthens	2
+stressed	-2
+stressor	-2
+stressors	-2
+stricken	-2
+strike	-1
+strikers	-2
+strikes	-1
+strong	2
+stronger	2
+strongest	2
+struck	-1
+struggle	-2
+struggled	-2
+struggles	-2
+struggling	-2
+stubborn	-2
+stuck	-2
+stunned	-2
+stunning	4
+stupid	-2
+stupidly	-2
+suave	2
+substantial	1
+substantially	1
+subversive	-2
+success	2
+successful	3
+suck	-3
+sucks	-3
+suffer	-2
+suffering	-2
+suffers	-2
+suicidal	-2
+suicide	-2
+suing	-2
+sulking	-2
+sulky	-2
+sullen	-2
+sunshine	2
+super	3
+superb	5
+superior	2
+support	2
+supported	2
+supporter	1
+supporters	1
+supporting	1
+supportive	2
+supports	2
+survived	2
+surviving	2
+survivor	2
+suspect	-1
+suspected	-1
+suspecting	-1
+suspects	-1
+suspend	-1
+suspended	-1
+suspicious	-2
+swear	-2
+swearing	-2
+swears	-2
+sweet	2
+swift	2
+swiftly	2
+swindle	-3
+swindles	-3
+swindling	-3
+sympathetic	2
+sympathy	2
+tard	-2
+tears	-2
+tender	2
+tense	-2
+tension	-1
+terrible	-3
+terribly	-3
+terrific	4
+terrified	-3
+terror	-3
+terrorize	-3
+terrorized	-3
+terrorizes	-3
+thank	2
+thankful	2
+thanks	2
+thorny	-2
+thoughtful	2
+thoughtless	-2
+threat	-2
+threaten	-2
+threatened	-2
+threatening	-2
+threatens	-2
+threats	-2
+thrilled	5
+thwart	-2
+thwarted	-2
+thwarting	-2
+thwarts	-2
+timid	-2
+timorous	-2
+tired	-2
+tits	-2
+tolerant	2
+toothless	-2
+top	2
+tops	2
+torn	-2
+torture	-4
+tortured	-4
+tortures	-4
+torturing	-4
+totalitarian	-2
+totalitarianism	-2
+tout	-2
+touted	-2
+touting	-2
+touts	-2
+tragedy	-2
+tragic	-2
+tranquil	2
+trap	-1
+trapped	-2
+trauma	-3
+traumatic	-3
+travesty	-2
+treason	-3
+treasonous	-3
+treasure	2
+treasures	2
+trembling	-2
+tremulous	-2
+tricked	-2
+trickery	-2
+triumph	4
+triumphant	4
+trouble	-2
+troubled	-2
+troubles	-2
+true	2
+trust	1
+trusted	2
+tumor	-2
+twat	-5
+ugly	-3
+unacceptable	-2
+unappreciated	-2
+unapproved	-2
+unaware	-2
+unbelievable	-1
+unbelieving	-1
+unbiased	2
+uncertain	-1
+unclear	-1
+uncomfortable	-2
+unconcerned	-2
+unconfirmed	-1
+unconvinced	-1
+uncredited	-1
+undecided	-1
+underestimate	-1
+underestimated	-1
+underestimates	-1
+underestimating	-1
+undermine	-2
+undermined	-2
+undermines	-2
+undermining	-2
+undeserving	-2
+undesirable	-2
+uneasy	-2
+unemployment	-2
+unequal	-1
+unequaled	2
+unethical	-2
+unfair	-2
+unfocused	-2
+unfulfilled	-2
+unhappy	-2
+unhealthy	-2
+unified	1
+unimpressed	-2
+unintelligent	-2
+united	1
+unjust	-2
+unlovable	-2
+unloved	-2
+unmatched	1
+unmotivated	-2
+unprofessional	-2
+unresearched	-2
+unsatisfied	-2
+unsecured	-2
+unsettled	-1
+unsophisticated	-2
+unstable	-2
+unstoppable	2
+unsupported	-2
+unsure	-1
+untarnished	2
+unwanted	-2
+unworthy	-2
+upset	-2
+upsets	-2
+upsetting	-2
+uptight	-2
+urgent	-1
+useful	2
+usefulness	2
+useless	-2
+uselessness	-2
+vague	-2
+validate	1
+validated	1
+validates	1
+validating	1
+verdict	-1
+verdicts	-1
+vested	1
+vexation	-2
+vexing	-2
+vibrant	3
+vicious	-2
+victim	-3
+victimize	-3
+victimized	-3
+victimizes	-3
+victimizing	-3
+victims	-3
+vigilant	3
+vile	-3
+vindicate	2
+vindicated	2
+vindicates	2
+vindicating	2
+violate	-2
+violated	-2
+violates	-2
+violating	-2
+violence	-3
+violent	-3
+virtuous	2
+virulent	-2
+vision	1
+visionary	3
+visioning	1
+visions	1
+vitality	3
+vitamin	1
+vitriolic	-3
+vivacious	3
+vociferous	-1
+vulnerability	-2
+vulnerable	-2
+walkout	-2
+walkouts	-2
+wanker	-3
+want	1
+war	-2
+warfare	-2
+warm	1
+warmth	2
+warn	-2
+warned	-2
+warning	-3
+warnings	-3
+warns	-2
+waste	-1
+wasted	-2
+wasting	-2
+wavering	-1
+weak	-2
+weakness	-2
+wealth	3
+wealthy	2
+weary	-2
+weep	-2
+weeping	-2
+weird	-2
+welcome	2
+welcomed	2
+welcomes	2
+whimsical	1
+whitewash	-3
+whore	-4
+wicked	-2
+widowed	-1
+willingness	2
+win	4
+winner	4
+winning	4
+wins	4
+winwin	3
+wish	1
+wishes	1
+wishing	1
+withdrawal	-3
+woebegone	-2
+woeful	-3
+won	3
+wonderful	4
+woo	3
+woohoo	3
+wooo	4
+woow	4
+worn	-1
+worried	-3
+worry	-3
+worrying	-3
+worse	-3
+worsen	-3
+worsened	-3
+worsening	-3
+worsens	-3
+worshiped	3
+worst	-3
+worth	2
+worthless	-2
+worthy	2
+wow	4
+wowow	4
+wowww	4
+wrathful	-3
+wreck	-2
+wrong	-2
+wronged	-2
+wtf	-4
+yeah	1
+yearning	1
+yeees	2
+yes	1
+youthful	2
+yucky	-2
+yummy	3
+zealot	-2
+zealots	-2
+zealous	2
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/example.scala b/spark-2.4.0-bin-hadoop2.7/example.scala
new file mode 100755
index 0000000000000000000000000000000000000000..3a231ae77cf247ce74fe7596dc2d1382a3677800
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/example.scala
@@ -0,0 +1,37 @@
+import java.time.format._
+import java.time._
+import cern.nxcals.data.access.builders._
+import cern.nxcals.common.utils.TimeUtils
+
+println("Working on 1 day of data:")
+
+val start = LocalDateTime.parse("2018-05-01T00:00:00", DateTimeFormatter.ISO_DATE_TIME).toInstant(ZoneOffset.UTC)
+val end = LocalDateTime.parse("2018-05-02T00:00:00", DateTimeFormatter.ISO_DATE_TIME).toInstant(ZoneOffset.UTC)
+
+val tgmData = DevicePropertyQuery.builder(spark).system("CMW").startTime(start).endTime(end).fields().entity().parameter("CPS.TGM/FULL-TELEGRAM.STRC").buildDataset()
+val data = DevicePropertyQuery.builder(spark).system("CMW").startTime(start).endTime(end).fields().entity().parameter("FTN.QFO415S/Acquisition").buildDataset()
+
+println("Calculating some basic information about current:")
+data.describe("current").show
+
+println("Showing current sum for all *EAST* users:")
+
+data.where("selector like '%EAST%'").agg(sum('current)).show
+
+println("Showing current sum for destination *TOF* using join:")
+
+val tgmFiltered = tgmData.where("DEST like '%TOF%'")
+
+tgmFiltered.count
+
+tgmFiltered.join(data, "cyclestamp").agg(sum('current)).show
+
+println("Showing access to nested types, here array elements")
+tgmData.select("SPCON.elements").as[Array[String]].show
+
+println("Counting cycles")
+
+tgmData.groupBy("USER").count().show
+
+println("Showing max and min dates for TGM data")
+tgmData.agg(min('cyclestamp), max('cyclestamp)).selectExpr("`min(cyclestamp)` as min","`max(cyclestamp)` as max").withColumn("mindate",from_unixtime(expr("min/1000000000"))).withColumn("maxdate", from_unixtime(expr("max/1000000000"))).show
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/jars/scopt_2.11-3.7.0.jar b/spark-2.4.0-bin-hadoop2.7/examples/jars/scopt_2.11-3.7.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..eac19a2583b91d9f40a2b759d225ccafc6ba6168
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/examples/jars/scopt_2.11-3.7.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/jars/spark-examples_2.11-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/examples/jars/spark-examples_2.11-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..4027bd8522e383ff7fee106b84f4c600e153304e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/examples/jars/spark-examples_2.11-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java
new file mode 100644
index 0000000000000000000000000000000000000000..362bd4435ecb3ea400d18bf73030c1d64f9895fc
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.sql.SparkSession;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.Random;
+import java.util.regex.Pattern;
+
+/**
+ * Logistic regression based classification.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.ml.classification.LogisticRegression.
+ */
+public final class JavaHdfsLR {
+
+  private static final int D = 10;   // Number of dimensions
+  private static final Random rand = new Random(42);
+
+  static void showWarning() {
+    String warning = "WARN: This is a naive implementation of Logistic Regression " +
+            "and is given as an example!\n" +
+            "Please use org.apache.spark.ml.classification.LogisticRegression " +
+            "for more conventional use.";
+    System.err.println(warning);
+  }
+
+  static class DataPoint implements Serializable {
+    DataPoint(double[] x, double y) {
+      this.x = x;
+      this.y = y;
+    }
+
+    double[] x;
+    double y;
+  }
+
+  static class ParsePoint implements Function<String, DataPoint> {
+    private static final Pattern SPACE = Pattern.compile(" ");
+
+    @Override
+    public DataPoint call(String line) {
+      String[] tok = SPACE.split(line);
+      double y = Double.parseDouble(tok[0]);
+      double[] x = new double[D];
+      for (int i = 0; i < D; i++) {
+        x[i] = Double.parseDouble(tok[i + 1]);
+      }
+      return new DataPoint(x, y);
+    }
+  }
+
+  static class VectorSum implements Function2<double[], double[], double[]> {
+    @Override
+    public double[] call(double[] a, double[] b) {
+      double[] result = new double[D];
+      for (int j = 0; j < D; j++) {
+        result[j] = a[j] + b[j];
+      }
+      return result;
+    }
+  }
+
+  static class ComputeGradient implements Function<DataPoint, double[]> {
+    private final double[] weights;
+
+    ComputeGradient(double[] weights) {
+      this.weights = weights;
+    }
+
+    @Override
+    public double[] call(DataPoint p) {
+      double[] gradient = new double[D];
+      for (int i = 0; i < D; i++) {
+        double dot = dot(weights, p.x);
+        gradient[i] = (1 / (1 + Math.exp(-p.y * dot)) - 1) * p.y * p.x[i];
+      }
+      return gradient;
+    }
+  }
+
+  public static double dot(double[] a, double[] b) {
+    double x = 0;
+    for (int i = 0; i < D; i++) {
+      x += a[i] * b[i];
+    }
+    return x;
+  }
+
+  public static void printWeights(double[] a) {
+    System.out.println(Arrays.toString(a));
+  }
+
+  public static void main(String[] args) {
+
+    if (args.length < 2) {
+      System.err.println("Usage: JavaHdfsLR <file> <iters>");
+      System.exit(1);
+    }
+
+    showWarning();
+
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaHdfsLR")
+      .getOrCreate();
+
+    JavaRDD<String> lines = spark.read().textFile(args[0]).javaRDD();
+    JavaRDD<DataPoint> points = lines.map(new ParsePoint()).cache();
+    int ITERATIONS = Integer.parseInt(args[1]);
+
+    // Initialize w to a random value
+    double[] w = new double[D];
+    for (int i = 0; i < D; i++) {
+      w[i] = 2 * rand.nextDouble() - 1;
+    }
+
+    System.out.print("Initial w: ");
+    printWeights(w);
+
+    for (int i = 1; i <= ITERATIONS; i++) {
+      System.out.println("On iteration " + i);
+
+      double[] gradient = points.map(
+        new ComputeGradient(w)
+      ).reduce(new VectorSum());
+
+      for (int j = 0; j < D; j++) {
+        w[j] -= gradient[j];
+      }
+
+    }
+
+    System.out.print("Final w: ");
+    printWeights(w);
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java
new file mode 100644
index 0000000000000000000000000000000000000000..cf12de390f608cad953ab6f2db5a1dd56d488d2c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
+
+import scala.Tuple2;
+import scala.Tuple3;
+
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SparkSession;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Executes a roll up-style query against Apache logs.
+ *
+ * Usage: JavaLogQuery [logFile]
+ */
+public final class JavaLogQuery {
+
+  public static final List<String> exampleApacheLogs = Arrays.asList(
+    "10.10.10.10 - \"FRED\" [18/Jan/2013:17:56:07 +1100] \"GET http://images.com/2013/Generic.jpg " +
+      "HTTP/1.1\" 304 315 \"http://referall.com/\" \"Mozilla/4.0 (compatible; MSIE 7.0; " +
+      "Windows NT 5.1; GTB7.4; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30; .NET CLR 3.0.04506.648; " +
+      ".NET CLR 3.5.21022; .NET CLR 3.0.4506.2152; .NET CLR 1.0.3705; .NET CLR 1.1.4322; .NET CLR " +
+      "3.5.30729; Release=ARP)\" \"UD-1\" - \"image/jpeg\" \"whatever\" 0.350 \"-\" - \"\" 265 923 934 \"\" " +
+      "62.24.11.25 images.com 1358492167 - Whatup",
+    "10.10.10.10 - \"FRED\" [18/Jan/2013:18:02:37 +1100] \"GET http://images.com/2013/Generic.jpg " +
+      "HTTP/1.1\" 304 306 \"http:/referall.com\" \"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; " +
+      "GTB7.4; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30; .NET CLR 3.0.04506.648; .NET CLR " +
+      "3.5.21022; .NET CLR 3.0.4506.2152; .NET CLR 1.0.3705; .NET CLR 1.1.4322; .NET CLR  " +
+      "3.5.30729; Release=ARP)\" \"UD-1\" - \"image/jpeg\" \"whatever\" 0.352 \"-\" - \"\" 256 977 988 \"\" " +
+      "0 73.23.2.15 images.com 1358492557 - Whatup");
+
+  public static final Pattern apacheLogRegex = Pattern.compile(
+    "^([\\d.]+) (\\S+) (\\S+) \\[([\\w\\d:/]+\\s[+\\-]\\d{4})\\] \"(.+?)\" (\\d{3}) ([\\d\\-]+) \"([^\"]+)\" \"([^\"]+)\".*");
+
+  /** Tracks the total query count and number of aggregate bytes for a particular group. */
+  public static class Stats implements Serializable {
+
+    private final int count;
+    private final int numBytes;
+
+    public Stats(int count, int numBytes) {
+      this.count = count;
+      this.numBytes = numBytes;
+    }
+    public Stats merge(Stats other) {
+      return new Stats(count + other.count, numBytes + other.numBytes);
+    }
+
+    public String toString() {
+      return String.format("bytes=%s\tn=%s", numBytes, count);
+    }
+  }
+
+  public static Tuple3<String, String, String> extractKey(String line) {
+    Matcher m = apacheLogRegex.matcher(line);
+    if (m.find()) {
+      String ip = m.group(1);
+      String user = m.group(3);
+      String query = m.group(5);
+      if (!user.equalsIgnoreCase("-")) {
+        return new Tuple3<>(ip, user, query);
+      }
+    }
+    return new Tuple3<>(null, null, null);
+  }
+
+  public static Stats extractStats(String line) {
+    Matcher m = apacheLogRegex.matcher(line);
+    if (m.find()) {
+      int bytes = Integer.parseInt(m.group(7));
+      return new Stats(1, bytes);
+    } else {
+      return new Stats(1, 0);
+    }
+  }
+
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaLogQuery")
+      .getOrCreate();
+
+    JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());
+
+    JavaRDD<String> dataSet = (args.length == 1) ? jsc.textFile(args[0]) : jsc.parallelize(exampleApacheLogs);
+
+    JavaPairRDD<Tuple3<String, String, String>, Stats> extracted =
+        dataSet.mapToPair(s -> new Tuple2<>(extractKey(s), extractStats(s)));
+
+    JavaPairRDD<Tuple3<String, String, String>, Stats> counts = extracted.reduceByKey(Stats::merge);
+
+    List<Tuple2<Tuple3<String, String, String>, Stats>> output = counts.collect();
+    for (Tuple2<?,?> t : output) {
+      System.out.println(t._1() + "\t" + t._2());
+    }
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java
new file mode 100644
index 0000000000000000000000000000000000000000..b5b4703932f0f92d33e2e060db85acdc3bd4aec8
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+import com.google.common.collect.Iterables;
+
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.sql.SparkSession;
+
+/**
+ * Computes the PageRank of URLs from an input file. Input file should
+ * be in format of:
+ * URL         neighbor URL
+ * URL         neighbor URL
+ * URL         neighbor URL
+ * ...
+ * where URL and their neighbors are separated by space(s).
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.graphx.lib.PageRank
+ *
+ * Example Usage:
+ * <pre>
+ * bin/run-example JavaPageRank data/mllib/pagerank_data.txt 10
+ * </pre>
+ */
+public final class JavaPageRank {
+  private static final Pattern SPACES = Pattern.compile("\\s+");
+
+  static void showWarning() {
+    String warning = "WARN: This is a naive implementation of PageRank " +
+            "and is given as an example! \n" +
+            "Please use the PageRank implementation found in " +
+            "org.apache.spark.graphx.lib.PageRank for more conventional use.";
+    System.err.println(warning);
+  }
+
+  private static class Sum implements Function2<Double, Double, Double> {
+    @Override
+    public Double call(Double a, Double b) {
+      return a + b;
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    if (args.length < 2) {
+      System.err.println("Usage: JavaPageRank <file> <number_of_iterations>");
+      System.exit(1);
+    }
+
+    showWarning();
+
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaPageRank")
+      .getOrCreate();
+
+    // Loads in input file. It should be in format of:
+    //     URL         neighbor URL
+    //     URL         neighbor URL
+    //     URL         neighbor URL
+    //     ...
+    JavaRDD<String> lines = spark.read().textFile(args[0]).javaRDD();
+
+    // Loads all URLs from input file and initialize their neighbors.
+    JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(s -> {
+      String[] parts = SPACES.split(s);
+      return new Tuple2<>(parts[0], parts[1]);
+    }).distinct().groupByKey().cache();
+
+    // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
+    JavaPairRDD<String, Double> ranks = links.mapValues(rs -> 1.0);
+
+    // Calculates and updates URL ranks continuously using PageRank algorithm.
+    for (int current = 0; current < Integer.parseInt(args[1]); current++) {
+      // Calculates URL contributions to the rank of other URLs.
+      JavaPairRDD<String, Double> contribs = links.join(ranks).values()
+        .flatMapToPair(s -> {
+          int urlCount = Iterables.size(s._1());
+          List<Tuple2<String, Double>> results = new ArrayList<>();
+          for (String n : s._1) {
+            results.add(new Tuple2<>(n, s._2() / urlCount));
+          }
+          return results.iterator();
+        });
+
+      // Re-calculates URL ranks based on neighbor contributions.
+      ranks = contribs.reduceByKey(new Sum()).mapValues(sum -> 0.15 + sum * 0.85);
+    }
+
+    // Collects all URL ranks and dump them to console.
+    List<Tuple2<String, Double>> output = ranks.collect();
+    for (Tuple2<?,?> tuple : output) {
+      System.out.println(tuple._1() + " has rank: " + tuple._2() + ".");
+    }
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java
new file mode 100644
index 0000000000000000000000000000000000000000..37bd8fffbe45a899d3e0e48dab558af72756feee
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/JavaSparkPi.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SparkSession;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Computes an approximation to pi
+ * Usage: JavaSparkPi [partitions]
+ */
+public final class JavaSparkPi {
+
+  public static void main(String[] args) throws Exception {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaSparkPi")
+      .getOrCreate();
+
+    JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());
+
+    int slices = (args.length == 1) ? Integer.parseInt(args[0]) : 2;
+    int n = 100000 * slices;
+    List<Integer> l = new ArrayList<>(n);
+    for (int i = 0; i < n; i++) {
+      l.add(i);
+    }
+
+    JavaRDD<Integer> dataSet = jsc.parallelize(l, slices);
+
+    int count = dataSet.map(integer -> {
+      double x = Math.random() * 2 - 1;
+      double y = Math.random() * 2 - 1;
+      return (x * x + y * y <= 1) ? 1 : 0;
+    }).reduce((integer, integer2) -> integer + integer2);
+
+    System.out.println("Pi is roughly " + 4.0 * count / n);
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/JavaStatusTrackerDemo.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/JavaStatusTrackerDemo.java
new file mode 100644
index 0000000000000000000000000000000000000000..b0ebedfed6a8b72e7e94dd193aef5b45621b6d7b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/JavaStatusTrackerDemo.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
+
+import org.apache.spark.SparkJobInfo;
+import org.apache.spark.SparkStageInfo;
+import org.apache.spark.api.java.JavaFutureAction;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.sql.SparkSession;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Example of using Spark's status APIs from Java.
+ */
+public final class JavaStatusTrackerDemo {
+
+  public static final String APP_NAME = "JavaStatusAPIDemo";
+
+  public static final class IdentityWithDelay<T> implements Function<T, T> {
+    @Override
+    public T call(T x) throws Exception {
+      Thread.sleep(2 * 1000);  // 2 seconds
+      return x;
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName(APP_NAME)
+      .getOrCreate();
+
+    JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());
+
+    // Example of implementing a progress reporter for a simple job.
+    JavaRDD<Integer> rdd = jsc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 5).map(
+        new IdentityWithDelay<>());
+    JavaFutureAction<List<Integer>> jobFuture = rdd.collectAsync();
+    while (!jobFuture.isDone()) {
+      Thread.sleep(1000);  // 1 second
+      List<Integer> jobIds = jobFuture.jobIds();
+      if (jobIds.isEmpty()) {
+        continue;
+      }
+      int currentJobId = jobIds.get(jobIds.size() - 1);
+      SparkJobInfo jobInfo = jsc.statusTracker().getJobInfo(currentJobId);
+      SparkStageInfo stageInfo = jsc.statusTracker().getStageInfo(jobInfo.stageIds()[0]);
+      System.out.println(stageInfo.numTasks() + " tasks total: " + stageInfo.numActiveTasks() +
+          " active, " + stageInfo.numCompletedTasks() + " complete");
+    }
+
+    System.out.println("Job results are: " + jobFuture.get());
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/JavaTC.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/JavaTC.java
new file mode 100644
index 0000000000000000000000000000000000000000..c9ca9c9b3a412c1c2edefbe7a3453ae81d5677da
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/JavaTC.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+
+import scala.Tuple2;
+
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.spark.sql.SparkSession;
+
+/**
+ * Transitive closure on a graph, implemented in Java.
+ * Usage: JavaTC [partitions]
+ */
+public final class JavaTC {
+
+  private static final int numEdges = 200;
+  private static final int numVertices = 100;
+  private static final Random rand = new Random(42);
+
+  static List<Tuple2<Integer, Integer>> generateGraph() {
+    Set<Tuple2<Integer, Integer>> edges = new HashSet<>(numEdges);
+    while (edges.size() < numEdges) {
+      int from = rand.nextInt(numVertices);
+      int to = rand.nextInt(numVertices);
+      Tuple2<Integer, Integer> e = new Tuple2<>(from, to);
+      if (from != to) {
+        edges.add(e);
+      }
+    }
+    return new ArrayList<>(edges);
+  }
+
+  static class ProjectFn implements PairFunction<Tuple2<Integer, Tuple2<Integer, Integer>>,
+      Integer, Integer> {
+    static final ProjectFn INSTANCE = new ProjectFn();
+
+    @Override
+    public Tuple2<Integer, Integer> call(Tuple2<Integer, Tuple2<Integer, Integer>> triple) {
+      return new Tuple2<>(triple._2()._2(), triple._2()._1());
+    }
+  }
+
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaTC")
+      .getOrCreate();
+
+    JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());
+
+    Integer slices = (args.length > 0) ? Integer.parseInt(args[0]): 2;
+    JavaPairRDD<Integer, Integer> tc = jsc.parallelizePairs(generateGraph(), slices).cache();
+
+    // Linear transitive closure: each round grows paths by one edge,
+    // by joining the graph's edges with the already-discovered paths.
+    // e.g. join the path (y, z) from the TC with the edge (x, y) from
+    // the graph to obtain the path (x, z).
+
+    // Because join() joins on keys, the edges are stored in reversed order.
+    JavaPairRDD<Integer, Integer> edges = tc.mapToPair(e -> new Tuple2<>(e._2(), e._1()));
+
+    long oldCount;
+    long nextCount = tc.count();
+    do {
+      oldCount = nextCount;
+      // Perform the join, obtaining an RDD of (y, (z, x)) pairs,
+      // then project the result to obtain the new (x, z) paths.
+      tc = tc.union(tc.join(edges).mapToPair(ProjectFn.INSTANCE)).distinct().cache();
+      nextCount = tc.count();
+    } while (nextCount != oldCount);
+
+    System.out.println("TC has " + tc.count() + " edges.");
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java
new file mode 100644
index 0000000000000000000000000000000000000000..f1ce1e958580fc79d06335e1a27d6753ae68f531
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples;
+
+import scala.Tuple2;
+
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.sql.SparkSession;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Pattern;
+
+public final class JavaWordCount {
+  private static final Pattern SPACE = Pattern.compile(" ");
+
+  public static void main(String[] args) throws Exception {
+
+    if (args.length < 1) {
+      System.err.println("Usage: JavaWordCount <file>");
+      System.exit(1);
+    }
+
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaWordCount")
+      .getOrCreate();
+
+    JavaRDD<String> lines = spark.read().textFile(args[0]).javaRDD();
+
+    JavaRDD<String> words = lines.flatMap(s -> Arrays.asList(SPACE.split(s)).iterator());
+
+    JavaPairRDD<String, Integer> ones = words.mapToPair(s -> new Tuple2<>(s, 1));
+
+    JavaPairRDD<String, Integer> counts = ones.reduceByKey((i1, i2) -> i1 + i2);
+
+    List<Tuple2<String, Integer>> output = counts.collect();
+    for (Tuple2<?,?> tuple : output) {
+      System.out.println(tuple._1() + ": " + tuple._2());
+    }
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaAFTSurvivalRegressionExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaAFTSurvivalRegressionExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..7c741ff56eaf4d160ca23963c35d07c757e6bcc8
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaAFTSurvivalRegressionExample.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.regression.AFTSurvivalRegression;
+import org.apache.spark.ml.regression.AFTSurvivalRegressionModel;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+/**
+ * An example demonstrating AFTSurvivalRegression.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaAFTSurvivalRegressionExample
+ * </pre>
+ */
+public class JavaAFTSurvivalRegressionExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaAFTSurvivalRegressionExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(1.218, 1.0, Vectors.dense(1.560, -0.605)),
+      RowFactory.create(2.949, 0.0, Vectors.dense(0.346, 2.158)),
+      RowFactory.create(3.627, 0.0, Vectors.dense(1.380, 0.231)),
+      RowFactory.create(0.273, 1.0, Vectors.dense(0.520, 1.151)),
+      RowFactory.create(4.199, 0.0, Vectors.dense(0.795, -0.226))
+    );
+    StructType schema = new StructType(new StructField[]{
+      new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+      new StructField("censor", DataTypes.DoubleType, false, Metadata.empty()),
+      new StructField("features", new VectorUDT(), false, Metadata.empty())
+    });
+    Dataset<Row> training = spark.createDataFrame(data, schema);
+    double[] quantileProbabilities = new double[]{0.3, 0.6};
+    AFTSurvivalRegression aft = new AFTSurvivalRegression()
+      .setQuantileProbabilities(quantileProbabilities)
+      .setQuantilesCol("quantiles");
+
+    AFTSurvivalRegressionModel model = aft.fit(training);
+
+    // Print the coefficients, intercept and scale parameter for AFT survival regression
+    System.out.println("Coefficients: " + model.coefficients());
+    System.out.println("Intercept: " + model.intercept());
+    System.out.println("Scale: " + model.scale());
+    model.transform(training).show(false);
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..27052be87b82e3086b3e68055a47fa3d48c42945
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.io.Serializable;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.evaluation.RegressionEvaluator;
+import org.apache.spark.ml.recommendation.ALS;
+import org.apache.spark.ml.recommendation.ALSModel;
+// $example off$
+
+public class JavaALSExample {
+
+  // $example on$
+  public static class Rating implements Serializable {
+    private int userId;
+    private int movieId;
+    private float rating;
+    private long timestamp;
+
+    public Rating() {}
+
+    public Rating(int userId, int movieId, float rating, long timestamp) {
+      this.userId = userId;
+      this.movieId = movieId;
+      this.rating = rating;
+      this.timestamp = timestamp;
+    }
+
+    public int getUserId() {
+      return userId;
+    }
+
+    public int getMovieId() {
+      return movieId;
+    }
+
+    public float getRating() {
+      return rating;
+    }
+
+    public long getTimestamp() {
+      return timestamp;
+    }
+
+    public static Rating parseRating(String str) {
+      String[] fields = str.split("::");
+      if (fields.length != 4) {
+        throw new IllegalArgumentException("Each line must contain 4 fields");
+      }
+      int userId = Integer.parseInt(fields[0]);
+      int movieId = Integer.parseInt(fields[1]);
+      float rating = Float.parseFloat(fields[2]);
+      long timestamp = Long.parseLong(fields[3]);
+      return new Rating(userId, movieId, rating, timestamp);
+    }
+  }
+  // $example off$
+
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaALSExample")
+      .getOrCreate();
+
+    // $example on$
+    JavaRDD<Rating> ratingsRDD = spark
+      .read().textFile("data/mllib/als/sample_movielens_ratings.txt").javaRDD()
+      .map(Rating::parseRating);
+    Dataset<Row> ratings = spark.createDataFrame(ratingsRDD, Rating.class);
+    Dataset<Row>[] splits = ratings.randomSplit(new double[]{0.8, 0.2});
+    Dataset<Row> training = splits[0];
+    Dataset<Row> test = splits[1];
+
+    // Build the recommendation model using ALS on the training data
+    ALS als = new ALS()
+      .setMaxIter(5)
+      .setRegParam(0.01)
+      .setUserCol("userId")
+      .setItemCol("movieId")
+      .setRatingCol("rating");
+    ALSModel model = als.fit(training);
+
+    // Evaluate the model by computing the RMSE on the test data
+    // Note we set cold start strategy to 'drop' to ensure we don't get NaN evaluation metrics
+    model.setColdStartStrategy("drop");
+    Dataset<Row> predictions = model.transform(test);
+
+    RegressionEvaluator evaluator = new RegressionEvaluator()
+      .setMetricName("rmse")
+      .setLabelCol("rating")
+      .setPredictionCol("prediction");
+    Double rmse = evaluator.evaluate(predictions);
+    System.out.println("Root-mean-square error = " + rmse);
+
+    // Generate top 10 movie recommendations for each user
+    Dataset<Row> userRecs = model.recommendForAllUsers(10);
+    // Generate top 10 user recommendations for each movie
+    Dataset<Row> movieRecs = model.recommendForAllItems(10);
+
+    // Generate top 10 movie recommendations for a specified set of users
+    Dataset<Row> users = ratings.select(als.getUserCol()).distinct().limit(3);
+    Dataset<Row> userSubsetRecs = model.recommendForUserSubset(users, 10);
+    // Generate top 10 user recommendations for a specified set of movies
+    Dataset<Row> movies = ratings.select(als.getItemCol()).distinct().limit(3);
+    Dataset<Row> movieSubSetRecs = model.recommendForItemSubset(movies, 10);
+    // $example off$
+    userRecs.show();
+    movieRecs.show();
+    userSubsetRecs.show();
+    movieSubSetRecs.show();
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..3090d8fd14522d9ad1f10615a0c27231eb2c688a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.Binarizer;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaBinarizerExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaBinarizerExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(0, 0.1),
+      RowFactory.create(1, 0.8),
+      RowFactory.create(2, 0.2)
+    );
+    StructType schema = new StructType(new StructField[]{
+      new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("feature", DataTypes.DoubleType, false, Metadata.empty())
+    });
+    Dataset<Row> continuousDataFrame = spark.createDataFrame(data, schema);
+
+    Binarizer binarizer = new Binarizer()
+      .setInputCol("feature")
+      .setOutputCol("binarized_feature")
+      .setThreshold(0.5);
+
+    Dataset<Row> binarizedDataFrame = binarizer.transform(continuousDataFrame);
+
+    System.out.println("Binarizer output with Threshold = " + binarizer.getThreshold());
+    binarizedDataFrame.show();
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..8c82aaaacca38ffb8b1aabdaa4d0db198fe767ca
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import org.apache.spark.ml.clustering.BisectingKMeans;
+import org.apache.spark.ml.clustering.BisectingKMeansModel;
+import org.apache.spark.ml.linalg.Vector;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+// $example off$
+import org.apache.spark.sql.SparkSession;
+
+
+/**
+ * An example demonstrating bisecting k-means clustering.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaBisectingKMeansExample
+ * </pre>
+ */
+public class JavaBisectingKMeansExample {
+
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaBisectingKMeansExample")
+      .getOrCreate();
+
+    // $example on$
+    // Loads data.
+    Dataset<Row> dataset = spark.read().format("libsvm").load("data/mllib/sample_kmeans_data.txt");
+
+    // Trains a bisecting k-means model.
+    BisectingKMeans bkm = new BisectingKMeans().setK(2).setSeed(1);
+    BisectingKMeansModel model = bkm.fit(dataset);
+
+    // Evaluate clustering.
+    double cost = model.computeCost(dataset);
+    System.out.println("Within Set Sum of Squared Errors = " + cost);
+
+    // Shows the result.
+    System.out.println("Cluster Centers: ");
+    Vector[] centers = model.clusterCenters();
+    for (Vector center : centers) {
+      System.out.println(center);
+    }
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketedRandomProjectionLSHExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketedRandomProjectionLSHExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..ff917b720c8b6eff645a234c95e5e5dd1ea0745c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketedRandomProjectionLSHExample.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.BucketedRandomProjectionLSH;
+import org.apache.spark.ml.feature.BucketedRandomProjectionLSHModel;
+import org.apache.spark.ml.linalg.Vector;
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+import static org.apache.spark.sql.functions.col;
+// $example off$
+
+/**
+ * An example demonstrating BucketedRandomProjectionLSH.
+ * Run with:
+ *   bin/run-example ml.JavaBucketedRandomProjectionLSHExample
+ */
+public class JavaBucketedRandomProjectionLSHExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaBucketedRandomProjectionLSHExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> dataA = Arrays.asList(
+      RowFactory.create(0, Vectors.dense(1.0, 1.0)),
+      RowFactory.create(1, Vectors.dense(1.0, -1.0)),
+      RowFactory.create(2, Vectors.dense(-1.0, -1.0)),
+      RowFactory.create(3, Vectors.dense(-1.0, 1.0))
+    );
+
+    List<Row> dataB = Arrays.asList(
+        RowFactory.create(4, Vectors.dense(1.0, 0.0)),
+        RowFactory.create(5, Vectors.dense(-1.0, 0.0)),
+        RowFactory.create(6, Vectors.dense(0.0, 1.0)),
+        RowFactory.create(7, Vectors.dense(0.0, -1.0))
+    );
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("features", new VectorUDT(), false, Metadata.empty())
+    });
+    Dataset<Row> dfA = spark.createDataFrame(dataA, schema);
+    Dataset<Row> dfB = spark.createDataFrame(dataB, schema);
+
+    Vector key = Vectors.dense(1.0, 0.0);
+
+    BucketedRandomProjectionLSH mh = new BucketedRandomProjectionLSH()
+      .setBucketLength(2.0)
+      .setNumHashTables(3)
+      .setInputCol("features")
+      .setOutputCol("hashes");
+
+    BucketedRandomProjectionLSHModel model = mh.fit(dfA);
+
+    // Feature Transformation
+    System.out.println("The hashed dataset where hashed values are stored in the column 'hashes':");
+    model.transform(dfA).show();
+
+    // Compute the locality sensitive hashes for the input rows, then perform approximate
+    // similarity join.
+    // We could avoid computing hashes by passing in the already-transformed dataset, e.g.
+    // `model.approxSimilarityJoin(transformedA, transformedB, 1.5)`
+    System.out.println("Approximately joining dfA and dfB on distance smaller than 1.5:");
+    model.approxSimilarityJoin(dfA, dfB, 1.5, "EuclideanDistance")
+      .select(col("datasetA.id").alias("idA"),
+        col("datasetB.id").alias("idB"),
+        col("EuclideanDistance")).show();
+
+    // Compute the locality sensitive hashes for the input rows, then perform approximate nearest
+    // neighbor search.
+    // We could avoid computing hashes by passing in the already-transformed dataset, e.g.
+    // `model.approxNearestNeighbors(transformedA, key, 2)`
+    System.out.println("Approximately searching dfA for 2 nearest neighbors of the key:");
+    model.approxNearestNeighbors(dfA, key, 2).show();
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizerExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizerExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..3e49bf04ac89223aec5d63db272da380ac4b2c52
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizerExample.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.Bucketizer;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+/**
+ * An example for Bucketizer.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaBucketizerExample
+ * </pre>
+ */
+public class JavaBucketizerExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaBucketizerExample")
+      .getOrCreate();
+
+    // $example on$
+    double[] splits = {Double.NEGATIVE_INFINITY, -0.5, 0.0, 0.5, Double.POSITIVE_INFINITY};
+
+    List<Row> data = Arrays.asList(
+      RowFactory.create(-999.9),
+      RowFactory.create(-0.5),
+      RowFactory.create(-0.3),
+      RowFactory.create(0.0),
+      RowFactory.create(0.2),
+      RowFactory.create(999.9)
+    );
+    StructType schema = new StructType(new StructField[]{
+      new StructField("features", DataTypes.DoubleType, false, Metadata.empty())
+    });
+    Dataset<Row> dataFrame = spark.createDataFrame(data, schema);
+
+    Bucketizer bucketizer = new Bucketizer()
+      .setInputCol("features")
+      .setOutputCol("bucketedFeatures")
+      .setSplits(splits);
+
+    // Transform original data into its bucket index.
+    Dataset<Row> bucketedData = bucketizer.transform(dataFrame);
+
+    System.out.println("Bucketizer output with " + (bucketizer.getSplits().length-1) + " buckets");
+    bucketedData.show();
+    // $example off$
+
+    // $example on$
+    // Bucketize multiple columns at one pass.
+    double[][] splitsArray = {
+      {Double.NEGATIVE_INFINITY, -0.5, 0.0, 0.5, Double.POSITIVE_INFINITY},
+      {Double.NEGATIVE_INFINITY, -0.3, 0.0, 0.3, Double.POSITIVE_INFINITY}
+    };
+
+    List<Row> data2 = Arrays.asList(
+      RowFactory.create(-999.9, -999.9),
+      RowFactory.create(-0.5, -0.2),
+      RowFactory.create(-0.3, -0.1),
+      RowFactory.create(0.0, 0.0),
+      RowFactory.create(0.2, 0.4),
+      RowFactory.create(999.9, 999.9)
+    );
+    StructType schema2 = new StructType(new StructField[]{
+      new StructField("features1", DataTypes.DoubleType, false, Metadata.empty()),
+      new StructField("features2", DataTypes.DoubleType, false, Metadata.empty())
+    });
+    Dataset<Row> dataFrame2 = spark.createDataFrame(data2, schema2);
+
+    Bucketizer bucketizer2 = new Bucketizer()
+      .setInputCols(new String[] {"features1", "features2"})
+      .setOutputCols(new String[] {"bucketedFeatures1", "bucketedFeatures2"})
+      .setSplitsArray(splitsArray);
+    // Transform original data into its bucket index.
+    Dataset<Row> bucketedData2 = bucketizer2.transform(dataFrame2);
+
+    System.out.println("Bucketizer output with [" +
+      (bucketizer2.getSplitsArray()[0].length-1) + ", " +
+      (bucketizer2.getSplitsArray()[1].length-1) + "] buckets for each input column");
+    bucketedData2.show();
+    // $example off$
+
+    spark.stop();
+  }
+}
+
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSqSelectorExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSqSelectorExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..73738966b118b536af8a19b32d96dc6b5d2c6bb0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSqSelectorExample.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.ChiSqSelector;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaChiSqSelectorExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaChiSqSelectorExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(7, Vectors.dense(0.0, 0.0, 18.0, 1.0), 1.0),
+      RowFactory.create(8, Vectors.dense(0.0, 1.0, 12.0, 0.0), 0.0),
+      RowFactory.create(9, Vectors.dense(1.0, 0.0, 15.0, 0.1), 0.0)
+    );
+    StructType schema = new StructType(new StructField[]{
+      new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("features", new VectorUDT(), false, Metadata.empty()),
+      new StructField("clicked", DataTypes.DoubleType, false, Metadata.empty())
+    });
+
+    Dataset<Row> df = spark.createDataFrame(data, schema);
+
+    ChiSqSelector selector = new ChiSqSelector()
+      .setNumTopFeatures(1)
+      .setFeaturesCol("features")
+      .setLabelCol("clicked")
+      .setOutputCol("selectedFeatures");
+
+    Dataset<Row> result = selector.fit(df).transform(df);
+
+    System.out.println("ChiSqSelector output with top " + selector.getNumTopFeatures()
+        + " features selected");
+    result.show();
+
+    // $example off$
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSquareTestExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSquareTestExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..4b39350fab9b56d6922f778048c0424df280192a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSquareTestExample.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.stat.ChiSquareTest;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.*;
+// $example off$
+
+/**
+ * An example for Chi-square hypothesis testing.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaChiSquareTestExample
+ * </pre>
+ */
+public class JavaChiSquareTestExample {
+
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaChiSquareTestExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(0.0, Vectors.dense(0.5, 10.0)),
+      RowFactory.create(0.0, Vectors.dense(1.5, 20.0)),
+      RowFactory.create(1.0, Vectors.dense(1.5, 30.0)),
+      RowFactory.create(0.0, Vectors.dense(3.5, 30.0)),
+      RowFactory.create(0.0, Vectors.dense(3.5, 40.0)),
+      RowFactory.create(1.0, Vectors.dense(3.5, 40.0))
+    );
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+      new StructField("features", new VectorUDT(), false, Metadata.empty()),
+    });
+
+    Dataset<Row> df = spark.createDataFrame(data, schema);
+    Row r = ChiSquareTest.test(df, "features", "label").head();
+    System.out.println("pValues: " + r.get(0).toString());
+    System.out.println("degreesOfFreedom: " + r.getList(1).toString());
+    System.out.println("statistics: " + r.get(2).toString());
+
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaCorrelationExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaCorrelationExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..2a6d62ab3fb73babcc54c86d266cbb12e1536721
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaCorrelationExample.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.stat.Correlation;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.*;
+// $example off$
+
+/**
+ * An example for computing correlation matrix.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaCorrelationExample
+ * </pre>
+ */
+public class JavaCorrelationExample {
+
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaCorrelationExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(Vectors.sparse(4, new int[]{0, 3}, new double[]{1.0, -2.0})),
+      RowFactory.create(Vectors.dense(4.0, 5.0, 0.0, 3.0)),
+      RowFactory.create(Vectors.dense(6.0, 7.0, 0.0, 8.0)),
+      RowFactory.create(Vectors.sparse(4, new int[]{0, 3}, new double[]{9.0, 1.0}))
+    );
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("features", new VectorUDT(), false, Metadata.empty()),
+    });
+
+    Dataset<Row> df = spark.createDataFrame(data, schema);
+    Row r1 = Correlation.corr(df, "features").head();
+    System.out.println("Pearson correlation matrix:\n" + r1.get(0).toString());
+
+    Row r2 = Correlation.corr(df, "features", "spearman").head();
+    System.out.println("Spearman correlation matrix:\n" + r2.get(0).toString());
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaCountVectorizerExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaCountVectorizerExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..ac2a86c30b0b2defbc4bfffb52366b903736df72
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaCountVectorizerExample.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.CountVectorizer;
+import org.apache.spark.ml.feature.CountVectorizerModel;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.types.*;
+// $example off$
+
+public class JavaCountVectorizerExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaCountVectorizerExample")
+      .getOrCreate();
+
+    // $example on$
+    // Input data: Each row is a bag of words from a sentence or document.
+    List<Row> data = Arrays.asList(
+      RowFactory.create(Arrays.asList("a", "b", "c")),
+      RowFactory.create(Arrays.asList("a", "b", "b", "c", "a"))
+    );
+    StructType schema = new StructType(new StructField [] {
+      new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty())
+    });
+    Dataset<Row> df = spark.createDataFrame(data, schema);
+
+    // fit a CountVectorizerModel from the corpus
+    CountVectorizerModel cvModel = new CountVectorizer()
+      .setInputCol("text")
+      .setOutputCol("feature")
+      .setVocabSize(3)
+      .setMinDF(2)
+      .fit(df);
+
+    // alternatively, define CountVectorizerModel with a-priori vocabulary
+    CountVectorizerModel cvm = new CountVectorizerModel(new String[]{"a", "b", "c"})
+      .setInputCol("text")
+      .setOutputCol("feature");
+
+    cvModel.transform(df).show(false);
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..04546d29fadd1502d873661d9aecf3f579018d14
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaDCTExample.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.DCT;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaDCTExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaDCTExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(Vectors.dense(0.0, 1.0, -2.0, 3.0)),
+      RowFactory.create(Vectors.dense(-1.0, 2.0, 4.0, -7.0)),
+      RowFactory.create(Vectors.dense(14.0, -2.0, -5.0, 1.0))
+    );
+    StructType schema = new StructType(new StructField[]{
+      new StructField("features", new VectorUDT(), false, Metadata.empty()),
+    });
+    Dataset<Row> df = spark.createDataFrame(data, schema);
+
+    DCT dct = new DCT()
+      .setInputCol("features")
+      .setOutputCol("featuresDCT")
+      .setInverse(false);
+
+    Dataset<Row> dctDf = dct.transform(df);
+
+    dctDf.select("featuresDCT").show(false);
+    // $example off$
+
+    spark.stop();
+  }
+}
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeClassificationExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeClassificationExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..a9c6e7f0bf6c54e542c08ecbbfbbf91400617f46
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeClassificationExample.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// scalastyle:off println
+package org.apache.spark.examples.ml;
+// $example on$
+import org.apache.spark.ml.Pipeline;
+import org.apache.spark.ml.PipelineModel;
+import org.apache.spark.ml.PipelineStage;
+import org.apache.spark.ml.classification.DecisionTreeClassifier;
+import org.apache.spark.ml.classification.DecisionTreeClassificationModel;
+import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator;
+import org.apache.spark.ml.feature.*;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+// $example off$
+
+public class JavaDecisionTreeClassificationExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaDecisionTreeClassificationExample")
+      .getOrCreate();
+
+    // $example on$
+    // Load the data stored in LIBSVM format as a DataFrame.
+    Dataset<Row> data = spark
+      .read()
+      .format("libsvm")
+      .load("data/mllib/sample_libsvm_data.txt");
+
+    // Index labels, adding metadata to the label column.
+    // Fit on whole dataset to include all labels in index.
+    StringIndexerModel labelIndexer = new StringIndexer()
+      .setInputCol("label")
+      .setOutputCol("indexedLabel")
+      .fit(data);
+
+    // Automatically identify categorical features, and index them.
+    VectorIndexerModel featureIndexer = new VectorIndexer()
+      .setInputCol("features")
+      .setOutputCol("indexedFeatures")
+      .setMaxCategories(4) // features with > 4 distinct values are treated as continuous.
+      .fit(data);
+
+    // Split the data into training and test sets (30% held out for testing).
+    Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
+    Dataset<Row> trainingData = splits[0];
+    Dataset<Row> testData = splits[1];
+
+    // Train a DecisionTree model.
+    DecisionTreeClassifier dt = new DecisionTreeClassifier()
+      .setLabelCol("indexedLabel")
+      .setFeaturesCol("indexedFeatures");
+
+    // Convert indexed labels back to original labels.
+    IndexToString labelConverter = new IndexToString()
+      .setInputCol("prediction")
+      .setOutputCol("predictedLabel")
+      .setLabels(labelIndexer.labels());
+
+    // Chain indexers and tree in a Pipeline.
+    Pipeline pipeline = new Pipeline()
+      .setStages(new PipelineStage[]{labelIndexer, featureIndexer, dt, labelConverter});
+
+    // Train model. This also runs the indexers.
+    PipelineModel model = pipeline.fit(trainingData);
+
+    // Make predictions.
+    Dataset<Row> predictions = model.transform(testData);
+
+    // Select example rows to display.
+    predictions.select("predictedLabel", "label", "features").show(5);
+
+    // Select (prediction, true label) and compute test error.
+    MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
+      .setLabelCol("indexedLabel")
+      .setPredictionCol("prediction")
+      .setMetricName("accuracy");
+    double accuracy = evaluator.evaluate(predictions);
+    System.out.println("Test Error = " + (1.0 - accuracy));
+
+    DecisionTreeClassificationModel treeModel =
+      (DecisionTreeClassificationModel) (model.stages()[2]);
+    System.out.println("Learned classification tree model:\n" + treeModel.toDebugString());
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeRegressionExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeRegressionExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..cffb7139edccde44fc3c790f5477ad7a946259ba
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaDecisionTreeRegressionExample.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// scalastyle:off println
+package org.apache.spark.examples.ml;
+// $example on$
+import org.apache.spark.ml.Pipeline;
+import org.apache.spark.ml.PipelineModel;
+import org.apache.spark.ml.PipelineStage;
+import org.apache.spark.ml.evaluation.RegressionEvaluator;
+import org.apache.spark.ml.feature.VectorIndexer;
+import org.apache.spark.ml.feature.VectorIndexerModel;
+import org.apache.spark.ml.regression.DecisionTreeRegressionModel;
+import org.apache.spark.ml.regression.DecisionTreeRegressor;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+// $example off$
+
+public class JavaDecisionTreeRegressionExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaDecisionTreeRegressionExample")
+      .getOrCreate();
+    // $example on$
+    // Load the data stored in LIBSVM format as a DataFrame.
+    Dataset<Row> data = spark.read().format("libsvm")
+      .load("data/mllib/sample_libsvm_data.txt");
+
+    // Automatically identify categorical features, and index them.
+    // Set maxCategories so features with > 4 distinct values are treated as continuous.
+    VectorIndexerModel featureIndexer = new VectorIndexer()
+      .setInputCol("features")
+      .setOutputCol("indexedFeatures")
+      .setMaxCategories(4)
+      .fit(data);
+
+    // Split the data into training and test sets (30% held out for testing).
+    Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
+    Dataset<Row> trainingData = splits[0];
+    Dataset<Row> testData = splits[1];
+
+    // Train a DecisionTree model.
+    DecisionTreeRegressor dt = new DecisionTreeRegressor()
+      .setFeaturesCol("indexedFeatures");
+
+    // Chain indexer and tree in a Pipeline.
+    Pipeline pipeline = new Pipeline()
+      .setStages(new PipelineStage[]{featureIndexer, dt});
+
+    // Train model. This also runs the indexer.
+    PipelineModel model = pipeline.fit(trainingData);
+
+    // Make predictions.
+    Dataset<Row> predictions = model.transform(testData);
+
+    // Select example rows to display.
+    predictions.select("label", "features").show(5);
+
+    // Select (prediction, true label) and compute test error.
+    RegressionEvaluator evaluator = new RegressionEvaluator()
+      .setLabelCol("label")
+      .setPredictionCol("prediction")
+      .setMetricName("rmse");
+    double rmse = evaluator.evaluate(predictions);
+    System.out.println("Root Mean Squared Error (RMSE) on test data = " + rmse);
+
+    DecisionTreeRegressionModel treeModel =
+      (DecisionTreeRegressionModel) (model.stages()[1]);
+    System.out.println("Learned regression tree model:\n" + treeModel.toDebugString());
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaDocument.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaDocument.java
new file mode 100644
index 0000000000000000000000000000000000000000..6459dabc0698b282103f8806eb0cd2a353bd2969
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaDocument.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.io.Serializable;
+
+/**
+ * Unlabeled instance type, Spark SQL can infer schema from Java Beans.
+ */
+@SuppressWarnings("serial")
+public class JavaDocument implements Serializable {
+
+  private long id;
+  private String text;
+
+  public JavaDocument(long id, String text) {
+    this.id = id;
+    this.text = text;
+  }
+
+  public long getId() {
+    return this.id;
+  }
+
+  public String getText() {
+    return this.text;
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProductExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProductExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..d2e70c23babc79b304d83be8219884c2854a65df
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProductExample.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.ElementwiseProduct;
+import org.apache.spark.ml.linalg.Vector;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaElementwiseProductExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaElementwiseProductExample")
+      .getOrCreate();
+
+    // $example on$
+    // Create some vector data; also works for sparse vectors
+    List<Row> data = Arrays.asList(
+      RowFactory.create("a", Vectors.dense(1.0, 2.0, 3.0)),
+      RowFactory.create("b", Vectors.dense(4.0, 5.0, 6.0))
+    );
+
+    List<StructField> fields = new ArrayList<>(2);
+    fields.add(DataTypes.createStructField("id", DataTypes.StringType, false));
+    fields.add(DataTypes.createStructField("vector", new VectorUDT(), false));
+
+    StructType schema = DataTypes.createStructType(fields);
+
+    Dataset<Row> dataFrame = spark.createDataFrame(data, schema);
+
+    Vector transformingVector = Vectors.dense(0.0, 1.0, 2.0);
+
+    ElementwiseProduct transformer = new ElementwiseProduct()
+      .setScalingVec(transformingVector)
+      .setInputCol("vector")
+      .setOutputCol("transformedVector");
+
+    // Batch transform the vectors to create new column:
+    transformer.transform(dataFrame).show();
+    // $example off$
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaEstimatorTransformerParamExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaEstimatorTransformerParamExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..9e07a0c2f899af512b9b9a7053c8bb37374eb6bb
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaEstimatorTransformerParamExample.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.classification.LogisticRegression;
+import org.apache.spark.ml.classification.LogisticRegressionModel;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.ml.param.ParamMap;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+import org.apache.spark.sql.SparkSession;
+
+/**
+ * Java example for Estimator, Transformer, and Param.
+ */
+public class JavaEstimatorTransformerParamExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaEstimatorTransformerParamExample")
+      .getOrCreate();
+
+    // $example on$
+    // Prepare training data.
+    List<Row> dataTraining = Arrays.asList(
+        RowFactory.create(1.0, Vectors.dense(0.0, 1.1, 0.1)),
+        RowFactory.create(0.0, Vectors.dense(2.0, 1.0, -1.0)),
+        RowFactory.create(0.0, Vectors.dense(2.0, 1.3, 1.0)),
+        RowFactory.create(1.0, Vectors.dense(0.0, 1.2, -0.5))
+    );
+    StructType schema = new StructType(new StructField[]{
+        new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+        new StructField("features", new VectorUDT(), false, Metadata.empty())
+    });
+    Dataset<Row> training = spark.createDataFrame(dataTraining, schema);
+
+    // Create a LogisticRegression instance. This instance is an Estimator.
+    LogisticRegression lr = new LogisticRegression();
+    // Print out the parameters, documentation, and any default values.
+    System.out.println("LogisticRegression parameters:\n" + lr.explainParams() + "\n");
+
+    // We may set parameters using setter methods.
+    lr.setMaxIter(10).setRegParam(0.01);
+
+    // Learn a LogisticRegression model. This uses the parameters stored in lr.
+    LogisticRegressionModel model1 = lr.fit(training);
+    // Since model1 is a Model (i.e., a Transformer produced by an Estimator),
+    // we can view the parameters it used during fit().
+    // This prints the parameter (name: value) pairs, where names are unique IDs for this
+    // LogisticRegression instance.
+    System.out.println("Model 1 was fit using parameters: " + model1.parent().extractParamMap());
+
+    // We may alternatively specify parameters using a ParamMap.
+    ParamMap paramMap = new ParamMap()
+      .put(lr.maxIter().w(20))  // Specify 1 Param.
+      .put(lr.maxIter(), 30)  // This overwrites the original maxIter.
+      .put(lr.regParam().w(0.1), lr.threshold().w(0.55));  // Specify multiple Params.
+
+    // One can also combine ParamMaps.
+    ParamMap paramMap2 = new ParamMap()
+      .put(lr.probabilityCol().w("myProbability"));  // Change output column name
+    ParamMap paramMapCombined = paramMap.$plus$plus(paramMap2);
+
+    // Now learn a new model using the paramMapCombined parameters.
+    // paramMapCombined overrides all parameters set earlier via lr.set* methods.
+    LogisticRegressionModel model2 = lr.fit(training, paramMapCombined);
+    System.out.println("Model 2 was fit using parameters: " + model2.parent().extractParamMap());
+
+    // Prepare test documents.
+    List<Row> dataTest = Arrays.asList(
+        RowFactory.create(1.0, Vectors.dense(-1.0, 1.5, 1.3)),
+        RowFactory.create(0.0, Vectors.dense(3.0, 2.0, -0.1)),
+        RowFactory.create(1.0, Vectors.dense(0.0, 2.2, -1.5))
+    );
+    Dataset<Row> test = spark.createDataFrame(dataTest, schema);
+
+    // Make predictions on test documents using the Transformer.transform() method.
+    // LogisticRegression.transform will only use the 'features' column.
+    // Note that model2.transform() outputs a 'myProbability' column instead of the usual
+    // 'probability' column since we renamed the lr.probabilityCol parameter previously.
+    Dataset<Row> results = model2.transform(test);
+    Dataset<Row> rows = results.select("features", "label", "myProbability", "prediction");
+    for (Row r: rows.collectAsList()) {
+      System.out.println("(" + r.get(0) + ", " + r.get(1) + ") -> prob=" + r.get(2)
+        + ", prediction=" + r.get(3));
+    }
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaFPGrowthExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaFPGrowthExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..717ec21c8b203567f686dc81c9114afcf9010f5b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaFPGrowthExample.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.fpm.FPGrowth;
+import org.apache.spark.ml.fpm.FPGrowthModel;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.types.*;
+// $example off$
+
+/**
+ * An example demonstrating FPGrowth.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaFPGrowthExample
+ * </pre>
+ */
+public class JavaFPGrowthExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaFPGrowthExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(Arrays.asList("1 2 5".split(" "))),
+      RowFactory.create(Arrays.asList("1 2 3 5".split(" "))),
+      RowFactory.create(Arrays.asList("1 2".split(" ")))
+    );
+    StructType schema = new StructType(new StructField[]{ new StructField(
+      "items", new ArrayType(DataTypes.StringType, true), false, Metadata.empty())
+    });
+    Dataset<Row> itemsDF = spark.createDataFrame(data, schema);
+
+    FPGrowthModel model = new FPGrowth()
+      .setItemsCol("items")
+      .setMinSupport(0.5)
+      .setMinConfidence(0.6)
+      .fit(itemsDF);
+
+    // Display frequent itemsets.
+    model.freqItemsets().show();
+
+    // Display generated association rules.
+    model.associationRules().show();
+
+    // transform examines the input items against all the association rules and summarize the
+    // consequents as prediction
+    model.transform(itemsDF).show();
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaFeatureHasherExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaFeatureHasherExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..9730d42e6db8d9d400c8b765ffbbdf1c85ad7b0c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaFeatureHasherExample.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.FeatureHasher;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaFeatureHasherExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaFeatureHasherExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(2.2, true, "1", "foo"),
+      RowFactory.create(3.3, false, "2", "bar"),
+      RowFactory.create(4.4, false, "3", "baz"),
+      RowFactory.create(5.5, false, "4", "foo")
+    );
+    StructType schema = new StructType(new StructField[]{
+      new StructField("real", DataTypes.DoubleType, false, Metadata.empty()),
+      new StructField("bool", DataTypes.BooleanType, false, Metadata.empty()),
+      new StructField("stringNum", DataTypes.StringType, false, Metadata.empty()),
+      new StructField("string", DataTypes.StringType, false, Metadata.empty())
+    });
+    Dataset<Row> dataset = spark.createDataFrame(data, schema);
+
+    FeatureHasher hasher = new FeatureHasher()
+      .setInputCols(new String[]{"real", "bool", "stringNum", "string"})
+      .setOutputCol("features");
+
+    Dataset<Row> featurized = hasher.transform(dataset);
+
+    featurized.show(false);
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaGaussianMixtureExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaGaussianMixtureExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..72bd5d0395ee4002d558a67d013080d171d2f8b7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaGaussianMixtureExample.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import org.apache.spark.ml.clustering.GaussianMixture;
+import org.apache.spark.ml.clustering.GaussianMixtureModel;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+// $example off$
+import org.apache.spark.sql.SparkSession;
+
+
+/**
+ * An example demonstrating Gaussian Mixture Model.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaGaussianMixtureExample
+ * </pre>
+ */
+public class JavaGaussianMixtureExample {
+
+  public static void main(String[] args) {
+
+    // Creates a SparkSession
+    SparkSession spark = SparkSession
+            .builder()
+            .appName("JavaGaussianMixtureExample")
+            .getOrCreate();
+
+    // $example on$
+    // Loads data
+    Dataset<Row> dataset = spark.read().format("libsvm").load("data/mllib/sample_kmeans_data.txt");
+
+    // Trains a GaussianMixture model
+    GaussianMixture gmm = new GaussianMixture()
+      .setK(2);
+    GaussianMixtureModel model = gmm.fit(dataset);
+
+    // Output the parameters of the mixture model
+    for (int i = 0; i < model.getK(); i++) {
+      System.out.printf("Gaussian %d:\nweight=%f\nmu=%s\nsigma=\n%s\n\n",
+              i, model.weights()[i], model.gaussians()[i].mean(), model.gaussians()[i].cov());
+    }
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaGeneralizedLinearRegressionExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaGeneralizedLinearRegressionExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..3f072d1e50eb56011732f273de4565a4aebd836d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaGeneralizedLinearRegressionExample.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.ml.regression.GeneralizedLinearRegression;
+import org.apache.spark.ml.regression.GeneralizedLinearRegressionModel;
+import org.apache.spark.ml.regression.GeneralizedLinearRegressionTrainingSummary;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+// $example off$
+import org.apache.spark.sql.SparkSession;
+
+/**
+ * An example demonstrating generalized linear regression.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaGeneralizedLinearRegressionExample
+ * </pre>
+ */
+
+public class JavaGeneralizedLinearRegressionExample {
+
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaGeneralizedLinearRegressionExample")
+      .getOrCreate();
+
+    // $example on$
+    // Load training data
+    Dataset<Row> dataset = spark.read().format("libsvm")
+      .load("data/mllib/sample_linear_regression_data.txt");
+
+    GeneralizedLinearRegression glr = new GeneralizedLinearRegression()
+      .setFamily("gaussian")
+      .setLink("identity")
+      .setMaxIter(10)
+      .setRegParam(0.3);
+
+    // Fit the model
+    GeneralizedLinearRegressionModel model = glr.fit(dataset);
+
+    // Print the coefficients and intercept for generalized linear regression model
+    System.out.println("Coefficients: " + model.coefficients());
+    System.out.println("Intercept: " + model.intercept());
+
+    // Summarize the model over the training set and print out some metrics
+    GeneralizedLinearRegressionTrainingSummary summary = model.summary();
+    System.out.println("Coefficient Standard Errors: "
+      + Arrays.toString(summary.coefficientStandardErrors()));
+    System.out.println("T Values: " + Arrays.toString(summary.tValues()));
+    System.out.println("P Values: " + Arrays.toString(summary.pValues()));
+    System.out.println("Dispersion: " + summary.dispersion());
+    System.out.println("Null Deviance: " + summary.nullDeviance());
+    System.out.println("Residual Degree Of Freedom Null: " + summary.residualDegreeOfFreedomNull());
+    System.out.println("Deviance: " + summary.deviance());
+    System.out.println("Residual Degree Of Freedom: " + summary.residualDegreeOfFreedom());
+    System.out.println("AIC: " + summary.aic());
+    System.out.println("Deviance Residuals: ");
+    summary.residuals().show();
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeClassifierExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeClassifierExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..3e9eb998c8e1fed875283e945e347a07318f5a7d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeClassifierExample.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import org.apache.spark.ml.Pipeline;
+import org.apache.spark.ml.PipelineModel;
+import org.apache.spark.ml.PipelineStage;
+import org.apache.spark.ml.classification.GBTClassificationModel;
+import org.apache.spark.ml.classification.GBTClassifier;
+import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator;
+import org.apache.spark.ml.feature.*;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+// $example off$
+
+public class JavaGradientBoostedTreeClassifierExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaGradientBoostedTreeClassifierExample")
+      .getOrCreate();
+
+    // $example on$
+    // Load and parse the data file, converting it to a DataFrame.
+    Dataset<Row> data = spark
+      .read()
+      .format("libsvm")
+      .load("data/mllib/sample_libsvm_data.txt");
+
+    // Index labels, adding metadata to the label column.
+    // Fit on whole dataset to include all labels in index.
+    StringIndexerModel labelIndexer = new StringIndexer()
+      .setInputCol("label")
+      .setOutputCol("indexedLabel")
+      .fit(data);
+    // Automatically identify categorical features, and index them.
+    // Set maxCategories so features with > 4 distinct values are treated as continuous.
+    VectorIndexerModel featureIndexer = new VectorIndexer()
+      .setInputCol("features")
+      .setOutputCol("indexedFeatures")
+      .setMaxCategories(4)
+      .fit(data);
+
+    // Split the data into training and test sets (30% held out for testing)
+    Dataset<Row>[] splits = data.randomSplit(new double[] {0.7, 0.3});
+    Dataset<Row> trainingData = splits[0];
+    Dataset<Row> testData = splits[1];
+
+    // Train a GBT model.
+    GBTClassifier gbt = new GBTClassifier()
+      .setLabelCol("indexedLabel")
+      .setFeaturesCol("indexedFeatures")
+      .setMaxIter(10);
+
+    // Convert indexed labels back to original labels.
+    IndexToString labelConverter = new IndexToString()
+      .setInputCol("prediction")
+      .setOutputCol("predictedLabel")
+      .setLabels(labelIndexer.labels());
+
+    // Chain indexers and GBT in a Pipeline.
+    Pipeline pipeline = new Pipeline()
+      .setStages(new PipelineStage[] {labelIndexer, featureIndexer, gbt, labelConverter});
+
+    // Train model. This also runs the indexers.
+    PipelineModel model = pipeline.fit(trainingData);
+
+    // Make predictions.
+    Dataset<Row> predictions = model.transform(testData);
+
+    // Select example rows to display.
+    predictions.select("predictedLabel", "label", "features").show(5);
+
+    // Select (prediction, true label) and compute test error.
+    MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
+      .setLabelCol("indexedLabel")
+      .setPredictionCol("prediction")
+      .setMetricName("accuracy");
+    double accuracy = evaluator.evaluate(predictions);
+    System.out.println("Test Error = " + (1.0 - accuracy));
+
+    GBTClassificationModel gbtModel = (GBTClassificationModel)(model.stages()[2]);
+    System.out.println("Learned classification GBT model:\n" + gbtModel.toDebugString());
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeRegressorExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeRegressorExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..769b5c3e85258f841f7ba39d775b11f5977a1964
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaGradientBoostedTreeRegressorExample.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import org.apache.spark.ml.Pipeline;
+import org.apache.spark.ml.PipelineModel;
+import org.apache.spark.ml.PipelineStage;
+import org.apache.spark.ml.evaluation.RegressionEvaluator;
+import org.apache.spark.ml.feature.VectorIndexer;
+import org.apache.spark.ml.feature.VectorIndexerModel;
+import org.apache.spark.ml.regression.GBTRegressionModel;
+import org.apache.spark.ml.regression.GBTRegressor;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+// $example off$
+
+public class JavaGradientBoostedTreeRegressorExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaGradientBoostedTreeRegressorExample")
+      .getOrCreate();
+
+    // $example on$
+    // Load and parse the data file, converting it to a DataFrame.
+    Dataset<Row> data = spark.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt");
+
+    // Automatically identify categorical features, and index them.
+    // Set maxCategories so features with > 4 distinct values are treated as continuous.
+    VectorIndexerModel featureIndexer = new VectorIndexer()
+      .setInputCol("features")
+      .setOutputCol("indexedFeatures")
+      .setMaxCategories(4)
+      .fit(data);
+
+    // Split the data into training and test sets (30% held out for testing).
+    Dataset<Row>[] splits = data.randomSplit(new double[] {0.7, 0.3});
+    Dataset<Row> trainingData = splits[0];
+    Dataset<Row> testData = splits[1];
+
+    // Train a GBT model.
+    GBTRegressor gbt = new GBTRegressor()
+      .setLabelCol("label")
+      .setFeaturesCol("indexedFeatures")
+      .setMaxIter(10);
+
+    // Chain indexer and GBT in a Pipeline.
+    Pipeline pipeline = new Pipeline().setStages(new PipelineStage[] {featureIndexer, gbt});
+
+    // Train model. This also runs the indexer.
+    PipelineModel model = pipeline.fit(trainingData);
+
+    // Make predictions.
+    Dataset<Row> predictions = model.transform(testData);
+
+    // Select example rows to display.
+    predictions.select("prediction", "label", "features").show(5);
+
+    // Select (prediction, true label) and compute test error.
+    RegressionEvaluator evaluator = new RegressionEvaluator()
+      .setLabelCol("label")
+      .setPredictionCol("prediction")
+      .setMetricName("rmse");
+    double rmse = evaluator.evaluate(predictions);
+    System.out.println("Root Mean Squared Error (RMSE) on test data = " + rmse);
+
+    GBTRegressionModel gbtModel = (GBTRegressionModel)(model.stages()[1]);
+    System.out.println("Learned regression GBT model:\n" + gbtModel.toDebugString());
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaImputerExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaImputerExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..ac40ccd9dbd759d90395e0fac7fbd76420182a1d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaImputerExample.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.Imputer;
+import org.apache.spark.ml.feature.ImputerModel;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.types.*;
+// $example off$
+
+import static org.apache.spark.sql.types.DataTypes.*;
+
+/**
+ * An example demonstrating Imputer.
+ * Run with:
+ *   bin/run-example ml.JavaImputerExample
+ */
+public class JavaImputerExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaImputerExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(1.0, Double.NaN),
+      RowFactory.create(2.0, Double.NaN),
+      RowFactory.create(Double.NaN, 3.0),
+      RowFactory.create(4.0, 4.0),
+      RowFactory.create(5.0, 5.0)
+    );
+    StructType schema = new StructType(new StructField[]{
+      createStructField("a", DoubleType, false),
+      createStructField("b", DoubleType, false)
+    });
+    Dataset<Row> df = spark.createDataFrame(data, schema);
+
+    Imputer imputer = new Imputer()
+      .setInputCols(new String[]{"a", "b"})
+      .setOutputCols(new String[]{"out_a", "out_b"});
+
+    ImputerModel model = imputer.fit(df);
+    model.transform(df).show();
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaIndexToStringExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaIndexToStringExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..6965512f9372aaf93ac0cf7b9a127a3a73760998
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaIndexToStringExample.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.attribute.Attribute;
+import org.apache.spark.ml.feature.IndexToString;
+import org.apache.spark.ml.feature.StringIndexer;
+import org.apache.spark.ml.feature.StringIndexerModel;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaIndexToStringExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaIndexToStringExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(0, "a"),
+      RowFactory.create(1, "b"),
+      RowFactory.create(2, "c"),
+      RowFactory.create(3, "a"),
+      RowFactory.create(4, "a"),
+      RowFactory.create(5, "c")
+    );
+    StructType schema = new StructType(new StructField[]{
+      new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("category", DataTypes.StringType, false, Metadata.empty())
+    });
+    Dataset<Row> df = spark.createDataFrame(data, schema);
+
+    StringIndexerModel indexer = new StringIndexer()
+      .setInputCol("category")
+      .setOutputCol("categoryIndex")
+      .fit(df);
+    Dataset<Row> indexed = indexer.transform(df);
+
+    System.out.println("Transformed string column '" + indexer.getInputCol() + "' " +
+        "to indexed column '" + indexer.getOutputCol() + "'");
+    indexed.show();
+
+    StructField inputColSchema = indexed.schema().apply(indexer.getOutputCol());
+    System.out.println("StringIndexer will store labels in output column metadata: " +
+        Attribute.fromStructField(inputColSchema).toString() + "\n");
+
+    IndexToString converter = new IndexToString()
+      .setInputCol("categoryIndex")
+      .setOutputCol("originalCategory");
+    Dataset<Row> converted = converter.transform(indexed);
+
+    System.out.println("Transformed indexed column '" + converter.getInputCol() + "' back to " +
+        "original string column '" + converter.getOutputCol() + "' using labels in metadata");
+    converted.select("id", "categoryIndex", "originalCategory").show();
+
+    // $example off$
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..3684a87e22e7b43b78232edcc74348da832de115
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.ml.feature.Interaction;
+import org.apache.spark.ml.feature.VectorAssembler;
+import org.apache.spark.sql.*;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+import java.util.Arrays;
+import java.util.List;
+
+// $example on$
+// $example off$
+
+public class JavaInteractionExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaInteractionExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(1, 1, 2, 3, 8, 4, 5),
+      RowFactory.create(2, 4, 3, 8, 7, 9, 8),
+      RowFactory.create(3, 6, 1, 9, 2, 3, 6),
+      RowFactory.create(4, 10, 8, 6, 9, 4, 5),
+      RowFactory.create(5, 9, 2, 7, 10, 7, 3),
+      RowFactory.create(6, 1, 1, 4, 2, 8, 4)
+    );
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("id1", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("id2", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("id3", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("id4", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("id5", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("id6", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("id7", DataTypes.IntegerType, false, Metadata.empty())
+    });
+
+    Dataset<Row> df = spark.createDataFrame(data, schema);
+
+    VectorAssembler assembler1 = new VectorAssembler()
+            .setInputCols(new String[]{"id2", "id3", "id4"})
+            .setOutputCol("vec1");
+
+    Dataset<Row> assembled1 = assembler1.transform(df);
+
+    VectorAssembler assembler2 = new VectorAssembler()
+            .setInputCols(new String[]{"id5", "id6", "id7"})
+            .setOutputCol("vec2");
+
+    Dataset<Row> assembled2 = assembler2.transform(assembled1).select("id1", "vec1", "vec2");
+
+    Interaction interaction = new Interaction()
+            .setInputCols(new String[]{"id1","vec1","vec2"})
+            .setOutputCol("interactedCol");
+
+    Dataset<Row> interacted = interaction.transform(assembled2);
+
+    interacted.show(false);
+    // $example off$
+
+    spark.stop();
+  }
+}
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaIsotonicRegressionExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaIsotonicRegressionExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..a7de8e699c40eab0a84a6c13fae816db9ceb55a9
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaIsotonicRegressionExample.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.examples.ml;
+
+// $example on$
+
+import org.apache.spark.ml.regression.IsotonicRegression;
+import org.apache.spark.ml.regression.IsotonicRegressionModel;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+// $example off$
+import org.apache.spark.sql.SparkSession;
+
+/**
+ * An example demonstrating IsotonicRegression.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaIsotonicRegressionExample
+ * </pre>
+ */
+public class JavaIsotonicRegressionExample {
+
+  public static void main(String[] args) {
+    // Create a SparkSession.
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaIsotonicRegressionExample")
+      .getOrCreate();
+
+    // $example on$
+    // Loads data.
+    Dataset<Row> dataset = spark.read().format("libsvm")
+      .load("data/mllib/sample_isotonic_regression_libsvm_data.txt");
+
+    // Trains an isotonic regression model.
+    IsotonicRegression ir = new IsotonicRegression();
+    IsotonicRegressionModel model = ir.fit(dataset);
+
+    System.out.println("Boundaries in increasing order: " + model.boundaries() + "\n");
+    System.out.println("Predictions associated with the boundaries: " + model.predictions() + "\n");
+
+    // Makes predictions.
+    model.transform(dataset).show();
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaKMeansExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaKMeansExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..dc4b0bcb5965742a7a85a318a904fc087da79065
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaKMeansExample.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import org.apache.spark.ml.clustering.KMeansModel;
+import org.apache.spark.ml.clustering.KMeans;
+import org.apache.spark.ml.evaluation.ClusteringEvaluator;
+import org.apache.spark.ml.linalg.Vector;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+// $example off$
+import org.apache.spark.sql.SparkSession;
+
+
+/**
+ * An example demonstrating k-means clustering.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaKMeansExample
+ * </pre>
+ */
+public class JavaKMeansExample {
+
+  public static void main(String[] args) {
+    // Create a SparkSession.
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaKMeansExample")
+      .getOrCreate();
+
+    // $example on$
+    // Loads data.
+    Dataset<Row> dataset = spark.read().format("libsvm").load("data/mllib/sample_kmeans_data.txt");
+
+    // Trains a k-means model.
+    KMeans kmeans = new KMeans().setK(2).setSeed(1L);
+    KMeansModel model = kmeans.fit(dataset);
+
+    // Make predictions
+    Dataset<Row> predictions = model.transform(dataset);
+
+    // Evaluate clustering by computing Silhouette score
+    ClusteringEvaluator evaluator = new ClusteringEvaluator();
+
+    double silhouette = evaluator.evaluate(predictions);
+    System.out.println("Silhouette with squared euclidean distance = " + silhouette);
+
+    // Shows the result.
+    Vector[] centers = model.clusterCenters();
+    System.out.println("Cluster Centers: ");
+    for (Vector center: centers) {
+      System.out.println(center);
+    }
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaLDAExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaLDAExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..0e5d00565b71af523a8591f42e5f3d32c04d091e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaLDAExample.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+// $example on$
+import org.apache.spark.ml.clustering.LDA;
+import org.apache.spark.ml.clustering.LDAModel;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+// $example off$
+
+/**
+ * An example demonstrating LDA.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaLDAExample
+ * </pre>
+ */
+public class JavaLDAExample {
+
+  public static void main(String[] args) {
+    // Creates a SparkSession
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaLDAExample")
+      .getOrCreate();
+
+    // $example on$
+    // Loads data.
+    Dataset<Row> dataset = spark.read().format("libsvm")
+      .load("data/mllib/sample_lda_libsvm_data.txt");
+
+    // Trains a LDA model.
+    LDA lda = new LDA().setK(10).setMaxIter(10);
+    LDAModel model = lda.fit(dataset);
+
+    double ll = model.logLikelihood(dataset);
+    double lp = model.logPerplexity(dataset);
+    System.out.println("The lower bound on the log likelihood of the entire corpus: " + ll);
+    System.out.println("The upper bound on perplexity: " + lp);
+
+    // Describe topics.
+    Dataset<Row> topics = model.describeTopics(3);
+    System.out.println("The topics described by their top-weighted terms:");
+    topics.show(false);
+
+    // Shows the result.
+    Dataset<Row> transformed = model.transform(dataset);
+    transformed.show(false);
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaLabeledDocument.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaLabeledDocument.java
new file mode 100644
index 0000000000000000000000000000000000000000..68d1caf6ad3f39b6d29c176ee241eaa44ab356d6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaLabeledDocument.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.io.Serializable;
+
+/**
+ * Labeled instance type, Spark SQL can infer schema from Java Beans.
+ */
+@SuppressWarnings("serial")
+public class JavaLabeledDocument extends JavaDocument implements Serializable {
+
+  private double label;
+
+  public JavaLabeledDocument(long id, String text, double label) {
+    super(id, text);
+    this.label = label;
+  }
+
+  public double getLabel() {
+    return this.label;
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..a561b6d39ba8372151fb3f6b4445e60f71f17a0a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import org.apache.spark.ml.regression.LinearRegression;
+import org.apache.spark.ml.regression.LinearRegressionModel;
+import org.apache.spark.ml.regression.LinearRegressionTrainingSummary;
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+// $example off$
+
+public class JavaLinearRegressionWithElasticNetExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaLinearRegressionWithElasticNetExample")
+      .getOrCreate();
+
+    // $example on$
+    // Load training data.
+    Dataset<Row> training = spark.read().format("libsvm")
+      .load("data/mllib/sample_linear_regression_data.txt");
+
+    LinearRegression lr = new LinearRegression()
+      .setMaxIter(10)
+      .setRegParam(0.3)
+      .setElasticNetParam(0.8);
+
+    // Fit the model.
+    LinearRegressionModel lrModel = lr.fit(training);
+
+    // Print the coefficients and intercept for linear regression.
+    System.out.println("Coefficients: "
+      + lrModel.coefficients() + " Intercept: " + lrModel.intercept());
+
+    // Summarize the model over the training set and print out some metrics.
+    LinearRegressionTrainingSummary trainingSummary = lrModel.summary();
+    System.out.println("numIterations: " + trainingSummary.totalIterations());
+    System.out.println("objectiveHistory: " + Vectors.dense(trainingSummary.objectiveHistory()));
+    trainingSummary.residuals().show();
+    System.out.println("RMSE: " + trainingSummary.rootMeanSquaredError());
+    System.out.println("r2: " + trainingSummary.r2());
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearSVCExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearSVCExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..a18ed1d0b48fa7d315ad762be8addcaa8da7ef1b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaLinearSVCExample.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import org.apache.spark.ml.classification.LinearSVC;
+import org.apache.spark.ml.classification.LinearSVCModel;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+// $example off$
+
+public class JavaLinearSVCExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaLinearSVCExample")
+      .getOrCreate();
+
+    // $example on$
+    // Load training data
+    Dataset<Row> training = spark.read().format("libsvm")
+      .load("data/mllib/sample_libsvm_data.txt");
+
+    LinearSVC lsvc = new LinearSVC()
+      .setMaxIter(10)
+      .setRegParam(0.1);
+
+    // Fit the model
+    LinearSVCModel lsvcModel = lsvc.fit(training);
+
+    // Print the coefficients and intercept for LinearSVC
+    System.out.println("Coefficients: "
+      + lsvcModel.coefficients() + " Intercept: " + lsvcModel.intercept());
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionSummaryExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionSummaryExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..1529da16f051f764fa89c2d2050901be0a395b43
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionSummaryExample.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import org.apache.spark.ml.classification.BinaryLogisticRegressionTrainingSummary;
+import org.apache.spark.ml.classification.LogisticRegression;
+import org.apache.spark.ml.classification.LogisticRegressionModel;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.functions;
+// $example off$
+
+public class JavaLogisticRegressionSummaryExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaLogisticRegressionSummaryExample")
+      .getOrCreate();
+
+    // Load training data
+    Dataset<Row> training = spark.read().format("libsvm")
+      .load("data/mllib/sample_libsvm_data.txt");
+
+    LogisticRegression lr = new LogisticRegression()
+      .setMaxIter(10)
+      .setRegParam(0.3)
+      .setElasticNetParam(0.8);
+
+    // Fit the model
+    LogisticRegressionModel lrModel = lr.fit(training);
+
+    // $example on$
+    // Extract the summary from the returned LogisticRegressionModel instance trained in the earlier
+    // example
+    BinaryLogisticRegressionTrainingSummary trainingSummary = lrModel.binarySummary();
+
+    // Obtain the loss per iteration.
+    double[] objectiveHistory = trainingSummary.objectiveHistory();
+    for (double lossPerIteration : objectiveHistory) {
+      System.out.println(lossPerIteration);
+    }
+
+    // Obtain the receiver-operating characteristic as a dataframe and areaUnderROC.
+    Dataset<Row> roc = trainingSummary.roc();
+    roc.show();
+    roc.select("FPR").show();
+    System.out.println(trainingSummary.areaUnderROC());
+
+    // Get the threshold corresponding to the maximum F-Measure and rerun LogisticRegression with
+    // this selected threshold.
+    Dataset<Row> fMeasure = trainingSummary.fMeasureByThreshold();
+    double maxFMeasure = fMeasure.select(functions.max("F-Measure")).head().getDouble(0);
+    double bestThreshold = fMeasure.where(fMeasure.col("F-Measure").equalTo(maxFMeasure))
+      .select("threshold").head().getDouble(0);
+    lrModel.setThreshold(bestThreshold);
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..4cdec21d230239ae63eed6d78800590e1dfb607b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import org.apache.spark.ml.classification.LogisticRegression;
+import org.apache.spark.ml.classification.LogisticRegressionModel;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+// $example off$
+
+public class JavaLogisticRegressionWithElasticNetExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaLogisticRegressionWithElasticNetExample")
+      .getOrCreate();
+
+    // $example on$
+    // Load training data
+    Dataset<Row> training = spark.read().format("libsvm")
+      .load("data/mllib/sample_libsvm_data.txt");
+
+    LogisticRegression lr = new LogisticRegression()
+      .setMaxIter(10)
+      .setRegParam(0.3)
+      .setElasticNetParam(0.8);
+
+    // Fit the model
+    LogisticRegressionModel lrModel = lr.fit(training);
+
+    // Print the coefficients and intercept for logistic regression
+    System.out.println("Coefficients: "
+      + lrModel.coefficients() + " Intercept: " + lrModel.intercept());
+
+    // We can also use the multinomial family for binary classification
+    LogisticRegression mlr = new LogisticRegression()
+            .setMaxIter(10)
+            .setRegParam(0.3)
+            .setElasticNetParam(0.8)
+            .setFamily("multinomial");
+
+    // Fit the model
+    LogisticRegressionModel mlrModel = mlr.fit(training);
+
+    // Print the coefficients and intercepts for logistic regression with multinomial family
+    System.out.println("Multinomial coefficients: " + lrModel.coefficientMatrix()
+      + "\nMultinomial intercepts: " + mlrModel.interceptVector());
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaMaxAbsScalerExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaMaxAbsScalerExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..9f1ce463cf309e13d72a5d4df3d017bcef11dbea
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaMaxAbsScalerExample.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.MaxAbsScaler;
+import org.apache.spark.ml.feature.MaxAbsScalerModel;
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+import org.apache.spark.sql.SparkSession;
+
+public class JavaMaxAbsScalerExample {
+
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaMaxAbsScalerExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+        RowFactory.create(0, Vectors.dense(1.0, 0.1, -8.0)),
+        RowFactory.create(1, Vectors.dense(2.0, 1.0, -4.0)),
+        RowFactory.create(2, Vectors.dense(4.0, 10.0, 8.0))
+    );
+    StructType schema = new StructType(new StructField[]{
+        new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
+        new StructField("features", new VectorUDT(), false, Metadata.empty())
+    });
+    Dataset<Row> dataFrame = spark.createDataFrame(data, schema);
+
+    MaxAbsScaler scaler = new MaxAbsScaler()
+      .setInputCol("features")
+      .setOutputCol("scaledFeatures");
+
+    // Compute summary statistics and generate MaxAbsScalerModel
+    MaxAbsScalerModel scalerModel = scaler.fit(dataFrame);
+
+    // rescale each feature to range [-1, 1].
+    Dataset<Row> scaledData = scalerModel.transform(dataFrame);
+    scaledData.select("features", "scaledFeatures").show();
+    // $example off$
+
+    spark.stop();
+  }
+
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaMinHashLSHExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaMinHashLSHExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..e164598e3ef87235bd102740e461989ee2c558c3
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaMinHashLSHExample.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.MinHashLSH;
+import org.apache.spark.ml.feature.MinHashLSHModel;
+import org.apache.spark.ml.linalg.Vector;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+import static org.apache.spark.sql.functions.col;
+// $example off$
+
+/**
+ * An example demonstrating MinHashLSH.
+ * Run with:
+ *   bin/run-example ml.JavaMinHashLSHExample
+ */
+public class JavaMinHashLSHExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaMinHashLSHExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> dataA = Arrays.asList(
+      RowFactory.create(0, Vectors.sparse(6, new int[]{0, 1, 2}, new double[]{1.0, 1.0, 1.0})),
+      RowFactory.create(1, Vectors.sparse(6, new int[]{2, 3, 4}, new double[]{1.0, 1.0, 1.0})),
+      RowFactory.create(2, Vectors.sparse(6, new int[]{0, 2, 4}, new double[]{1.0, 1.0, 1.0}))
+    );
+
+    List<Row> dataB = Arrays.asList(
+      RowFactory.create(0, Vectors.sparse(6, new int[]{1, 3, 5}, new double[]{1.0, 1.0, 1.0})),
+      RowFactory.create(1, Vectors.sparse(6, new int[]{2, 3, 5}, new double[]{1.0, 1.0, 1.0})),
+      RowFactory.create(2, Vectors.sparse(6, new int[]{1, 2, 4}, new double[]{1.0, 1.0, 1.0}))
+    );
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("features", new VectorUDT(), false, Metadata.empty())
+    });
+    Dataset<Row> dfA = spark.createDataFrame(dataA, schema);
+    Dataset<Row> dfB = spark.createDataFrame(dataB, schema);
+
+    int[] indices = {1, 3};
+    double[] values = {1.0, 1.0};
+    Vector key = Vectors.sparse(6, indices, values);
+
+    MinHashLSH mh = new MinHashLSH()
+      .setNumHashTables(5)
+      .setInputCol("features")
+      .setOutputCol("hashes");
+
+    MinHashLSHModel model = mh.fit(dfA);
+
+    // Feature Transformation
+    System.out.println("The hashed dataset where hashed values are stored in the column 'hashes':");
+    model.transform(dfA).show();
+
+    // Compute the locality sensitive hashes for the input rows, then perform approximate
+    // similarity join.
+    // We could avoid computing hashes by passing in the already-transformed dataset, e.g.
+    // `model.approxSimilarityJoin(transformedA, transformedB, 0.6)`
+    System.out.println("Approximately joining dfA and dfB on Jaccard distance smaller than 0.6:");
+    model.approxSimilarityJoin(dfA, dfB, 0.6, "JaccardDistance")
+      .select(col("datasetA.id").alias("idA"),
+        col("datasetB.id").alias("idB"),
+        col("JaccardDistance")).show();
+
+    // Compute the locality sensitive hashes for the input rows, then perform approximate nearest
+    // neighbor search.
+    // We could avoid computing hashes by passing in the already-transformed dataset, e.g.
+    // `model.approxNearestNeighbors(transformedA, key, 2)`
+    // It may return less than 2 rows when not enough approximate near-neighbor candidates are
+    // found.
+    System.out.println("Approximately searching dfA for 2 nearest neighbors of the key:");
+    model.approxNearestNeighbors(dfA, key, 2).show();
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaMinMaxScalerExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaMinMaxScalerExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..2757af8d245d218122ddaa19518482848184638f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaMinMaxScalerExample.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.MinMaxScaler;
+import org.apache.spark.ml.feature.MinMaxScalerModel;
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaMinMaxScalerExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaMinMaxScalerExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+        RowFactory.create(0, Vectors.dense(1.0, 0.1, -1.0)),
+        RowFactory.create(1, Vectors.dense(2.0, 1.1, 1.0)),
+        RowFactory.create(2, Vectors.dense(3.0, 10.1, 3.0))
+    );
+    StructType schema = new StructType(new StructField[]{
+        new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
+        new StructField("features", new VectorUDT(), false, Metadata.empty())
+    });
+    Dataset<Row> dataFrame = spark.createDataFrame(data, schema);
+
+    MinMaxScaler scaler = new MinMaxScaler()
+      .setInputCol("features")
+      .setOutputCol("scaledFeatures");
+
+    // Compute summary statistics and generate MinMaxScalerModel
+    MinMaxScalerModel scalerModel = scaler.fit(dataFrame);
+
+    // rescale each feature to range [min, max].
+    Dataset<Row> scaledData = scalerModel.transform(dataFrame);
+    System.out.println("Features scaled to range: [" + scaler.getMin() + ", "
+        + scaler.getMax() + "]");
+    scaledData.select("features", "scaledFeatures").show();
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaCrossValidationExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaCrossValidationExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..d97327969ab264bb2282979c3a034b97ad923e77
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaCrossValidationExample.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import java.util.Arrays;
+// $example off$
+
+// $example on$
+import org.apache.spark.ml.Pipeline;
+import org.apache.spark.ml.PipelineStage;
+import org.apache.spark.ml.classification.LogisticRegression;
+import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator;
+import org.apache.spark.ml.feature.HashingTF;
+import org.apache.spark.ml.feature.Tokenizer;
+import org.apache.spark.ml.param.ParamMap;
+import org.apache.spark.ml.tuning.CrossValidator;
+import org.apache.spark.ml.tuning.CrossValidatorModel;
+import org.apache.spark.ml.tuning.ParamGridBuilder;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+// $example off$
+import org.apache.spark.sql.SparkSession;
+
+/**
+ * Java example for Model Selection via Cross Validation.
+ */
+public class JavaModelSelectionViaCrossValidationExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaModelSelectionViaCrossValidationExample")
+      .getOrCreate();
+
+    // $example on$
+    // Prepare training documents, which are labeled.
+    Dataset<Row> training = spark.createDataFrame(Arrays.asList(
+      new JavaLabeledDocument(0L, "a b c d e spark", 1.0),
+      new JavaLabeledDocument(1L, "b d", 0.0),
+      new JavaLabeledDocument(2L,"spark f g h", 1.0),
+      new JavaLabeledDocument(3L, "hadoop mapreduce", 0.0),
+      new JavaLabeledDocument(4L, "b spark who", 1.0),
+      new JavaLabeledDocument(5L, "g d a y", 0.0),
+      new JavaLabeledDocument(6L, "spark fly", 1.0),
+      new JavaLabeledDocument(7L, "was mapreduce", 0.0),
+      new JavaLabeledDocument(8L, "e spark program", 1.0),
+      new JavaLabeledDocument(9L, "a e c l", 0.0),
+      new JavaLabeledDocument(10L, "spark compile", 1.0),
+      new JavaLabeledDocument(11L, "hadoop software", 0.0)
+    ), JavaLabeledDocument.class);
+
+    // Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr.
+    Tokenizer tokenizer = new Tokenizer()
+      .setInputCol("text")
+      .setOutputCol("words");
+    HashingTF hashingTF = new HashingTF()
+      .setNumFeatures(1000)
+      .setInputCol(tokenizer.getOutputCol())
+      .setOutputCol("features");
+    LogisticRegression lr = new LogisticRegression()
+      .setMaxIter(10)
+      .setRegParam(0.01);
+    Pipeline pipeline = new Pipeline()
+      .setStages(new PipelineStage[] {tokenizer, hashingTF, lr});
+
+    // We use a ParamGridBuilder to construct a grid of parameters to search over.
+    // With 3 values for hashingTF.numFeatures and 2 values for lr.regParam,
+    // this grid will have 3 x 2 = 6 parameter settings for CrossValidator to choose from.
+    ParamMap[] paramGrid = new ParamGridBuilder()
+      .addGrid(hashingTF.numFeatures(), new int[] {10, 100, 1000})
+      .addGrid(lr.regParam(), new double[] {0.1, 0.01})
+      .build();
+
+    // We now treat the Pipeline as an Estimator, wrapping it in a CrossValidator instance.
+    // This will allow us to jointly choose parameters for all Pipeline stages.
+    // A CrossValidator requires an Estimator, a set of Estimator ParamMaps, and an Evaluator.
+    // Note that the evaluator here is a BinaryClassificationEvaluator and its default metric
+    // is areaUnderROC.
+    CrossValidator cv = new CrossValidator()
+      .setEstimator(pipeline)
+      .setEvaluator(new BinaryClassificationEvaluator())
+      .setEstimatorParamMaps(paramGrid)
+      .setNumFolds(2)  // Use 3+ in practice
+      .setParallelism(2);  // Evaluate up to 2 parameter settings in parallel
+
+    // Run cross-validation, and choose the best set of parameters.
+    CrossValidatorModel cvModel = cv.fit(training);
+
+    // Prepare test documents, which are unlabeled.
+    Dataset<Row> test = spark.createDataFrame(Arrays.asList(
+      new JavaDocument(4L, "spark i j k"),
+      new JavaDocument(5L, "l m n"),
+      new JavaDocument(6L, "mapreduce spark"),
+      new JavaDocument(7L, "apache hadoop")
+    ), JavaDocument.class);
+
+    // Make predictions on test documents. cvModel uses the best model found (lrModel).
+    Dataset<Row> predictions = cvModel.transform(test);
+    for (Row r : predictions.select("id", "text", "probability", "prediction").collectAsList()) {
+      System.out.println("(" + r.get(0) + ", " + r.get(1) + ") --> prob=" + r.get(2)
+        + ", prediction=" + r.get(3));
+    }
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaTrainValidationSplitExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaTrainValidationSplitExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..2ef8bea0b2a2b26713cdf6cd7dd201fb218d6722
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaTrainValidationSplitExample.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import org.apache.spark.ml.evaluation.RegressionEvaluator;
+import org.apache.spark.ml.param.ParamMap;
+import org.apache.spark.ml.regression.LinearRegression;
+import org.apache.spark.ml.tuning.ParamGridBuilder;
+import org.apache.spark.ml.tuning.TrainValidationSplit;
+import org.apache.spark.ml.tuning.TrainValidationSplitModel;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+// $example off$
+import org.apache.spark.sql.SparkSession;
+
+/**
+ * Java example demonstrating model selection using TrainValidationSplit.
+ *
+ * Run with
+ * {{{
+ * bin/run-example ml.JavaModelSelectionViaTrainValidationSplitExample
+ * }}}
+ */
+public class JavaModelSelectionViaTrainValidationSplitExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaModelSelectionViaTrainValidationSplitExample")
+      .getOrCreate();
+
+    // $example on$
+    Dataset<Row> data = spark.read().format("libsvm")
+      .load("data/mllib/sample_linear_regression_data.txt");
+
+    // Prepare training and test data.
+    Dataset<Row>[] splits = data.randomSplit(new double[] {0.9, 0.1}, 12345);
+    Dataset<Row> training = splits[0];
+    Dataset<Row> test = splits[1];
+
+    LinearRegression lr = new LinearRegression();
+
+    // We use a ParamGridBuilder to construct a grid of parameters to search over.
+    // TrainValidationSplit will try all combinations of values and determine best model using
+    // the evaluator.
+    ParamMap[] paramGrid = new ParamGridBuilder()
+      .addGrid(lr.regParam(), new double[] {0.1, 0.01})
+      .addGrid(lr.fitIntercept())
+      .addGrid(lr.elasticNetParam(), new double[] {0.0, 0.5, 1.0})
+      .build();
+
+    // In this case the estimator is simply the linear regression.
+    // A TrainValidationSplit requires an Estimator, a set of Estimator ParamMaps, and an Evaluator.
+    TrainValidationSplit trainValidationSplit = new TrainValidationSplit()
+      .setEstimator(lr)
+      .setEvaluator(new RegressionEvaluator())
+      .setEstimatorParamMaps(paramGrid)
+      .setTrainRatio(0.8)  // 80% for training and the remaining 20% for validation
+      .setParallelism(2);  // Evaluate up to 2 parameter settings in parallel
+
+    // Run train validation split, and choose the best set of parameters.
+    TrainValidationSplitModel model = trainValidationSplit.fit(training);
+
+    // Make predictions on test data. model is the model with combination of parameters
+    // that performed best.
+    model.transform(test)
+      .select("features", "label", "prediction")
+      .show();
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaMulticlassLogisticRegressionWithElasticNetExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaMulticlassLogisticRegressionWithElasticNetExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..801a82cd2f24f253639b2e941a0e8997db79de6f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaMulticlassLogisticRegressionWithElasticNetExample.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import org.apache.spark.ml.classification.LogisticRegression;
+import org.apache.spark.ml.classification.LogisticRegressionModel;
+import org.apache.spark.ml.classification.LogisticRegressionTrainingSummary;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+// $example off$
+
+public class JavaMulticlassLogisticRegressionWithElasticNetExample {
+    public static void main(String[] args) {
+        SparkSession spark = SparkSession
+                .builder()
+                .appName("JavaMulticlassLogisticRegressionWithElasticNetExample")
+                .getOrCreate();
+
+        // $example on$
+        // Load training data
+        Dataset<Row> training = spark.read().format("libsvm")
+                .load("data/mllib/sample_multiclass_classification_data.txt");
+
+        LogisticRegression lr = new LogisticRegression()
+                .setMaxIter(10)
+                .setRegParam(0.3)
+                .setElasticNetParam(0.8);
+
+        // Fit the model
+        LogisticRegressionModel lrModel = lr.fit(training);
+
+        // Print the coefficients and intercept for multinomial logistic regression
+        System.out.println("Coefficients: \n"
+                + lrModel.coefficientMatrix() + " \nIntercept: " + lrModel.interceptVector());
+        LogisticRegressionTrainingSummary trainingSummary = lrModel.summary();
+
+        // Obtain the loss per iteration.
+        double[] objectiveHistory = trainingSummary.objectiveHistory();
+        for (double lossPerIteration : objectiveHistory) {
+            System.out.println(lossPerIteration);
+        }
+
+        // for multiclass, we can inspect metrics on a per-label basis
+        System.out.println("False positive rate by label:");
+        int i = 0;
+        double[] fprLabel = trainingSummary.falsePositiveRateByLabel();
+        for (double fpr : fprLabel) {
+            System.out.println("label " + i + ": " + fpr);
+            i++;
+        }
+
+        System.out.println("True positive rate by label:");
+        i = 0;
+        double[] tprLabel = trainingSummary.truePositiveRateByLabel();
+        for (double tpr : tprLabel) {
+            System.out.println("label " + i + ": " + tpr);
+            i++;
+        }
+
+        System.out.println("Precision by label:");
+        i = 0;
+        double[] precLabel = trainingSummary.precisionByLabel();
+        for (double prec : precLabel) {
+            System.out.println("label " + i + ": " + prec);
+            i++;
+        }
+
+        System.out.println("Recall by label:");
+        i = 0;
+        double[] recLabel = trainingSummary.recallByLabel();
+        for (double rec : recLabel) {
+            System.out.println("label " + i + ": " + rec);
+            i++;
+        }
+
+        System.out.println("F-measure by label:");
+        i = 0;
+        double[] fLabel = trainingSummary.fMeasureByLabel();
+        for (double f : fLabel) {
+            System.out.println("label " + i + ": " + f);
+            i++;
+        }
+
+        double accuracy = trainingSummary.accuracy();
+        double falsePositiveRate = trainingSummary.weightedFalsePositiveRate();
+        double truePositiveRate = trainingSummary.weightedTruePositiveRate();
+        double fMeasure = trainingSummary.weightedFMeasure();
+        double precision = trainingSummary.weightedPrecision();
+        double recall = trainingSummary.weightedRecall();
+        System.out.println("Accuracy: " + accuracy);
+        System.out.println("FPR: " + falsePositiveRate);
+        System.out.println("TPR: " + truePositiveRate);
+        System.out.println("F-measure: " + fMeasure);
+        System.out.println("Precision: " + precision);
+        System.out.println("Recall: " + recall);
+        // $example off$
+
+        spark.stop();
+    }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaMultilayerPerceptronClassifierExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaMultilayerPerceptronClassifierExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..43db41ce17463f7ff94f71a0866d25846973cfeb
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaMultilayerPerceptronClassifierExample.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel;
+import org.apache.spark.ml.classification.MultilayerPerceptronClassifier;
+import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator;
+// $example off$
+
+/**
+ * An example for Multilayer Perceptron Classification.
+ */
+public class JavaMultilayerPerceptronClassifierExample {
+
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaMultilayerPerceptronClassifierExample")
+      .getOrCreate();
+
+    // $example on$
+    // Load training data
+    String path = "data/mllib/sample_multiclass_classification_data.txt";
+    Dataset<Row> dataFrame = spark.read().format("libsvm").load(path);
+
+    // Split the data into train and test
+    Dataset<Row>[] splits = dataFrame.randomSplit(new double[]{0.6, 0.4}, 1234L);
+    Dataset<Row> train = splits[0];
+    Dataset<Row> test = splits[1];
+
+    // specify layers for the neural network:
+    // input layer of size 4 (features), two intermediate of size 5 and 4
+    // and output of size 3 (classes)
+    int[] layers = new int[] {4, 5, 4, 3};
+
+    // create the trainer and set its parameters
+    MultilayerPerceptronClassifier trainer = new MultilayerPerceptronClassifier()
+      .setLayers(layers)
+      .setBlockSize(128)
+      .setSeed(1234L)
+      .setMaxIter(100);
+
+    // train the model
+    MultilayerPerceptronClassificationModel model = trainer.fit(train);
+
+    // compute accuracy on the test set
+    Dataset<Row> result = model.transform(test);
+    Dataset<Row> predictionAndLabels = result.select("prediction", "label");
+    MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
+      .setMetricName("accuracy");
+
+    System.out.println("Test set accuracy = " + evaluator.evaluate(predictionAndLabels));
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..5427e466656aa197a1914362aae70475e9290f64
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.NGram;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaNGramExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaNGramExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(0, Arrays.asList("Hi", "I", "heard", "about", "Spark")),
+      RowFactory.create(1, Arrays.asList("I", "wish", "Java", "could", "use", "case", "classes")),
+      RowFactory.create(2, Arrays.asList("Logistic", "regression", "models", "are", "neat"))
+    );
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField(
+        "words", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty())
+    });
+
+    Dataset<Row> wordDataFrame = spark.createDataFrame(data, schema);
+
+    NGram ngramTransformer = new NGram().setN(2).setInputCol("words").setOutputCol("ngrams");
+
+    Dataset<Row> ngramDataFrame = ngramTransformer.transform(wordDataFrame);
+    ngramDataFrame.select("ngrams").show(false);
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaNaiveBayesExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaNaiveBayesExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..be578dc8110e9bda7444bca6811dcd96a97316da
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaNaiveBayesExample.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import org.apache.spark.ml.classification.NaiveBayes;
+import org.apache.spark.ml.classification.NaiveBayesModel;
+import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+// $example off$
+
+/**
+ * An example for Naive Bayes Classification.
+ */
+public class JavaNaiveBayesExample {
+
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaNaiveBayesExample")
+      .getOrCreate();
+
+    // $example on$
+    // Load training data
+    Dataset<Row> dataFrame =
+      spark.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt");
+    // Split the data into train and test
+    Dataset<Row>[] splits = dataFrame.randomSplit(new double[]{0.6, 0.4}, 1234L);
+    Dataset<Row> train = splits[0];
+    Dataset<Row> test = splits[1];
+
+    // create the trainer and set its parameters
+    NaiveBayes nb = new NaiveBayes();
+
+    // train the model
+    NaiveBayesModel model = nb.fit(train);
+
+    // Select example rows to display.
+    Dataset<Row> predictions = model.transform(test);
+    predictions.show();
+
+    // compute accuracy on the test set
+    MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
+      .setLabelCol("label")
+      .setPredictionCol("prediction")
+      .setMetricName("accuracy");
+    double accuracy = evaluator.evaluate(predictions);
+    System.out.println("Test set accuracy = " + accuracy);
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaNormalizerExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaNormalizerExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..f878c420d82377ce1a4689921c6c4d63437e16e8
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaNormalizerExample.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.Normalizer;
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaNormalizerExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaNormalizerExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+        RowFactory.create(0, Vectors.dense(1.0, 0.1, -8.0)),
+        RowFactory.create(1, Vectors.dense(2.0, 1.0, -4.0)),
+        RowFactory.create(2, Vectors.dense(4.0, 10.0, 8.0))
+    );
+    StructType schema = new StructType(new StructField[]{
+        new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
+        new StructField("features", new VectorUDT(), false, Metadata.empty())
+    });
+    Dataset<Row> dataFrame = spark.createDataFrame(data, schema);
+
+    // Normalize each Vector using $L^1$ norm.
+    Normalizer normalizer = new Normalizer()
+      .setInputCol("features")
+      .setOutputCol("normFeatures")
+      .setP(1.0);
+
+    Dataset<Row> l1NormData = normalizer.transform(dataFrame);
+    l1NormData.show();
+
+    // Normalize each Vector using $L^\infty$ norm.
+    Dataset<Row> lInfNormData =
+      normalizer.transform(dataFrame, normalizer.p().w(Double.POSITIVE_INFINITY));
+    lInfNormData.show();
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderEstimatorExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderEstimatorExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..6f93cff94b725db7e25c0f31e0ca7760338f59f3
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoderEstimatorExample.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.OneHotEncoderEstimator;
+import org.apache.spark.ml.feature.OneHotEncoderModel;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaOneHotEncoderEstimatorExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaOneHotEncoderEstimatorExample")
+      .getOrCreate();
+
+    // Note: categorical features are usually first encoded with StringIndexer
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(0.0, 1.0),
+      RowFactory.create(1.0, 0.0),
+      RowFactory.create(2.0, 1.0),
+      RowFactory.create(0.0, 2.0),
+      RowFactory.create(0.0, 1.0),
+      RowFactory.create(2.0, 0.0)
+    );
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("categoryIndex1", DataTypes.DoubleType, false, Metadata.empty()),
+      new StructField("categoryIndex2", DataTypes.DoubleType, false, Metadata.empty())
+    });
+
+    Dataset<Row> df = spark.createDataFrame(data, schema);
+
+    OneHotEncoderEstimator encoder = new OneHotEncoderEstimator()
+      .setInputCols(new String[] {"categoryIndex1", "categoryIndex2"})
+      .setOutputCols(new String[] {"categoryVec1", "categoryVec2"});
+
+    OneHotEncoderModel model = encoder.fit(df);
+    Dataset<Row> encoded = model.transform(df);
+    encoded.show();
+    // $example off$
+
+    spark.stop();
+  }
+}
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..82fb54095019dcd3342ca847f18d90135589034e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import org.apache.spark.ml.classification.LogisticRegression;
+import org.apache.spark.ml.classification.OneVsRest;
+import org.apache.spark.ml.classification.OneVsRestModel;
+import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+// $example off$
+import org.apache.spark.sql.SparkSession;
+
+
+/**
+ * An example of Multiclass to Binary Reduction with One Vs Rest,
+ * using Logistic Regression as the base classifier.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaOneVsRestExample
+ * </pre>
+ */
+public class JavaOneVsRestExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaOneVsRestExample")
+      .getOrCreate();
+
+    // $example on$
+    // load data file.
+    Dataset<Row> inputData = spark.read().format("libsvm")
+      .load("data/mllib/sample_multiclass_classification_data.txt");
+
+    // generate the train/test split.
+    Dataset<Row>[] tmp = inputData.randomSplit(new double[]{0.8, 0.2});
+    Dataset<Row> train = tmp[0];
+    Dataset<Row> test = tmp[1];
+
+    // configure the base classifier.
+    LogisticRegression classifier = new LogisticRegression()
+      .setMaxIter(10)
+      .setTol(1E-6)
+      .setFitIntercept(true);
+
+    // instantiate the One Vs Rest Classifier.
+    OneVsRest ovr = new OneVsRest().setClassifier(classifier);
+
+    // train the multiclass model.
+    OneVsRestModel ovrModel = ovr.fit(train);
+
+    // score the model on test data.
+    Dataset<Row> predictions = ovrModel.transform(test)
+      .select("prediction", "label");
+
+    // obtain evaluator.
+    MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
+            .setMetricName("accuracy");
+
+    // compute the classification error on test data.
+    double accuracy = evaluator.evaluate(predictions);
+    System.out.println("Test Error = " + (1 - accuracy));
+    // $example off$
+
+    spark.stop();
+  }
+
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..6951a65553e5bdf343cff9b9c1e0da3a56f013ab
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.PCA;
+import org.apache.spark.ml.feature.PCAModel;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaPCAExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaPCAExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(Vectors.sparse(5, new int[]{1, 3}, new double[]{1.0, 7.0})),
+      RowFactory.create(Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0)),
+      RowFactory.create(Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0))
+    );
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("features", new VectorUDT(), false, Metadata.empty()),
+    });
+
+    Dataset<Row> df = spark.createDataFrame(data, schema);
+
+    PCAModel pca = new PCA()
+      .setInputCol("features")
+      .setOutputCol("pcaFeatures")
+      .setK(3)
+      .fit(df);
+
+    Dataset<Row> result = pca.transform(df).select("pcaFeatures");
+    result.show(false);
+    // $example off$
+    spark.stop();
+  }
+}
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaPipelineExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaPipelineExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..4ccd8f6ce265047f9423305a97582e0c62e644c6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaPipelineExample.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.ml.Pipeline;
+import org.apache.spark.ml.PipelineModel;
+import org.apache.spark.ml.PipelineStage;
+import org.apache.spark.ml.classification.LogisticRegression;
+import org.apache.spark.ml.feature.HashingTF;
+import org.apache.spark.ml.feature.Tokenizer;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+// $example off$
+import org.apache.spark.sql.SparkSession;
+
+/**
+ * Java example for simple text document 'Pipeline'.
+ */
+public class JavaPipelineExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaPipelineExample")
+      .getOrCreate();
+
+    // $example on$
+    // Prepare training documents, which are labeled.
+    Dataset<Row> training = spark.createDataFrame(Arrays.asList(
+      new JavaLabeledDocument(0L, "a b c d e spark", 1.0),
+      new JavaLabeledDocument(1L, "b d", 0.0),
+      new JavaLabeledDocument(2L, "spark f g h", 1.0),
+      new JavaLabeledDocument(3L, "hadoop mapreduce", 0.0)
+    ), JavaLabeledDocument.class);
+
+    // Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr.
+    Tokenizer tokenizer = new Tokenizer()
+      .setInputCol("text")
+      .setOutputCol("words");
+    HashingTF hashingTF = new HashingTF()
+      .setNumFeatures(1000)
+      .setInputCol(tokenizer.getOutputCol())
+      .setOutputCol("features");
+    LogisticRegression lr = new LogisticRegression()
+      .setMaxIter(10)
+      .setRegParam(0.001);
+    Pipeline pipeline = new Pipeline()
+      .setStages(new PipelineStage[] {tokenizer, hashingTF, lr});
+
+    // Fit the pipeline to training documents.
+    PipelineModel model = pipeline.fit(training);
+
+    // Prepare test documents, which are unlabeled.
+    Dataset<Row> test = spark.createDataFrame(Arrays.asList(
+      new JavaDocument(4L, "spark i j k"),
+      new JavaDocument(5L, "l m n"),
+      new JavaDocument(6L, "spark hadoop spark"),
+      new JavaDocument(7L, "apache hadoop")
+    ), JavaDocument.class);
+
+    // Make predictions on test documents.
+    Dataset<Row> predictions = model.transform(test);
+    for (Row r : predictions.select("id", "text", "probability", "prediction").collectAsList()) {
+      System.out.println("(" + r.get(0) + ", " + r.get(1) + ") --> prob=" + r.get(2)
+        + ", prediction=" + r.get(3));
+    }
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..43c636c5340306452c417ded0ce88dd47a2138a0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.PolynomialExpansion;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaPolynomialExpansionExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaPolynomialExpansionExample")
+      .getOrCreate();
+
+    // $example on$
+    PolynomialExpansion polyExpansion = new PolynomialExpansion()
+      .setInputCol("features")
+      .setOutputCol("polyFeatures")
+      .setDegree(3);
+
+    List<Row> data = Arrays.asList(
+      RowFactory.create(Vectors.dense(2.0, 1.0)),
+      RowFactory.create(Vectors.dense(0.0, 0.0)),
+      RowFactory.create(Vectors.dense(3.0, -1.0))
+    );
+    StructType schema = new StructType(new StructField[]{
+      new StructField("features", new VectorUDT(), false, Metadata.empty()),
+    });
+    Dataset<Row> df = spark.createDataFrame(data, schema);
+
+    Dataset<Row> polyDF = polyExpansion.transform(df);
+    polyDF.show(false);
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..51865637df6f65d92c69ceefc1f112b684511678
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaPowerIterationClusteringExample.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.clustering.PowerIterationClustering;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaPowerIterationClusteringExample {
+  public static void main(String[] args) {
+    // Create a SparkSession.
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaPowerIterationClustering")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(0L, 1L, 1.0),
+      RowFactory.create(0L, 2L, 1.0),
+      RowFactory.create(1L, 2L, 1.0),
+      RowFactory.create(3L, 4L, 1.0),
+      RowFactory.create(4L, 0L, 0.1)
+    );
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("src", DataTypes.LongType, false, Metadata.empty()),
+      new StructField("dst", DataTypes.LongType, false, Metadata.empty()),
+      new StructField("weight", DataTypes.DoubleType, false, Metadata.empty())
+    });
+
+    Dataset<Row> df = spark.createDataFrame(data, schema);
+
+    PowerIterationClustering model = new PowerIterationClustering()
+      .setK(2)
+      .setMaxIter(10)
+      .setInitMode("degree")
+      .setWeightCol("weight");
+
+    Dataset<Row> result = model.assignClusters(df);
+    result.show(false);
+    // $example off$
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaPrefixSpanExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaPrefixSpanExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..98ffd4faf95753eea26c48ffe176cce6502606f3
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaPrefixSpanExample.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+
+import org.apache.spark.ml.fpm.PrefixSpan;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.types.*;
+
+import java.util.Arrays;
+import java.util.List;
+// $example off$
+
+/**
+ * An example demonstrating PrefixSpan.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaPrefixSpanExample
+ * </pre>
+ */
+public class JavaPrefixSpanExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaPrefixSpanExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(Arrays.asList(Arrays.asList(1, 2), Arrays.asList(3))),
+      RowFactory.create(Arrays.asList(Arrays.asList(1), Arrays.asList(3, 2), Arrays.asList(1,2))),
+      RowFactory.create(Arrays.asList(Arrays.asList(1, 2), Arrays.asList(5))),
+      RowFactory.create(Arrays.asList(Arrays.asList(6)))
+    );
+    StructType schema = new StructType(new StructField[]{ new StructField(
+      "sequence", new ArrayType(new ArrayType(DataTypes.IntegerType, true), true),
+      false, Metadata.empty())
+    });
+    Dataset<Row> sequenceDF = spark.createDataFrame(data, schema);
+
+    PrefixSpan prefixSpan = new PrefixSpan().setMinSupport(0.5).setMaxPatternLength(5);
+
+    // Finding frequent sequential patterns
+    prefixSpan.findFrequentSequentialPatterns(sequenceDF).show();
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaQuantileDiscretizerExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaQuantileDiscretizerExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..43cc30c1a899b77b62cb1f36ff2168dddb11e039
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaQuantileDiscretizerExample.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.QuantileDiscretizer;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaQuantileDiscretizerExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaQuantileDiscretizerExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(0, 18.0),
+      RowFactory.create(1, 19.0),
+      RowFactory.create(2, 8.0),
+      RowFactory.create(3, 5.0),
+      RowFactory.create(4, 2.2)
+    );
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("hour", DataTypes.DoubleType, false, Metadata.empty())
+    });
+
+    Dataset<Row> df = spark.createDataFrame(data, schema);
+    // $example off$
+    // Output of QuantileDiscretizer for such small datasets can depend on the number of
+    // partitions. Here we force a single partition to ensure consistent results.
+    // Note this is not necessary for normal use cases
+    df = df.repartition(1);
+    // $example on$
+    QuantileDiscretizer discretizer = new QuantileDiscretizer()
+      .setInputCol("hour")
+      .setOutputCol("result")
+      .setNumBuckets(3);
+
+    Dataset<Row> result = discretizer.fit(df).transform(df);
+    result.show(false);
+    // $example off$
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaRFormulaExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaRFormulaExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..428067e0f7efee99156e0f237530dbb18515c75c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaRFormulaExample.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.RFormula;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+import static org.apache.spark.sql.types.DataTypes.*;
+// $example off$
+
+public class JavaRFormulaExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaRFormulaExample")
+      .getOrCreate();
+
+    // $example on$
+    StructType schema = createStructType(new StructField[]{
+      createStructField("id", IntegerType, false),
+      createStructField("country", StringType, false),
+      createStructField("hour", IntegerType, false),
+      createStructField("clicked", DoubleType, false)
+    });
+
+    List<Row> data = Arrays.asList(
+      RowFactory.create(7, "US", 18, 1.0),
+      RowFactory.create(8, "CA", 12, 0.0),
+      RowFactory.create(9, "NZ", 15, 0.0)
+    );
+
+    Dataset<Row> dataset = spark.createDataFrame(data, schema);
+    RFormula formula = new RFormula()
+      .setFormula("clicked ~ country + hour")
+      .setFeaturesCol("features")
+      .setLabelCol("label");
+    Dataset<Row> output = formula.fit(dataset).transform(dataset);
+    output.select("features", "label").show();
+    // $example off$
+    spark.stop();
+  }
+}
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaRandomForestClassifierExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaRandomForestClassifierExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..da2633e8860accb0c89eef5042e4a026b3d8c051
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaRandomForestClassifierExample.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import org.apache.spark.ml.Pipeline;
+import org.apache.spark.ml.PipelineModel;
+import org.apache.spark.ml.PipelineStage;
+import org.apache.spark.ml.classification.RandomForestClassificationModel;
+import org.apache.spark.ml.classification.RandomForestClassifier;
+import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator;
+import org.apache.spark.ml.feature.*;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+// $example off$
+
+public class JavaRandomForestClassifierExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaRandomForestClassifierExample")
+      .getOrCreate();
+
+    // $example on$
+    // Load and parse the data file, converting it to a DataFrame.
+    Dataset<Row> data = spark.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt");
+
+    // Index labels, adding metadata to the label column.
+    // Fit on whole dataset to include all labels in index.
+    StringIndexerModel labelIndexer = new StringIndexer()
+      .setInputCol("label")
+      .setOutputCol("indexedLabel")
+      .fit(data);
+    // Automatically identify categorical features, and index them.
+    // Set maxCategories so features with > 4 distinct values are treated as continuous.
+    VectorIndexerModel featureIndexer = new VectorIndexer()
+      .setInputCol("features")
+      .setOutputCol("indexedFeatures")
+      .setMaxCategories(4)
+      .fit(data);
+
+    // Split the data into training and test sets (30% held out for testing)
+    Dataset<Row>[] splits = data.randomSplit(new double[] {0.7, 0.3});
+    Dataset<Row> trainingData = splits[0];
+    Dataset<Row> testData = splits[1];
+
+    // Train a RandomForest model.
+    RandomForestClassifier rf = new RandomForestClassifier()
+      .setLabelCol("indexedLabel")
+      .setFeaturesCol("indexedFeatures");
+
+    // Convert indexed labels back to original labels.
+    IndexToString labelConverter = new IndexToString()
+      .setInputCol("prediction")
+      .setOutputCol("predictedLabel")
+      .setLabels(labelIndexer.labels());
+
+    // Chain indexers and forest in a Pipeline
+    Pipeline pipeline = new Pipeline()
+      .setStages(new PipelineStage[] {labelIndexer, featureIndexer, rf, labelConverter});
+
+    // Train model. This also runs the indexers.
+    PipelineModel model = pipeline.fit(trainingData);
+
+    // Make predictions.
+    Dataset<Row> predictions = model.transform(testData);
+
+    // Select example rows to display.
+    predictions.select("predictedLabel", "label", "features").show(5);
+
+    // Select (prediction, true label) and compute test error
+    MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
+      .setLabelCol("indexedLabel")
+      .setPredictionCol("prediction")
+      .setMetricName("accuracy");
+    double accuracy = evaluator.evaluate(predictions);
+    System.out.println("Test Error = " + (1.0 - accuracy));
+
+    RandomForestClassificationModel rfModel = (RandomForestClassificationModel)(model.stages()[2]);
+    System.out.println("Learned classification forest model:\n" + rfModel.toDebugString());
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaRandomForestRegressorExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaRandomForestRegressorExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..a7078453deb8a9632889c78ae68c6a3c0fef063f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaRandomForestRegressorExample.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import org.apache.spark.ml.Pipeline;
+import org.apache.spark.ml.PipelineModel;
+import org.apache.spark.ml.PipelineStage;
+import org.apache.spark.ml.evaluation.RegressionEvaluator;
+import org.apache.spark.ml.feature.VectorIndexer;
+import org.apache.spark.ml.feature.VectorIndexerModel;
+import org.apache.spark.ml.regression.RandomForestRegressionModel;
+import org.apache.spark.ml.regression.RandomForestRegressor;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+// $example off$
+
+public class JavaRandomForestRegressorExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaRandomForestRegressorExample")
+      .getOrCreate();
+
+    // $example on$
+    // Load and parse the data file, converting it to a DataFrame.
+    Dataset<Row> data = spark.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt");
+
+    // Automatically identify categorical features, and index them.
+    // Set maxCategories so features with > 4 distinct values are treated as continuous.
+    VectorIndexerModel featureIndexer = new VectorIndexer()
+      .setInputCol("features")
+      .setOutputCol("indexedFeatures")
+      .setMaxCategories(4)
+      .fit(data);
+
+    // Split the data into training and test sets (30% held out for testing)
+    Dataset<Row>[] splits = data.randomSplit(new double[] {0.7, 0.3});
+    Dataset<Row> trainingData = splits[0];
+    Dataset<Row> testData = splits[1];
+
+    // Train a RandomForest model.
+    RandomForestRegressor rf = new RandomForestRegressor()
+      .setLabelCol("label")
+      .setFeaturesCol("indexedFeatures");
+
+    // Chain indexer and forest in a Pipeline
+    Pipeline pipeline = new Pipeline()
+      .setStages(new PipelineStage[] {featureIndexer, rf});
+
+    // Train model. This also runs the indexer.
+    PipelineModel model = pipeline.fit(trainingData);
+
+    // Make predictions.
+    Dataset<Row> predictions = model.transform(testData);
+
+    // Select example rows to display.
+    predictions.select("prediction", "label", "features").show(5);
+
+    // Select (prediction, true label) and compute test error
+    RegressionEvaluator evaluator = new RegressionEvaluator()
+      .setLabelCol("label")
+      .setPredictionCol("prediction")
+      .setMetricName("rmse");
+    double rmse = evaluator.evaluate(predictions);
+    System.out.println("Root Mean Squared Error (RMSE) on test data = " + rmse);
+
+    RandomForestRegressionModel rfModel = (RandomForestRegressionModel)(model.stages()[1]);
+    System.out.println("Learned regression forest model:\n" + rfModel.toDebugString());
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaSQLTransformerExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaSQLTransformerExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..2a3d62de41ab7f9eab61ecb529383c34c4fcc158
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaSQLTransformerExample.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.SQLTransformer;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.types.*;
+// $example off$
+
+public class JavaSQLTransformerExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaSQLTransformerExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(0, 1.0, 3.0),
+      RowFactory.create(2, 2.0, 5.0)
+    );
+    StructType schema = new StructType(new StructField [] {
+      new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("v1", DataTypes.DoubleType, false, Metadata.empty()),
+      new StructField("v2", DataTypes.DoubleType, false, Metadata.empty())
+    });
+    Dataset<Row> df = spark.createDataFrame(data, schema);
+
+    SQLTransformer sqlTrans = new SQLTransformer().setStatement(
+      "SELECT *, (v1 + v2) AS v3, (v1 * v2) AS v4 FROM __THIS__");
+
+    sqlTrans.transform(df).show();
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaStandardScalerExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaStandardScalerExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..08ea285a0d53d310c6712af7b10170a637f441f5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaStandardScalerExample.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import org.apache.spark.ml.feature.StandardScaler;
+import org.apache.spark.ml.feature.StandardScalerModel;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+// $example off$
+
+public class JavaStandardScalerExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaStandardScalerExample")
+      .getOrCreate();
+
+    // $example on$
+    Dataset<Row> dataFrame =
+      spark.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt");
+
+    StandardScaler scaler = new StandardScaler()
+      .setInputCol("features")
+      .setOutputCol("scaledFeatures")
+      .setWithStd(true)
+      .setWithMean(false);
+
+    // Compute summary statistics by fitting the StandardScaler
+    StandardScalerModel scalerModel = scaler.fit(dataFrame);
+
+    // Normalize each feature to have unit standard deviation.
+    Dataset<Row> scaledData = scalerModel.transform(dataFrame);
+    scaledData.show();
+    // $example off$
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaStopWordsRemoverExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaStopWordsRemoverExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..94ead625b4745232436970c0ed68674f272636d9
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaStopWordsRemoverExample.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.StopWordsRemover;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaStopWordsRemoverExample {
+
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaStopWordsRemoverExample")
+      .getOrCreate();
+
+    // $example on$
+    StopWordsRemover remover = new StopWordsRemover()
+      .setInputCol("raw")
+      .setOutputCol("filtered");
+
+    List<Row> data = Arrays.asList(
+      RowFactory.create(Arrays.asList("I", "saw", "the", "red", "balloon")),
+      RowFactory.create(Arrays.asList("Mary", "had", "a", "little", "lamb"))
+    );
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField(
+        "raw", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty())
+    });
+
+    Dataset<Row> dataset = spark.createDataFrame(data, schema);
+    remover.transform(dataset).show(false);
+    // $example off$
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaStringIndexerExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaStringIndexerExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..cf9747a994691d434b7125328e23be9f7096f356
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaStringIndexerExample.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.StringIndexer;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+import static org.apache.spark.sql.types.DataTypes.*;
+// $example off$
+
+public class JavaStringIndexerExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaStringIndexerExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(0, "a"),
+      RowFactory.create(1, "b"),
+      RowFactory.create(2, "c"),
+      RowFactory.create(3, "a"),
+      RowFactory.create(4, "a"),
+      RowFactory.create(5, "c")
+    );
+    StructType schema = new StructType(new StructField[]{
+      createStructField("id", IntegerType, false),
+      createStructField("category", StringType, false)
+    });
+    Dataset<Row> df = spark.createDataFrame(data, schema);
+
+    StringIndexer indexer = new StringIndexer()
+      .setInputCol("category")
+      .setOutputCol("categoryIndex");
+
+    Dataset<Row> indexed = indexer.fit(df).transform(df);
+    indexed.show();
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaSummarizerExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaSummarizerExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..e9b84365d86edf914740e1eb12865f75be34b683
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaSummarizerExample.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.*;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.linalg.Vector;
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.stat.Summarizer;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaSummarizerExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaSummarizerExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(Vectors.dense(2.0, 3.0, 5.0), 1.0),
+      RowFactory.create(Vectors.dense(4.0, 6.0, 7.0), 2.0)
+    );
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("features", new VectorUDT(), false, Metadata.empty()),
+      new StructField("weight", DataTypes.DoubleType, false, Metadata.empty())
+    });
+
+    Dataset<Row> df = spark.createDataFrame(data, schema);
+
+    Row result1 = df.select(Summarizer.metrics("mean", "variance")
+      .summary(new Column("features"), new Column("weight")).as("summary"))
+      .select("summary.mean", "summary.variance").first();
+    System.out.println("with weight: mean = " + result1.<Vector>getAs(0).toString() +
+      ", variance = " + result1.<Vector>getAs(1).toString());
+
+    Row result2 = df.select(
+      Summarizer.mean(new Column("features")),
+      Summarizer.variance(new Column("features"))
+    ).first();
+    System.out.println("without weight: mean = " + result2.<Vector>getAs(0).toString() +
+      ", variance = " + result2.<Vector>getAs(1).toString());
+    // $example off$
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaTfIdfExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaTfIdfExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..b740cd097a9b5534bf7830c789e9feb669676181
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaTfIdfExample.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.HashingTF;
+import org.apache.spark.ml.feature.IDF;
+import org.apache.spark.ml.feature.IDFModel;
+import org.apache.spark.ml.feature.Tokenizer;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaTfIdfExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaTfIdfExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(0.0, "Hi I heard about Spark"),
+      RowFactory.create(0.0, "I wish Java could use case classes"),
+      RowFactory.create(1.0, "Logistic regression models are neat")
+    );
+    StructType schema = new StructType(new StructField[]{
+      new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+      new StructField("sentence", DataTypes.StringType, false, Metadata.empty())
+    });
+    Dataset<Row> sentenceData = spark.createDataFrame(data, schema);
+
+    Tokenizer tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words");
+    Dataset<Row> wordsData = tokenizer.transform(sentenceData);
+
+    int numFeatures = 20;
+    HashingTF hashingTF = new HashingTF()
+      .setInputCol("words")
+      .setOutputCol("rawFeatures")
+      .setNumFeatures(numFeatures);
+
+    Dataset<Row> featurizedData = hashingTF.transform(wordsData);
+    // alternatively, CountVectorizer can also be used to get term frequency vectors
+
+    IDF idf = new IDF().setInputCol("rawFeatures").setOutputCol("features");
+    IDFModel idfModel = idf.fit(featurizedData);
+
+    Dataset<Row> rescaledData = idfModel.transform(featurizedData);
+    rescaledData.select("label", "features").show();
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..a0979aa2d24e4f44e1a7750db060c2a27c946aec
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import scala.collection.mutable.WrappedArray;
+
+import org.apache.spark.ml.feature.RegexTokenizer;
+import org.apache.spark.ml.feature.Tokenizer;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+// col("...") is preferable to df.col("...")
+import static org.apache.spark.sql.functions.callUDF;
+import static org.apache.spark.sql.functions.col;
+// $example off$
+
+public class JavaTokenizerExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaTokenizerExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(0, "Hi I heard about Spark"),
+      RowFactory.create(1, "I wish Java could use case classes"),
+      RowFactory.create(2, "Logistic,regression,models,are,neat")
+    );
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("sentence", DataTypes.StringType, false, Metadata.empty())
+    });
+
+    Dataset<Row> sentenceDataFrame = spark.createDataFrame(data, schema);
+
+    Tokenizer tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words");
+
+    RegexTokenizer regexTokenizer = new RegexTokenizer()
+        .setInputCol("sentence")
+        .setOutputCol("words")
+        .setPattern("\\W");  // alternatively .setPattern("\\w+").setGaps(false);
+
+    spark.udf().register(
+      "countTokens", (WrappedArray<?> words) -> words.size(), DataTypes.IntegerType);
+
+    Dataset<Row> tokenized = tokenizer.transform(sentenceDataFrame);
+    tokenized.select("sentence", "words")
+        .withColumn("tokens", callUDF("countTokens", col("words")))
+        .show(false);
+
+    Dataset<Row> regexTokenized = regexTokenizer.transform(sentenceDataFrame);
+    regexTokenized.select("sentence", "words")
+        .withColumn("tokens", callUDF("countTokens", col("words")))
+        .show(false);
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssemblerExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssemblerExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..384e09c73bed84dca19d13a8852db9cfd9d3e100
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssemblerExample.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.ml.feature.VectorAssembler;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.*;
+import static org.apache.spark.sql.types.DataTypes.*;
+// $example off$
+
+public class JavaVectorAssemblerExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaVectorAssemblerExample")
+      .getOrCreate();
+
+    // $example on$
+    StructType schema = createStructType(new StructField[]{
+      createStructField("id", IntegerType, false),
+      createStructField("hour", IntegerType, false),
+      createStructField("mobile", DoubleType, false),
+      createStructField("userFeatures", new VectorUDT(), false),
+      createStructField("clicked", DoubleType, false)
+    });
+    Row row = RowFactory.create(0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0);
+    Dataset<Row> dataset = spark.createDataFrame(Arrays.asList(row), schema);
+
+    VectorAssembler assembler = new VectorAssembler()
+      .setInputCols(new String[]{"hour", "mobile", "userFeatures"})
+      .setOutputCol("features");
+
+    Dataset<Row> output = assembler.transform(dataset);
+    System.out.println("Assembled columns 'hour', 'mobile', 'userFeatures' to vector column " +
+        "'features'");
+    output.select("features", "clicked").show(false);
+    // $example off$
+
+    spark.stop();
+  }
+}
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorIndexerExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorIndexerExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..dd9d757dd68315f0024e5c445cb6037a6aa6b21d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorIndexerExample.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Map;
+
+import org.apache.spark.ml.feature.VectorIndexer;
+import org.apache.spark.ml.feature.VectorIndexerModel;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+// $example off$
+
+public class JavaVectorIndexerExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaVectorIndexerExample")
+      .getOrCreate();
+
+    // $example on$
+    Dataset<Row> data = spark.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt");
+
+    VectorIndexer indexer = new VectorIndexer()
+      .setInputCol("features")
+      .setOutputCol("indexed")
+      .setMaxCategories(10);
+    VectorIndexerModel indexerModel = indexer.fit(data);
+
+    Map<Integer, Map<Double, Integer>> categoryMaps = indexerModel.javaCategoryMaps();
+    System.out.print("Chose " + categoryMaps.size() + " categorical features:");
+
+    for (Integer feature : categoryMaps.keySet()) {
+      System.out.print(" " + feature);
+    }
+    System.out.println();
+
+    // Create new column "indexed" with categorical values transformed to indices
+    Dataset<Row> indexedData = indexerModel.transform(data);
+    indexedData.show();
+    // $example off$
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSizeHintExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSizeHintExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..d649a2ccbaa72828c179c3f01a73e979a914f4d0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSizeHintExample.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.ml.feature.VectorAssembler;
+import org.apache.spark.ml.feature.VectorSizeHint;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+import static org.apache.spark.sql.types.DataTypes.*;
+// $example off$
+
+public class JavaVectorSizeHintExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaVectorSizeHintExample")
+      .getOrCreate();
+
+    // $example on$
+    StructType schema = createStructType(new StructField[]{
+      createStructField("id", IntegerType, false),
+      createStructField("hour", IntegerType, false),
+      createStructField("mobile", DoubleType, false),
+      createStructField("userFeatures", new VectorUDT(), false),
+      createStructField("clicked", DoubleType, false)
+    });
+    Row row0 = RowFactory.create(0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0);
+    Row row1 = RowFactory.create(0, 18, 1.0, Vectors.dense(0.0, 10.0), 0.0);
+    Dataset<Row> dataset = spark.createDataFrame(Arrays.asList(row0, row1), schema);
+
+    VectorSizeHint sizeHint = new VectorSizeHint()
+      .setInputCol("userFeatures")
+      .setHandleInvalid("skip")
+      .setSize(3);
+
+    Dataset<Row> datasetWithSize = sizeHint.transform(dataset);
+    System.out.println("Rows where 'userFeatures' is not the right size are filtered out");
+    datasetWithSize.show(false);
+
+    VectorAssembler assembler = new VectorAssembler()
+      .setInputCols(new String[]{"hour", "mobile", "userFeatures"})
+      .setOutputCol("features");
+
+    // This dataframe can be used by downstream transformers as before
+    Dataset<Row> output = assembler.transform(datasetWithSize);
+    System.out.println("Assembled columns 'hour', 'mobile', 'userFeatures' to vector column " +
+        "'features'");
+    output.select("features", "clicked").show(false);
+    // $example off$
+
+    spark.stop();
+  }
+}
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicerExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicerExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..1ae48be2660bca8981c0c11fe9777e5a42181d32
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicerExample.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.attribute.Attribute;
+import org.apache.spark.ml.attribute.AttributeGroup;
+import org.apache.spark.ml.attribute.NumericAttribute;
+import org.apache.spark.ml.feature.VectorSlicer;
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.*;
+// $example off$
+
+public class JavaVectorSlicerExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaVectorSlicerExample")
+      .getOrCreate();
+
+    // $example on$
+    Attribute[] attrs = {
+      NumericAttribute.defaultAttr().withName("f1"),
+      NumericAttribute.defaultAttr().withName("f2"),
+      NumericAttribute.defaultAttr().withName("f3")
+    };
+    AttributeGroup group = new AttributeGroup("userFeatures", attrs);
+
+    List<Row> data = Arrays.asList(
+      RowFactory.create(Vectors.sparse(3, new int[]{0, 1}, new double[]{-2.0, 2.3})),
+      RowFactory.create(Vectors.dense(-2.0, 2.3, 0.0))
+    );
+
+    Dataset<Row> dataset =
+      spark.createDataFrame(data, (new StructType()).add(group.toStructField()));
+
+    VectorSlicer vectorSlicer = new VectorSlicer()
+      .setInputCol("userFeatures").setOutputCol("features");
+
+    vectorSlicer.setIndices(new int[]{1}).setNames(new String[]{"f3"});
+    // or slicer.setIndices(new int[]{1, 2}), or slicer.setNames(new String[]{"f2", "f3"})
+
+    Dataset<Row> output = vectorSlicer.transform(dataset);
+    output.show(false);
+    // $example off$
+
+    spark.stop();
+  }
+}
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaWord2VecExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaWord2VecExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..fc9b45968874ae646e490d442420b96a0db2cc1b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/ml/JavaWord2VecExample.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.Word2Vec;
+import org.apache.spark.ml.feature.Word2VecModel;
+import org.apache.spark.ml.linalg.Vector;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.types.*;
+// $example off$
+
+public class JavaWord2VecExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaWord2VecExample")
+      .getOrCreate();
+
+    // $example on$
+    // Input data: Each row is a bag of words from a sentence or document.
+    List<Row> data = Arrays.asList(
+      RowFactory.create(Arrays.asList("Hi I heard about Spark".split(" "))),
+      RowFactory.create(Arrays.asList("I wish Java could use case classes".split(" "))),
+      RowFactory.create(Arrays.asList("Logistic regression models are neat".split(" ")))
+    );
+    StructType schema = new StructType(new StructField[]{
+      new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty())
+    });
+    Dataset<Row> documentDF = spark.createDataFrame(data, schema);
+
+    // Learn a mapping from words to Vectors.
+    Word2Vec word2Vec = new Word2Vec()
+      .setInputCol("text")
+      .setOutputCol("result")
+      .setVectorSize(3)
+      .setMinCount(0);
+
+    Word2VecModel model = word2Vec.fit(documentDF);
+    Dataset<Row> result = model.transform(documentDF);
+
+    for (Row row : result.collectAsList()) {
+      List<String> text = row.getList(0);
+      Vector vector = (Vector) row.get(1);
+      System.out.println("Text: " + text + " => \nVector: " + vector + "\n");
+    }
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaALS.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaALS.java
new file mode 100644
index 0000000000000000000000000000000000000000..95a430f1da2345e312e71d3e9ccb411e66b8e624
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaALS.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
+
+import org.apache.spark.mllib.recommendation.ALS;
+import org.apache.spark.mllib.recommendation.MatrixFactorizationModel;
+import org.apache.spark.mllib.recommendation.Rating;
+
+import java.util.Arrays;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+/**
+ * Example using MLlib ALS from Java.
+ */
+public final class JavaALS {
+
+  static class ParseRating implements Function<String, Rating> {
+    private static final Pattern COMMA = Pattern.compile(",");
+
+    @Override
+    public Rating call(String line) {
+      String[] tok = COMMA.split(line);
+      int x = Integer.parseInt(tok[0]);
+      int y = Integer.parseInt(tok[1]);
+      double rating = Double.parseDouble(tok[2]);
+      return new Rating(x, y, rating);
+    }
+  }
+
+  static class FeaturesToString implements Function<Tuple2<Object, double[]>, String> {
+    @Override
+    public String call(Tuple2<Object, double[]> element) {
+      return element._1() + "," + Arrays.toString(element._2());
+    }
+  }
+
+  public static void main(String[] args) {
+
+    if (args.length < 4) {
+      System.err.println(
+        "Usage: JavaALS <ratings_file> <rank> <iterations> <output_dir> [<blocks>]");
+      System.exit(1);
+    }
+    SparkConf sparkConf = new SparkConf().setAppName("JavaALS");
+    int rank = Integer.parseInt(args[1]);
+    int iterations = Integer.parseInt(args[2]);
+    String outputDir = args[3];
+    int blocks = -1;
+    if (args.length == 5) {
+      blocks = Integer.parseInt(args[4]);
+    }
+
+    JavaSparkContext sc = new JavaSparkContext(sparkConf);
+    JavaRDD<String> lines = sc.textFile(args[0]);
+
+    JavaRDD<Rating> ratings = lines.map(new ParseRating());
+
+    MatrixFactorizationModel model = ALS.train(ratings.rdd(), rank, iterations, 0.01, blocks);
+
+    model.userFeatures().toJavaRDD().map(new FeaturesToString()).saveAsTextFile(
+        outputDir + "/userFeatures");
+    model.productFeatures().toJavaRDD().map(new FeaturesToString()).saveAsTextFile(
+        outputDir + "/productFeatures");
+    System.out.println("Final user/product features written to " + outputDir);
+
+    sc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaAssociationRulesExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaAssociationRulesExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..5f43603f4ff5c8d042f11bd9b5c43331c2062637
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaAssociationRulesExample.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.fpm.AssociationRules;
+import org.apache.spark.mllib.fpm.FPGrowth;
+import org.apache.spark.mllib.fpm.FPGrowth.FreqItemset;
+// $example off$
+
+import org.apache.spark.SparkConf;
+
+public class JavaAssociationRulesExample {
+
+  public static void main(String[] args) {
+
+    SparkConf sparkConf = new SparkConf().setAppName("JavaAssociationRulesExample");
+    JavaSparkContext sc = new JavaSparkContext(sparkConf);
+
+    // $example on$
+    JavaRDD<FPGrowth.FreqItemset<String>> freqItemsets = sc.parallelize(Arrays.asList(
+      new FreqItemset<>(new String[] {"a"}, 15L),
+      new FreqItemset<>(new String[] {"b"}, 35L),
+      new FreqItemset<>(new String[] {"a", "b"}, 12L)
+    ));
+
+    AssociationRules arules = new AssociationRules()
+      .setMinConfidence(0.8);
+    JavaRDD<AssociationRules.Rule<String>> results = arules.run(freqItemsets);
+
+    for (AssociationRules.Rule<String> rule : results.collect()) {
+      System.out.println(
+        rule.javaAntecedent() + " => " + rule.javaConsequent() + ", " + rule.confidence());
+    }
+    // $example off$
+
+    sc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaBinaryClassificationMetricsExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaBinaryClassificationMetricsExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..b9d0313c6bb560b1bfcccd0396cf3334f82ac593
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaBinaryClassificationMetricsExample.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import scala.Tuple2;
+
+import org.apache.spark.api.java.*;
+import org.apache.spark.mllib.classification.LogisticRegressionModel;
+import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS;
+import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.util.MLUtils;
+// $example off$
+import org.apache.spark.SparkConf;
+import org.apache.spark.SparkContext;
+
+public class JavaBinaryClassificationMetricsExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("Java Binary Classification Metrics Example");
+    SparkContext sc = new SparkContext(conf);
+    // $example on$
+    String path = "data/mllib/sample_binary_classification_data.txt";
+    JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc, path).toJavaRDD();
+
+    // Split initial RDD into two... [60% training data, 40% testing data].
+    JavaRDD<LabeledPoint>[] splits =
+      data.randomSplit(new double[]{0.6, 0.4}, 11L);
+    JavaRDD<LabeledPoint> training = splits[0].cache();
+    JavaRDD<LabeledPoint> test = splits[1];
+
+    // Run training algorithm to build the model.
+    LogisticRegressionModel model = new LogisticRegressionWithLBFGS()
+      .setNumClasses(2)
+      .run(training.rdd());
+
+    // Clear the prediction threshold so the model will return probabilities
+    model.clearThreshold();
+
+    // Compute raw scores on the test set.
+    JavaPairRDD<Object, Object> predictionAndLabels = test.mapToPair(p ->
+      new Tuple2<>(model.predict(p.features()), p.label()));
+
+    // Get evaluation metrics.
+    BinaryClassificationMetrics metrics =
+      new BinaryClassificationMetrics(predictionAndLabels.rdd());
+
+    // Precision by threshold
+    JavaRDD<Tuple2<Object, Object>> precision = metrics.precisionByThreshold().toJavaRDD();
+    System.out.println("Precision by threshold: " + precision.collect());
+
+    // Recall by threshold
+    JavaRDD<?> recall = metrics.recallByThreshold().toJavaRDD();
+    System.out.println("Recall by threshold: " + recall.collect());
+
+    // F Score by threshold
+    JavaRDD<?> f1Score = metrics.fMeasureByThreshold().toJavaRDD();
+    System.out.println("F1 Score by threshold: " + f1Score.collect());
+
+    JavaRDD<?> f2Score = metrics.fMeasureByThreshold(2.0).toJavaRDD();
+    System.out.println("F2 Score by threshold: " + f2Score.collect());
+
+    // Precision-recall curve
+    JavaRDD<?> prc = metrics.pr().toJavaRDD();
+    System.out.println("Precision-recall curve: " + prc.collect());
+
+    // Thresholds
+    JavaRDD<Double> thresholds = precision.map(t -> Double.parseDouble(t._1().toString()));
+
+    // ROC Curve
+    JavaRDD<?> roc = metrics.roc().toJavaRDD();
+    System.out.println("ROC curve: " + roc.collect());
+
+    // AUPRC
+    System.out.println("Area under precision-recall curve = " + metrics.areaUnderPR());
+
+    // AUROC
+    System.out.println("Area under ROC = " + metrics.areaUnderROC());
+
+    // Save and load model
+    model.save(sc, "target/tmp/LogisticRegressionModel");
+    LogisticRegressionModel.load(sc, "target/tmp/LogisticRegressionModel");
+    // $example off$
+
+    sc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaBisectingKMeansExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaBisectingKMeansExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..f878b55a98adf662fc8ba1309a881ef90f085de0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaBisectingKMeansExample.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+// $example off$
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+// $example on$
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.clustering.BisectingKMeans;
+import org.apache.spark.mllib.clustering.BisectingKMeansModel;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+// $example off$
+
+/**
+ * Java example for bisecting k-means clustering.
+ */
+public class JavaBisectingKMeansExample {
+  public static void main(String[] args) {
+    SparkConf sparkConf = new SparkConf().setAppName("JavaBisectingKMeansExample");
+    JavaSparkContext sc = new JavaSparkContext(sparkConf);
+
+    // $example on$
+    List<Vector> localData = Arrays.asList(
+      Vectors.dense(0.1, 0.1),   Vectors.dense(0.3, 0.3),
+      Vectors.dense(10.1, 10.1), Vectors.dense(10.3, 10.3),
+      Vectors.dense(20.1, 20.1), Vectors.dense(20.3, 20.3),
+      Vectors.dense(30.1, 30.1), Vectors.dense(30.3, 30.3)
+    );
+    JavaRDD<Vector> data = sc.parallelize(localData, 2);
+
+    BisectingKMeans bkm = new BisectingKMeans()
+      .setK(4);
+    BisectingKMeansModel model = bkm.run(data);
+
+    System.out.println("Compute Cost: " + model.computeCost(data));
+
+    Vector[] clusterCenters = model.clusterCenters();
+    for (int i = 0; i < clusterCenters.length; i++) {
+      Vector clusterCenter = clusterCenters[i];
+      System.out.println("Cluster Center " + i + ": " + clusterCenter);
+    }
+    // $example off$
+
+    sc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaChiSqSelectorExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaChiSqSelectorExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..ce354af2b57935387444bf07bd3ef6e9d716a358
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaChiSqSelectorExample.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+// $example on$
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.feature.ChiSqSelector;
+import org.apache.spark.mllib.feature.ChiSqSelectorModel;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.util.MLUtils;
+// $example off$
+
+public class JavaChiSqSelectorExample {
+  public static void main(String[] args) {
+
+    SparkConf conf = new SparkConf().setAppName("JavaChiSqSelectorExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+
+    // $example on$
+    JavaRDD<LabeledPoint> points = MLUtils.loadLibSVMFile(jsc.sc(),
+      "data/mllib/sample_libsvm_data.txt").toJavaRDD().cache();
+
+    // Discretize data in 16 equal bins since ChiSqSelector requires categorical features
+    // Although features are doubles, the ChiSqSelector treats each unique value as a category
+    JavaRDD<LabeledPoint> discretizedData = points.map(lp -> {
+      double[] discretizedFeatures = new double[lp.features().size()];
+      for (int i = 0; i < lp.features().size(); ++i) {
+        discretizedFeatures[i] = Math.floor(lp.features().apply(i) / 16);
+      }
+      return new LabeledPoint(lp.label(), Vectors.dense(discretizedFeatures));
+    });
+
+    // Create ChiSqSelector that will select top 50 of 692 features
+    ChiSqSelector selector = new ChiSqSelector(50);
+    // Create ChiSqSelector model (selecting features)
+    ChiSqSelectorModel transformer = selector.fit(discretizedData.rdd());
+    // Filter the top 50 features from each feature vector
+    JavaRDD<LabeledPoint> filteredData = discretizedData.map(lp ->
+      new LabeledPoint(lp.label(), transformer.transform(lp.features())));
+    // $example off$
+
+    System.out.println("filtered data: ");
+    filteredData.foreach(System.out::println);
+
+    jsc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..c0fa0b3cac1e971f9c1d2af90514200c444777e5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaCorrelationsExample.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaDoubleRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.linalg.Matrix;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.mllib.stat.Statistics;
+// $example off$
+
+public class JavaCorrelationsExample {
+  public static void main(String[] args) {
+
+    SparkConf conf = new SparkConf().setAppName("JavaCorrelationsExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+
+    // $example on$
+    JavaDoubleRDD seriesX = jsc.parallelizeDoubles(
+      Arrays.asList(1.0, 2.0, 3.0, 3.0, 5.0));  // a series
+
+    // must have the same number of partitions and cardinality as seriesX
+    JavaDoubleRDD seriesY = jsc.parallelizeDoubles(
+      Arrays.asList(11.0, 22.0, 33.0, 33.0, 555.0));
+
+    // compute the correlation using Pearson's method. Enter "spearman" for Spearman's method.
+    // If a method is not specified, Pearson's method will be used by default.
+    Double correlation = Statistics.corr(seriesX.srdd(), seriesY.srdd(), "pearson");
+    System.out.println("Correlation is: " + correlation);
+
+    // note that each Vector is a row and not a column
+    JavaRDD<Vector> data = jsc.parallelize(
+      Arrays.asList(
+        Vectors.dense(1.0, 10.0, 100.0),
+        Vectors.dense(2.0, 20.0, 200.0),
+        Vectors.dense(5.0, 33.0, 366.0)
+      )
+    );
+
+    // calculate the correlation matrix using Pearson's method.
+    // Use "spearman" for Spearman's method.
+    // If a method is not specified, Pearson's method will be used by default.
+    Matrix correlMatrix = Statistics.corr(data.rdd(), "pearson");
+    System.out.println(correlMatrix.toString());
+    // $example off$
+
+    jsc.stop();
+  }
+}
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaDecisionTreeClassificationExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaDecisionTreeClassificationExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..032c168b946d6163b3f9352676f6341771bd825a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaDecisionTreeClassificationExample.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import java.util.HashMap;
+import java.util.Map;
+
+import scala.Tuple2;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.tree.DecisionTree;
+import org.apache.spark.mllib.tree.model.DecisionTreeModel;
+import org.apache.spark.mllib.util.MLUtils;
+// $example off$
+
+class JavaDecisionTreeClassificationExample {
+
+  public static void main(String[] args) {
+
+    // $example on$
+    SparkConf sparkConf = new SparkConf().setAppName("JavaDecisionTreeClassificationExample");
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+
+    // Load and parse the data file.
+    String datapath = "data/mllib/sample_libsvm_data.txt";
+    JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(jsc.sc(), datapath).toJavaRDD();
+    // Split the data into training and test sets (30% held out for testing)
+    JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[]{0.7, 0.3});
+    JavaRDD<LabeledPoint> trainingData = splits[0];
+    JavaRDD<LabeledPoint> testData = splits[1];
+
+    // Set parameters.
+    //  Empty categoricalFeaturesInfo indicates all features are continuous.
+    int numClasses = 2;
+    Map<Integer, Integer> categoricalFeaturesInfo = new HashMap<>();
+    String impurity = "gini";
+    int maxDepth = 5;
+    int maxBins = 32;
+
+    // Train a DecisionTree model for classification.
+    DecisionTreeModel model = DecisionTree.trainClassifier(trainingData, numClasses,
+      categoricalFeaturesInfo, impurity, maxDepth, maxBins);
+
+    // Evaluate model on test instances and compute test error
+    JavaPairRDD<Double, Double> predictionAndLabel =
+      testData.mapToPair(p -> new Tuple2<>(model.predict(p.features()), p.label()));
+    double testErr =
+      predictionAndLabel.filter(pl -> !pl._1().equals(pl._2())).count() / (double) testData.count();
+
+    System.out.println("Test Error: " + testErr);
+    System.out.println("Learned classification tree model:\n" + model.toDebugString());
+
+    // Save and load model
+    model.save(jsc.sc(), "target/tmp/myDecisionTreeClassificationModel");
+    DecisionTreeModel sameModel = DecisionTreeModel
+      .load(jsc.sc(), "target/tmp/myDecisionTreeClassificationModel");
+    // $example off$
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaDecisionTreeRegressionExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaDecisionTreeRegressionExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..f222c38fc82b6e8ed57e9385c28144492db41fc2
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaDecisionTreeRegressionExample.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import java.util.HashMap;
+import java.util.Map;
+
+import scala.Tuple2;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.tree.DecisionTree;
+import org.apache.spark.mllib.tree.model.DecisionTreeModel;
+import org.apache.spark.mllib.util.MLUtils;
+// $example off$
+
+class JavaDecisionTreeRegressionExample {
+
+  public static void main(String[] args) {
+
+    // $example on$
+    SparkConf sparkConf = new SparkConf().setAppName("JavaDecisionTreeRegressionExample");
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+
+    // Load and parse the data file.
+    String datapath = "data/mllib/sample_libsvm_data.txt";
+    JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(jsc.sc(), datapath).toJavaRDD();
+    // Split the data into training and test sets (30% held out for testing)
+    JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[]{0.7, 0.3});
+    JavaRDD<LabeledPoint> trainingData = splits[0];
+    JavaRDD<LabeledPoint> testData = splits[1];
+
+    // Set parameters.
+    // Empty categoricalFeaturesInfo indicates all features are continuous.
+    Map<Integer, Integer> categoricalFeaturesInfo = new HashMap<>();
+    String impurity = "variance";
+    int maxDepth = 5;
+    int maxBins = 32;
+
+    // Train a DecisionTree model.
+    DecisionTreeModel model = DecisionTree.trainRegressor(trainingData,
+      categoricalFeaturesInfo, impurity, maxDepth, maxBins);
+
+    // Evaluate model on test instances and compute test error
+    JavaPairRDD<Double, Double> predictionAndLabel =
+      testData.mapToPair(p -> new Tuple2<>(model.predict(p.features()), p.label()));
+    double testMSE = predictionAndLabel.mapToDouble(pl -> {
+      double diff = pl._1() - pl._2();
+      return diff * diff;
+    }).mean();
+    System.out.println("Test Mean Squared Error: " + testMSE);
+    System.out.println("Learned regression tree model:\n" + model.toDebugString());
+
+    // Save and load model
+    model.save(jsc.sc(), "target/tmp/myDecisionTreeRegressionModel");
+    DecisionTreeModel sameModel = DecisionTreeModel
+      .load(jsc.sc(), "target/tmp/myDecisionTreeRegressionModel");
+    // $example off$
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaElementwiseProductExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaElementwiseProductExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..2d45c6166fee3325168bcf6b9578a28f325b8fe4
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaElementwiseProductExample.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import java.util.Arrays;
+// $example off$
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+// $example on$
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.feature.ElementwiseProduct;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+// $example off$
+
+public class JavaElementwiseProductExample {
+  public static void main(String[] args) {
+
+    SparkConf conf = new SparkConf().setAppName("JavaElementwiseProductExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+
+    // $example on$
+    // Create some vector data; also works for sparse vectors
+    JavaRDD<Vector> data = jsc.parallelize(Arrays.asList(
+      Vectors.dense(1.0, 2.0, 3.0), Vectors.dense(4.0, 5.0, 6.0)));
+    Vector transformingVector = Vectors.dense(0.0, 1.0, 2.0);
+    ElementwiseProduct transformer = new ElementwiseProduct(transformingVector);
+
+    // Batch transform and per-row transform give the same results:
+    JavaRDD<Vector> transformedData = transformer.transform(data);
+    JavaRDD<Vector> transformedData2 = data.map(transformer::transform);
+    // $example off$
+
+    System.out.println("transformedData: ");
+    transformedData.foreach(System.out::println);
+
+    System.out.println("transformedData2: ");
+    transformedData2.foreach(System.out::println);
+
+    jsc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaGaussianMixtureExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaGaussianMixtureExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..5792e5a71cb0958edac79f662e895fa255680304
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaGaussianMixtureExample.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+
+// $example on$
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.clustering.GaussianMixture;
+import org.apache.spark.mllib.clustering.GaussianMixtureModel;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+// $example off$
+
+public class JavaGaussianMixtureExample {
+  public static void main(String[] args) {
+
+    SparkConf conf = new SparkConf().setAppName("JavaGaussianMixtureExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+
+    // $example on$
+    // Load and parse data
+    String path = "data/mllib/gmm_data.txt";
+    JavaRDD<String> data = jsc.textFile(path);
+    JavaRDD<Vector> parsedData = data.map(s -> {
+      String[] sarray = s.trim().split(" ");
+      double[] values = new double[sarray.length];
+      for (int i = 0; i < sarray.length; i++) {
+        values[i] = Double.parseDouble(sarray[i]);
+      }
+      return Vectors.dense(values);
+    });
+    parsedData.cache();
+
+    // Cluster the data into two classes using GaussianMixture
+    GaussianMixtureModel gmm = new GaussianMixture().setK(2).run(parsedData.rdd());
+
+    // Save and load GaussianMixtureModel
+    gmm.save(jsc.sc(), "target/org/apache/spark/JavaGaussianMixtureExample/GaussianMixtureModel");
+    GaussianMixtureModel sameModel = GaussianMixtureModel.load(jsc.sc(),
+      "target/org.apache.spark.JavaGaussianMixtureExample/GaussianMixtureModel");
+
+    // Output the parameters of the mixture model
+    for (int j = 0; j < gmm.k(); j++) {
+      System.out.printf("weight=%f\nmu=%s\nsigma=\n%s\n",
+        gmm.weights()[j], gmm.gaussians()[j].mu(), gmm.gaussians()[j].sigma());
+    }
+    // $example off$
+
+    jsc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaGradientBoostingClassificationExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaGradientBoostingClassificationExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..521ee96fbdf4bf6509dc18fee527ba532d66bfa1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaGradientBoostingClassificationExample.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import java.util.HashMap;
+import java.util.Map;
+
+import scala.Tuple2;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.tree.GradientBoostedTrees;
+import org.apache.spark.mllib.tree.configuration.BoostingStrategy;
+import org.apache.spark.mllib.tree.model.GradientBoostedTreesModel;
+import org.apache.spark.mllib.util.MLUtils;
+// $example off$
+
+public class JavaGradientBoostingClassificationExample {
+  public static void main(String[] args) {
+    // $example on$
+    SparkConf sparkConf = new SparkConf()
+      .setAppName("JavaGradientBoostedTreesClassificationExample");
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+
+    // Load and parse the data file.
+    String datapath = "data/mllib/sample_libsvm_data.txt";
+    JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(jsc.sc(), datapath).toJavaRDD();
+    // Split the data into training and test sets (30% held out for testing)
+    JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[]{0.7, 0.3});
+    JavaRDD<LabeledPoint> trainingData = splits[0];
+    JavaRDD<LabeledPoint> testData = splits[1];
+
+    // Train a GradientBoostedTrees model.
+    // The defaultParams for Classification use LogLoss by default.
+    BoostingStrategy boostingStrategy = BoostingStrategy.defaultParams("Classification");
+    boostingStrategy.setNumIterations(3); // Note: Use more iterations in practice.
+    boostingStrategy.getTreeStrategy().setNumClasses(2);
+    boostingStrategy.getTreeStrategy().setMaxDepth(5);
+    // Empty categoricalFeaturesInfo indicates all features are continuous.
+    Map<Integer, Integer> categoricalFeaturesInfo = new HashMap<>();
+    boostingStrategy.treeStrategy().setCategoricalFeaturesInfo(categoricalFeaturesInfo);
+
+    GradientBoostedTreesModel model = GradientBoostedTrees.train(trainingData, boostingStrategy);
+
+    // Evaluate model on test instances and compute test error
+    JavaPairRDD<Double, Double> predictionAndLabel =
+      testData.mapToPair(p -> new Tuple2<>(model.predict(p.features()), p.label()));
+    double testErr =
+      predictionAndLabel.filter(pl -> !pl._1().equals(pl._2())).count() / (double) testData.count();
+    System.out.println("Test Error: " + testErr);
+    System.out.println("Learned classification GBT model:\n" + model.toDebugString());
+
+    // Save and load model
+    model.save(jsc.sc(), "target/tmp/myGradientBoostingClassificationModel");
+    GradientBoostedTreesModel sameModel = GradientBoostedTreesModel.load(jsc.sc(),
+      "target/tmp/myGradientBoostingClassificationModel");
+    // $example off$
+
+    jsc.stop();
+  }
+
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaGradientBoostingRegressionExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaGradientBoostingRegressionExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..b345d19f59ab605fb625de351beff75faa1859e9
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaGradientBoostingRegressionExample.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import java.util.HashMap;
+import java.util.Map;
+
+import scala.Tuple2;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.tree.GradientBoostedTrees;
+import org.apache.spark.mllib.tree.configuration.BoostingStrategy;
+import org.apache.spark.mllib.tree.model.GradientBoostedTreesModel;
+import org.apache.spark.mllib.util.MLUtils;
+// $example off$
+
+public class JavaGradientBoostingRegressionExample {
+  public static void main(String[] args) {
+    // $example on$
+    SparkConf sparkConf = new SparkConf()
+      .setAppName("JavaGradientBoostedTreesRegressionExample");
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+    // Load and parse the data file.
+    String datapath = "data/mllib/sample_libsvm_data.txt";
+    JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(jsc.sc(), datapath).toJavaRDD();
+    // Split the data into training and test sets (30% held out for testing)
+    JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[]{0.7, 0.3});
+    JavaRDD<LabeledPoint> trainingData = splits[0];
+    JavaRDD<LabeledPoint> testData = splits[1];
+
+    // Train a GradientBoostedTrees model.
+    // The defaultParams for Regression use SquaredError by default.
+    BoostingStrategy boostingStrategy = BoostingStrategy.defaultParams("Regression");
+    boostingStrategy.setNumIterations(3); // Note: Use more iterations in practice.
+    boostingStrategy.getTreeStrategy().setMaxDepth(5);
+    // Empty categoricalFeaturesInfo indicates all features are continuous.
+    Map<Integer, Integer> categoricalFeaturesInfo = new HashMap<>();
+    boostingStrategy.treeStrategy().setCategoricalFeaturesInfo(categoricalFeaturesInfo);
+
+    GradientBoostedTreesModel model = GradientBoostedTrees.train(trainingData, boostingStrategy);
+
+    // Evaluate model on test instances and compute test error
+    JavaPairRDD<Double, Double> predictionAndLabel =
+      testData.mapToPair(p -> new Tuple2<>(model.predict(p.features()), p.label()));
+    double testMSE = predictionAndLabel.mapToDouble(pl -> {
+      double diff = pl._1() - pl._2();
+      return diff * diff;
+    }).mean();
+    System.out.println("Test Mean Squared Error: " + testMSE);
+    System.out.println("Learned regression GBT model:\n" + model.toDebugString());
+
+    // Save and load model
+    model.save(jsc.sc(), "target/tmp/myGradientBoostingRegressionModel");
+    GradientBoostedTreesModel sameModel = GradientBoostedTreesModel.load(jsc.sc(),
+      "target/tmp/myGradientBoostingRegressionModel");
+    // $example off$
+
+    jsc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..273273652c955f3edc6d721c3b70ec5bb39a2cf2
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingExample.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.linalg.Matrices;
+import org.apache.spark.mllib.linalg.Matrix;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.stat.Statistics;
+import org.apache.spark.mllib.stat.test.ChiSqTestResult;
+// $example off$
+
+public class JavaHypothesisTestingExample {
+  public static void main(String[] args) {
+
+    SparkConf conf = new SparkConf().setAppName("JavaHypothesisTestingExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+
+    // $example on$
+    // a vector composed of the frequencies of events
+    Vector vec = Vectors.dense(0.1, 0.15, 0.2, 0.3, 0.25);
+
+    // compute the goodness of fit. If a second vector to test against is not supplied
+    // as a parameter, the test runs against a uniform distribution.
+    ChiSqTestResult goodnessOfFitTestResult = Statistics.chiSqTest(vec);
+    // summary of the test including the p-value, degrees of freedom, test statistic,
+    // the method used, and the null hypothesis.
+    System.out.println(goodnessOfFitTestResult + "\n");
+
+    // Create a contingency matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
+    Matrix mat = Matrices.dense(3, 2, new double[]{1.0, 3.0, 5.0, 2.0, 4.0, 6.0});
+
+    // conduct Pearson's independence test on the input contingency matrix
+    ChiSqTestResult independenceTestResult = Statistics.chiSqTest(mat);
+    // summary of the test including the p-value, degrees of freedom...
+    System.out.println(independenceTestResult + "\n");
+
+    // an RDD of labeled points
+    JavaRDD<LabeledPoint> obs = jsc.parallelize(
+      Arrays.asList(
+        new LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0)),
+        new LabeledPoint(1.0, Vectors.dense(1.0, 2.0, 0.0)),
+        new LabeledPoint(-1.0, Vectors.dense(-1.0, 0.0, -0.5))
+      )
+    );
+
+    // The contingency table is constructed from the raw (label, feature) pairs and used to conduct
+    // the independence test. Returns an array containing the ChiSquaredTestResult for every feature
+    // against the label.
+    ChiSqTestResult[] featureTestResults = Statistics.chiSqTest(obs.rdd());
+    int i = 1;
+    for (ChiSqTestResult result : featureTestResults) {
+      System.out.println("Column " + i + ":");
+      System.out.println(result + "\n");  // summary of the test
+      i++;
+    }
+    // $example off$
+
+    jsc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingKolmogorovSmirnovTestExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingKolmogorovSmirnovTestExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..fe611c9ae67c9a6821bf8d9f1b28ea222be5e447
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaHypothesisTestingKolmogorovSmirnovTestExample.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaDoubleRDD;
+import org.apache.spark.mllib.stat.Statistics;
+import org.apache.spark.mllib.stat.test.KolmogorovSmirnovTestResult;
+// $example off$
+
+public class JavaHypothesisTestingKolmogorovSmirnovTestExample {
+  public static void main(String[] args) {
+
+    SparkConf conf =
+      new SparkConf().setAppName("JavaHypothesisTestingKolmogorovSmirnovTestExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+
+    // $example on$
+    JavaDoubleRDD data = jsc.parallelizeDoubles(Arrays.asList(0.1, 0.15, 0.2, 0.3, 0.25));
+    KolmogorovSmirnovTestResult testResult =
+      Statistics.kolmogorovSmirnovTest(data, "norm", 0.0, 1.0);
+    // summary of the test including the p-value, test statistic, and null hypothesis
+    // if our p-value indicates significance, we can reject the null hypothesis
+    System.out.println(testResult);
+    // $example off$
+
+    jsc.stop();
+  }
+}
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaIsotonicRegressionExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaIsotonicRegressionExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..adebafe4b89d7f247f8c253ee48eec0c49910832
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaIsotonicRegressionExample.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.examples.mllib;
+
+// $example on$
+
+import scala.Tuple2;
+import scala.Tuple3;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.regression.IsotonicRegression;
+import org.apache.spark.mllib.regression.IsotonicRegressionModel;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.util.MLUtils;
+// $example off$
+import org.apache.spark.SparkConf;
+
+public class JavaIsotonicRegressionExample {
+  public static void main(String[] args) {
+    SparkConf sparkConf = new SparkConf().setAppName("JavaIsotonicRegressionExample");
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+    // $example on$
+    JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(
+      jsc.sc(), "data/mllib/sample_isotonic_regression_libsvm_data.txt").toJavaRDD();
+
+    // Create label, feature, weight tuples from input data with weight set to default value 1.0.
+    JavaRDD<Tuple3<Double, Double, Double>> parsedData = data.map(point ->
+      new Tuple3<>(point.label(), point.features().apply(0), 1.0));
+
+    // Split data into training (60%) and test (40%) sets.
+    JavaRDD<Tuple3<Double, Double, Double>>[] splits =
+      parsedData.randomSplit(new double[]{0.6, 0.4}, 11L);
+    JavaRDD<Tuple3<Double, Double, Double>> training = splits[0];
+    JavaRDD<Tuple3<Double, Double, Double>> test = splits[1];
+
+    // Create isotonic regression model from training data.
+    // Isotonic parameter defaults to true so it is only shown for demonstration
+    IsotonicRegressionModel model = new IsotonicRegression().setIsotonic(true).run(training);
+
+    // Create tuples of predicted and real labels.
+    JavaPairRDD<Double, Double> predictionAndLabel = test.mapToPair(point ->
+      new Tuple2<>(model.predict(point._2()), point._1()));
+
+    // Calculate mean squared error between predicted and real labels.
+    double meanSquaredError = predictionAndLabel.mapToDouble(pl -> {
+      double diff = pl._1() - pl._2();
+      return diff * diff;
+    }).mean();
+    System.out.println("Mean Squared Error = " + meanSquaredError);
+
+    // Save and load model
+    model.save(jsc.sc(), "target/tmp/myIsotonicRegressionModel");
+    IsotonicRegressionModel sameModel =
+      IsotonicRegressionModel.load(jsc.sc(), "target/tmp/myIsotonicRegressionModel");
+    // $example off$
+
+    jsc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaKMeansExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaKMeansExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..f17275617ad5e612fe00e56b0e39dcaf2b4905d6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaKMeansExample.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+
+// $example on$
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.clustering.KMeans;
+import org.apache.spark.mllib.clustering.KMeansModel;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+// $example off$
+
+public class JavaKMeansExample {
+  public static void main(String[] args) {
+
+    SparkConf conf = new SparkConf().setAppName("JavaKMeansExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+
+    // $example on$
+    // Load and parse data
+    String path = "data/mllib/kmeans_data.txt";
+    JavaRDD<String> data = jsc.textFile(path);
+    JavaRDD<Vector> parsedData = data.map(s -> {
+      String[] sarray = s.split(" ");
+      double[] values = new double[sarray.length];
+      for (int i = 0; i < sarray.length; i++) {
+        values[i] = Double.parseDouble(sarray[i]);
+      }
+      return Vectors.dense(values);
+    });
+    parsedData.cache();
+
+    // Cluster the data into two classes using KMeans
+    int numClusters = 2;
+    int numIterations = 20;
+    KMeansModel clusters = KMeans.train(parsedData.rdd(), numClusters, numIterations);
+
+    System.out.println("Cluster centers:");
+    for (Vector center: clusters.clusterCenters()) {
+      System.out.println(" " + center);
+    }
+    double cost = clusters.computeCost(parsedData.rdd());
+    System.out.println("Cost: " + cost);
+
+    // Evaluate clustering by computing Within Set Sum of Squared Errors
+    double WSSSE = clusters.computeCost(parsedData.rdd());
+    System.out.println("Within Set Sum of Squared Errors = " + WSSSE);
+
+    // Save and load model
+    clusters.save(jsc.sc(), "target/org/apache/spark/JavaKMeansExample/KMeansModel");
+    KMeansModel sameModel = KMeansModel.load(jsc.sc(),
+      "target/org/apache/spark/JavaKMeansExample/KMeansModel");
+    // $example off$
+
+    jsc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaKernelDensityEstimationExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaKernelDensityEstimationExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..41de0d90eccd7de97b20337a1f9107cdc908ebab
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaKernelDensityEstimationExample.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.stat.KernelDensity;
+// $example off$
+
+public class JavaKernelDensityEstimationExample {
+  public static void main(String[] args) {
+
+    SparkConf conf = new SparkConf().setAppName("JavaKernelDensityEstimationExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+
+    // $example on$
+    // an RDD of sample data
+    JavaRDD<Double> data = jsc.parallelize(
+      Arrays.asList(1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0, 8.0, 9.0, 9.0));
+
+    // Construct the density estimator with the sample data
+    // and a standard deviation for the Gaussian kernels
+    KernelDensity kd = new KernelDensity().setSample(data).setBandwidth(3.0);
+
+    // Find density estimates for the given values
+    double[] densities = kd.estimate(new double[]{-1.0, 2.0, 5.0});
+
+    System.out.println(Arrays.toString(densities));
+    // $example off$
+
+    jsc.stop();
+  }
+}
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaLBFGSExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaLBFGSExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..3fdc03a92ad7a7de6db94da4147adb7f351bdbe5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaLBFGSExample.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import java.util.Arrays;
+
+import scala.Tuple2;
+
+import org.apache.spark.api.java.*;
+import org.apache.spark.mllib.classification.LogisticRegressionModel;
+import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.mllib.optimization.*;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.util.MLUtils;
+import org.apache.spark.SparkConf;
+import org.apache.spark.SparkContext;
+// $example off$
+
+public class JavaLBFGSExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("L-BFGS Example");
+    SparkContext sc = new SparkContext(conf);
+
+    // $example on$
+    String path = "data/mllib/sample_libsvm_data.txt";
+    JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc, path).toJavaRDD();
+    int numFeatures = data.take(1).get(0).features().size();
+
+    // Split initial RDD into two... [60% training data, 40% testing data].
+    JavaRDD<LabeledPoint> trainingInit = data.sample(false, 0.6, 11L);
+    JavaRDD<LabeledPoint> test = data.subtract(trainingInit);
+
+    // Append 1 into the training data as intercept.
+    JavaPairRDD<Object, Vector> training = data.mapToPair(p ->
+      new Tuple2<>(p.label(), MLUtils.appendBias(p.features())));
+    training.cache();
+
+    // Run training algorithm to build the model.
+    int numCorrections = 10;
+    double convergenceTol = 1e-4;
+    int maxNumIterations = 20;
+    double regParam = 0.1;
+    Vector initialWeightsWithIntercept = Vectors.dense(new double[numFeatures + 1]);
+
+    Tuple2<Vector, double[]> result = LBFGS.runLBFGS(
+      training.rdd(),
+      new LogisticGradient(),
+      new SquaredL2Updater(),
+      numCorrections,
+      convergenceTol,
+      maxNumIterations,
+      regParam,
+      initialWeightsWithIntercept);
+    Vector weightsWithIntercept = result._1();
+    double[] loss = result._2();
+
+    LogisticRegressionModel model = new LogisticRegressionModel(
+      Vectors.dense(Arrays.copyOf(weightsWithIntercept.toArray(), weightsWithIntercept.size() - 1)),
+      (weightsWithIntercept.toArray())[weightsWithIntercept.size() - 1]);
+
+    // Clear the default threshold.
+    model.clearThreshold();
+
+    // Compute raw scores on the test set.
+    JavaPairRDD<Object, Object> scoreAndLabels = test.mapToPair(p ->
+      new Tuple2<>(model.predict(p.features()), p.label()));
+
+    // Get evaluation metrics.
+    BinaryClassificationMetrics metrics =
+      new BinaryClassificationMetrics(scoreAndLabels.rdd());
+    double auROC = metrics.areaUnderROC();
+
+    System.out.println("Loss of each step in training process");
+    for (double l : loss) {
+      System.out.println(l);
+    }
+    System.out.println("Area under ROC = " + auROC);
+    // $example off$
+
+    sc.stop();
+  }
+}
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaLatentDirichletAllocationExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaLatentDirichletAllocationExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..887edf8c212105c4e4085cee34ff5b8203c7615f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaLatentDirichletAllocationExample.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+
+// $example on$
+import scala.Tuple2;
+
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.clustering.DistributedLDAModel;
+import org.apache.spark.mllib.clustering.LDA;
+import org.apache.spark.mllib.clustering.LDAModel;
+import org.apache.spark.mllib.linalg.Matrix;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+// $example off$
+
+public class JavaLatentDirichletAllocationExample {
+  public static void main(String[] args) {
+
+    SparkConf conf = new SparkConf().setAppName("JavaKLatentDirichletAllocationExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+
+    // $example on$
+    // Load and parse the data
+    String path = "data/mllib/sample_lda_data.txt";
+    JavaRDD<String> data = jsc.textFile(path);
+    JavaRDD<Vector> parsedData = data.map(s -> {
+      String[] sarray = s.trim().split(" ");
+      double[] values = new double[sarray.length];
+      for (int i = 0; i < sarray.length; i++) {
+        values[i] = Double.parseDouble(sarray[i]);
+      }
+      return Vectors.dense(values);
+    });
+    // Index documents with unique IDs
+    JavaPairRDD<Long, Vector> corpus =
+      JavaPairRDD.fromJavaRDD(parsedData.zipWithIndex().map(Tuple2::swap));
+    corpus.cache();
+
+    // Cluster the documents into three topics using LDA
+    LDAModel ldaModel = new LDA().setK(3).run(corpus);
+
+    // Output topics. Each is a distribution over words (matching word count vectors)
+    System.out.println("Learned topics (as distributions over vocab of " + ldaModel.vocabSize()
+      + " words):");
+    Matrix topics = ldaModel.topicsMatrix();
+    for (int topic = 0; topic < 3; topic++) {
+      System.out.print("Topic " + topic + ":");
+      for (int word = 0; word < ldaModel.vocabSize(); word++) {
+        System.out.print(" " + topics.apply(word, topic));
+      }
+      System.out.println();
+    }
+
+    ldaModel.save(jsc.sc(),
+      "target/org/apache/spark/JavaLatentDirichletAllocationExample/LDAModel");
+    DistributedLDAModel sameModel = DistributedLDAModel.load(jsc.sc(),
+      "target/org/apache/spark/JavaLatentDirichletAllocationExample/LDAModel");
+    // $example off$
+
+    jsc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaLinearRegressionWithSGDExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaLinearRegressionWithSGDExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..324a781c1a44af47f34eb68d3f226bf0882df3be
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaLinearRegressionWithSGDExample.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+
+// $example on$
+import scala.Tuple2;
+
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.regression.LinearRegressionModel;
+import org.apache.spark.mllib.regression.LinearRegressionWithSGD;
+// $example off$
+
+/**
+ * Example for LinearRegressionWithSGD.
+ */
+public class JavaLinearRegressionWithSGDExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaLinearRegressionWithSGDExample");
+    JavaSparkContext sc = new JavaSparkContext(conf);
+
+    // $example on$
+    // Load and parse the data
+    String path = "data/mllib/ridge-data/lpsa.data";
+    JavaRDD<String> data = sc.textFile(path);
+    JavaRDD<LabeledPoint> parsedData = data.map(line -> {
+      String[] parts = line.split(",");
+      String[] features = parts[1].split(" ");
+      double[] v = new double[features.length];
+      for (int i = 0; i < features.length - 1; i++) {
+        v[i] = Double.parseDouble(features[i]);
+      }
+      return new LabeledPoint(Double.parseDouble(parts[0]), Vectors.dense(v));
+    });
+    parsedData.cache();
+
+    // Building the model
+    int numIterations = 100;
+    double stepSize = 0.00000001;
+    LinearRegressionModel model =
+      LinearRegressionWithSGD.train(JavaRDD.toRDD(parsedData), numIterations, stepSize);
+
+    // Evaluate model on training examples and compute training error
+    JavaPairRDD<Double, Double> valuesAndPreds = parsedData.mapToPair(point ->
+      new Tuple2<>(model.predict(point.features()), point.label()));
+
+    double MSE = valuesAndPreds.mapToDouble(pair -> {
+      double diff = pair._1() - pair._2();
+      return diff * diff;
+    }).mean();
+    System.out.println("training Mean Squared Error = " + MSE);
+
+    // Save and load model
+    model.save(sc.sc(), "target/tmp/javaLinearRegressionWithSGDModel");
+    LinearRegressionModel sameModel = LinearRegressionModel.load(sc.sc(),
+      "target/tmp/javaLinearRegressionWithSGDModel");
+    // $example off$
+
+    sc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaLogisticRegressionWithLBFGSExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaLogisticRegressionWithLBFGSExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..26b8a6e9fa3ad6353791ae96c6e60a37b73bd573
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaLogisticRegressionWithLBFGSExample.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.SparkContext;
+
+// $example on$
+import scala.Tuple2;
+
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.classification.LogisticRegressionModel;
+import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS;
+import org.apache.spark.mllib.evaluation.MulticlassMetrics;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.util.MLUtils;
+// $example off$
+
+/**
+ * Example for LogisticRegressionWithLBFGS.
+ */
+public class JavaLogisticRegressionWithLBFGSExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaLogisticRegressionWithLBFGSExample");
+    SparkContext sc = new SparkContext(conf);
+    // $example on$
+    String path = "data/mllib/sample_libsvm_data.txt";
+    JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc, path).toJavaRDD();
+
+    // Split initial RDD into two... [60% training data, 40% testing data].
+    JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[] {0.6, 0.4}, 11L);
+    JavaRDD<LabeledPoint> training = splits[0].cache();
+    JavaRDD<LabeledPoint> test = splits[1];
+
+    // Run training algorithm to build the model.
+    LogisticRegressionModel model = new LogisticRegressionWithLBFGS()
+      .setNumClasses(10)
+      .run(training.rdd());
+
+    // Compute raw scores on the test set.
+    JavaPairRDD<Object, Object> predictionAndLabels = test.mapToPair(p ->
+      new Tuple2<>(model.predict(p.features()), p.label()));
+
+    // Get evaluation metrics.
+    MulticlassMetrics metrics = new MulticlassMetrics(predictionAndLabels.rdd());
+    double accuracy = metrics.accuracy();
+    System.out.println("Accuracy = " + accuracy);
+
+    // Save and load model
+    model.save(sc, "target/tmp/javaLogisticRegressionWithLBFGSModel");
+    LogisticRegressionModel sameModel = LogisticRegressionModel.load(sc,
+      "target/tmp/javaLogisticRegressionWithLBFGSModel");
+    // $example off$
+
+    sc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaMultiLabelClassificationMetricsExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaMultiLabelClassificationMetricsExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..bc99dc023fa7bb2ab116ba2e612334c5ea5695c5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaMultiLabelClassificationMetricsExample.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import scala.Tuple2;
+
+import org.apache.spark.api.java.*;
+import org.apache.spark.mllib.evaluation.MultilabelMetrics;
+import org.apache.spark.SparkConf;
+// $example off$
+
+public class JavaMultiLabelClassificationMetricsExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("Multilabel Classification Metrics Example");
+    JavaSparkContext sc = new JavaSparkContext(conf);
+    // $example on$
+    List<Tuple2<double[], double[]>> data = Arrays.asList(
+      new Tuple2<>(new double[]{0.0, 1.0}, new double[]{0.0, 2.0}),
+      new Tuple2<>(new double[]{0.0, 2.0}, new double[]{0.0, 1.0}),
+      new Tuple2<>(new double[]{}, new double[]{0.0}),
+      new Tuple2<>(new double[]{2.0}, new double[]{2.0}),
+      new Tuple2<>(new double[]{2.0, 0.0}, new double[]{2.0, 0.0}),
+      new Tuple2<>(new double[]{0.0, 1.0, 2.0}, new double[]{0.0, 1.0}),
+      new Tuple2<>(new double[]{1.0}, new double[]{1.0, 2.0})
+    );
+    JavaRDD<Tuple2<double[], double[]>> scoreAndLabels = sc.parallelize(data);
+
+    // Instantiate metrics object
+    MultilabelMetrics metrics = new MultilabelMetrics(scoreAndLabels.rdd());
+
+    // Summary stats
+    System.out.format("Recall = %f\n", metrics.recall());
+    System.out.format("Precision = %f\n", metrics.precision());
+    System.out.format("F1 measure = %f\n", metrics.f1Measure());
+    System.out.format("Accuracy = %f\n", metrics.accuracy());
+
+    // Stats by labels
+    for (int i = 0; i < metrics.labels().length - 1; i++) {
+      System.out.format("Class %1.1f precision = %f\n", metrics.labels()[i], metrics.precision(
+        metrics.labels()[i]));
+      System.out.format("Class %1.1f recall = %f\n", metrics.labels()[i], metrics.recall(
+        metrics.labels()[i]));
+      System.out.format("Class %1.1f F1 score = %f\n", metrics.labels()[i], metrics.f1Measure(
+        metrics.labels()[i]));
+    }
+
+    // Micro stats
+    System.out.format("Micro recall = %f\n", metrics.microRecall());
+    System.out.format("Micro precision = %f\n", metrics.microPrecision());
+    System.out.format("Micro F1 measure = %f\n", metrics.microF1Measure());
+
+    // Hamming loss
+    System.out.format("Hamming loss = %f\n", metrics.hammingLoss());
+
+    // Subset accuracy
+    System.out.format("Subset accuracy = %f\n", metrics.subsetAccuracy());
+    // $example off$
+
+    sc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaMulticlassClassificationMetricsExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaMulticlassClassificationMetricsExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..03670383b794f20a01b00490898c5177250c8143
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaMulticlassClassificationMetricsExample.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import scala.Tuple2;
+
+import org.apache.spark.api.java.*;
+import org.apache.spark.mllib.classification.LogisticRegressionModel;
+import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS;
+import org.apache.spark.mllib.evaluation.MulticlassMetrics;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.util.MLUtils;
+import org.apache.spark.mllib.linalg.Matrix;
+// $example off$
+import org.apache.spark.SparkConf;
+import org.apache.spark.SparkContext;
+
+public class JavaMulticlassClassificationMetricsExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("Multi class Classification Metrics Example");
+    SparkContext sc = new SparkContext(conf);
+    // $example on$
+    String path = "data/mllib/sample_multiclass_classification_data.txt";
+    JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc, path).toJavaRDD();
+
+    // Split initial RDD into two... [60% training data, 40% testing data].
+    JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[]{0.6, 0.4}, 11L);
+    JavaRDD<LabeledPoint> training = splits[0].cache();
+    JavaRDD<LabeledPoint> test = splits[1];
+
+    // Run training algorithm to build the model.
+    LogisticRegressionModel model = new LogisticRegressionWithLBFGS()
+      .setNumClasses(3)
+      .run(training.rdd());
+
+    // Compute raw scores on the test set.
+    JavaPairRDD<Object, Object> predictionAndLabels = test.mapToPair(p ->
+      new Tuple2<>(model.predict(p.features()), p.label()));
+
+    // Get evaluation metrics.
+    MulticlassMetrics metrics = new MulticlassMetrics(predictionAndLabels.rdd());
+
+    // Confusion matrix
+    Matrix confusion = metrics.confusionMatrix();
+    System.out.println("Confusion matrix: \n" + confusion);
+
+    // Overall statistics
+    System.out.println("Accuracy = " + metrics.accuracy());
+
+    // Stats by labels
+    for (int i = 0; i < metrics.labels().length; i++) {
+      System.out.format("Class %f precision = %f\n", metrics.labels()[i],metrics.precision(
+        metrics.labels()[i]));
+      System.out.format("Class %f recall = %f\n", metrics.labels()[i], metrics.recall(
+        metrics.labels()[i]));
+      System.out.format("Class %f F1 score = %f\n", metrics.labels()[i], metrics.fMeasure(
+        metrics.labels()[i]));
+    }
+
+    //Weighted stats
+    System.out.format("Weighted precision = %f\n", metrics.weightedPrecision());
+    System.out.format("Weighted recall = %f\n", metrics.weightedRecall());
+    System.out.format("Weighted F1 score = %f\n", metrics.weightedFMeasure());
+    System.out.format("Weighted false positive rate = %f\n", metrics.weightedFalsePositiveRate());
+
+    // Save and load model
+    model.save(sc, "target/tmp/LogisticRegressionModel");
+    LogisticRegressionModel sameModel = LogisticRegressionModel.load(sc,
+      "target/tmp/LogisticRegressionModel");
+    // $example off$
+
+    sc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaNaiveBayesExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaNaiveBayesExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..d80dbe80000b3dfd0d25f864898f8c156c035a2e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaNaiveBayesExample.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import scala.Tuple2;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.classification.NaiveBayes;
+import org.apache.spark.mllib.classification.NaiveBayesModel;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.util.MLUtils;
+// $example off$
+import org.apache.spark.SparkConf;
+
+public class JavaNaiveBayesExample {
+  public static void main(String[] args) {
+    SparkConf sparkConf = new SparkConf().setAppName("JavaNaiveBayesExample");
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+    // $example on$
+    String path = "data/mllib/sample_libsvm_data.txt";
+    JavaRDD<LabeledPoint> inputData = MLUtils.loadLibSVMFile(jsc.sc(), path).toJavaRDD();
+    JavaRDD<LabeledPoint>[] tmp = inputData.randomSplit(new double[]{0.6, 0.4});
+    JavaRDD<LabeledPoint> training = tmp[0]; // training set
+    JavaRDD<LabeledPoint> test = tmp[1]; // test set
+    NaiveBayesModel model = NaiveBayes.train(training.rdd(), 1.0);
+    JavaPairRDD<Double, Double> predictionAndLabel =
+      test.mapToPair(p -> new Tuple2<>(model.predict(p.features()), p.label()));
+    double accuracy =
+      predictionAndLabel.filter(pl -> pl._1().equals(pl._2())).count() / (double) test.count();
+
+    // Save and load model
+    model.save(jsc.sc(), "target/tmp/myNaiveBayesModel");
+    NaiveBayesModel sameModel = NaiveBayesModel.load(jsc.sc(), "target/tmp/myNaiveBayesModel");
+    // $example off$
+
+    jsc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaPCAExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaPCAExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..0a7dc621e1110a99332d39c17151d2251a71497c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaPCAExample.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+// $example off$
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.SparkContext;
+// $example on$
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.linalg.Matrix;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.mllib.linalg.distributed.RowMatrix;
+// $example off$
+
+/**
+ * Example for compute principal components on a 'RowMatrix'.
+ */
+public class JavaPCAExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("PCA Example");
+    SparkContext sc = new SparkContext(conf);
+    JavaSparkContext jsc = JavaSparkContext.fromSparkContext(sc);
+
+    // $example on$
+    List<Vector> data = Arrays.asList(
+            Vectors.sparse(5, new int[] {1, 3}, new double[] {1.0, 7.0}),
+            Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
+            Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
+    );
+
+    JavaRDD<Vector> rows = jsc.parallelize(data);
+
+    // Create a RowMatrix from JavaRDD<Vector>.
+    RowMatrix mat = new RowMatrix(rows.rdd());
+
+    // Compute the top 4 principal components.
+    // Principal components are stored in a local dense matrix.
+    Matrix pc = mat.computePrincipalComponents(4);
+
+    // Project the rows to the linear space spanned by the top 4 principal components.
+    RowMatrix projected = mat.multiply(pc);
+    // $example off$
+    Vector[] collectPartitions = (Vector[])projected.rows().collect();
+    System.out.println("Projected vector of principal component:");
+    for (Vector vector : collectPartitions) {
+      System.out.println("\t" + vector);
+    }
+    jsc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaPowerIterationClusteringExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaPowerIterationClusteringExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..5155f182ba20e608dcc0670b1121fdf54f1330fb
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaPowerIterationClusteringExample.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import java.util.Arrays;
+
+import scala.Tuple3;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+// $example on$
+import org.apache.spark.mllib.clustering.PowerIterationClustering;
+import org.apache.spark.mllib.clustering.PowerIterationClusteringModel;
+// $example off$
+
+/**
+ * Java example for graph clustering using power iteration clustering (PIC).
+ */
+public class JavaPowerIterationClusteringExample {
+  public static void main(String[] args) {
+    SparkConf sparkConf = new SparkConf().setAppName("JavaPowerIterationClusteringExample");
+    JavaSparkContext sc = new JavaSparkContext(sparkConf);
+
+    @SuppressWarnings("unchecked")
+    // $example on$
+    JavaRDD<Tuple3<Long, Long, Double>> similarities = sc.parallelize(Arrays.asList(
+      new Tuple3<>(0L, 1L, 0.9),
+      new Tuple3<>(1L, 2L, 0.9),
+      new Tuple3<>(2L, 3L, 0.9),
+      new Tuple3<>(3L, 4L, 0.1),
+      new Tuple3<>(4L, 5L, 0.9)));
+
+    PowerIterationClustering pic = new PowerIterationClustering()
+      .setK(2)
+      .setMaxIterations(10);
+    PowerIterationClusteringModel model = pic.run(similarities);
+
+    for (PowerIterationClustering.Assignment a: model.assignments().toJavaRDD().collect()) {
+      System.out.println(a.id() + " -> " + a.cluster());
+    }
+    // $example off$
+
+    sc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaPrefixSpanExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaPrefixSpanExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..16340759412916245fe7fcf94ee268fa51ceca7a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaPrefixSpanExample.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+// $example off$
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+// $example on$
+import org.apache.spark.mllib.fpm.PrefixSpan;
+import org.apache.spark.mllib.fpm.PrefixSpanModel;
+// $example off$
+import org.apache.spark.SparkConf;
+
+public class JavaPrefixSpanExample {
+
+  public static void main(String[] args) {
+
+    SparkConf sparkConf = new SparkConf().setAppName("JavaPrefixSpanExample");
+    JavaSparkContext sc = new JavaSparkContext(sparkConf);
+
+    // $example on$
+    JavaRDD<List<List<Integer>>> sequences = sc.parallelize(Arrays.asList(
+      Arrays.asList(Arrays.asList(1, 2), Arrays.asList(3)),
+      Arrays.asList(Arrays.asList(1), Arrays.asList(3, 2), Arrays.asList(1, 2)),
+      Arrays.asList(Arrays.asList(1, 2), Arrays.asList(5)),
+      Arrays.asList(Arrays.asList(6))
+    ), 2);
+    PrefixSpan prefixSpan = new PrefixSpan()
+      .setMinSupport(0.5)
+      .setMaxPatternLength(5);
+    PrefixSpanModel<Integer> model = prefixSpan.run(sequences);
+    for (PrefixSpan.FreqSequence<Integer> freqSeq: model.freqSequences().toJavaRDD().collect()) {
+      System.out.println(freqSeq.javaSequence() + ", " + freqSeq.freq());
+    }
+    // $example off$
+
+    sc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaRandomForestClassificationExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaRandomForestClassificationExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..6998ce2156c25d26c7a6f9b5e22439f5846dce1d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaRandomForestClassificationExample.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import java.util.HashMap;
+import java.util.Map;
+
+import scala.Tuple2;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.tree.RandomForest;
+import org.apache.spark.mllib.tree.model.RandomForestModel;
+import org.apache.spark.mllib.util.MLUtils;
+// $example off$
+
+public class JavaRandomForestClassificationExample {
+  public static void main(String[] args) {
+    // $example on$
+    SparkConf sparkConf = new SparkConf().setAppName("JavaRandomForestClassificationExample");
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+    // Load and parse the data file.
+    String datapath = "data/mllib/sample_libsvm_data.txt";
+    JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(jsc.sc(), datapath).toJavaRDD();
+    // Split the data into training and test sets (30% held out for testing)
+    JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[]{0.7, 0.3});
+    JavaRDD<LabeledPoint> trainingData = splits[0];
+    JavaRDD<LabeledPoint> testData = splits[1];
+
+    // Train a RandomForest model.
+    // Empty categoricalFeaturesInfo indicates all features are continuous.
+    Integer numClasses = 2;
+    Map<Integer, Integer> categoricalFeaturesInfo = new HashMap<>();
+    Integer numTrees = 3; // Use more in practice.
+    String featureSubsetStrategy = "auto"; // Let the algorithm choose.
+    String impurity = "gini";
+    Integer maxDepth = 5;
+    Integer maxBins = 32;
+    Integer seed = 12345;
+
+    RandomForestModel model = RandomForest.trainClassifier(trainingData, numClasses,
+      categoricalFeaturesInfo, numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins,
+      seed);
+
+    // Evaluate model on test instances and compute test error
+    JavaPairRDD<Double, Double> predictionAndLabel =
+      testData.mapToPair(p -> new Tuple2<>(model.predict(p.features()), p.label()));
+    double testErr =
+      predictionAndLabel.filter(pl -> !pl._1().equals(pl._2())).count() / (double) testData.count();
+    System.out.println("Test Error: " + testErr);
+    System.out.println("Learned classification forest model:\n" + model.toDebugString());
+
+    // Save and load model
+    model.save(jsc.sc(), "target/tmp/myRandomForestClassificationModel");
+    RandomForestModel sameModel = RandomForestModel.load(jsc.sc(),
+      "target/tmp/myRandomForestClassificationModel");
+    // $example off$
+
+    jsc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaRandomForestRegressionExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaRandomForestRegressionExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..4a0f55f529801b0f598aa7c37285836bdd7d85a0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaRandomForestRegressionExample.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import java.util.HashMap;
+import java.util.Map;
+
+import scala.Tuple2;
+
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.tree.RandomForest;
+import org.apache.spark.mllib.tree.model.RandomForestModel;
+import org.apache.spark.mllib.util.MLUtils;
+import org.apache.spark.SparkConf;
+// $example off$
+
+public class JavaRandomForestRegressionExample {
+  public static void main(String[] args) {
+    // $example on$
+    SparkConf sparkConf = new SparkConf().setAppName("JavaRandomForestRegressionExample");
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+    // Load and parse the data file.
+    String datapath = "data/mllib/sample_libsvm_data.txt";
+    JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(jsc.sc(), datapath).toJavaRDD();
+    // Split the data into training and test sets (30% held out for testing)
+    JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[]{0.7, 0.3});
+    JavaRDD<LabeledPoint> trainingData = splits[0];
+    JavaRDD<LabeledPoint> testData = splits[1];
+
+    // Set parameters.
+    // Empty categoricalFeaturesInfo indicates all features are continuous.
+    Map<Integer, Integer> categoricalFeaturesInfo = new HashMap<>();
+    int numTrees = 3; // Use more in practice.
+    String featureSubsetStrategy = "auto"; // Let the algorithm choose.
+    String impurity = "variance";
+    int maxDepth = 4;
+    int maxBins = 32;
+    int seed = 12345;
+    // Train a RandomForest model.
+    RandomForestModel model = RandomForest.trainRegressor(trainingData,
+      categoricalFeaturesInfo, numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins, seed);
+
+    // Evaluate model on test instances and compute test error
+    JavaPairRDD<Double, Double> predictionAndLabel =
+      testData.mapToPair(p -> new Tuple2<>(model.predict(p.features()), p.label()));
+    double testMSE = predictionAndLabel.mapToDouble(pl -> {
+      double diff = pl._1() - pl._2();
+      return diff * diff;
+    }).mean();
+    System.out.println("Test Mean Squared Error: " + testMSE);
+    System.out.println("Learned regression forest model:\n" + model.toDebugString());
+
+    // Save and load model
+    model.save(jsc.sc(), "target/tmp/myRandomForestRegressionModel");
+    RandomForestModel sameModel = RandomForestModel.load(jsc.sc(),
+      "target/tmp/myRandomForestRegressionModel");
+    // $example off$
+
+    jsc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaRankingMetricsExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaRankingMetricsExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..dc9970d8852740cc94069ab8b6fc5dd495f0aa88
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaRankingMetricsExample.java
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import java.util.*;
+
+import scala.Tuple2;
+
+import org.apache.spark.api.java.*;
+import org.apache.spark.mllib.evaluation.RegressionMetrics;
+import org.apache.spark.mllib.evaluation.RankingMetrics;
+import org.apache.spark.mllib.recommendation.ALS;
+import org.apache.spark.mllib.recommendation.MatrixFactorizationModel;
+import org.apache.spark.mllib.recommendation.Rating;
+// $example off$
+import org.apache.spark.SparkConf;
+
+public class JavaRankingMetricsExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("Java Ranking Metrics Example");
+    JavaSparkContext sc = new JavaSparkContext(conf);
+    // $example on$
+    String path = "data/mllib/sample_movielens_data.txt";
+    JavaRDD<String> data = sc.textFile(path);
+    JavaRDD<Rating> ratings = data.map(line -> {
+        String[] parts = line.split("::");
+        return new Rating(Integer.parseInt(parts[0]), Integer.parseInt(parts[1]), Double
+            .parseDouble(parts[2]) - 2.5);
+      });
+    ratings.cache();
+
+    // Train an ALS model
+    MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(ratings), 10, 10, 0.01);
+
+    // Get top 10 recommendations for every user and scale ratings from 0 to 1
+    JavaRDD<Tuple2<Object, Rating[]>> userRecs = model.recommendProductsForUsers(10).toJavaRDD();
+    JavaRDD<Tuple2<Object, Rating[]>> userRecsScaled = userRecs.map(t -> {
+        Rating[] scaledRatings = new Rating[t._2().length];
+        for (int i = 0; i < scaledRatings.length; i++) {
+          double newRating = Math.max(Math.min(t._2()[i].rating(), 1.0), 0.0);
+          scaledRatings[i] = new Rating(t._2()[i].user(), t._2()[i].product(), newRating);
+        }
+        return new Tuple2<>(t._1(), scaledRatings);
+      });
+    JavaPairRDD<Object, Rating[]> userRecommended = JavaPairRDD.fromJavaRDD(userRecsScaled);
+
+    // Map ratings to 1 or 0, 1 indicating a movie that should be recommended
+    JavaRDD<Rating> binarizedRatings = ratings.map(r -> {
+        double binaryRating;
+        if (r.rating() > 0.0) {
+          binaryRating = 1.0;
+        } else {
+          binaryRating = 0.0;
+        }
+        return new Rating(r.user(), r.product(), binaryRating);
+      });
+
+    // Group ratings by common user
+    JavaPairRDD<Object, Iterable<Rating>> userMovies = binarizedRatings.groupBy(Rating::user);
+
+    // Get true relevant documents from all user ratings
+    JavaPairRDD<Object, List<Integer>> userMoviesList = userMovies.mapValues(docs -> {
+        List<Integer> products = new ArrayList<>();
+        for (Rating r : docs) {
+          if (r.rating() > 0.0) {
+            products.add(r.product());
+          }
+        }
+        return products;
+      });
+
+    // Extract the product id from each recommendation
+    JavaPairRDD<Object, List<Integer>> userRecommendedList = userRecommended.mapValues(docs -> {
+        List<Integer> products = new ArrayList<>();
+        for (Rating r : docs) {
+          products.add(r.product());
+        }
+        return products;
+      });
+    JavaRDD<Tuple2<List<Integer>, List<Integer>>> relevantDocs = userMoviesList.join(
+      userRecommendedList).values();
+
+    // Instantiate the metrics object
+    RankingMetrics<Integer> metrics = RankingMetrics.of(relevantDocs);
+
+    // Precision and NDCG at k
+    Integer[] kVector = {1, 3, 5};
+    for (Integer k : kVector) {
+      System.out.format("Precision at %d = %f\n", k, metrics.precisionAt(k));
+      System.out.format("NDCG at %d = %f\n", k, metrics.ndcgAt(k));
+    }
+
+    // Mean average precision
+    System.out.format("Mean average precision = %f\n", metrics.meanAveragePrecision());
+
+    // Evaluate the model using numerical ratings and regression metrics
+    JavaRDD<Tuple2<Object, Object>> userProducts =
+        ratings.map(r -> new Tuple2<>(r.user(), r.product()));
+
+    JavaPairRDD<Tuple2<Integer, Integer>, Object> predictions = JavaPairRDD.fromJavaRDD(
+      model.predict(JavaRDD.toRDD(userProducts)).toJavaRDD().map(r ->
+        new Tuple2<>(new Tuple2<>(r.user(), r.product()), r.rating())));
+    JavaRDD<Tuple2<Object, Object>> ratesAndPreds =
+      JavaPairRDD.fromJavaRDD(ratings.map(r ->
+        new Tuple2<Tuple2<Integer, Integer>, Object>(
+          new Tuple2<>(r.user(), r.product()),
+          r.rating())
+      )).join(predictions).values();
+
+    // Create regression metrics object
+    RegressionMetrics regressionMetrics = new RegressionMetrics(ratesAndPreds.rdd());
+
+    // Root mean squared error
+    System.out.format("RMSE = %f\n", regressionMetrics.rootMeanSquaredError());
+
+    // R-squared
+    System.out.format("R-squared = %f\n", regressionMetrics.r2());
+    // $example off$
+
+    sc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaRecommendationExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaRecommendationExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..1ee68da35e81a3d031ac0757d8307f08753e7100
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaRecommendationExample.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import scala.Tuple2;
+
+import org.apache.spark.api.java.*;
+import org.apache.spark.mllib.recommendation.ALS;
+import org.apache.spark.mllib.recommendation.MatrixFactorizationModel;
+import org.apache.spark.mllib.recommendation.Rating;
+import org.apache.spark.SparkConf;
+// $example off$
+
+public class JavaRecommendationExample {
+  public static void main(String[] args) {
+    // $example on$
+    SparkConf conf = new SparkConf().setAppName("Java Collaborative Filtering Example");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+
+    // Load and parse the data
+    String path = "data/mllib/als/test.data";
+    JavaRDD<String> data = jsc.textFile(path);
+    JavaRDD<Rating> ratings = data.map(s -> {
+      String[] sarray = s.split(",");
+      return new Rating(Integer.parseInt(sarray[0]),
+        Integer.parseInt(sarray[1]),
+        Double.parseDouble(sarray[2]));
+    });
+
+    // Build the recommendation model using ALS
+    int rank = 10;
+    int numIterations = 10;
+    MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(ratings), rank, numIterations, 0.01);
+
+    // Evaluate the model on rating data
+    JavaRDD<Tuple2<Object, Object>> userProducts =
+      ratings.map(r -> new Tuple2<>(r.user(), r.product()));
+    JavaPairRDD<Tuple2<Integer, Integer>, Double> predictions = JavaPairRDD.fromJavaRDD(
+      model.predict(JavaRDD.toRDD(userProducts)).toJavaRDD()
+          .map(r -> new Tuple2<>(new Tuple2<>(r.user(), r.product()), r.rating()))
+    );
+    JavaRDD<Tuple2<Double, Double>> ratesAndPreds = JavaPairRDD.fromJavaRDD(
+        ratings.map(r -> new Tuple2<>(new Tuple2<>(r.user(), r.product()), r.rating())))
+      .join(predictions).values();
+    double MSE = ratesAndPreds.mapToDouble(pair -> {
+      double err = pair._1() - pair._2();
+      return err * err;
+    }).mean();
+    System.out.println("Mean Squared Error = " + MSE);
+
+    // Save and load model
+    model.save(jsc.sc(), "target/tmp/myCollaborativeFilter");
+    MatrixFactorizationModel sameModel = MatrixFactorizationModel.load(jsc.sc(),
+      "target/tmp/myCollaborativeFilter");
+    // $example off$
+
+    jsc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaRegressionMetricsExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaRegressionMetricsExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..00033b5730a3d6513c4a62d3bd576b5d334faf3e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaRegressionMetricsExample.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import scala.Tuple2;
+
+import org.apache.spark.api.java.*;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.regression.LinearRegressionModel;
+import org.apache.spark.mllib.regression.LinearRegressionWithSGD;
+import org.apache.spark.mllib.evaluation.RegressionMetrics;
+import org.apache.spark.SparkConf;
+// $example off$
+
+public class JavaRegressionMetricsExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("Java Regression Metrics Example");
+    JavaSparkContext sc = new JavaSparkContext(conf);
+    // $example on$
+    // Load and parse the data
+    String path = "data/mllib/sample_linear_regression_data.txt";
+    JavaRDD<String> data = sc.textFile(path);
+    JavaRDD<LabeledPoint> parsedData = data.map(line -> {
+      String[] parts = line.split(" ");
+      double[] v = new double[parts.length - 1];
+      for (int i = 1; i < parts.length; i++) {
+        v[i - 1] = Double.parseDouble(parts[i].split(":")[1]);
+      }
+      return new LabeledPoint(Double.parseDouble(parts[0]), Vectors.dense(v));
+    });
+    parsedData.cache();
+
+    // Building the model
+    int numIterations = 100;
+    LinearRegressionModel model = LinearRegressionWithSGD.train(JavaRDD.toRDD(parsedData),
+      numIterations);
+
+    // Evaluate model on training examples and compute training error
+    JavaPairRDD<Object, Object> valuesAndPreds = parsedData.mapToPair(point ->
+      new Tuple2<>(model.predict(point.features()), point.label()));
+
+    // Instantiate metrics object
+    RegressionMetrics metrics = new RegressionMetrics(valuesAndPreds.rdd());
+
+    // Squared error
+    System.out.format("MSE = %f\n", metrics.meanSquaredError());
+    System.out.format("RMSE = %f\n", metrics.rootMeanSquaredError());
+
+    // R-squared
+    System.out.format("R Squared = %f\n", metrics.r2());
+
+    // Mean absolute error
+    System.out.format("MAE = %f\n", metrics.meanAbsoluteError());
+
+    // Explained variance
+    System.out.format("Explained Variance = %f\n", metrics.explainedVariance());
+
+    // Save and load model
+    model.save(sc.sc(), "target/tmp/LogisticRegressionModel");
+    LinearRegressionModel sameModel = LinearRegressionModel.load(sc.sc(),
+      "target/tmp/LogisticRegressionModel");
+    // $example off$
+
+    sc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaSVDExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaSVDExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..802be3960a3378a901a0ea68ea34a508a9aefe5d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaSVDExample.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+// $example off$
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.SparkContext;
+// $example on$
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.linalg.Matrix;
+import org.apache.spark.mllib.linalg.SingularValueDecomposition;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.mllib.linalg.distributed.RowMatrix;
+// $example off$
+
+/**
+ * Example for SingularValueDecomposition.
+ */
+public class JavaSVDExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("SVD Example");
+    SparkContext sc = new SparkContext(conf);
+    JavaSparkContext jsc = JavaSparkContext.fromSparkContext(sc);
+
+    // $example on$
+    List<Vector> data = Arrays.asList(
+            Vectors.sparse(5, new int[] {1, 3}, new double[] {1.0, 7.0}),
+            Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
+            Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
+    );
+
+    JavaRDD<Vector> rows = jsc.parallelize(data);
+
+    // Create a RowMatrix from JavaRDD<Vector>.
+    RowMatrix mat = new RowMatrix(rows.rdd());
+
+    // Compute the top 5 singular values and corresponding singular vectors.
+    SingularValueDecomposition<RowMatrix, Matrix> svd = mat.computeSVD(5, true, 1.0E-9d);
+    RowMatrix U = svd.U();  // The U factor is a RowMatrix.
+    Vector s = svd.s();     // The singular values are stored in a local dense vector.
+    Matrix V = svd.V();     // The V factor is a local dense matrix.
+    // $example off$
+    Vector[] collectPartitions = (Vector[]) U.rows().collect();
+    System.out.println("U factor is:");
+    for (Vector vector : collectPartitions) {
+      System.out.println("\t" + vector);
+    }
+    System.out.println("Singular values are: " + s);
+    System.out.println("V factor is:\n" + V);
+
+    jsc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaSVMWithSGDExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaSVMWithSGDExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..866a221fdb592012fbca855d4053c3058fc7f48f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaSVMWithSGDExample.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.SparkContext;
+
+// $example on$
+import scala.Tuple2;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.classification.SVMModel;
+import org.apache.spark.mllib.classification.SVMWithSGD;
+import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics;
+import org.apache.spark.mllib.regression.LabeledPoint;
+import org.apache.spark.mllib.util.MLUtils;
+// $example off$
+
+/**
+ * Example for SVMWithSGD.
+ */
+public class JavaSVMWithSGDExample {
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaSVMWithSGDExample");
+    SparkContext sc = new SparkContext(conf);
+    // $example on$
+    String path = "data/mllib/sample_libsvm_data.txt";
+    JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc, path).toJavaRDD();
+
+    // Split initial RDD into two... [60% training data, 40% testing data].
+    JavaRDD<LabeledPoint> training = data.sample(false, 0.6, 11L);
+    training.cache();
+    JavaRDD<LabeledPoint> test = data.subtract(training);
+
+    // Run training algorithm to build the model.
+    int numIterations = 100;
+    SVMModel model = SVMWithSGD.train(training.rdd(), numIterations);
+
+    // Clear the default threshold.
+    model.clearThreshold();
+
+    // Compute raw scores on the test set.
+    JavaRDD<Tuple2<Object, Object>> scoreAndLabels = test.map(p ->
+      new Tuple2<>(model.predict(p.features()), p.label()));
+
+    // Get evaluation metrics.
+    BinaryClassificationMetrics metrics =
+      new BinaryClassificationMetrics(JavaRDD.toRDD(scoreAndLabels));
+    double auROC = metrics.areaUnderROC();
+
+    System.out.println("Area under ROC = " + auROC);
+
+    // Save and load model
+    model.save(sc, "target/tmp/javaSVMWithSGDModel");
+    SVMModel sameModel = SVMModel.load(sc, "target/tmp/javaSVMWithSGDModel");
+    // $example off$
+
+    sc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaSimpleFPGrowth.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaSimpleFPGrowth.java
new file mode 100644
index 0000000000000000000000000000000000000000..f9198e75c2ff50c49c889c823f573185da544096
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaSimpleFPGrowth.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.mllib.fpm.AssociationRules;
+import org.apache.spark.mllib.fpm.FPGrowth;
+import org.apache.spark.mllib.fpm.FPGrowthModel;
+// $example off$
+
+import org.apache.spark.SparkConf;
+
+public class JavaSimpleFPGrowth {
+
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("FP-growth Example");
+    JavaSparkContext sc = new JavaSparkContext(conf);
+
+    // $example on$
+    JavaRDD<String> data = sc.textFile("data/mllib/sample_fpgrowth.txt");
+
+    JavaRDD<List<String>> transactions = data.map(line -> Arrays.asList(line.split(" ")));
+
+    FPGrowth fpg = new FPGrowth()
+      .setMinSupport(0.2)
+      .setNumPartitions(10);
+    FPGrowthModel<String> model = fpg.run(transactions);
+
+    for (FPGrowth.FreqItemset<String> itemset: model.freqItemsets().toJavaRDD().collect()) {
+      System.out.println("[" + itemset.javaItems() + "], " + itemset.freq());
+    }
+
+    double minConfidence = 0.8;
+    for (AssociationRules.Rule<String> rule
+      : model.generateAssociationRules(minConfidence).toJavaRDD().collect()) {
+      System.out.println(
+        rule.javaAntecedent() + " => " + rule.javaConsequent() + ", " + rule.confidence());
+    }
+    // $example off$
+
+    sc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..286b95cfbc33dd73d0cc15513c2e9512707c51d0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaStratifiedSamplingExample.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import com.google.common.collect.ImmutableMap;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+
+// $example on$
+import java.util.*;
+
+import scala.Tuple2;
+
+import org.apache.spark.api.java.JavaPairRDD;
+// $example off$
+
+public class JavaStratifiedSamplingExample {
+  public static void main(String[] args) {
+
+    SparkConf conf = new SparkConf().setAppName("JavaStratifiedSamplingExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+
+    @SuppressWarnings("unchecked")
+    // $example on$
+    List<Tuple2<Integer, Character>> list = Arrays.asList(
+        new Tuple2<>(1, 'a'),
+        new Tuple2<>(1, 'b'),
+        new Tuple2<>(2, 'c'),
+        new Tuple2<>(2, 'd'),
+        new Tuple2<>(2, 'e'),
+        new Tuple2<>(3, 'f')
+    );
+
+    JavaPairRDD<Integer, Character> data = jsc.parallelizePairs(list);
+
+    // specify the exact fraction desired from each key Map<K, Double>
+    ImmutableMap<Integer, Double> fractions = ImmutableMap.of(1, 0.1, 2, 0.6, 3, 0.3);
+
+    // Get an approximate sample from each stratum
+    JavaPairRDD<Integer, Character> approxSample = data.sampleByKey(false, fractions);
+    // Get an exact sample from each stratum
+    JavaPairRDD<Integer, Character> exactSample = data.sampleByKeyExact(false, fractions);
+    // $example off$
+
+    System.out.println("approxSample size is " + approxSample.collect().size());
+    for (Tuple2<Integer, Character> t : approxSample.collect()) {
+      System.out.println(t._1() + " " + t._2());
+    }
+
+    System.out.println("exactSample size is " + exactSample.collect().size());
+    for (Tuple2<Integer, Character> t : exactSample.collect()) {
+      System.out.println(t._1() + " " + t._2());
+    }
+
+    jsc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaStreamingTestExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaStreamingTestExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..4be702c2ba6ad202952adda32de7f06f7abd288c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaStreamingTestExample.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+// $example on$
+import org.apache.spark.mllib.stat.test.BinarySample;
+import org.apache.spark.mllib.stat.test.StreamingTest;
+import org.apache.spark.mllib.stat.test.StreamingTestResult;
+// $example off$
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.Duration;
+import org.apache.spark.streaming.Seconds;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.apache.spark.util.Utils;
+
+
+/**
+ * Perform streaming testing using Welch's 2-sample t-test on a stream of data, where the data
+ * stream arrives as text files in a directory. Stops when the two groups are statistically
+ * significant (p-value < 0.05) or after a user-specified timeout in number of batches is exceeded.
+ *
+ * The rows of the text files must be in the form `Boolean, Double`. For example:
+ *   false, -3.92
+ *   true, 99.32
+ *
+ * Usage:
+ *   JavaStreamingTestExample <dataDir> <batchDuration> <numBatchesTimeout>
+ *
+ * To run on your local machine using the directory `dataDir` with 5 seconds between each batch and
+ * a timeout after 100 insignificant batches, call:
+ *    $ bin/run-example mllib.JavaStreamingTestExample dataDir 5 100
+ *
+ * As you add text files to `dataDir` the significance test wil continually update every
+ * `batchDuration` seconds until the test becomes significant (p-value < 0.05) or the number of
+ * batches processed exceeds `numBatchesTimeout`.
+ */
+public class JavaStreamingTestExample {
+
+  private static int timeoutCounter = 0;
+
+  public static void main(String[] args) throws Exception {
+    if (args.length != 3) {
+      System.err.println("Usage: JavaStreamingTestExample " +
+        "<dataDir> <batchDuration> <numBatchesTimeout>");
+        System.exit(1);
+    }
+
+    String dataDir = args[0];
+    Duration batchDuration = Seconds.apply(Long.parseLong(args[1]));
+    int numBatchesTimeout = Integer.parseInt(args[2]);
+
+    SparkConf conf = new SparkConf().setMaster("local").setAppName("StreamingTestExample");
+    JavaStreamingContext ssc = new JavaStreamingContext(conf, batchDuration);
+
+    ssc.checkpoint(Utils.createTempDir(System.getProperty("java.io.tmpdir"), "spark").toString());
+
+    // $example on$
+    JavaDStream<BinarySample> data = ssc.textFileStream(dataDir).map(line -> {
+      String[] ts = line.split(",");
+      boolean label = Boolean.parseBoolean(ts[0]);
+      double value = Double.parseDouble(ts[1]);
+      return new BinarySample(label, value);
+    });
+
+    StreamingTest streamingTest = new StreamingTest()
+      .setPeacePeriod(0)
+      .setWindowSize(0)
+      .setTestMethod("welch");
+
+    JavaDStream<StreamingTestResult> out = streamingTest.registerStream(data);
+    out.print();
+    // $example off$
+
+    // Stop processing if test becomes significant or we time out
+    timeoutCounter = numBatchesTimeout;
+
+    out.foreachRDD(rdd -> {
+      timeoutCounter -= 1;
+      boolean anySignificant = !rdd.filter(v -> v.pValue() < 0.05).isEmpty();
+      if (timeoutCounter <= 0 || anySignificant) {
+        rdd.context().stop();
+      }
+    });
+
+    ssc.start();
+    ssc.awaitTermination();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaSummaryStatisticsExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaSummaryStatisticsExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..278706bc8f6ed8924c0e6023ce9d72d826cd473e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/mllib/JavaSummaryStatisticsExample.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+// $example on$
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.mllib.stat.MultivariateStatisticalSummary;
+import org.apache.spark.mllib.stat.Statistics;
+// $example off$
+
+public class JavaSummaryStatisticsExample {
+  public static void main(String[] args) {
+
+    SparkConf conf = new SparkConf().setAppName("JavaSummaryStatisticsExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+
+    // $example on$
+    JavaRDD<Vector> mat = jsc.parallelize(
+      Arrays.asList(
+        Vectors.dense(1.0, 10.0, 100.0),
+        Vectors.dense(2.0, 20.0, 200.0),
+        Vectors.dense(3.0, 30.0, 300.0)
+      )
+    ); // an RDD of Vectors
+
+    // Compute column summary statistics.
+    MultivariateStatisticalSummary summary = Statistics.colStats(mat.rdd());
+    System.out.println(summary.mean());  // a dense vector containing the mean value for each column
+    System.out.println(summary.variance());  // column-wise variance
+    System.out.println(summary.numNonzeros());  // number of nonzeros in each column
+    // $example off$
+
+    jsc.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..97e9ca311267cea6014395f792a2f0b447d5c767
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java
@@ -0,0 +1,301 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.examples.sql;
+
+// $example on:schema_merging$
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+// $example off:schema_merging$
+import java.util.Properties;
+
+// $example on:basic_parquet_example$
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.Encoders;
+// $example on:schema_merging$
+// $example on:json_dataset$
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+// $example off:json_dataset$
+// $example off:schema_merging$
+// $example off:basic_parquet_example$
+import org.apache.spark.sql.SparkSession;
+
+public class JavaSQLDataSourceExample {
+
+  // $example on:schema_merging$
+  public static class Square implements Serializable {
+    private int value;
+    private int square;
+
+    // Getters and setters...
+    // $example off:schema_merging$
+    public int getValue() {
+      return value;
+    }
+
+    public void setValue(int value) {
+      this.value = value;
+    }
+
+    public int getSquare() {
+      return square;
+    }
+
+    public void setSquare(int square) {
+      this.square = square;
+    }
+    // $example on:schema_merging$
+  }
+  // $example off:schema_merging$
+
+  // $example on:schema_merging$
+  public static class Cube implements Serializable {
+    private int value;
+    private int cube;
+
+    // Getters and setters...
+    // $example off:schema_merging$
+    public int getValue() {
+      return value;
+    }
+
+    public void setValue(int value) {
+      this.value = value;
+    }
+
+    public int getCube() {
+      return cube;
+    }
+
+    public void setCube(int cube) {
+      this.cube = cube;
+    }
+    // $example on:schema_merging$
+  }
+  // $example off:schema_merging$
+
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("Java Spark SQL data sources example")
+      .config("spark.some.config.option", "some-value")
+      .getOrCreate();
+
+    runBasicDataSourceExample(spark);
+    runBasicParquetExample(spark);
+    runParquetSchemaMergingExample(spark);
+    runJsonDatasetExample(spark);
+    runJdbcDatasetExample(spark);
+
+    spark.stop();
+  }
+
+  private static void runBasicDataSourceExample(SparkSession spark) {
+    // $example on:generic_load_save_functions$
+    Dataset<Row> usersDF = spark.read().load("examples/src/main/resources/users.parquet");
+    usersDF.select("name", "favorite_color").write().save("namesAndFavColors.parquet");
+    // $example off:generic_load_save_functions$
+    // $example on:manual_load_options$
+    Dataset<Row> peopleDF =
+      spark.read().format("json").load("examples/src/main/resources/people.json");
+    peopleDF.select("name", "age").write().format("parquet").save("namesAndAges.parquet");
+    // $example off:manual_load_options$
+    // $example on:manual_load_options_csv$
+    Dataset<Row> peopleDFCsv = spark.read().format("csv")
+      .option("sep", ";")
+      .option("inferSchema", "true")
+      .option("header", "true")
+      .load("examples/src/main/resources/people.csv");
+    // $example off:manual_load_options_csv$
+    // $example on:manual_save_options_orc$
+    usersDF.write().format("orc")
+      .option("orc.bloom.filter.columns", "favorite_color")
+      .option("orc.dictionary.key.threshold", "1.0")
+      .save("users_with_options.orc");
+    // $example off:manual_save_options_orc$
+    // $example on:direct_sql$
+    Dataset<Row> sqlDF =
+      spark.sql("SELECT * FROM parquet.`examples/src/main/resources/users.parquet`");
+    // $example off:direct_sql$
+    // $example on:write_sorting_and_bucketing$
+    peopleDF.write().bucketBy(42, "name").sortBy("age").saveAsTable("people_bucketed");
+    // $example off:write_sorting_and_bucketing$
+    // $example on:write_partitioning$
+    usersDF
+      .write()
+      .partitionBy("favorite_color")
+      .format("parquet")
+      .save("namesPartByColor.parquet");
+    // $example off:write_partitioning$
+    // $example on:write_partition_and_bucket$
+    peopleDF
+      .write()
+      .partitionBy("favorite_color")
+      .bucketBy(42, "name")
+      .saveAsTable("people_partitioned_bucketed");
+    // $example off:write_partition_and_bucket$
+
+    spark.sql("DROP TABLE IF EXISTS people_bucketed");
+    spark.sql("DROP TABLE IF EXISTS people_partitioned_bucketed");
+  }
+
+  private static void runBasicParquetExample(SparkSession spark) {
+    // $example on:basic_parquet_example$
+    Dataset<Row> peopleDF = spark.read().json("examples/src/main/resources/people.json");
+
+    // DataFrames can be saved as Parquet files, maintaining the schema information
+    peopleDF.write().parquet("people.parquet");
+
+    // Read in the Parquet file created above.
+    // Parquet files are self-describing so the schema is preserved
+    // The result of loading a parquet file is also a DataFrame
+    Dataset<Row> parquetFileDF = spark.read().parquet("people.parquet");
+
+    // Parquet files can also be used to create a temporary view and then used in SQL statements
+    parquetFileDF.createOrReplaceTempView("parquetFile");
+    Dataset<Row> namesDF = spark.sql("SELECT name FROM parquetFile WHERE age BETWEEN 13 AND 19");
+    Dataset<String> namesDS = namesDF.map(
+        (MapFunction<Row, String>) row -> "Name: " + row.getString(0),
+        Encoders.STRING());
+    namesDS.show();
+    // +------------+
+    // |       value|
+    // +------------+
+    // |Name: Justin|
+    // +------------+
+    // $example off:basic_parquet_example$
+  }
+
+  private static void runParquetSchemaMergingExample(SparkSession spark) {
+    // $example on:schema_merging$
+    List<Square> squares = new ArrayList<>();
+    for (int value = 1; value <= 5; value++) {
+      Square square = new Square();
+      square.setValue(value);
+      square.setSquare(value * value);
+      squares.add(square);
+    }
+
+    // Create a simple DataFrame, store into a partition directory
+    Dataset<Row> squaresDF = spark.createDataFrame(squares, Square.class);
+    squaresDF.write().parquet("data/test_table/key=1");
+
+    List<Cube> cubes = new ArrayList<>();
+    for (int value = 6; value <= 10; value++) {
+      Cube cube = new Cube();
+      cube.setValue(value);
+      cube.setCube(value * value * value);
+      cubes.add(cube);
+    }
+
+    // Create another DataFrame in a new partition directory,
+    // adding a new column and dropping an existing column
+    Dataset<Row> cubesDF = spark.createDataFrame(cubes, Cube.class);
+    cubesDF.write().parquet("data/test_table/key=2");
+
+    // Read the partitioned table
+    Dataset<Row> mergedDF = spark.read().option("mergeSchema", true).parquet("data/test_table");
+    mergedDF.printSchema();
+
+    // The final schema consists of all 3 columns in the Parquet files together
+    // with the partitioning column appeared in the partition directory paths
+    // root
+    //  |-- value: int (nullable = true)
+    //  |-- square: int (nullable = true)
+    //  |-- cube: int (nullable = true)
+    //  |-- key: int (nullable = true)
+    // $example off:schema_merging$
+  }
+
+  private static void runJsonDatasetExample(SparkSession spark) {
+    // $example on:json_dataset$
+    // A JSON dataset is pointed to by path.
+    // The path can be either a single text file or a directory storing text files
+    Dataset<Row> people = spark.read().json("examples/src/main/resources/people.json");
+
+    // The inferred schema can be visualized using the printSchema() method
+    people.printSchema();
+    // root
+    //  |-- age: long (nullable = true)
+    //  |-- name: string (nullable = true)
+
+    // Creates a temporary view using the DataFrame
+    people.createOrReplaceTempView("people");
+
+    // SQL statements can be run by using the sql methods provided by spark
+    Dataset<Row> namesDF = spark.sql("SELECT name FROM people WHERE age BETWEEN 13 AND 19");
+    namesDF.show();
+    // +------+
+    // |  name|
+    // +------+
+    // |Justin|
+    // +------+
+
+    // Alternatively, a DataFrame can be created for a JSON dataset represented by
+    // a Dataset<String> storing one JSON object per string.
+    List<String> jsonData = Arrays.asList(
+            "{\"name\":\"Yin\",\"address\":{\"city\":\"Columbus\",\"state\":\"Ohio\"}}");
+    Dataset<String> anotherPeopleDataset = spark.createDataset(jsonData, Encoders.STRING());
+    Dataset<Row> anotherPeople = spark.read().json(anotherPeopleDataset);
+    anotherPeople.show();
+    // +---------------+----+
+    // |        address|name|
+    // +---------------+----+
+    // |[Columbus,Ohio]| Yin|
+    // +---------------+----+
+    // $example off:json_dataset$
+  }
+
+  private static void runJdbcDatasetExample(SparkSession spark) {
+    // $example on:jdbc_dataset$
+    // Note: JDBC loading and saving can be achieved via either the load/save or jdbc methods
+    // Loading data from a JDBC source
+    Dataset<Row> jdbcDF = spark.read()
+      .format("jdbc")
+      .option("url", "jdbc:postgresql:dbserver")
+      .option("dbtable", "schema.tablename")
+      .option("user", "username")
+      .option("password", "password")
+      .load();
+
+    Properties connectionProperties = new Properties();
+    connectionProperties.put("user", "username");
+    connectionProperties.put("password", "password");
+    Dataset<Row> jdbcDF2 = spark.read()
+      .jdbc("jdbc:postgresql:dbserver", "schema.tablename", connectionProperties);
+
+    // Saving data to a JDBC source
+    jdbcDF.write()
+      .format("jdbc")
+      .option("url", "jdbc:postgresql:dbserver")
+      .option("dbtable", "schema.tablename")
+      .option("user", "username")
+      .option("password", "password")
+      .save();
+
+    jdbcDF2.write()
+      .jdbc("jdbc:postgresql:dbserver", "schema.tablename", connectionProperties);
+
+    // Specifying create table column data types on write
+    jdbcDF.write()
+      .option("createTableColumnTypes", "name CHAR(64), comments VARCHAR(1024)")
+      .jdbc("jdbc:postgresql:dbserver", "schema.tablename", connectionProperties);
+    // $example off:jdbc_dataset$
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQLExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQLExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..8605852d0881ccc798da4f0a5e7b4e2afba0410b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQLExample.java
@@ -0,0 +1,344 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.examples.sql;
+
+// $example on:programmatic_schema$
+import java.util.ArrayList;
+import java.util.List;
+// $example off:programmatic_schema$
+// $example on:create_ds$
+import java.util.Arrays;
+import java.util.Collections;
+import java.io.Serializable;
+// $example off:create_ds$
+
+// $example on:schema_inferring$
+// $example on:programmatic_schema$
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.function.Function;
+// $example off:programmatic_schema$
+// $example on:create_ds$
+import org.apache.spark.api.java.function.MapFunction;
+// $example on:create_df$
+// $example on:run_sql$
+// $example on:programmatic_schema$
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+// $example off:programmatic_schema$
+// $example off:create_df$
+// $example off:run_sql$
+import org.apache.spark.sql.Encoder;
+import org.apache.spark.sql.Encoders;
+// $example off:create_ds$
+// $example off:schema_inferring$
+import org.apache.spark.sql.RowFactory;
+// $example on:init_session$
+import org.apache.spark.sql.SparkSession;
+// $example off:init_session$
+// $example on:programmatic_schema$
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off:programmatic_schema$
+import org.apache.spark.sql.AnalysisException;
+
+// $example on:untyped_ops$
+// col("...") is preferable to df.col("...")
+import static org.apache.spark.sql.functions.col;
+// $example off:untyped_ops$
+
+public class JavaSparkSQLExample {
+  // $example on:create_ds$
+  public static class Person implements Serializable {
+    private String name;
+    private int age;
+
+    public String getName() {
+      return name;
+    }
+
+    public void setName(String name) {
+      this.name = name;
+    }
+
+    public int getAge() {
+      return age;
+    }
+
+    public void setAge(int age) {
+      this.age = age;
+    }
+  }
+  // $example off:create_ds$
+
+  public static void main(String[] args) throws AnalysisException {
+    // $example on:init_session$
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("Java Spark SQL basic example")
+      .config("spark.some.config.option", "some-value")
+      .getOrCreate();
+    // $example off:init_session$
+
+    runBasicDataFrameExample(spark);
+    runDatasetCreationExample(spark);
+    runInferSchemaExample(spark);
+    runProgrammaticSchemaExample(spark);
+
+    spark.stop();
+  }
+
+  private static void runBasicDataFrameExample(SparkSession spark) throws AnalysisException {
+    // $example on:create_df$
+    Dataset<Row> df = spark.read().json("examples/src/main/resources/people.json");
+
+    // Displays the content of the DataFrame to stdout
+    df.show();
+    // +----+-------+
+    // | age|   name|
+    // +----+-------+
+    // |null|Michael|
+    // |  30|   Andy|
+    // |  19| Justin|
+    // +----+-------+
+    // $example off:create_df$
+
+    // $example on:untyped_ops$
+    // Print the schema in a tree format
+    df.printSchema();
+    // root
+    // |-- age: long (nullable = true)
+    // |-- name: string (nullable = true)
+
+    // Select only the "name" column
+    df.select("name").show();
+    // +-------+
+    // |   name|
+    // +-------+
+    // |Michael|
+    // |   Andy|
+    // | Justin|
+    // +-------+
+
+    // Select everybody, but increment the age by 1
+    df.select(col("name"), col("age").plus(1)).show();
+    // +-------+---------+
+    // |   name|(age + 1)|
+    // +-------+---------+
+    // |Michael|     null|
+    // |   Andy|       31|
+    // | Justin|       20|
+    // +-------+---------+
+
+    // Select people older than 21
+    df.filter(col("age").gt(21)).show();
+    // +---+----+
+    // |age|name|
+    // +---+----+
+    // | 30|Andy|
+    // +---+----+
+
+    // Count people by age
+    df.groupBy("age").count().show();
+    // +----+-----+
+    // | age|count|
+    // +----+-----+
+    // |  19|    1|
+    // |null|    1|
+    // |  30|    1|
+    // +----+-----+
+    // $example off:untyped_ops$
+
+    // $example on:run_sql$
+    // Register the DataFrame as a SQL temporary view
+    df.createOrReplaceTempView("people");
+
+    Dataset<Row> sqlDF = spark.sql("SELECT * FROM people");
+    sqlDF.show();
+    // +----+-------+
+    // | age|   name|
+    // +----+-------+
+    // |null|Michael|
+    // |  30|   Andy|
+    // |  19| Justin|
+    // +----+-------+
+    // $example off:run_sql$
+
+    // $example on:global_temp_view$
+    // Register the DataFrame as a global temporary view
+    df.createGlobalTempView("people");
+
+    // Global temporary view is tied to a system preserved database `global_temp`
+    spark.sql("SELECT * FROM global_temp.people").show();
+    // +----+-------+
+    // | age|   name|
+    // +----+-------+
+    // |null|Michael|
+    // |  30|   Andy|
+    // |  19| Justin|
+    // +----+-------+
+
+    // Global temporary view is cross-session
+    spark.newSession().sql("SELECT * FROM global_temp.people").show();
+    // +----+-------+
+    // | age|   name|
+    // +----+-------+
+    // |null|Michael|
+    // |  30|   Andy|
+    // |  19| Justin|
+    // +----+-------+
+    // $example off:global_temp_view$
+  }
+
+  private static void runDatasetCreationExample(SparkSession spark) {
+    // $example on:create_ds$
+    // Create an instance of a Bean class
+    Person person = new Person();
+    person.setName("Andy");
+    person.setAge(32);
+
+    // Encoders are created for Java beans
+    Encoder<Person> personEncoder = Encoders.bean(Person.class);
+    Dataset<Person> javaBeanDS = spark.createDataset(
+      Collections.singletonList(person),
+      personEncoder
+    );
+    javaBeanDS.show();
+    // +---+----+
+    // |age|name|
+    // +---+----+
+    // | 32|Andy|
+    // +---+----+
+
+    // Encoders for most common types are provided in class Encoders
+    Encoder<Integer> integerEncoder = Encoders.INT();
+    Dataset<Integer> primitiveDS = spark.createDataset(Arrays.asList(1, 2, 3), integerEncoder);
+    Dataset<Integer> transformedDS = primitiveDS.map(
+        (MapFunction<Integer, Integer>) value -> value + 1,
+        integerEncoder);
+    transformedDS.collect(); // Returns [2, 3, 4]
+
+    // DataFrames can be converted to a Dataset by providing a class. Mapping based on name
+    String path = "examples/src/main/resources/people.json";
+    Dataset<Person> peopleDS = spark.read().json(path).as(personEncoder);
+    peopleDS.show();
+    // +----+-------+
+    // | age|   name|
+    // +----+-------+
+    // |null|Michael|
+    // |  30|   Andy|
+    // |  19| Justin|
+    // +----+-------+
+    // $example off:create_ds$
+  }
+
+  private static void runInferSchemaExample(SparkSession spark) {
+    // $example on:schema_inferring$
+    // Create an RDD of Person objects from a text file
+    JavaRDD<Person> peopleRDD = spark.read()
+      .textFile("examples/src/main/resources/people.txt")
+      .javaRDD()
+      .map(line -> {
+        String[] parts = line.split(",");
+        Person person = new Person();
+        person.setName(parts[0]);
+        person.setAge(Integer.parseInt(parts[1].trim()));
+        return person;
+      });
+
+    // Apply a schema to an RDD of JavaBeans to get a DataFrame
+    Dataset<Row> peopleDF = spark.createDataFrame(peopleRDD, Person.class);
+    // Register the DataFrame as a temporary view
+    peopleDF.createOrReplaceTempView("people");
+
+    // SQL statements can be run by using the sql methods provided by spark
+    Dataset<Row> teenagersDF = spark.sql("SELECT name FROM people WHERE age BETWEEN 13 AND 19");
+
+    // The columns of a row in the result can be accessed by field index
+    Encoder<String> stringEncoder = Encoders.STRING();
+    Dataset<String> teenagerNamesByIndexDF = teenagersDF.map(
+        (MapFunction<Row, String>) row -> "Name: " + row.getString(0),
+        stringEncoder);
+    teenagerNamesByIndexDF.show();
+    // +------------+
+    // |       value|
+    // +------------+
+    // |Name: Justin|
+    // +------------+
+
+    // or by field name
+    Dataset<String> teenagerNamesByFieldDF = teenagersDF.map(
+        (MapFunction<Row, String>) row -> "Name: " + row.<String>getAs("name"),
+        stringEncoder);
+    teenagerNamesByFieldDF.show();
+    // +------------+
+    // |       value|
+    // +------------+
+    // |Name: Justin|
+    // +------------+
+    // $example off:schema_inferring$
+  }
+
+  private static void runProgrammaticSchemaExample(SparkSession spark) {
+    // $example on:programmatic_schema$
+    // Create an RDD
+    JavaRDD<String> peopleRDD = spark.sparkContext()
+      .textFile("examples/src/main/resources/people.txt", 1)
+      .toJavaRDD();
+
+    // The schema is encoded in a string
+    String schemaString = "name age";
+
+    // Generate the schema based on the string of schema
+    List<StructField> fields = new ArrayList<>();
+    for (String fieldName : schemaString.split(" ")) {
+      StructField field = DataTypes.createStructField(fieldName, DataTypes.StringType, true);
+      fields.add(field);
+    }
+    StructType schema = DataTypes.createStructType(fields);
+
+    // Convert records of the RDD (people) to Rows
+    JavaRDD<Row> rowRDD = peopleRDD.map((Function<String, Row>) record -> {
+      String[] attributes = record.split(",");
+      return RowFactory.create(attributes[0], attributes[1].trim());
+    });
+
+    // Apply the schema to the RDD
+    Dataset<Row> peopleDataFrame = spark.createDataFrame(rowRDD, schema);
+
+    // Creates a temporary view using the DataFrame
+    peopleDataFrame.createOrReplaceTempView("people");
+
+    // SQL can be run over a temporary view created using DataFrames
+    Dataset<Row> results = spark.sql("SELECT name FROM people");
+
+    // The results of SQL queries are DataFrames and support all the normal RDD operations
+    // The columns of a row in the result can be accessed by field index or by field name
+    Dataset<String> namesDS = results.map(
+        (MapFunction<Row, String>) row -> "Name: " + row.getString(0),
+        Encoders.STRING());
+    namesDS.show();
+    // +-------------+
+    // |        value|
+    // +-------------+
+    // |Name: Michael|
+    // |   Name: Andy|
+    // | Name: Justin|
+    // +-------------+
+    // $example off:programmatic_schema$
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedTypedAggregation.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedTypedAggregation.java
new file mode 100644
index 0000000000000000000000000000000000000000..78e9011be47056346af5fd72b7cf86589c3b81d2
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedTypedAggregation.java
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.examples.sql;
+
+// $example on:typed_custom_aggregation$
+import java.io.Serializable;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoder;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.TypedColumn;
+import org.apache.spark.sql.expressions.Aggregator;
+// $example off:typed_custom_aggregation$
+
+public class JavaUserDefinedTypedAggregation {
+
+  // $example on:typed_custom_aggregation$
+  public static class Employee implements Serializable {
+    private String name;
+    private long salary;
+
+    // Constructors, getters, setters...
+    // $example off:typed_custom_aggregation$
+    public String getName() {
+      return name;
+    }
+
+    public void setName(String name) {
+      this.name = name;
+    }
+
+    public long getSalary() {
+      return salary;
+    }
+
+    public void setSalary(long salary) {
+      this.salary = salary;
+    }
+    // $example on:typed_custom_aggregation$
+  }
+
+  public static class Average implements Serializable  {
+    private long sum;
+    private long count;
+
+    // Constructors, getters, setters...
+    // $example off:typed_custom_aggregation$
+    public Average() {
+    }
+
+    public Average(long sum, long count) {
+      this.sum = sum;
+      this.count = count;
+    }
+
+    public long getSum() {
+      return sum;
+    }
+
+    public void setSum(long sum) {
+      this.sum = sum;
+    }
+
+    public long getCount() {
+      return count;
+    }
+
+    public void setCount(long count) {
+      this.count = count;
+    }
+    // $example on:typed_custom_aggregation$
+  }
+
+  public static class MyAverage extends Aggregator<Employee, Average, Double> {
+    // A zero value for this aggregation. Should satisfy the property that any b + zero = b
+    public Average zero() {
+      return new Average(0L, 0L);
+    }
+    // Combine two values to produce a new value. For performance, the function may modify `buffer`
+    // and return it instead of constructing a new object
+    public Average reduce(Average buffer, Employee employee) {
+      long newSum = buffer.getSum() + employee.getSalary();
+      long newCount = buffer.getCount() + 1;
+      buffer.setSum(newSum);
+      buffer.setCount(newCount);
+      return buffer;
+    }
+    // Merge two intermediate values
+    public Average merge(Average b1, Average b2) {
+      long mergedSum = b1.getSum() + b2.getSum();
+      long mergedCount = b1.getCount() + b2.getCount();
+      b1.setSum(mergedSum);
+      b1.setCount(mergedCount);
+      return b1;
+    }
+    // Transform the output of the reduction
+    public Double finish(Average reduction) {
+      return ((double) reduction.getSum()) / reduction.getCount();
+    }
+    // Specifies the Encoder for the intermediate value type
+    public Encoder<Average> bufferEncoder() {
+      return Encoders.bean(Average.class);
+    }
+    // Specifies the Encoder for the final output value type
+    public Encoder<Double> outputEncoder() {
+      return Encoders.DOUBLE();
+    }
+  }
+  // $example off:typed_custom_aggregation$
+
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("Java Spark SQL user-defined Datasets aggregation example")
+      .getOrCreate();
+
+    // $example on:typed_custom_aggregation$
+    Encoder<Employee> employeeEncoder = Encoders.bean(Employee.class);
+    String path = "examples/src/main/resources/employees.json";
+    Dataset<Employee> ds = spark.read().json(path).as(employeeEncoder);
+    ds.show();
+    // +-------+------+
+    // |   name|salary|
+    // +-------+------+
+    // |Michael|  3000|
+    // |   Andy|  4500|
+    // | Justin|  3500|
+    // |  Berta|  4000|
+    // +-------+------+
+
+    MyAverage myAverage = new MyAverage();
+    // Convert the function to a `TypedColumn` and give it a name
+    TypedColumn<Employee, Double> averageSalary = myAverage.toColumn().name("average_salary");
+    Dataset<Double> result = ds.select(averageSalary);
+    result.show();
+    // +--------------+
+    // |average_salary|
+    // +--------------+
+    // |        3750.0|
+    // +--------------+
+    // $example off:typed_custom_aggregation$
+    spark.stop();
+  }
+
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedUntypedAggregation.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedUntypedAggregation.java
new file mode 100644
index 0000000000000000000000000000000000000000..6da60a1fc6b88bf079cc48b26285a92ecec3db29
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedUntypedAggregation.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.examples.sql;
+
+// $example on:untyped_custom_aggregation$
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.expressions.MutableAggregationBuffer;
+import org.apache.spark.sql.expressions.UserDefinedAggregateFunction;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off:untyped_custom_aggregation$
+
+public class JavaUserDefinedUntypedAggregation {
+
+  // $example on:untyped_custom_aggregation$
+  public static class MyAverage extends UserDefinedAggregateFunction {
+
+    private StructType inputSchema;
+    private StructType bufferSchema;
+
+    public MyAverage() {
+      List<StructField> inputFields = new ArrayList<>();
+      inputFields.add(DataTypes.createStructField("inputColumn", DataTypes.LongType, true));
+      inputSchema = DataTypes.createStructType(inputFields);
+
+      List<StructField> bufferFields = new ArrayList<>();
+      bufferFields.add(DataTypes.createStructField("sum", DataTypes.LongType, true));
+      bufferFields.add(DataTypes.createStructField("count", DataTypes.LongType, true));
+      bufferSchema = DataTypes.createStructType(bufferFields);
+    }
+    // Data types of input arguments of this aggregate function
+    public StructType inputSchema() {
+      return inputSchema;
+    }
+    // Data types of values in the aggregation buffer
+    public StructType bufferSchema() {
+      return bufferSchema;
+    }
+    // The data type of the returned value
+    public DataType dataType() {
+      return DataTypes.DoubleType;
+    }
+    // Whether this function always returns the same output on the identical input
+    public boolean deterministic() {
+      return true;
+    }
+    // Initializes the given aggregation buffer. The buffer itself is a `Row` that in addition to
+    // standard methods like retrieving a value at an index (e.g., get(), getBoolean()), provides
+    // the opportunity to update its values. Note that arrays and maps inside the buffer are still
+    // immutable.
+    public void initialize(MutableAggregationBuffer buffer) {
+      buffer.update(0, 0L);
+      buffer.update(1, 0L);
+    }
+    // Updates the given aggregation buffer `buffer` with new input data from `input`
+    public void update(MutableAggregationBuffer buffer, Row input) {
+      if (!input.isNullAt(0)) {
+        long updatedSum = buffer.getLong(0) + input.getLong(0);
+        long updatedCount = buffer.getLong(1) + 1;
+        buffer.update(0, updatedSum);
+        buffer.update(1, updatedCount);
+      }
+    }
+    // Merges two aggregation buffers and stores the updated buffer values back to `buffer1`
+    public void merge(MutableAggregationBuffer buffer1, Row buffer2) {
+      long mergedSum = buffer1.getLong(0) + buffer2.getLong(0);
+      long mergedCount = buffer1.getLong(1) + buffer2.getLong(1);
+      buffer1.update(0, mergedSum);
+      buffer1.update(1, mergedCount);
+    }
+    // Calculates the final result
+    public Double evaluate(Row buffer) {
+      return ((double) buffer.getLong(0)) / buffer.getLong(1);
+    }
+  }
+  // $example off:untyped_custom_aggregation$
+
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("Java Spark SQL user-defined DataFrames aggregation example")
+      .getOrCreate();
+
+    // $example on:untyped_custom_aggregation$
+    // Register the function to access it
+    spark.udf().register("myAverage", new MyAverage());
+
+    Dataset<Row> df = spark.read().json("examples/src/main/resources/employees.json");
+    df.createOrReplaceTempView("employees");
+    df.show();
+    // +-------+------+
+    // |   name|salary|
+    // +-------+------+
+    // |Michael|  3000|
+    // |   Andy|  4500|
+    // | Justin|  3500|
+    // |  Berta|  4000|
+    // +-------+------+
+
+    Dataset<Row> result = spark.sql("SELECT myAverage(salary) as average_salary FROM employees");
+    result.show();
+    // +--------------+
+    // |average_salary|
+    // +--------------+
+    // |        3750.0|
+    // +--------------+
+    // $example off:untyped_custom_aggregation$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/hive/JavaSparkHiveExample.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/hive/JavaSparkHiveExample.java
new file mode 100644
index 0000000000000000000000000000000000000000..575a463e8725fd1ed2ec28c103f39c512060510b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/hive/JavaSparkHiveExample.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.examples.sql.hive;
+
+// $example on:spark_hive$
+import java.io.File;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+// $example off:spark_hive$
+
+public class JavaSparkHiveExample {
+
+  // $example on:spark_hive$
+  public static class Record implements Serializable {
+    private int key;
+    private String value;
+
+    public int getKey() {
+      return key;
+    }
+
+    public void setKey(int key) {
+      this.key = key;
+    }
+
+    public String getValue() {
+      return value;
+    }
+
+    public void setValue(String value) {
+      this.value = value;
+    }
+  }
+  // $example off:spark_hive$
+
+  public static void main(String[] args) {
+    // $example on:spark_hive$
+    // warehouseLocation points to the default location for managed databases and tables
+    String warehouseLocation = new File("spark-warehouse").getAbsolutePath();
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("Java Spark Hive Example")
+      .config("spark.sql.warehouse.dir", warehouseLocation)
+      .enableHiveSupport()
+      .getOrCreate();
+
+    spark.sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING) USING hive");
+    spark.sql("LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE src");
+
+    // Queries are expressed in HiveQL
+    spark.sql("SELECT * FROM src").show();
+    // +---+-------+
+    // |key|  value|
+    // +---+-------+
+    // |238|val_238|
+    // | 86| val_86|
+    // |311|val_311|
+    // ...
+
+    // Aggregation queries are also supported.
+    spark.sql("SELECT COUNT(*) FROM src").show();
+    // +--------+
+    // |count(1)|
+    // +--------+
+    // |    500 |
+    // +--------+
+
+    // The results of SQL queries are themselves DataFrames and support all normal functions.
+    Dataset<Row> sqlDF = spark.sql("SELECT key, value FROM src WHERE key < 10 ORDER BY key");
+
+    // The items in DataFrames are of type Row, which lets you to access each column by ordinal.
+    Dataset<String> stringsDS = sqlDF.map(
+        (MapFunction<Row, String>) row -> "Key: " + row.get(0) + ", Value: " + row.get(1),
+        Encoders.STRING());
+    stringsDS.show();
+    // +--------------------+
+    // |               value|
+    // +--------------------+
+    // |Key: 0, Value: val_0|
+    // |Key: 0, Value: val_0|
+    // |Key: 0, Value: val_0|
+    // ...
+
+    // You can also use DataFrames to create temporary views within a SparkSession.
+    List<Record> records = new ArrayList<>();
+    for (int key = 1; key < 100; key++) {
+      Record record = new Record();
+      record.setKey(key);
+      record.setValue("val_" + key);
+      records.add(record);
+    }
+    Dataset<Row> recordsDF = spark.createDataFrame(records, Record.class);
+    recordsDF.createOrReplaceTempView("records");
+
+    // Queries can then join DataFrames data with data stored in Hive.
+    spark.sql("SELECT * FROM records r JOIN src s ON r.key = s.key").show();
+    // +---+------+---+------+
+    // |key| value|key| value|
+    // +---+------+---+------+
+    // |  2| val_2|  2| val_2|
+    // |  2| val_2|  2| val_2|
+    // |  4| val_4|  4| val_4|
+    // ...
+    // $example off:spark_hive$
+
+    spark.stop();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredKafkaWordCount.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredKafkaWordCount.java
new file mode 100644
index 0000000000000000000000000000000000000000..4e02719e043adb4af303b43eb2302915050112c5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredKafkaWordCount.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.sql.streaming;
+
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.streaming.StreamingQuery;
+
+import java.util.Arrays;
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: JavaStructuredKafkaWordCount <bootstrap-servers> <subscribe-type> <topics>
+ *   <bootstrap-servers> The Kafka "bootstrap.servers" configuration. A
+ *   comma-separated list of host:port.
+ *   <subscribe-type> There are three kinds of type, i.e. 'assign', 'subscribe',
+ *   'subscribePattern'.
+ *   |- <assign> Specific TopicPartitions to consume. Json string
+ *   |  {"topicA":[0,1],"topicB":[2,4]}.
+ *   |- <subscribe> The topic list to subscribe. A comma-separated list of
+ *   |  topics.
+ *   |- <subscribePattern> The pattern used to subscribe to topic(s).
+ *   |  Java regex string.
+ *   |- Only one of "assign, "subscribe" or "subscribePattern" options can be
+ *   |  specified for Kafka source.
+ *   <topics> Different value format depends on the value of 'subscribe-type'.
+ *
+ * Example:
+ *    `$ bin/run-example \
+ *      sql.streaming.JavaStructuredKafkaWordCount host1:port1,host2:port2 \
+ *      subscribe topic1,topic2`
+ */
+public final class JavaStructuredKafkaWordCount {
+
+  public static void main(String[] args) throws Exception {
+    if (args.length < 3) {
+      System.err.println("Usage: JavaStructuredKafkaWordCount <bootstrap-servers> " +
+        "<subscribe-type> <topics>");
+      System.exit(1);
+    }
+
+    String bootstrapServers = args[0];
+    String subscribeType = args[1];
+    String topics = args[2];
+
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaStructuredKafkaWordCount")
+      .getOrCreate();
+
+    // Create DataSet representing the stream of input lines from kafka
+    Dataset<String> lines = spark
+      .readStream()
+      .format("kafka")
+      .option("kafka.bootstrap.servers", bootstrapServers)
+      .option(subscribeType, topics)
+      .load()
+      .selectExpr("CAST(value AS STRING)")
+      .as(Encoders.STRING());
+
+    // Generate running word count
+    Dataset<Row> wordCounts = lines.flatMap(
+        (FlatMapFunction<String, String>) x -> Arrays.asList(x.split(" ")).iterator(),
+        Encoders.STRING()).groupBy("value").count();
+
+    // Start running the query that prints the running counts to the console
+    StreamingQuery query = wordCounts.writeStream()
+      .outputMode("complete")
+      .format("console")
+      .start();
+
+    query.awaitTermination();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredNetworkWordCount.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredNetworkWordCount.java
new file mode 100644
index 0000000000000000000000000000000000000000..3af786978b16792cc7436d49d3e567a260ccec60
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredNetworkWordCount.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.examples.sql.streaming;
+
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.sql.*;
+import org.apache.spark.sql.streaming.StreamingQuery;
+
+import java.util.Arrays;
+
+/**
+ * Counts words in UTF8 encoded, '\n' delimited text received from the network.
+ *
+ * Usage: JavaStructuredNetworkWordCount <hostname> <port>
+ * <hostname> and <port> describe the TCP server that Structured Streaming
+ * would connect to receive data.
+ *
+ * To run this on your local machine, you need to first run a Netcat server
+ *    `$ nc -lk 9999`
+ * and then run the example
+ *    `$ bin/run-example sql.streaming.JavaStructuredNetworkWordCount
+ *    localhost 9999`
+ */
+public final class JavaStructuredNetworkWordCount {
+
+  public static void main(String[] args) throws Exception {
+    if (args.length < 2) {
+      System.err.println("Usage: JavaStructuredNetworkWordCount <hostname> <port>");
+      System.exit(1);
+    }
+
+    String host = args[0];
+    int port = Integer.parseInt(args[1]);
+
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaStructuredNetworkWordCount")
+      .getOrCreate();
+
+    // Create DataFrame representing the stream of input lines from connection to host:port
+    Dataset<Row> lines = spark
+      .readStream()
+      .format("socket")
+      .option("host", host)
+      .option("port", port)
+      .load();
+
+    // Split the lines into words
+    Dataset<String> words = lines.as(Encoders.STRING()).flatMap(
+        (FlatMapFunction<String, String>) x -> Arrays.asList(x.split(" ")).iterator(),
+        Encoders.STRING());
+
+    // Generate running word count
+    Dataset<Row> wordCounts = words.groupBy("value").count();
+
+    // Start running the query that prints the running counts to the console
+    StreamingQuery query = wordCounts.writeStream()
+      .outputMode("complete")
+      .format("console")
+      .start();
+
+    query.awaitTermination();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredNetworkWordCountWindowed.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredNetworkWordCountWindowed.java
new file mode 100644
index 0000000000000000000000000000000000000000..93ec5e269515734025f36ae443be87a7343cec60
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredNetworkWordCountWindowed.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.examples.sql.streaming;
+
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.sql.*;
+import org.apache.spark.sql.streaming.StreamingQuery;
+import scala.Tuple2;
+
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Counts words in UTF8 encoded, '\n' delimited text received from the network over a
+ * sliding window of configurable duration. Each line from the network is tagged
+ * with a timestamp that is used to determine the windows into which it falls.
+ *
+ * Usage: JavaStructuredNetworkWordCountWindowed <hostname> <port> <window duration>
+ *   [<slide duration>]
+ * <hostname> and <port> describe the TCP server that Structured Streaming
+ * would connect to receive data.
+ * <window duration> gives the size of window, specified as integer number of seconds
+ * <slide duration> gives the amount of time successive windows are offset from one another,
+ * given in the same units as above. <slide duration> should be less than or equal to
+ * <window duration>. If the two are equal, successive windows have no overlap. If
+ * <slide duration> is not provided, it defaults to <window duration>.
+ *
+ * To run this on your local machine, you need to first run a Netcat server
+ *    `$ nc -lk 9999`
+ * and then run the example
+ *    `$ bin/run-example sql.streaming.JavaStructuredNetworkWordCountWindowed
+ *    localhost 9999 <window duration in seconds> [<slide duration in seconds>]`
+ *
+ * One recommended <window duration>, <slide duration> pair is 10, 5
+ */
+public final class JavaStructuredNetworkWordCountWindowed {
+
+  public static void main(String[] args) throws Exception {
+    if (args.length < 3) {
+      System.err.println("Usage: JavaStructuredNetworkWordCountWindowed <hostname> <port>" +
+        " <window duration in seconds> [<slide duration in seconds>]");
+      System.exit(1);
+    }
+
+    String host = args[0];
+    int port = Integer.parseInt(args[1]);
+    int windowSize = Integer.parseInt(args[2]);
+    int slideSize = (args.length == 3) ? windowSize : Integer.parseInt(args[3]);
+    if (slideSize > windowSize) {
+      System.err.println("<slide duration> must be less than or equal to <window duration>");
+    }
+    String windowDuration = windowSize + " seconds";
+    String slideDuration = slideSize + " seconds";
+
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaStructuredNetworkWordCountWindowed")
+      .getOrCreate();
+
+    // Create DataFrame representing the stream of input lines from connection to host:port
+    Dataset<Row> lines = spark
+      .readStream()
+      .format("socket")
+      .option("host", host)
+      .option("port", port)
+      .option("includeTimestamp", true)
+      .load();
+
+    // Split the lines into words, retaining timestamps
+    Dataset<Row> words = lines
+      .as(Encoders.tuple(Encoders.STRING(), Encoders.TIMESTAMP()))
+      .flatMap((FlatMapFunction<Tuple2<String, Timestamp>, Tuple2<String, Timestamp>>) t -> {
+          List<Tuple2<String, Timestamp>> result = new ArrayList<>();
+          for (String word : t._1.split(" ")) {
+            result.add(new Tuple2<>(word, t._2));
+          }
+          return result.iterator();
+        },
+        Encoders.tuple(Encoders.STRING(), Encoders.TIMESTAMP())
+      ).toDF("word", "timestamp");
+
+    // Group the data by window and word and compute the count of each group
+    Dataset<Row> windowedCounts = words.groupBy(
+      functions.window(words.col("timestamp"), windowDuration, slideDuration),
+      words.col("word")
+    ).count().orderBy("window");
+
+    // Start running the query that prints the windowed word counts to the console
+    StreamingQuery query = windowedCounts.writeStream()
+      .outputMode("complete")
+      .format("console")
+      .option("truncate", "false")
+      .start();
+
+    query.awaitTermination();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredSessionization.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredSessionization.java
new file mode 100644
index 0000000000000000000000000000000000000000..943e3d82f30ff87cc39f132ed22f42af14bff290
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredSessionization.java
@@ -0,0 +1,251 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.examples.sql.streaming;
+
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.MapFunction;
+import org.apache.spark.api.java.function.MapGroupsWithStateFunction;
+import org.apache.spark.sql.*;
+import org.apache.spark.sql.streaming.GroupState;
+import org.apache.spark.sql.streaming.GroupStateTimeout;
+import org.apache.spark.sql.streaming.StreamingQuery;
+
+import java.io.Serializable;
+import java.sql.Timestamp;
+import java.util.*;
+
+/**
+ * Counts words in UTF8 encoded, '\n' delimited text received from the network.
+ * <p>
+ * Usage: JavaStructuredNetworkWordCount <hostname> <port>
+ * <hostname> and <port> describe the TCP server that Structured Streaming
+ * would connect to receive data.
+ * <p>
+ * To run this on your local machine, you need to first run a Netcat server
+ * `$ nc -lk 9999`
+ * and then run the example
+ * `$ bin/run-example sql.streaming.JavaStructuredSessionization
+ * localhost 9999`
+ */
+public final class JavaStructuredSessionization {
+
+  public static void main(String[] args) throws Exception {
+    if (args.length < 2) {
+      System.err.println("Usage: JavaStructuredSessionization <hostname> <port>");
+      System.exit(1);
+    }
+
+    String host = args[0];
+    int port = Integer.parseInt(args[1]);
+
+    SparkSession spark = SparkSession
+        .builder()
+        .appName("JavaStructuredSessionization")
+        .getOrCreate();
+
+    // Create DataFrame representing the stream of input lines from connection to host:port
+    Dataset<Row> lines = spark
+        .readStream()
+        .format("socket")
+        .option("host", host)
+        .option("port", port)
+        .option("includeTimestamp", true)
+        .load();
+
+    FlatMapFunction<LineWithTimestamp, Event> linesToEvents =
+      new FlatMapFunction<LineWithTimestamp, Event>() {
+        @Override
+        public Iterator<Event> call(LineWithTimestamp lineWithTimestamp) {
+          ArrayList<Event> eventList = new ArrayList<Event>();
+          for (String word : lineWithTimestamp.getLine().split(" ")) {
+            eventList.add(new Event(word, lineWithTimestamp.getTimestamp()));
+          }
+          return eventList.iterator();
+        }
+      };
+
+    // Split the lines into words, treat words as sessionId of events
+    Dataset<Event> events = lines
+        .withColumnRenamed("value", "line")
+        .as(Encoders.bean(LineWithTimestamp.class))
+        .flatMap(linesToEvents, Encoders.bean(Event.class));
+
+    // Sessionize the events. Track number of events, start and end timestamps of session, and
+    // and report session updates.
+    //
+    // Step 1: Define the state update function
+    MapGroupsWithStateFunction<String, Event, SessionInfo, SessionUpdate> stateUpdateFunc =
+      new MapGroupsWithStateFunction<String, Event, SessionInfo, SessionUpdate>() {
+        @Override public SessionUpdate call(
+            String sessionId, Iterator<Event> events, GroupState<SessionInfo> state) {
+          // If timed out, then remove session and send final update
+          if (state.hasTimedOut()) {
+            SessionUpdate finalUpdate = new SessionUpdate(
+                sessionId, state.get().calculateDuration(), state.get().getNumEvents(), true);
+            state.remove();
+            return finalUpdate;
+
+          } else {
+            // Find max and min timestamps in events
+            long maxTimestampMs = Long.MIN_VALUE;
+            long minTimestampMs = Long.MAX_VALUE;
+            int numNewEvents = 0;
+            while (events.hasNext()) {
+              Event e = events.next();
+              long timestampMs = e.getTimestamp().getTime();
+              maxTimestampMs = Math.max(timestampMs, maxTimestampMs);
+              minTimestampMs = Math.min(timestampMs, minTimestampMs);
+              numNewEvents += 1;
+            }
+            SessionInfo updatedSession = new SessionInfo();
+
+            // Update start and end timestamps in session
+            if (state.exists()) {
+              SessionInfo oldSession = state.get();
+              updatedSession.setNumEvents(oldSession.numEvents + numNewEvents);
+              updatedSession.setStartTimestampMs(oldSession.startTimestampMs);
+              updatedSession.setEndTimestampMs(Math.max(oldSession.endTimestampMs, maxTimestampMs));
+            } else {
+              updatedSession.setNumEvents(numNewEvents);
+              updatedSession.setStartTimestampMs(minTimestampMs);
+              updatedSession.setEndTimestampMs(maxTimestampMs);
+            }
+            state.update(updatedSession);
+            // Set timeout such that the session will be expired if no data received for 10 seconds
+            state.setTimeoutDuration("10 seconds");
+            return new SessionUpdate(
+                sessionId, state.get().calculateDuration(), state.get().getNumEvents(), false);
+          }
+        }
+      };
+
+    // Step 2: Apply the state update function to the events streaming Dataset grouped by sessionId
+    Dataset<SessionUpdate> sessionUpdates = events
+        .groupByKey(
+            new MapFunction<Event, String>() {
+              @Override public String call(Event event) {
+                return event.getSessionId();
+              }
+            }, Encoders.STRING())
+        .mapGroupsWithState(
+            stateUpdateFunc,
+            Encoders.bean(SessionInfo.class),
+            Encoders.bean(SessionUpdate.class),
+            GroupStateTimeout.ProcessingTimeTimeout());
+
+    // Start running the query that prints the session updates to the console
+    StreamingQuery query = sessionUpdates
+        .writeStream()
+        .outputMode("update")
+        .format("console")
+        .start();
+
+    query.awaitTermination();
+  }
+
+  /**
+   * User-defined data type representing the raw lines with timestamps.
+   */
+  public static class LineWithTimestamp implements Serializable {
+    private String line;
+    private Timestamp timestamp;
+
+    public Timestamp getTimestamp() { return timestamp; }
+    public void setTimestamp(Timestamp timestamp) { this.timestamp = timestamp; }
+
+    public String getLine() { return line; }
+    public void setLine(String sessionId) { this.line = sessionId; }
+  }
+
+  /**
+   * User-defined data type representing the input events
+   */
+  public static class Event implements Serializable {
+    private String sessionId;
+    private Timestamp timestamp;
+
+    public Event() { }
+    public Event(String sessionId, Timestamp timestamp) {
+      this.sessionId = sessionId;
+      this.timestamp = timestamp;
+    }
+
+    public Timestamp getTimestamp() { return timestamp; }
+    public void setTimestamp(Timestamp timestamp) { this.timestamp = timestamp; }
+
+    public String getSessionId() { return sessionId; }
+    public void setSessionId(String sessionId) { this.sessionId = sessionId; }
+  }
+
+  /**
+   * User-defined data type for storing a session information as state in mapGroupsWithState.
+   */
+  public static class SessionInfo implements Serializable {
+    private int numEvents = 0;
+    private long startTimestampMs = -1;
+    private long endTimestampMs = -1;
+
+    public int getNumEvents() { return numEvents; }
+    public void setNumEvents(int numEvents) { this.numEvents = numEvents; }
+
+    public long getStartTimestampMs() { return startTimestampMs; }
+    public void setStartTimestampMs(long startTimestampMs) {
+      this.startTimestampMs = startTimestampMs;
+    }
+
+    public long getEndTimestampMs() { return endTimestampMs; }
+    public void setEndTimestampMs(long endTimestampMs) { this.endTimestampMs = endTimestampMs; }
+
+    public long calculateDuration() { return endTimestampMs - startTimestampMs; }
+
+    @Override public String toString() {
+      return "SessionInfo(numEvents = " + numEvents +
+          ", timestamps = " + startTimestampMs + " to " + endTimestampMs + ")";
+    }
+  }
+
+  /**
+   * User-defined data type representing the update information returned by mapGroupsWithState.
+   */
+  public static class SessionUpdate implements Serializable {
+    private String id;
+    private long durationMs;
+    private int numEvents;
+    private boolean expired;
+
+    public SessionUpdate() { }
+
+    public SessionUpdate(String id, long durationMs, int numEvents, boolean expired) {
+      this.id = id;
+      this.durationMs = durationMs;
+      this.numEvents = numEvents;
+      this.expired = expired;
+    }
+
+    public String getId() { return id; }
+    public void setId(String id) { this.id = id; }
+
+    public long getDurationMs() { return durationMs; }
+    public void setDurationMs(long durationMs) { this.durationMs = durationMs; }
+
+    public int getNumEvents() { return numEvents; }
+    public void setNumEvents(int numEvents) { this.numEvents = numEvents; }
+
+    public boolean isExpired() { return expired; }
+    public void setExpired(boolean expired) { this.expired = expired; }
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java
new file mode 100644
index 0000000000000000000000000000000000000000..47692ec982890c1e9bb17d22cd9dead9e73e9e25
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import com.google.common.io.Closeables;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.storage.StorageLevel;
+import org.apache.spark.streaming.Duration;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.apache.spark.streaming.api.java.JavaPairDStream;
+import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.apache.spark.streaming.receiver.Receiver;
+import scala.Tuple2;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.net.ConnectException;
+import java.net.Socket;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.regex.Pattern;
+
+/**
+ * Custom Receiver that receives data over a socket. Received bytes is interpreted as
+ * text and \n delimited lines are considered as records. They are then counted and printed.
+ *
+ * Usage: JavaCustomReceiver <master> <hostname> <port>
+ *   <master> is the Spark master URL. In local mode, <master> should be 'local[n]' with n > 1.
+ *   <hostname> and <port> of the TCP server that Spark Streaming would connect to receive data.
+ *
+ * To run this on your local machine, you need to first run a Netcat server
+ *    `$ nc -lk 9999`
+ * and then run the example
+ *    `$ bin/run-example org.apache.spark.examples.streaming.JavaCustomReceiver localhost 9999`
+ */
+
+public class JavaCustomReceiver extends Receiver<String> {
+  private static final Pattern SPACE = Pattern.compile(" ");
+
+  public static void main(String[] args) throws Exception {
+    if (args.length < 2) {
+      System.err.println("Usage: JavaCustomReceiver <hostname> <port>");
+      System.exit(1);
+    }
+
+    StreamingExamples.setStreamingLogLevels();
+
+    // Create the context with a 1 second batch size
+    SparkConf sparkConf = new SparkConf().setAppName("JavaCustomReceiver");
+    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000));
+
+    // Create an input stream with the custom receiver on target ip:port and count the
+    // words in input stream of \n delimited text (eg. generated by 'nc')
+    JavaReceiverInputDStream<String> lines = ssc.receiverStream(
+      new JavaCustomReceiver(args[0], Integer.parseInt(args[1])));
+    JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(SPACE.split(x)).iterator());
+    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<>(s, 1))
+        .reduceByKey((i1, i2) -> i1 + i2);
+
+    wordCounts.print();
+    ssc.start();
+    ssc.awaitTermination();
+  }
+
+  // ============= Receiver code that receives data over a socket ==============
+
+  String host = null;
+  int port = -1;
+
+  public JavaCustomReceiver(String host_ , int port_) {
+    super(StorageLevel.MEMORY_AND_DISK_2());
+    host = host_;
+    port = port_;
+  }
+
+  @Override
+  public void onStart() {
+    // Start the thread that receives data over a connection
+    new Thread(this::receive).start();
+  }
+
+  @Override
+  public void onStop() {
+    // There is nothing much to do as the thread calling receive()
+    // is designed to stop by itself isStopped() returns false
+  }
+
+  /** Create a socket connection and receive data until receiver is stopped */
+  private void receive() {
+    try {
+      Socket socket = null;
+      BufferedReader reader = null;
+      try {
+        // connect to the server
+        socket = new Socket(host, port);
+        reader = new BufferedReader(
+            new InputStreamReader(socket.getInputStream(), StandardCharsets.UTF_8));
+        // Until stopped or connection broken continue reading
+        String userInput;
+        while (!isStopped() && (userInput = reader.readLine()) != null) {
+          System.out.println("Received data '" + userInput + "'");
+          store(userInput);
+        }
+      } finally {
+        Closeables.close(reader, /* swallowIOException = */ true);
+        Closeables.close(socket,  /* swallowIOException = */ true);
+      }
+      // Restart in an attempt to connect again when server is active again
+      restart("Trying to connect again");
+    } catch(ConnectException ce) {
+      // restart if could not connect to server
+      restart("Could not connect", ce);
+    } catch(Throwable t) {
+      restart("Error receiving data", t);
+    }
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java
new file mode 100644
index 0000000000000000000000000000000000000000..748bf58f3035006c932d4540ede7ab2aa1433cfe
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.serialization.StringDeserializer;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.*;
+import org.apache.spark.streaming.kafka010.ConsumerStrategies;
+import org.apache.spark.streaming.kafka010.KafkaUtils;
+import org.apache.spark.streaming.kafka010.LocationStrategies;
+import org.apache.spark.streaming.Durations;
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: JavaDirectKafkaWordCount <brokers> <groupId> <topics>
+ *   <brokers> is a list of one or more Kafka brokers
+ *   <groupId> is a consumer group name to consume from topics
+ *   <topics> is a list of one or more kafka topics to consume from
+ *
+ * Example:
+ *    $ bin/run-example streaming.JavaDirectKafkaWordCount broker1-host:port,broker2-host:port \
+ *      consumer-group topic1,topic2
+ */
+
+public final class JavaDirectKafkaWordCount {
+  private static final Pattern SPACE = Pattern.compile(" ");
+
+  public static void main(String[] args) throws Exception {
+    if (args.length < 3) {
+      System.err.println("Usage: JavaDirectKafkaWordCount <brokers> <groupId> <topics>\n" +
+                         "  <brokers> is a list of one or more Kafka brokers\n" +
+                         "  <groupId> is a consumer group name to consume from topics\n" +
+                         "  <topics> is a list of one or more kafka topics to consume from\n\n");
+      System.exit(1);
+    }
+
+    StreamingExamples.setStreamingLogLevels();
+
+    String brokers = args[0];
+    String groupId = args[1];
+    String topics = args[2];
+
+    // Create context with a 2 seconds batch interval
+    SparkConf sparkConf = new SparkConf().setAppName("JavaDirectKafkaWordCount");
+    JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(2));
+
+    Set<String> topicsSet = new HashSet<>(Arrays.asList(topics.split(",")));
+    Map<String, Object> kafkaParams = new HashMap<>();
+    kafkaParams.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers);
+    kafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
+    kafkaParams.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
+    kafkaParams.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
+
+    // Create direct kafka stream with brokers and topics
+    JavaInputDStream<ConsumerRecord<String, String>> messages = KafkaUtils.createDirectStream(
+        jssc,
+        LocationStrategies.PreferConsistent(),
+        ConsumerStrategies.Subscribe(topicsSet, kafkaParams));
+
+    // Get the lines, split them into words, count the words and print
+    JavaDStream<String> lines = messages.map(ConsumerRecord::value);
+    JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(SPACE.split(x)).iterator());
+    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<>(s, 1))
+        .reduceByKey((i1, i2) -> i1 + i2);
+    wordCounts.print();
+
+    // Start the computation
+    jssc.start();
+    jssc.awaitTermination();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaNetworkWordCount.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaNetworkWordCount.java
new file mode 100644
index 0000000000000000000000000000000000000000..b217672def88e6aa32df618aa352cc90b85f04b5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaNetworkWordCount.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.Arrays;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.StorageLevels;
+import org.apache.spark.streaming.Durations;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.apache.spark.streaming.api.java.JavaPairDStream;
+import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+
+/**
+ * Counts words in UTF8 encoded, '\n' delimited text received from the network every second.
+ *
+ * Usage: JavaNetworkWordCount <hostname> <port>
+ * <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive data.
+ *
+ * To run this on your local machine, you need to first run a Netcat server
+ *    `$ nc -lk 9999`
+ * and then run the example
+ *    `$ bin/run-example org.apache.spark.examples.streaming.JavaNetworkWordCount localhost 9999`
+ */
+public final class JavaNetworkWordCount {
+  private static final Pattern SPACE = Pattern.compile(" ");
+
+  public static void main(String[] args) throws Exception {
+    if (args.length < 2) {
+      System.err.println("Usage: JavaNetworkWordCount <hostname> <port>");
+      System.exit(1);
+    }
+
+    StreamingExamples.setStreamingLogLevels();
+
+    // Create the context with a 1 second batch size
+    SparkConf sparkConf = new SparkConf().setAppName("JavaNetworkWordCount");
+    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
+
+    // Create a JavaReceiverInputDStream on target ip:port and count the
+    // words in input stream of \n delimited text (eg. generated by 'nc')
+    // Note that no duplication in storage level only for running locally.
+    // Replication necessary in distributed scenario for fault tolerance.
+    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(
+            args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER);
+    JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(SPACE.split(x)).iterator());
+    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<>(s, 1))
+        .reduceByKey((i1, i2) -> i1 + i2);
+
+    wordCounts.print();
+    ssc.start();
+    ssc.awaitTermination();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaQueueStream.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaQueueStream.java
new file mode 100644
index 0000000000000000000000000000000000000000..e86f8ab38a74ff7b7f761dcaff4b549d9d51613f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaQueueStream.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Queue;
+
+import scala.Tuple2;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.streaming.Duration;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.apache.spark.streaming.api.java.JavaPairDStream;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+
+public final class JavaQueueStream {
+  private JavaQueueStream() {
+  }
+
+  public static void main(String[] args) throws Exception {
+
+    StreamingExamples.setStreamingLogLevels();
+    SparkConf sparkConf = new SparkConf().setAppName("JavaQueueStream");
+
+    // Create the context
+    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000));
+
+    // Create the queue through which RDDs can be pushed to
+    // a QueueInputDStream
+
+    // Create and push some RDDs into the queue
+    List<Integer> list = new ArrayList<>();
+    for (int i = 0; i < 1000; i++) {
+      list.add(i);
+    }
+
+    Queue<JavaRDD<Integer>> rddQueue = new LinkedList<>();
+    for (int i = 0; i < 30; i++) {
+      rddQueue.add(ssc.sparkContext().parallelize(list));
+    }
+
+    // Create the QueueInputDStream and use it do some processing
+    JavaDStream<Integer> inputStream = ssc.queueStream(rddQueue);
+    JavaPairDStream<Integer, Integer> mappedStream = inputStream.mapToPair(
+        i -> new Tuple2<>(i % 10, 1));
+    JavaPairDStream<Integer, Integer> reducedStream = mappedStream.reduceByKey(
+        (i1, i2) -> i1 + i2);
+
+    reducedStream.print();
+    ssc.start();
+    ssc.awaitTermination();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecord.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecord.java
new file mode 100644
index 0000000000000000000000000000000000000000..e63697a79f23ade738538edde009a7a6a85da68e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecord.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+/** Java Bean class to be used with the example JavaSqlNetworkWordCount. */
+public class JavaRecord implements java.io.Serializable {
+  private String word;
+
+  public String getWord() {
+    return word;
+  }
+
+  public void setWord(String word) {
+    this.word = word;
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecoverableNetworkWordCount.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecoverableNetworkWordCount.java
new file mode 100644
index 0000000000000000000000000000000000000000..45a876decff8be5aed9b895363f3b245483fa424
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecoverableNetworkWordCount.java
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.io.File;
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+import com.google.common.io.Files;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.*;
+import org.apache.spark.broadcast.Broadcast;
+import org.apache.spark.streaming.Durations;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.apache.spark.streaming.api.java.JavaPairDStream;
+import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+import org.apache.spark.util.LongAccumulator;
+
+/**
+ * Use this singleton to get or register a Broadcast variable.
+ */
+class JavaWordBlacklist {
+
+  private static volatile Broadcast<List<String>> instance = null;
+
+  public static Broadcast<List<String>> getInstance(JavaSparkContext jsc) {
+    if (instance == null) {
+      synchronized (JavaWordBlacklist.class) {
+        if (instance == null) {
+          List<String> wordBlacklist = Arrays.asList("a", "b", "c");
+          instance = jsc.broadcast(wordBlacklist);
+        }
+      }
+    }
+    return instance;
+  }
+}
+
+/**
+ * Use this singleton to get or register an Accumulator.
+ */
+class JavaDroppedWordsCounter {
+
+  private static volatile LongAccumulator instance = null;
+
+  public static LongAccumulator getInstance(JavaSparkContext jsc) {
+    if (instance == null) {
+      synchronized (JavaDroppedWordsCounter.class) {
+        if (instance == null) {
+          instance = jsc.sc().longAccumulator("WordsInBlacklistCounter");
+        }
+      }
+    }
+    return instance;
+  }
+}
+
+/**
+ * Counts words in text encoded with UTF8 received from the network every second. This example also
+ * shows how to use lazily instantiated singleton instances for Accumulator and Broadcast so that
+ * they can be registered on driver failures.
+ *
+ * Usage: JavaRecoverableNetworkWordCount <hostname> <port> <checkpoint-directory> <output-file>
+ *   <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive
+ *   data. <checkpoint-directory> directory to HDFS-compatible file system which checkpoint data
+ *   <output-file> file to which the word counts will be appended
+ *
+ * <checkpoint-directory> and <output-file> must be absolute paths
+ *
+ * To run this on your local machine, you need to first run a Netcat server
+ *
+ *      `$ nc -lk 9999`
+ *
+ * and run the example as
+ *
+ *      `$ ./bin/run-example org.apache.spark.examples.streaming.JavaRecoverableNetworkWordCount \
+ *              localhost 9999 ~/checkpoint/ ~/out`
+ *
+ * If the directory ~/checkpoint/ does not exist (e.g. running for the first time), it will create
+ * a new StreamingContext (will print "Creating new context" to the console). Otherwise, if
+ * checkpoint data exists in ~/checkpoint/, then it will create StreamingContext from
+ * the checkpoint data.
+ *
+ * Refer to the online documentation for more details.
+ */
+public final class JavaRecoverableNetworkWordCount {
+  private static final Pattern SPACE = Pattern.compile(" ");
+
+  private static JavaStreamingContext createContext(String ip,
+                                                    int port,
+                                                    String checkpointDirectory,
+                                                    String outputPath) {
+
+    // If you do not see this printed, that means the StreamingContext has been loaded
+    // from the new checkpoint
+    System.out.println("Creating new context");
+    File outputFile = new File(outputPath);
+    if (outputFile.exists()) {
+      outputFile.delete();
+    }
+    SparkConf sparkConf = new SparkConf().setAppName("JavaRecoverableNetworkWordCount");
+    // Create the context with a 1 second batch size
+    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
+    ssc.checkpoint(checkpointDirectory);
+
+    // Create a socket stream on target ip:port and count the
+    // words in input stream of \n delimited text (eg. generated by 'nc')
+    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(ip, port);
+    JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(SPACE.split(x)).iterator());
+    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<>(s, 1))
+        .reduceByKey((i1, i2) -> i1 + i2);
+
+    wordCounts.foreachRDD((rdd, time) -> {
+      // Get or register the blacklist Broadcast
+      Broadcast<List<String>> blacklist =
+          JavaWordBlacklist.getInstance(new JavaSparkContext(rdd.context()));
+      // Get or register the droppedWordsCounter Accumulator
+      LongAccumulator droppedWordsCounter =
+          JavaDroppedWordsCounter.getInstance(new JavaSparkContext(rdd.context()));
+      // Use blacklist to drop words and use droppedWordsCounter to count them
+      String counts = rdd.filter(wordCount -> {
+        if (blacklist.value().contains(wordCount._1())) {
+          droppedWordsCounter.add(wordCount._2());
+          return false;
+        } else {
+          return true;
+        }
+      }).collect().toString();
+      String output = "Counts at time " + time + " " + counts;
+      System.out.println(output);
+      System.out.println("Dropped " + droppedWordsCounter.value() + " word(s) totally");
+      System.out.println("Appending to " + outputFile.getAbsolutePath());
+      Files.append(output + "\n", outputFile, Charset.defaultCharset());
+    });
+
+    return ssc;
+  }
+
+  public static void main(String[] args) throws Exception {
+    if (args.length != 4) {
+      System.err.println("You arguments were " + Arrays.asList(args));
+      System.err.println(
+          "Usage: JavaRecoverableNetworkWordCount <hostname> <port> <checkpoint-directory>\n" +
+          "     <output-file>. <hostname> and <port> describe the TCP server that Spark\n" +
+          "     Streaming would connect to receive data. <checkpoint-directory> directory to\n" +
+          "     HDFS-compatible file system which checkpoint data <output-file> file to which\n" +
+          "     the word counts will be appended\n" +
+          "\n" +
+          "In local mode, <master> should be 'local[n]' with n > 1\n" +
+          "Both <checkpoint-directory> and <output-file> must be absolute paths");
+      System.exit(1);
+    }
+
+    String ip = args[0];
+    int port = Integer.parseInt(args[1]);
+    String checkpointDirectory = args[2];
+    String outputPath = args[3];
+
+    // Function to create JavaStreamingContext without any output operations
+    // (used to detect the new context)
+    Function0<JavaStreamingContext> createContextFunc =
+        () -> createContext(ip, port, checkpointDirectory, outputPath);
+
+    JavaStreamingContext ssc =
+      JavaStreamingContext.getOrCreate(checkpointDirectory, createContextFunc);
+    ssc.start();
+    ssc.awaitTermination();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java
new file mode 100644
index 0000000000000000000000000000000000000000..948d1a21117805386ac3a1606fc312c88e641843
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.Arrays;
+import java.util.regex.Pattern;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.api.java.StorageLevels;
+import org.apache.spark.streaming.Durations;
+import org.apache.spark.streaming.api.java.JavaDStream;
+import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
+import org.apache.spark.streaming.api.java.JavaStreamingContext;
+
+/**
+ * Use DataFrames and SQL to count words in UTF8 encoded, '\n' delimited text received from the
+ * network every second.
+ *
+ * Usage: JavaSqlNetworkWordCount <hostname> <port>
+ * <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive data.
+ *
+ * To run this on your local machine, you need to first run a Netcat server
+ *    `$ nc -lk 9999`
+ * and then run the example
+ *    `$ bin/run-example org.apache.spark.examples.streaming.JavaSqlNetworkWordCount localhost 9999`
+ */
+public final class JavaSqlNetworkWordCount {
+  private static final Pattern SPACE = Pattern.compile(" ");
+
+  public static void main(String[] args) throws Exception {
+    if (args.length < 2) {
+      System.err.println("Usage: JavaNetworkWordCount <hostname> <port>");
+      System.exit(1);
+    }
+
+    StreamingExamples.setStreamingLogLevels();
+
+    // Create the context with a 1 second batch size
+    SparkConf sparkConf = new SparkConf().setAppName("JavaSqlNetworkWordCount");
+    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
+
+    // Create a JavaReceiverInputDStream on target ip:port and count the
+    // words in input stream of \n delimited text (eg. generated by 'nc')
+    // Note that no duplication in storage level only for running locally.
+    // Replication necessary in distributed scenario for fault tolerance.
+    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(
+        args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER);
+    JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(SPACE.split(x)).iterator());
+
+    // Convert RDDs of the words DStream to DataFrame and run SQL query
+    words.foreachRDD((rdd, time) -> {
+      SparkSession spark = JavaSparkSessionSingleton.getInstance(rdd.context().getConf());
+
+      // Convert JavaRDD[String] to JavaRDD[bean class] to DataFrame
+      JavaRDD<JavaRecord> rowRDD = rdd.map(word -> {
+        JavaRecord record = new JavaRecord();
+        record.setWord(word);
+        return record;
+      });
+      Dataset<Row> wordsDataFrame = spark.createDataFrame(rowRDD, JavaRecord.class);
+
+      // Creates a temporary view using the DataFrame
+      wordsDataFrame.createOrReplaceTempView("words");
+
+      // Do word count on table using SQL and print it
+      Dataset<Row> wordCountsDataFrame =
+          spark.sql("select word, count(*) as total from words group by word");
+      System.out.println("========= " + time + "=========");
+      wordCountsDataFrame.show();
+    });
+
+    ssc.start();
+    ssc.awaitTermination();
+  }
+}
+
+/** Lazily instantiated singleton instance of SparkSession */
+class JavaSparkSessionSingleton {
+  private static transient SparkSession instance = null;
+  public static SparkSession getInstance(SparkConf sparkConf) {
+    if (instance == null) {
+      instance = SparkSession
+        .builder()
+        .config(sparkConf)
+        .getOrCreate();
+    }
+    return instance;
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaStatefulNetworkWordCount.java b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaStatefulNetworkWordCount.java
new file mode 100644
index 0000000000000000000000000000000000000000..9d8bd7fd11ebd7912b3803c7d16613cc1dd083a7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/java/org/apache/spark/examples/streaming/JavaStatefulNetworkWordCount.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.function.*;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.Optional;
+import org.apache.spark.api.java.StorageLevels;
+import org.apache.spark.streaming.Durations;
+import org.apache.spark.streaming.State;
+import org.apache.spark.streaming.StateSpec;
+import org.apache.spark.streaming.api.java.*;
+
+/**
+ * Counts words cumulatively in UTF8 encoded, '\n' delimited text received from the network every
+ * second starting with initial value of word count.
+ * Usage: JavaStatefulNetworkWordCount <hostname> <port>
+ * <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive
+ * data.
+ * <p>
+ * To run this on your local machine, you need to first run a Netcat server
+ * `$ nc -lk 9999`
+ * and then run the example
+ * `$ bin/run-example
+ * org.apache.spark.examples.streaming.JavaStatefulNetworkWordCount localhost 9999`
+ */
+public class JavaStatefulNetworkWordCount {
+  private static final Pattern SPACE = Pattern.compile(" ");
+
+  public static void main(String[] args) throws Exception {
+    if (args.length < 2) {
+      System.err.println("Usage: JavaStatefulNetworkWordCount <hostname> <port>");
+      System.exit(1);
+    }
+
+    StreamingExamples.setStreamingLogLevels();
+
+    // Create the context with a 1 second batch size
+    SparkConf sparkConf = new SparkConf().setAppName("JavaStatefulNetworkWordCount");
+    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
+    ssc.checkpoint(".");
+
+    // Initial state RDD input to mapWithState
+    @SuppressWarnings("unchecked")
+    List<Tuple2<String, Integer>> tuples =
+        Arrays.asList(new Tuple2<>("hello", 1), new Tuple2<>("world", 1));
+    JavaPairRDD<String, Integer> initialRDD = ssc.sparkContext().parallelizePairs(tuples);
+
+    JavaReceiverInputDStream<String> lines = ssc.socketTextStream(
+            args[0], Integer.parseInt(args[1]), StorageLevels.MEMORY_AND_DISK_SER_2);
+
+    JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(SPACE.split(x)).iterator());
+
+    JavaPairDStream<String, Integer> wordsDstream = words.mapToPair(s -> new Tuple2<>(s, 1));
+
+    // Update the cumulative count function
+    Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc =
+        (word, one, state) -> {
+          int sum = one.orElse(0) + (state.exists() ? state.get() : 0);
+          Tuple2<String, Integer> output = new Tuple2<>(word, sum);
+          state.update(sum);
+          return output;
+        };
+
+    // DStream made of get cumulative counts that get updated in every batch
+    JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream =
+        wordsDstream.mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD));
+
+    stateDstream.print();
+    ssc.start();
+    ssc.awaitTermination();
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/als.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/als.py
new file mode 100755
index 0000000000000000000000000000000000000000..6d3241876ad518f16a81e794e2d5bab00dfc4a67
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/als.py
@@ -0,0 +1,108 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+This is an example implementation of ALS for learning how to use Spark. Please refer to
+pyspark.ml.recommendation.ALS for more conventional use.
+
+This example requires numpy (http://www.numpy.org/)
+"""
+from __future__ import print_function
+
+import sys
+
+import numpy as np
+from numpy.random import rand
+from numpy import matrix
+from pyspark.sql import SparkSession
+
+LAMBDA = 0.01   # regularization
+np.random.seed(42)
+
+
+def rmse(R, ms, us):
+    diff = R - ms * us.T
+    return np.sqrt(np.sum(np.power(diff, 2)) / (M * U))
+
+
+def update(i, mat, ratings):
+    uu = mat.shape[0]
+    ff = mat.shape[1]
+
+    XtX = mat.T * mat
+    Xty = mat.T * ratings[i, :].T
+
+    for j in range(ff):
+        XtX[j, j] += LAMBDA * uu
+
+    return np.linalg.solve(XtX, Xty)
+
+
+if __name__ == "__main__":
+
+    """
+    Usage: als [M] [U] [F] [iterations] [partitions]"
+    """
+
+    print("""WARN: This is a naive implementation of ALS and is given as an
+      example. Please use pyspark.ml.recommendation.ALS for more
+      conventional use.""", file=sys.stderr)
+
+    spark = SparkSession\
+        .builder\
+        .appName("PythonALS")\
+        .getOrCreate()
+
+    sc = spark.sparkContext
+
+    M = int(sys.argv[1]) if len(sys.argv) > 1 else 100
+    U = int(sys.argv[2]) if len(sys.argv) > 2 else 500
+    F = int(sys.argv[3]) if len(sys.argv) > 3 else 10
+    ITERATIONS = int(sys.argv[4]) if len(sys.argv) > 4 else 5
+    partitions = int(sys.argv[5]) if len(sys.argv) > 5 else 2
+
+    print("Running ALS with M=%d, U=%d, F=%d, iters=%d, partitions=%d\n" %
+          (M, U, F, ITERATIONS, partitions))
+
+    R = matrix(rand(M, F)) * matrix(rand(U, F).T)
+    ms = matrix(rand(M, F))
+    us = matrix(rand(U, F))
+
+    Rb = sc.broadcast(R)
+    msb = sc.broadcast(ms)
+    usb = sc.broadcast(us)
+
+    for i in range(ITERATIONS):
+        ms = sc.parallelize(range(M), partitions) \
+               .map(lambda x: update(x, usb.value, Rb.value)) \
+               .collect()
+        # collect() returns a list, so array ends up being
+        # a 3-d array, we take the first 2 dims for the matrix
+        ms = matrix(np.array(ms)[:, :, 0])
+        msb = sc.broadcast(ms)
+
+        us = sc.parallelize(range(U), partitions) \
+               .map(lambda x: update(x, msb.value, Rb.value.T)) \
+               .collect()
+        us = matrix(np.array(us)[:, :, 0])
+        usb = sc.broadcast(us)
+
+        error = rmse(R, ms, us)
+        print("Iteration %d:" % i)
+        print("\nRMSE: %5.4f\n" % error)
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/avro_inputformat.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/avro_inputformat.py
new file mode 100644
index 0000000000000000000000000000000000000000..a18722c687f8b98a32bc86dadbaeb05b2f34760b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/avro_inputformat.py
@@ -0,0 +1,91 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Read data file users.avro in local Spark distro:
+
+$ cd $SPARK_HOME
+$ ./bin/spark-submit --driver-class-path /path/to/example/jar \
+> ./examples/src/main/python/avro_inputformat.py \
+> examples/src/main/resources/users.avro
+{u'favorite_color': None, u'name': u'Alyssa', u'favorite_numbers': [3, 9, 15, 20]}
+{u'favorite_color': u'red', u'name': u'Ben', u'favorite_numbers': []}
+
+To read name and favorite_color fields only, specify the following reader schema:
+
+$ cat examples/src/main/resources/user.avsc
+{"namespace": "example.avro",
+ "type": "record",
+ "name": "User",
+ "fields": [
+     {"name": "name", "type": "string"},
+     {"name": "favorite_color", "type": ["string", "null"]}
+ ]
+}
+
+$ ./bin/spark-submit --driver-class-path /path/to/example/jar \
+> ./examples/src/main/python/avro_inputformat.py \
+> examples/src/main/resources/users.avro examples/src/main/resources/user.avsc
+{u'favorite_color': None, u'name': u'Alyssa'}
+{u'favorite_color': u'red', u'name': u'Ben'}
+"""
+from __future__ import print_function
+
+import sys
+
+from functools import reduce
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2 and len(sys.argv) != 3:
+        print("""
+        Usage: avro_inputformat <data_file> [reader_schema_file]
+
+        Run with example jar:
+        ./bin/spark-submit --driver-class-path /path/to/example/jar \
+        /path/to/examples/avro_inputformat.py <data_file> [reader_schema_file]
+        Assumes you have Avro data stored in <data_file>. Reader schema can be optionally specified
+        in [reader_schema_file].
+        """, file=sys.stderr)
+        sys.exit(-1)
+
+    path = sys.argv[1]
+
+    spark = SparkSession\
+        .builder\
+        .appName("AvroKeyInputFormat")\
+        .getOrCreate()
+
+    sc = spark.sparkContext
+
+    conf = None
+    if len(sys.argv) == 3:
+        schema_rdd = sc.textFile(sys.argv[2], 1).collect()
+        conf = {"avro.schema.input.key": reduce(lambda x, y: x + y, schema_rdd)}
+
+    avro_rdd = sc.newAPIHadoopFile(
+        path,
+        "org.apache.avro.mapreduce.AvroKeyInputFormat",
+        "org.apache.avro.mapred.AvroKey",
+        "org.apache.hadoop.io.NullWritable",
+        keyConverter="org.apache.spark.examples.pythonconverters.AvroWrapperToJavaConverter",
+        conf=conf)
+    output = avro_rdd.map(lambda x: x[0]).collect()
+    for k in output:
+        print(k)
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/kmeans.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/kmeans.py
new file mode 100755
index 0000000000000000000000000000000000000000..a42d711fc505fac2d1846827cceea569c9c4a078
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/kmeans.py
@@ -0,0 +1,86 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+The K-means algorithm written from scratch against PySpark. In practice,
+one may prefer to use the KMeans algorithm in ML, as shown in
+examples/src/main/python/ml/kmeans_example.py.
+
+This example requires NumPy (http://www.numpy.org/).
+"""
+from __future__ import print_function
+
+import sys
+
+import numpy as np
+from pyspark.sql import SparkSession
+
+
+def parseVector(line):
+    return np.array([float(x) for x in line.split(' ')])
+
+
+def closestPoint(p, centers):
+    bestIndex = 0
+    closest = float("+inf")
+    for i in range(len(centers)):
+        tempDist = np.sum((p - centers[i]) ** 2)
+        if tempDist < closest:
+            closest = tempDist
+            bestIndex = i
+    return bestIndex
+
+
+if __name__ == "__main__":
+
+    if len(sys.argv) != 4:
+        print("Usage: kmeans <file> <k> <convergeDist>", file=sys.stderr)
+        sys.exit(-1)
+
+    print("""WARN: This is a naive implementation of KMeans Clustering and is given
+       as an example! Please refer to examples/src/main/python/ml/kmeans_example.py for an
+       example on how to use ML's KMeans implementation.""", file=sys.stderr)
+
+    spark = SparkSession\
+        .builder\
+        .appName("PythonKMeans")\
+        .getOrCreate()
+
+    lines = spark.read.text(sys.argv[1]).rdd.map(lambda r: r[0])
+    data = lines.map(parseVector).cache()
+    K = int(sys.argv[2])
+    convergeDist = float(sys.argv[3])
+
+    kPoints = data.takeSample(False, K, 1)
+    tempDist = 1.0
+
+    while tempDist > convergeDist:
+        closest = data.map(
+            lambda p: (closestPoint(p, kPoints), (p, 1)))
+        pointStats = closest.reduceByKey(
+            lambda p1_c1, p2_c2: (p1_c1[0] + p2_c2[0], p1_c1[1] + p2_c2[1]))
+        newPoints = pointStats.map(
+            lambda st: (st[0], st[1][0] / st[1][1])).collect()
+
+        tempDist = sum(np.sum((kPoints[iK] - p) ** 2) for (iK, p) in newPoints)
+
+        for (iK, p) in newPoints:
+            kPoints[iK] = p
+
+    print("Final centers: " + str(kPoints))
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/logistic_regression.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/logistic_regression.py
new file mode 100755
index 0000000000000000000000000000000000000000..bcc4e0f4e8eaedbbd533ba82bba2f2aada6c4a91
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/logistic_regression.py
@@ -0,0 +1,88 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+A logistic regression implementation that uses NumPy (http://www.numpy.org)
+to act on batches of input data using efficient matrix operations.
+
+In practice, one may prefer to use the LogisticRegression algorithm in
+ML, as shown in examples/src/main/python/ml/logistic_regression_with_elastic_net.py.
+"""
+from __future__ import print_function
+
+import sys
+
+import numpy as np
+from pyspark.sql import SparkSession
+
+
+D = 10  # Number of dimensions
+
+
+# Read a batch of points from the input file into a NumPy matrix object. We operate on batches to
+# make further computations faster.
+# The data file contains lines of the form <label> <x1> <x2> ... <xD>. We load each block of these
+# into a NumPy array of size numLines * (D + 1) and pull out column 0 vs the others in gradient().
+def readPointBatch(iterator):
+    strs = list(iterator)
+    matrix = np.zeros((len(strs), D + 1))
+    for i, s in enumerate(strs):
+        matrix[i] = np.fromstring(s.replace(',', ' '), dtype=np.float32, sep=' ')
+    return [matrix]
+
+if __name__ == "__main__":
+
+    if len(sys.argv) != 3:
+        print("Usage: logistic_regression <file> <iterations>", file=sys.stderr)
+        sys.exit(-1)
+
+    print("""WARN: This is a naive implementation of Logistic Regression and is
+      given as an example!
+      Please refer to examples/src/main/python/ml/logistic_regression_with_elastic_net.py
+      to see how ML's implementation is used.""", file=sys.stderr)
+
+    spark = SparkSession\
+        .builder\
+        .appName("PythonLR")\
+        .getOrCreate()
+
+    points = spark.read.text(sys.argv[1]).rdd.map(lambda r: r[0])\
+        .mapPartitions(readPointBatch).cache()
+    iterations = int(sys.argv[2])
+
+    # Initialize w to a random value
+    w = 2 * np.random.ranf(size=D) - 1
+    print("Initial w: " + str(w))
+
+    # Compute logistic regression gradient for a matrix of data points
+    def gradient(matrix, w):
+        Y = matrix[:, 0]    # point labels (first column of input file)
+        X = matrix[:, 1:]   # point coordinates
+        # For each point (x, y), compute gradient function, then sum these up
+        return ((1.0 / (1.0 + np.exp(-Y * X.dot(w))) - 1.0) * Y * X.T).sum(1)
+
+    def add(x, y):
+        x += y
+        return x
+
+    for i in range(iterations):
+        print("On iteration %i" % (i + 1))
+        w -= points.map(lambda m: gradient(m, w)).reduce(add)
+
+    print("Final w: " + str(w))
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/aft_survival_regression.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/aft_survival_regression.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a71f76418ea611df9a3964c560778ef995472c2
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/aft_survival_regression.py
@@ -0,0 +1,57 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+An example demonstrating aft survival regression.
+Run with:
+  bin/spark-submit examples/src/main/python/ml/aft_survival_regression.py
+"""
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.regression import AFTSurvivalRegression
+from pyspark.ml.linalg import Vectors
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession \
+        .builder \
+        .appName("AFTSurvivalRegressionExample") \
+        .getOrCreate()
+
+    # $example on$
+    training = spark.createDataFrame([
+        (1.218, 1.0, Vectors.dense(1.560, -0.605)),
+        (2.949, 0.0, Vectors.dense(0.346, 2.158)),
+        (3.627, 0.0, Vectors.dense(1.380, 0.231)),
+        (0.273, 1.0, Vectors.dense(0.520, 1.151)),
+        (4.199, 0.0, Vectors.dense(0.795, -0.226))], ["label", "censor", "features"])
+    quantileProbabilities = [0.3, 0.6]
+    aft = AFTSurvivalRegression(quantileProbabilities=quantileProbabilities,
+                                quantilesCol="quantiles")
+
+    model = aft.fit(training)
+
+    # Print the coefficients, intercept and scale parameter for AFT survival regression
+    print("Coefficients: " + str(model.coefficients))
+    print("Intercept: " + str(model.intercept))
+    print("Scale: " + str(model.scale))
+    model.transform(training).show(truncate=False)
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/als_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/als_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b7ec9c439f9f852464c668a6e8d4f60ecd1c069
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/als_example.py
@@ -0,0 +1,76 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+import sys
+if sys.version >= '3':
+    long = int
+
+from pyspark.sql import SparkSession
+
+# $example on$
+from pyspark.ml.evaluation import RegressionEvaluator
+from pyspark.ml.recommendation import ALS
+from pyspark.sql import Row
+# $example off$
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("ALSExample")\
+        .getOrCreate()
+
+    # $example on$
+    lines = spark.read.text("data/mllib/als/sample_movielens_ratings.txt").rdd
+    parts = lines.map(lambda row: row.value.split("::"))
+    ratingsRDD = parts.map(lambda p: Row(userId=int(p[0]), movieId=int(p[1]),
+                                         rating=float(p[2]), timestamp=long(p[3])))
+    ratings = spark.createDataFrame(ratingsRDD)
+    (training, test) = ratings.randomSplit([0.8, 0.2])
+
+    # Build the recommendation model using ALS on the training data
+    # Note we set cold start strategy to 'drop' to ensure we don't get NaN evaluation metrics
+    als = ALS(maxIter=5, regParam=0.01, userCol="userId", itemCol="movieId", ratingCol="rating",
+              coldStartStrategy="drop")
+    model = als.fit(training)
+
+    # Evaluate the model by computing the RMSE on the test data
+    predictions = model.transform(test)
+    evaluator = RegressionEvaluator(metricName="rmse", labelCol="rating",
+                                    predictionCol="prediction")
+    rmse = evaluator.evaluate(predictions)
+    print("Root-mean-square error = " + str(rmse))
+
+    # Generate top 10 movie recommendations for each user
+    userRecs = model.recommendForAllUsers(10)
+    # Generate top 10 user recommendations for each movie
+    movieRecs = model.recommendForAllItems(10)
+
+    # Generate top 10 movie recommendations for a specified set of users
+    users = ratings.select(als.getUserCol()).distinct().limit(3)
+    userSubsetRecs = model.recommendForUserSubset(users, 10)
+    # Generate top 10 user recommendations for a specified set of movies
+    movies = ratings.select(als.getItemCol()).distinct().limit(3)
+    movieSubSetRecs = model.recommendForItemSubset(movies, 10)
+    # $example off$
+    userRecs.show()
+    movieRecs.show()
+    userSubsetRecs.show()
+    movieSubSetRecs.show()
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/binarizer_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/binarizer_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..669bb2aeabecdf2eef95d700d3c2f717de69ebab
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/binarizer_example.py
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark.sql import SparkSession
+# $example on$
+from pyspark.ml.feature import Binarizer
+# $example off$
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("BinarizerExample")\
+        .getOrCreate()
+
+    # $example on$
+    continuousDataFrame = spark.createDataFrame([
+        (0, 0.1),
+        (1, 0.8),
+        (2, 0.2)
+    ], ["id", "feature"])
+
+    binarizer = Binarizer(threshold=0.5, inputCol="feature", outputCol="binarized_feature")
+
+    binarizedDataFrame = binarizer.transform(continuousDataFrame)
+
+    print("Binarizer output with Threshold = %f" % binarizer.getThreshold())
+    binarizedDataFrame.show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/bisecting_k_means_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/bisecting_k_means_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..7842d2009e238d6cbc084ff1718b3ebd6114b51b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/bisecting_k_means_example.py
@@ -0,0 +1,55 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+An example demonstrating bisecting k-means clustering.
+Run with:
+  bin/spark-submit examples/src/main/python/ml/bisecting_k_means_example.py
+"""
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.clustering import BisectingKMeans
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("BisectingKMeansExample")\
+        .getOrCreate()
+
+    # $example on$
+    # Loads data.
+    dataset = spark.read.format("libsvm").load("data/mllib/sample_kmeans_data.txt")
+
+    # Trains a bisecting k-means model.
+    bkm = BisectingKMeans().setK(2).setSeed(1)
+    model = bkm.fit(dataset)
+
+    # Evaluate clustering.
+    cost = model.computeCost(dataset)
+    print("Within Set Sum of Squared Errors = " + str(cost))
+
+    # Shows the result.
+    print("Cluster Centers: ")
+    centers = model.clusterCenters()
+    for center in centers:
+        print(center)
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/bucketed_random_projection_lsh_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/bucketed_random_projection_lsh_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..610176ea596ca9b93aff1b8ac6dfc5fc15e2aa38
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/bucketed_random_projection_lsh_example.py
@@ -0,0 +1,79 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+An example demonstrating BucketedRandomProjectionLSH.
+Run with:
+  bin/spark-submit examples/src/main/python/ml/bucketed_random_projection_lsh_example.py
+"""
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import BucketedRandomProjectionLSH
+from pyspark.ml.linalg import Vectors
+from pyspark.sql.functions import col
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession \
+        .builder \
+        .appName("BucketedRandomProjectionLSHExample") \
+        .getOrCreate()
+
+    # $example on$
+    dataA = [(0, Vectors.dense([1.0, 1.0]),),
+             (1, Vectors.dense([1.0, -1.0]),),
+             (2, Vectors.dense([-1.0, -1.0]),),
+             (3, Vectors.dense([-1.0, 1.0]),)]
+    dfA = spark.createDataFrame(dataA, ["id", "features"])
+
+    dataB = [(4, Vectors.dense([1.0, 0.0]),),
+             (5, Vectors.dense([-1.0, 0.0]),),
+             (6, Vectors.dense([0.0, 1.0]),),
+             (7, Vectors.dense([0.0, -1.0]),)]
+    dfB = spark.createDataFrame(dataB, ["id", "features"])
+
+    key = Vectors.dense([1.0, 0.0])
+
+    brp = BucketedRandomProjectionLSH(inputCol="features", outputCol="hashes", bucketLength=2.0,
+                                      numHashTables=3)
+    model = brp.fit(dfA)
+
+    # Feature Transformation
+    print("The hashed dataset where hashed values are stored in the column 'hashes':")
+    model.transform(dfA).show()
+
+    # Compute the locality sensitive hashes for the input rows, then perform approximate
+    # similarity join.
+    # We could avoid computing hashes by passing in the already-transformed dataset, e.g.
+    # `model.approxSimilarityJoin(transformedA, transformedB, 1.5)`
+    print("Approximately joining dfA and dfB on Euclidean distance smaller than 1.5:")
+    model.approxSimilarityJoin(dfA, dfB, 1.5, distCol="EuclideanDistance")\
+        .select(col("datasetA.id").alias("idA"),
+                col("datasetB.id").alias("idB"),
+                col("EuclideanDistance")).show()
+
+    # Compute the locality sensitive hashes for the input rows, then perform approximate nearest
+    # neighbor search.
+    # We could avoid computing hashes by passing in the already-transformed dataset, e.g.
+    # `model.approxNearestNeighbors(transformedA, key, 2)`
+    print("Approximately searching dfA for 2 nearest neighbors of the key:")
+    model.approxNearestNeighbors(dfA, key, 2).show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/bucketizer_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/bucketizer_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..742f35093b9d20569fa4d2b05cf452eb849b86e9
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/bucketizer_example.py
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark.sql import SparkSession
+# $example on$
+from pyspark.ml.feature import Bucketizer
+# $example off$
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("BucketizerExample")\
+        .getOrCreate()
+
+    # $example on$
+    splits = [-float("inf"), -0.5, 0.0, 0.5, float("inf")]
+
+    data = [(-999.9,), (-0.5,), (-0.3,), (0.0,), (0.2,), (999.9,)]
+    dataFrame = spark.createDataFrame(data, ["features"])
+
+    bucketizer = Bucketizer(splits=splits, inputCol="features", outputCol="bucketedFeatures")
+
+    # Transform original data into its bucket index.
+    bucketedData = bucketizer.transform(dataFrame)
+
+    print("Bucketizer output with %d buckets" % (len(bucketizer.getSplits())-1))
+    bucketedData.show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/chi_square_test_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/chi_square_test_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..2af7e683cdb7240e5226711512bbddf71dff0368
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/chi_square_test_example.py
@@ -0,0 +1,52 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+An example for Chi-square hypothesis testing.
+Run with:
+  bin/spark-submit examples/src/main/python/ml/chi_square_test_example.py
+"""
+from __future__ import print_function
+
+from pyspark.sql import SparkSession
+# $example on$
+from pyspark.ml.linalg import Vectors
+from pyspark.ml.stat import ChiSquareTest
+# $example off$
+
+if __name__ == "__main__":
+    spark = SparkSession \
+        .builder \
+        .appName("ChiSquareTestExample") \
+        .getOrCreate()
+
+    # $example on$
+    data = [(0.0, Vectors.dense(0.5, 10.0)),
+            (0.0, Vectors.dense(1.5, 20.0)),
+            (1.0, Vectors.dense(1.5, 30.0)),
+            (0.0, Vectors.dense(3.5, 30.0)),
+            (0.0, Vectors.dense(3.5, 40.0)),
+            (1.0, Vectors.dense(3.5, 40.0))]
+    df = spark.createDataFrame(data, ["label", "features"])
+
+    r = ChiSquareTest.test(df, "features", "label").head()
+    print("pValues: " + str(r.pValues))
+    print("degreesOfFreedom: " + str(r.degreesOfFreedom))
+    print("statistics: " + str(r.statistics))
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/chisq_selector_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/chisq_selector_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..028a9ea9d67b1fb0f889efa812b778308067605d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/chisq_selector_example.py
@@ -0,0 +1,47 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark.sql import SparkSession
+# $example on$
+from pyspark.ml.feature import ChiSqSelector
+from pyspark.ml.linalg import Vectors
+# $example off$
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("ChiSqSelectorExample")\
+        .getOrCreate()
+
+    # $example on$
+    df = spark.createDataFrame([
+        (7, Vectors.dense([0.0, 0.0, 18.0, 1.0]), 1.0,),
+        (8, Vectors.dense([0.0, 1.0, 12.0, 0.0]), 0.0,),
+        (9, Vectors.dense([1.0, 0.0, 15.0, 0.1]), 0.0,)], ["id", "features", "clicked"])
+
+    selector = ChiSqSelector(numTopFeatures=1, featuresCol="features",
+                             outputCol="selectedFeatures", labelCol="clicked")
+
+    result = selector.fit(df).transform(df)
+
+    print("ChiSqSelector output with top %d features selected" % selector.getNumTopFeatures())
+    result.show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/correlation_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/correlation_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f4e402ac1a51481b8e6b76845575de6aad83d4d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/correlation_example.py
@@ -0,0 +1,51 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+An example for computing correlation matrix.
+Run with:
+  bin/spark-submit examples/src/main/python/ml/correlation_example.py
+"""
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.linalg import Vectors
+from pyspark.ml.stat import Correlation
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession \
+        .builder \
+        .appName("CorrelationExample") \
+        .getOrCreate()
+
+    # $example on$
+    data = [(Vectors.sparse(4, [(0, 1.0), (3, -2.0)]),),
+            (Vectors.dense([4.0, 5.0, 0.0, 3.0]),),
+            (Vectors.dense([6.0, 7.0, 0.0, 8.0]),),
+            (Vectors.sparse(4, [(0, 9.0), (3, 1.0)]),)]
+    df = spark.createDataFrame(data, ["features"])
+
+    r1 = Correlation.corr(df, "features").head()
+    print("Pearson correlation matrix:\n" + str(r1[0]))
+
+    r2 = Correlation.corr(df, "features", "spearman").head()
+    print("Spearman correlation matrix:\n" + str(r2[0]))
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/count_vectorizer_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/count_vectorizer_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..f2e41db77d8987a6d351014479de30578b166bfb
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/count_vectorizer_example.py
@@ -0,0 +1,47 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark.sql import SparkSession
+# $example on$
+from pyspark.ml.feature import CountVectorizer
+# $example off$
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("CountVectorizerExample")\
+        .getOrCreate()
+
+    # $example on$
+    # Input data: Each row is a bag of words with a ID.
+    df = spark.createDataFrame([
+        (0, "a b c".split(" ")),
+        (1, "a b b c a".split(" "))
+    ], ["id", "words"])
+
+    # fit a CountVectorizerModel from the corpus.
+    cv = CountVectorizer(inputCol="words", outputCol="features", vocabSize=3, minDF=2.0)
+
+    model = cv.fit(df)
+
+    result = model.transform(df)
+    result.show(truncate=False)
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/cross_validator.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/cross_validator.py
new file mode 100644
index 0000000000000000000000000000000000000000..6256d11504afb876af024f0b21d3e25d8a463ffd
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/cross_validator.py
@@ -0,0 +1,99 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+A simple example demonstrating model selection using CrossValidator.
+This example also demonstrates how Pipelines are Estimators.
+Run with:
+
+  bin/spark-submit examples/src/main/python/ml/cross_validator.py
+"""
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml import Pipeline
+from pyspark.ml.classification import LogisticRegression
+from pyspark.ml.evaluation import BinaryClassificationEvaluator
+from pyspark.ml.feature import HashingTF, Tokenizer
+from pyspark.ml.tuning import CrossValidator, ParamGridBuilder
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("CrossValidatorExample")\
+        .getOrCreate()
+
+    # $example on$
+    # Prepare training documents, which are labeled.
+    training = spark.createDataFrame([
+        (0, "a b c d e spark", 1.0),
+        (1, "b d", 0.0),
+        (2, "spark f g h", 1.0),
+        (3, "hadoop mapreduce", 0.0),
+        (4, "b spark who", 1.0),
+        (5, "g d a y", 0.0),
+        (6, "spark fly", 1.0),
+        (7, "was mapreduce", 0.0),
+        (8, "e spark program", 1.0),
+        (9, "a e c l", 0.0),
+        (10, "spark compile", 1.0),
+        (11, "hadoop software", 0.0)
+    ], ["id", "text", "label"])
+
+    # Configure an ML pipeline, which consists of tree stages: tokenizer, hashingTF, and lr.
+    tokenizer = Tokenizer(inputCol="text", outputCol="words")
+    hashingTF = HashingTF(inputCol=tokenizer.getOutputCol(), outputCol="features")
+    lr = LogisticRegression(maxIter=10)
+    pipeline = Pipeline(stages=[tokenizer, hashingTF, lr])
+
+    # We now treat the Pipeline as an Estimator, wrapping it in a CrossValidator instance.
+    # This will allow us to jointly choose parameters for all Pipeline stages.
+    # A CrossValidator requires an Estimator, a set of Estimator ParamMaps, and an Evaluator.
+    # We use a ParamGridBuilder to construct a grid of parameters to search over.
+    # With 3 values for hashingTF.numFeatures and 2 values for lr.regParam,
+    # this grid will have 3 x 2 = 6 parameter settings for CrossValidator to choose from.
+    paramGrid = ParamGridBuilder() \
+        .addGrid(hashingTF.numFeatures, [10, 100, 1000]) \
+        .addGrid(lr.regParam, [0.1, 0.01]) \
+        .build()
+
+    crossval = CrossValidator(estimator=pipeline,
+                              estimatorParamMaps=paramGrid,
+                              evaluator=BinaryClassificationEvaluator(),
+                              numFolds=2)  # use 3+ folds in practice
+
+    # Run cross-validation, and choose the best set of parameters.
+    cvModel = crossval.fit(training)
+
+    # Prepare test documents, which are unlabeled.
+    test = spark.createDataFrame([
+        (4, "spark i j k"),
+        (5, "l m n"),
+        (6, "mapreduce spark"),
+        (7, "apache hadoop")
+    ], ["id", "text"])
+
+    # Make predictions on test documents. cvModel uses the best model found (lrModel).
+    prediction = cvModel.transform(test)
+    selected = prediction.select("id", "text", "probability", "prediction")
+    for row in selected.collect():
+        print(row)
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/dataframe_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/dataframe_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..cabc3de68f2f491ab5967a1980e75d55a3edd230
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/dataframe_example.py
@@ -0,0 +1,79 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+An example of how to use DataFrame for ML. Run with::
+    bin/spark-submit examples/src/main/python/ml/dataframe_example.py <input_path>
+"""
+from __future__ import print_function
+
+import os
+import sys
+import tempfile
+import shutil
+
+from pyspark.sql import SparkSession
+from pyspark.mllib.stat import Statistics
+from pyspark.mllib.util import MLUtils
+
+if __name__ == "__main__":
+    if len(sys.argv) > 2:
+        print("Usage: dataframe_example.py <libsvm file>", file=sys.stderr)
+        sys.exit(-1)
+    elif len(sys.argv) == 2:
+        input_path = sys.argv[1]
+    else:
+        input_path = "data/mllib/sample_libsvm_data.txt"
+
+    spark = SparkSession \
+        .builder \
+        .appName("DataFrameExample") \
+        .getOrCreate()
+
+    # Load an input file
+    print("Loading LIBSVM file with UDT from " + input_path + ".")
+    df = spark.read.format("libsvm").load(input_path).cache()
+    print("Schema from LIBSVM:")
+    df.printSchema()
+    print("Loaded training data as a DataFrame with " +
+          str(df.count()) + " records.")
+
+    # Show statistical summary of labels.
+    labelSummary = df.describe("label")
+    labelSummary.show()
+
+    # Convert features column to an RDD of vectors.
+    features = MLUtils.convertVectorColumnsFromML(df, "features") \
+        .select("features").rdd.map(lambda r: r.features)
+    summary = Statistics.colStats(features)
+    print("Selected features column with average values:\n" +
+          str(summary.mean()))
+
+    # Save the records in a parquet file.
+    tempdir = tempfile.NamedTemporaryFile(delete=False).name
+    os.unlink(tempdir)
+    print("Saving to " + tempdir + " as Parquet file.")
+    df.write.parquet(tempdir)
+
+    # Load the records back.
+    print("Loading Parquet file with UDT from " + tempdir)
+    newDF = spark.read.parquet(tempdir)
+    print("Schema from Parquet:")
+    newDF.printSchema()
+    shutil.rmtree(tempdir)
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/dct_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/dct_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0457f8d0f43b0256c1eb940f581946216d6bc82
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/dct_example.py
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import DCT
+from pyspark.ml.linalg import Vectors
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("DCTExample")\
+        .getOrCreate()
+
+    # $example on$
+    df = spark.createDataFrame([
+        (Vectors.dense([0.0, 1.0, -2.0, 3.0]),),
+        (Vectors.dense([-1.0, 2.0, 4.0, -7.0]),),
+        (Vectors.dense([14.0, -2.0, -5.0, 1.0]),)], ["features"])
+
+    dct = DCT(inverse=False, inputCol="features", outputCol="featuresDCT")
+
+    dctDf = dct.transform(df)
+
+    dctDf.select("featuresDCT").show(truncate=False)
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/decision_tree_classification_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/decision_tree_classification_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6e2977de00823710c8c38c5cdff577bea11af1c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/decision_tree_classification_example.py
@@ -0,0 +1,78 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Decision Tree Classification Example.
+"""
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml import Pipeline
+from pyspark.ml.classification import DecisionTreeClassifier
+from pyspark.ml.feature import StringIndexer, VectorIndexer
+from pyspark.ml.evaluation import MulticlassClassificationEvaluator
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("DecisionTreeClassificationExample")\
+        .getOrCreate()
+
+    # $example on$
+    # Load the data stored in LIBSVM format as a DataFrame.
+    data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    # Index labels, adding metadata to the label column.
+    # Fit on whole dataset to include all labels in index.
+    labelIndexer = StringIndexer(inputCol="label", outputCol="indexedLabel").fit(data)
+    # Automatically identify categorical features, and index them.
+    # We specify maxCategories so features with > 4 distinct values are treated as continuous.
+    featureIndexer =\
+        VectorIndexer(inputCol="features", outputCol="indexedFeatures", maxCategories=4).fit(data)
+
+    # Split the data into training and test sets (30% held out for testing)
+    (trainingData, testData) = data.randomSplit([0.7, 0.3])
+
+    # Train a DecisionTree model.
+    dt = DecisionTreeClassifier(labelCol="indexedLabel", featuresCol="indexedFeatures")
+
+    # Chain indexers and tree in a Pipeline
+    pipeline = Pipeline(stages=[labelIndexer, featureIndexer, dt])
+
+    # Train model.  This also runs the indexers.
+    model = pipeline.fit(trainingData)
+
+    # Make predictions.
+    predictions = model.transform(testData)
+
+    # Select example rows to display.
+    predictions.select("prediction", "indexedLabel", "features").show(5)
+
+    # Select (prediction, true label) and compute test error
+    evaluator = MulticlassClassificationEvaluator(
+        labelCol="indexedLabel", predictionCol="prediction", metricName="accuracy")
+    accuracy = evaluator.evaluate(predictions)
+    print("Test Error = %g " % (1.0 - accuracy))
+
+    treeModel = model.stages[2]
+    # summary only
+    print(treeModel)
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/decision_tree_regression_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/decision_tree_regression_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..58d7ad921d8e06351fa615faaab79a832ef1f565
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/decision_tree_regression_example.py
@@ -0,0 +1,75 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Decision Tree Regression Example.
+"""
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml import Pipeline
+from pyspark.ml.regression import DecisionTreeRegressor
+from pyspark.ml.feature import VectorIndexer
+from pyspark.ml.evaluation import RegressionEvaluator
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("DecisionTreeRegressionExample")\
+        .getOrCreate()
+
+    # $example on$
+    # Load the data stored in LIBSVM format as a DataFrame.
+    data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    # Automatically identify categorical features, and index them.
+    # We specify maxCategories so features with > 4 distinct values are treated as continuous.
+    featureIndexer =\
+        VectorIndexer(inputCol="features", outputCol="indexedFeatures", maxCategories=4).fit(data)
+
+    # Split the data into training and test sets (30% held out for testing)
+    (trainingData, testData) = data.randomSplit([0.7, 0.3])
+
+    # Train a DecisionTree model.
+    dt = DecisionTreeRegressor(featuresCol="indexedFeatures")
+
+    # Chain indexer and tree in a Pipeline
+    pipeline = Pipeline(stages=[featureIndexer, dt])
+
+    # Train model.  This also runs the indexer.
+    model = pipeline.fit(trainingData)
+
+    # Make predictions.
+    predictions = model.transform(testData)
+
+    # Select example rows to display.
+    predictions.select("prediction", "label", "features").show(5)
+
+    # Select (prediction, true label) and compute test error
+    evaluator = RegressionEvaluator(
+        labelCol="label", predictionCol="prediction", metricName="rmse")
+    rmse = evaluator.evaluate(predictions)
+    print("Root Mean Squared Error (RMSE) on test data = %g" % rmse)
+
+    treeModel = model.stages[1]
+    # summary only
+    print(treeModel)
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/elementwise_product_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/elementwise_product_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..590053998bccc172d7cd7db037a09ba400b2716f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/elementwise_product_example.py
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import ElementwiseProduct
+from pyspark.ml.linalg import Vectors
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("ElementwiseProductExample")\
+        .getOrCreate()
+
+    # $example on$
+    # Create some vector data; also works for sparse vectors
+    data = [(Vectors.dense([1.0, 2.0, 3.0]),), (Vectors.dense([4.0, 5.0, 6.0]),)]
+    df = spark.createDataFrame(data, ["vector"])
+    transformer = ElementwiseProduct(scalingVec=Vectors.dense([0.0, 1.0, 2.0]),
+                                     inputCol="vector", outputCol="transformedVector")
+    # Batch transform the vectors to create new column:
+    transformer.transform(df).show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/estimator_transformer_param_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/estimator_transformer_param_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb210514353934e1004b2d3db4bcc48a8b2c18d1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/estimator_transformer_param_example.py
@@ -0,0 +1,93 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Estimator Transformer Param Example.
+"""
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.linalg import Vectors
+from pyspark.ml.classification import LogisticRegression
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("EstimatorTransformerParamExample")\
+        .getOrCreate()
+
+    # $example on$
+    # Prepare training data from a list of (label, features) tuples.
+    training = spark.createDataFrame([
+        (1.0, Vectors.dense([0.0, 1.1, 0.1])),
+        (0.0, Vectors.dense([2.0, 1.0, -1.0])),
+        (0.0, Vectors.dense([2.0, 1.3, 1.0])),
+        (1.0, Vectors.dense([0.0, 1.2, -0.5]))], ["label", "features"])
+
+    # Create a LogisticRegression instance. This instance is an Estimator.
+    lr = LogisticRegression(maxIter=10, regParam=0.01)
+    # Print out the parameters, documentation, and any default values.
+    print("LogisticRegression parameters:\n" + lr.explainParams() + "\n")
+
+    # Learn a LogisticRegression model. This uses the parameters stored in lr.
+    model1 = lr.fit(training)
+
+    # Since model1 is a Model (i.e., a transformer produced by an Estimator),
+    # we can view the parameters it used during fit().
+    # This prints the parameter (name: value) pairs, where names are unique IDs for this
+    # LogisticRegression instance.
+    print("Model 1 was fit using parameters: ")
+    print(model1.extractParamMap())
+
+    # We may alternatively specify parameters using a Python dictionary as a paramMap
+    paramMap = {lr.maxIter: 20}
+    paramMap[lr.maxIter] = 30  # Specify 1 Param, overwriting the original maxIter.
+    paramMap.update({lr.regParam: 0.1, lr.threshold: 0.55})  # Specify multiple Params.
+
+    # You can combine paramMaps, which are python dictionaries.
+    paramMap2 = {lr.probabilityCol: "myProbability"}  # Change output column name
+    paramMapCombined = paramMap.copy()
+    paramMapCombined.update(paramMap2)
+
+    # Now learn a new model using the paramMapCombined parameters.
+    # paramMapCombined overrides all parameters set earlier via lr.set* methods.
+    model2 = lr.fit(training, paramMapCombined)
+    print("Model 2 was fit using parameters: ")
+    print(model2.extractParamMap())
+
+    # Prepare test data
+    test = spark.createDataFrame([
+        (1.0, Vectors.dense([-1.0, 1.5, 1.3])),
+        (0.0, Vectors.dense([3.0, 2.0, -0.1])),
+        (1.0, Vectors.dense([0.0, 2.2, -1.5]))], ["label", "features"])
+
+    # Make predictions on test data using the Transformer.transform() method.
+    # LogisticRegression.transform will only use the 'features' column.
+    # Note that model2.transform() outputs a "myProbability" column instead of the usual
+    # 'probability' column since we renamed the lr.probabilityCol parameter previously.
+    prediction = model2.transform(test)
+    result = prediction.select("features", "label", "myProbability", "prediction") \
+        .collect()
+
+    for row in result:
+        print("features=%s, label=%s -> prob=%s, prediction=%s"
+              % (row.features, row.label, row.myProbability, row.prediction))
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/feature_hasher_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/feature_hasher_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..6cf9ecc39640019f134ca957594d4f8eee5eb5dc
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/feature_hasher_example.py
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark.sql import SparkSession
+# $example on$
+from pyspark.ml.feature import FeatureHasher
+# $example off$
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("FeatureHasherExample")\
+        .getOrCreate()
+
+    # $example on$
+    dataset = spark.createDataFrame([
+        (2.2, True, "1", "foo"),
+        (3.3, False, "2", "bar"),
+        (4.4, False, "3", "baz"),
+        (5.5, False, "4", "foo")
+    ], ["real", "bool", "stringNum", "string"])
+
+    hasher = FeatureHasher(inputCols=["real", "bool", "stringNum", "string"],
+                           outputCol="features")
+
+    featurized = hasher.transform(dataset)
+    featurized.show(truncate=False)
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/fpgrowth_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/fpgrowth_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..39092e616d42990e40a78401e644919acf2f310a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/fpgrowth_example.py
@@ -0,0 +1,55 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+An example demonstrating FPGrowth.
+Run with:
+  bin/spark-submit examples/src/main/python/ml/fpgrowth_example.py
+"""
+# $example on$
+from pyspark.ml.fpm import FPGrowth
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("FPGrowthExample")\
+        .getOrCreate()
+
+    # $example on$
+    df = spark.createDataFrame([
+        (0, [1, 2, 5]),
+        (1, [1, 2, 3, 5]),
+        (2, [1, 2])
+    ], ["id", "items"])
+
+    fpGrowth = FPGrowth(itemsCol="items", minSupport=0.5, minConfidence=0.6)
+    model = fpGrowth.fit(df)
+
+    # Display frequent itemsets.
+    model.freqItemsets.show()
+
+    # Display generated association rules.
+    model.associationRules.show()
+
+    # transform examines the input items against all the association rules and summarize the
+    # consequents as prediction
+    model.transform(df).show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/gaussian_mixture_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/gaussian_mixture_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..4938a904189f9d842ac952351ec5e0868c2e3428
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/gaussian_mixture_example.py
@@ -0,0 +1,47 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+A simple example demonstrating Gaussian Mixture Model (GMM).
+Run with:
+  bin/spark-submit examples/src/main/python/ml/gaussian_mixture_example.py
+"""
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.clustering import GaussianMixture
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("GaussianMixtureExample")\
+        .getOrCreate()
+
+    # $example on$
+    # loads data
+    dataset = spark.read.format("libsvm").load("data/mllib/sample_kmeans_data.txt")
+
+    gmm = GaussianMixture().setK(2).setSeed(538009335)
+    model = gmm.fit(dataset)
+
+    print("Gaussians shown as a DataFrame: ")
+    model.gaussiansDF.show(truncate=False)
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/generalized_linear_regression_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/generalized_linear_regression_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..a52f4650c1c6f959ff646e9b239f982c85629132
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/generalized_linear_regression_example.py
@@ -0,0 +1,65 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+An example demonstrating generalized linear regression.
+Run with:
+  bin/spark-submit examples/src/main/python/ml/generalized_linear_regression_example.py
+"""
+from __future__ import print_function
+
+from pyspark.sql import SparkSession
+# $example on$
+from pyspark.ml.regression import GeneralizedLinearRegression
+# $example off$
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("GeneralizedLinearRegressionExample")\
+        .getOrCreate()
+
+    # $example on$
+    # Load training data
+    dataset = spark.read.format("libsvm")\
+        .load("data/mllib/sample_linear_regression_data.txt")
+
+    glr = GeneralizedLinearRegression(family="gaussian", link="identity", maxIter=10, regParam=0.3)
+
+    # Fit the model
+    model = glr.fit(dataset)
+
+    # Print the coefficients and intercept for generalized linear regression model
+    print("Coefficients: " + str(model.coefficients))
+    print("Intercept: " + str(model.intercept))
+
+    # Summarize the model over the training set and print out some metrics
+    summary = model.summary
+    print("Coefficient Standard Errors: " + str(summary.coefficientStandardErrors))
+    print("T Values: " + str(summary.tValues))
+    print("P Values: " + str(summary.pValues))
+    print("Dispersion: " + str(summary.dispersion))
+    print("Null Deviance: " + str(summary.nullDeviance))
+    print("Residual Degree Of Freedom Null: " + str(summary.residualDegreeOfFreedomNull))
+    print("Deviance: " + str(summary.deviance))
+    print("Residual Degree Of Freedom: " + str(summary.residualDegreeOfFreedom))
+    print("AIC: " + str(summary.aic))
+    print("Deviance Residuals: ")
+    summary.residuals().show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/gradient_boosted_tree_classifier_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/gradient_boosted_tree_classifier_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..c2042fd7b7b0718decc2b014ad429090ba4f9b14
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/gradient_boosted_tree_classifier_example.py
@@ -0,0 +1,77 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Gradient Boosted Tree Classifier Example.
+"""
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml import Pipeline
+from pyspark.ml.classification import GBTClassifier
+from pyspark.ml.feature import StringIndexer, VectorIndexer
+from pyspark.ml.evaluation import MulticlassClassificationEvaluator
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("GradientBoostedTreeClassifierExample")\
+        .getOrCreate()
+
+    # $example on$
+    # Load and parse the data file, converting it to a DataFrame.
+    data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    # Index labels, adding metadata to the label column.
+    # Fit on whole dataset to include all labels in index.
+    labelIndexer = StringIndexer(inputCol="label", outputCol="indexedLabel").fit(data)
+    # Automatically identify categorical features, and index them.
+    # Set maxCategories so features with > 4 distinct values are treated as continuous.
+    featureIndexer =\
+        VectorIndexer(inputCol="features", outputCol="indexedFeatures", maxCategories=4).fit(data)
+
+    # Split the data into training and test sets (30% held out for testing)
+    (trainingData, testData) = data.randomSplit([0.7, 0.3])
+
+    # Train a GBT model.
+    gbt = GBTClassifier(labelCol="indexedLabel", featuresCol="indexedFeatures", maxIter=10)
+
+    # Chain indexers and GBT in a Pipeline
+    pipeline = Pipeline(stages=[labelIndexer, featureIndexer, gbt])
+
+    # Train model.  This also runs the indexers.
+    model = pipeline.fit(trainingData)
+
+    # Make predictions.
+    predictions = model.transform(testData)
+
+    # Select example rows to display.
+    predictions.select("prediction", "indexedLabel", "features").show(5)
+
+    # Select (prediction, true label) and compute test error
+    evaluator = MulticlassClassificationEvaluator(
+        labelCol="indexedLabel", predictionCol="prediction", metricName="accuracy")
+    accuracy = evaluator.evaluate(predictions)
+    print("Test Error = %g" % (1.0 - accuracy))
+
+    gbtModel = model.stages[2]
+    print(gbtModel)  # summary only
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/gradient_boosted_tree_regressor_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/gradient_boosted_tree_regressor_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc96c973e4b2376eef629658a889f1b09f551b53
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/gradient_boosted_tree_regressor_example.py
@@ -0,0 +1,74 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Gradient Boosted Tree Regressor Example.
+"""
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml import Pipeline
+from pyspark.ml.regression import GBTRegressor
+from pyspark.ml.feature import VectorIndexer
+from pyspark.ml.evaluation import RegressionEvaluator
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("GradientBoostedTreeRegressorExample")\
+        .getOrCreate()
+
+    # $example on$
+    # Load and parse the data file, converting it to a DataFrame.
+    data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    # Automatically identify categorical features, and index them.
+    # Set maxCategories so features with > 4 distinct values are treated as continuous.
+    featureIndexer =\
+        VectorIndexer(inputCol="features", outputCol="indexedFeatures", maxCategories=4).fit(data)
+
+    # Split the data into training and test sets (30% held out for testing)
+    (trainingData, testData) = data.randomSplit([0.7, 0.3])
+
+    # Train a GBT model.
+    gbt = GBTRegressor(featuresCol="indexedFeatures", maxIter=10)
+
+    # Chain indexer and GBT in a Pipeline
+    pipeline = Pipeline(stages=[featureIndexer, gbt])
+
+    # Train model.  This also runs the indexer.
+    model = pipeline.fit(trainingData)
+
+    # Make predictions.
+    predictions = model.transform(testData)
+
+    # Select example rows to display.
+    predictions.select("prediction", "label", "features").show(5)
+
+    # Select (prediction, true label) and compute test error
+    evaluator = RegressionEvaluator(
+        labelCol="label", predictionCol="prediction", metricName="rmse")
+    rmse = evaluator.evaluate(predictions)
+    print("Root Mean Squared Error (RMSE) on test data = %g" % rmse)
+
+    gbtModel = model.stages[1]
+    print(gbtModel)  # summary only
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/imputer_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/imputer_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ba01477636181e410b224f253d31d9962736434
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/imputer_example.py
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+An example demonstrating Imputer.
+Run with:
+  bin/spark-submit examples/src/main/python/ml/imputer_example.py
+"""
+# $example on$
+from pyspark.ml.feature import Imputer
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("ImputerExample")\
+        .getOrCreate()
+
+    # $example on$
+    df = spark.createDataFrame([
+        (1.0, float("nan")),
+        (2.0, float("nan")),
+        (float("nan"), 3.0),
+        (4.0, 4.0),
+        (5.0, 5.0)
+    ], ["a", "b"])
+
+    imputer = Imputer(inputCols=["a", "b"], outputCols=["out_a", "out_b"])
+    model = imputer.fit(df)
+
+    model.transform(df).show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/index_to_string_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/index_to_string_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..33d104e8e3f414f99a296536caf0b3b6a690e646
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/index_to_string_example.py
@@ -0,0 +1,54 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import IndexToString, StringIndexer
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("IndexToStringExample")\
+        .getOrCreate()
+
+    # $example on$
+    df = spark.createDataFrame(
+        [(0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c")],
+        ["id", "category"])
+
+    indexer = StringIndexer(inputCol="category", outputCol="categoryIndex")
+    model = indexer.fit(df)
+    indexed = model.transform(df)
+
+    print("Transformed string column '%s' to indexed column '%s'"
+          % (indexer.getInputCol(), indexer.getOutputCol()))
+    indexed.show()
+
+    print("StringIndexer will store labels in output column metadata\n")
+
+    converter = IndexToString(inputCol="categoryIndex", outputCol="originalCategory")
+    converted = converter.transform(indexed)
+
+    print("Transformed indexed column '%s' back to original string column '%s' using "
+          "labels in metadata" % (converter.getInputCol(), converter.getOutputCol()))
+    converted.select("id", "categoryIndex", "originalCategory").show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/isotonic_regression_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/isotonic_regression_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..89cba9dfc7e8f6463f481ce7ff1f5da96b82bdd1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/isotonic_regression_example.py
@@ -0,0 +1,51 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Isotonic Regression Example.
+
+Run with:
+  bin/spark-submit examples/src/main/python/ml/isotonic_regression_example.py
+"""
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.regression import IsotonicRegression
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("IsotonicRegressionExample")\
+        .getOrCreate()
+
+    # $example on$
+    # Loads data.
+    dataset = spark.read.format("libsvm")\
+        .load("data/mllib/sample_isotonic_regression_libsvm_data.txt")
+
+    # Trains an isotonic regression model.
+    model = IsotonicRegression().fit(dataset)
+    print("Boundaries in increasing order: %s\n" % str(model.boundaries))
+    print("Predictions associated with the boundaries: %s\n" % str(model.predictions))
+
+    # Makes predictions.
+    model.transform(dataset).show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/kmeans_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/kmeans_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..80a878af679f4f17ff98bf4fadeeda6245b9f28c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/kmeans_example.py
@@ -0,0 +1,64 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+An example demonstrating k-means clustering.
+Run with:
+  bin/spark-submit examples/src/main/python/ml/kmeans_example.py
+
+This example requires NumPy (http://www.numpy.org/).
+"""
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.clustering import KMeans
+from pyspark.ml.evaluation import ClusteringEvaluator
+# $example off$
+
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("KMeansExample")\
+        .getOrCreate()
+
+    # $example on$
+    # Loads data.
+    dataset = spark.read.format("libsvm").load("data/mllib/sample_kmeans_data.txt")
+
+    # Trains a k-means model.
+    kmeans = KMeans().setK(2).setSeed(1)
+    model = kmeans.fit(dataset)
+
+    # Make predictions
+    predictions = model.transform(dataset)
+
+    # Evaluate clustering by computing Silhouette score
+    evaluator = ClusteringEvaluator()
+
+    silhouette = evaluator.evaluate(predictions)
+    print("Silhouette with squared euclidean distance = " + str(silhouette))
+
+    # Shows the result.
+    centers = model.clusterCenters()
+    print("Cluster Centers: ")
+    for center in centers:
+        print(center)
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/lda_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/lda_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..97d1a042d1479786ad6a107fe88d84a65b40b7cd
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/lda_example.py
@@ -0,0 +1,59 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+An example demonstrating LDA.
+Run with:
+  bin/spark-submit examples/src/main/python/ml/lda_example.py
+"""
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.clustering import LDA
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession \
+        .builder \
+        .appName("LDAExample") \
+        .getOrCreate()
+
+    # $example on$
+    # Loads data.
+    dataset = spark.read.format("libsvm").load("data/mllib/sample_lda_libsvm_data.txt")
+
+    # Trains a LDA model.
+    lda = LDA(k=10, maxIter=10)
+    model = lda.fit(dataset)
+
+    ll = model.logLikelihood(dataset)
+    lp = model.logPerplexity(dataset)
+    print("The lower bound on the log likelihood of the entire corpus: " + str(ll))
+    print("The upper bound on perplexity: " + str(lp))
+
+    # Describe topics.
+    topics = model.describeTopics(3)
+    print("The topics described by their top-weighted terms:")
+    topics.show(truncate=False)
+
+    # Shows the result
+    transformed = model.transform(dataset)
+    transformed.show(truncate=False)
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/linear_regression_with_elastic_net.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/linear_regression_with_elastic_net.py
new file mode 100644
index 0000000000000000000000000000000000000000..6639e9160ab7183b27dbe13267967d6531cdf048
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/linear_regression_with_elastic_net.py
@@ -0,0 +1,54 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.regression import LinearRegression
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("LinearRegressionWithElasticNet")\
+        .getOrCreate()
+
+    # $example on$
+    # Load training data
+    training = spark.read.format("libsvm")\
+        .load("data/mllib/sample_linear_regression_data.txt")
+
+    lr = LinearRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)
+
+    # Fit the model
+    lrModel = lr.fit(training)
+
+    # Print the coefficients and intercept for linear regression
+    print("Coefficients: %s" % str(lrModel.coefficients))
+    print("Intercept: %s" % str(lrModel.intercept))
+
+    # Summarize the model over the training set and print out some metrics
+    trainingSummary = lrModel.summary
+    print("numIterations: %d" % trainingSummary.totalIterations)
+    print("objectiveHistory: %s" % str(trainingSummary.objectiveHistory))
+    trainingSummary.residuals.show()
+    print("RMSE: %f" % trainingSummary.rootMeanSquaredError)
+    print("r2: %f" % trainingSummary.r2)
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/linearsvc.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/linearsvc.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b79abbf96f887895f4fb6702beb072389239fc1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/linearsvc.py
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.classification import LinearSVC
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("linearSVC Example")\
+        .getOrCreate()
+
+    # $example on$
+    # Load training data
+    training = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    lsvc = LinearSVC(maxIter=10, regParam=0.1)
+
+    # Fit the model
+    lsvcModel = lsvc.fit(training)
+
+    # Print the coefficients and intercept for linear SVC
+    print("Coefficients: " + str(lsvcModel.coefficients))
+    print("Intercept: " + str(lsvcModel.intercept))
+
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/logistic_regression_summary_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/logistic_regression_summary_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..2274ff707b2a341363bf7dbdf84a4c50435d5ef7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/logistic_regression_summary_example.py
@@ -0,0 +1,67 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+An example demonstrating Logistic Regression Summary.
+Run with:
+  bin/spark-submit examples/src/main/python/ml/logistic_regression_summary_example.py
+"""
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.classification import LogisticRegression
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession \
+        .builder \
+        .appName("LogisticRegressionSummary") \
+        .getOrCreate()
+
+    # Load training data
+    training = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    lr = LogisticRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)
+
+    # Fit the model
+    lrModel = lr.fit(training)
+
+    # $example on$
+    # Extract the summary from the returned LogisticRegressionModel instance trained
+    # in the earlier example
+    trainingSummary = lrModel.summary
+
+    # Obtain the objective per iteration
+    objectiveHistory = trainingSummary.objectiveHistory
+    print("objectiveHistory:")
+    for objective in objectiveHistory:
+        print(objective)
+
+    # Obtain the receiver-operating characteristic as a dataframe and areaUnderROC.
+    trainingSummary.roc.show()
+    print("areaUnderROC: " + str(trainingSummary.areaUnderROC))
+
+    # Set the model threshold to maximize F-Measure
+    fMeasure = trainingSummary.fMeasureByThreshold
+    maxFMeasure = fMeasure.groupBy().max('F-Measure').select('max(F-Measure)').head()
+    bestThreshold = fMeasure.where(fMeasure['F-Measure'] == maxFMeasure['max(F-Measure)']) \
+        .select('threshold').head()['threshold']
+    lr.setThreshold(bestThreshold)
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/logistic_regression_with_elastic_net.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/logistic_regression_with_elastic_net.py
new file mode 100644
index 0000000000000000000000000000000000000000..d095fbd3734080ab9e400dbea125c24c5a687719
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/logistic_regression_with_elastic_net.py
@@ -0,0 +1,55 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.classification import LogisticRegression
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("LogisticRegressionWithElasticNet")\
+        .getOrCreate()
+
+    # $example on$
+    # Load training data
+    training = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    lr = LogisticRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)
+
+    # Fit the model
+    lrModel = lr.fit(training)
+
+    # Print the coefficients and intercept for logistic regression
+    print("Coefficients: " + str(lrModel.coefficients))
+    print("Intercept: " + str(lrModel.intercept))
+
+    # We can also use the multinomial family for binary classification
+    mlr = LogisticRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8, family="multinomial")
+
+    # Fit the model
+    mlrModel = mlr.fit(training)
+
+    # Print the coefficients and intercepts for logistic regression with multinomial family
+    print("Multinomial coefficients: " + str(mlrModel.coefficientMatrix))
+    print("Multinomial intercepts: " + str(mlrModel.interceptVector))
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/max_abs_scaler_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/max_abs_scaler_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..45eda3cdadde3552d9d87ed0ca1a2c3086acf0ac
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/max_abs_scaler_example.py
@@ -0,0 +1,50 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import MaxAbsScaler
+from pyspark.ml.linalg import Vectors
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("MaxAbsScalerExample")\
+        .getOrCreate()
+
+    # $example on$
+    dataFrame = spark.createDataFrame([
+        (0, Vectors.dense([1.0, 0.1, -8.0]),),
+        (1, Vectors.dense([2.0, 1.0, -4.0]),),
+        (2, Vectors.dense([4.0, 10.0, 8.0]),)
+    ], ["id", "features"])
+
+    scaler = MaxAbsScaler(inputCol="features", outputCol="scaledFeatures")
+
+    # Compute summary statistics and generate MaxAbsScalerModel
+    scalerModel = scaler.fit(dataFrame)
+
+    # rescale each feature to range [-1, 1].
+    scaledData = scalerModel.transform(dataFrame)
+
+    scaledData.select("features", "scaledFeatures").show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/min_hash_lsh_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/min_hash_lsh_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..93136e6ae3caeb407f1a0e16ceb1593d8aca8fe3
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/min_hash_lsh_example.py
@@ -0,0 +1,79 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+An example demonstrating MinHashLSH.
+Run with:
+  bin/spark-submit examples/src/main/python/ml/min_hash_lsh_example.py
+"""
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import MinHashLSH
+from pyspark.ml.linalg import Vectors
+from pyspark.sql.functions import col
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession \
+        .builder \
+        .appName("MinHashLSHExample") \
+        .getOrCreate()
+
+    # $example on$
+    dataA = [(0, Vectors.sparse(6, [0, 1, 2], [1.0, 1.0, 1.0]),),
+             (1, Vectors.sparse(6, [2, 3, 4], [1.0, 1.0, 1.0]),),
+             (2, Vectors.sparse(6, [0, 2, 4], [1.0, 1.0, 1.0]),)]
+    dfA = spark.createDataFrame(dataA, ["id", "features"])
+
+    dataB = [(3, Vectors.sparse(6, [1, 3, 5], [1.0, 1.0, 1.0]),),
+             (4, Vectors.sparse(6, [2, 3, 5], [1.0, 1.0, 1.0]),),
+             (5, Vectors.sparse(6, [1, 2, 4], [1.0, 1.0, 1.0]),)]
+    dfB = spark.createDataFrame(dataB, ["id", "features"])
+
+    key = Vectors.sparse(6, [1, 3], [1.0, 1.0])
+
+    mh = MinHashLSH(inputCol="features", outputCol="hashes", numHashTables=5)
+    model = mh.fit(dfA)
+
+    # Feature Transformation
+    print("The hashed dataset where hashed values are stored in the column 'hashes':")
+    model.transform(dfA).show()
+
+    # Compute the locality sensitive hashes for the input rows, then perform approximate
+    # similarity join.
+    # We could avoid computing hashes by passing in the already-transformed dataset, e.g.
+    # `model.approxSimilarityJoin(transformedA, transformedB, 0.6)`
+    print("Approximately joining dfA and dfB on distance smaller than 0.6:")
+    model.approxSimilarityJoin(dfA, dfB, 0.6, distCol="JaccardDistance")\
+        .select(col("datasetA.id").alias("idA"),
+                col("datasetB.id").alias("idB"),
+                col("JaccardDistance")).show()
+
+    # Compute the locality sensitive hashes for the input rows, then perform approximate nearest
+    # neighbor search.
+    # We could avoid computing hashes by passing in the already-transformed dataset, e.g.
+    # `model.approxNearestNeighbors(transformedA, key, 2)`
+    # It may return less than 2 rows when not enough approximate near-neighbor candidates are
+    # found.
+    print("Approximately searching dfA for 2 nearest neighbors of the key:")
+    model.approxNearestNeighbors(dfA, key, 2).show()
+
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/min_max_scaler_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/min_max_scaler_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..b5f272e59bc3069b33ef7d7f2de4d67b6b167378
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/min_max_scaler_example.py
@@ -0,0 +1,50 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import MinMaxScaler
+from pyspark.ml.linalg import Vectors
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("MinMaxScalerExample")\
+        .getOrCreate()
+
+    # $example on$
+    dataFrame = spark.createDataFrame([
+        (0, Vectors.dense([1.0, 0.1, -1.0]),),
+        (1, Vectors.dense([2.0, 1.1, 1.0]),),
+        (2, Vectors.dense([3.0, 10.1, 3.0]),)
+    ], ["id", "features"])
+
+    scaler = MinMaxScaler(inputCol="features", outputCol="scaledFeatures")
+
+    # Compute summary statistics and generate MinMaxScalerModel
+    scalerModel = scaler.fit(dataFrame)
+
+    # rescale each feature to range [min, max].
+    scaledData = scalerModel.transform(dataFrame)
+    print("Features scaled to range: [%f, %f]" % (scaler.getMin(), scaler.getMax()))
+    scaledData.select("features", "scaledFeatures").show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/multiclass_logistic_regression_with_elastic_net.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/multiclass_logistic_regression_with_elastic_net.py
new file mode 100644
index 0000000000000000000000000000000000000000..bec9860c79a2d867bc7de8b545bfc3e1ff58636d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/multiclass_logistic_regression_with_elastic_net.py
@@ -0,0 +1,86 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.classification import LogisticRegression
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession \
+        .builder \
+        .appName("MulticlassLogisticRegressionWithElasticNet") \
+        .getOrCreate()
+
+    # $example on$
+    # Load training data
+    training = spark \
+        .read \
+        .format("libsvm") \
+        .load("data/mllib/sample_multiclass_classification_data.txt")
+
+    lr = LogisticRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)
+
+    # Fit the model
+    lrModel = lr.fit(training)
+
+    # Print the coefficients and intercept for multinomial logistic regression
+    print("Coefficients: \n" + str(lrModel.coefficientMatrix))
+    print("Intercept: " + str(lrModel.interceptVector))
+
+    trainingSummary = lrModel.summary
+
+    # Obtain the objective per iteration
+    objectiveHistory = trainingSummary.objectiveHistory
+    print("objectiveHistory:")
+    for objective in objectiveHistory:
+        print(objective)
+
+    # for multiclass, we can inspect metrics on a per-label basis
+    print("False positive rate by label:")
+    for i, rate in enumerate(trainingSummary.falsePositiveRateByLabel):
+        print("label %d: %s" % (i, rate))
+
+    print("True positive rate by label:")
+    for i, rate in enumerate(trainingSummary.truePositiveRateByLabel):
+        print("label %d: %s" % (i, rate))
+
+    print("Precision by label:")
+    for i, prec in enumerate(trainingSummary.precisionByLabel):
+        print("label %d: %s" % (i, prec))
+
+    print("Recall by label:")
+    for i, rec in enumerate(trainingSummary.recallByLabel):
+        print("label %d: %s" % (i, rec))
+
+    print("F-measure by label:")
+    for i, f in enumerate(trainingSummary.fMeasureByLabel()):
+        print("label %d: %s" % (i, f))
+
+    accuracy = trainingSummary.accuracy
+    falsePositiveRate = trainingSummary.weightedFalsePositiveRate
+    truePositiveRate = trainingSummary.weightedTruePositiveRate
+    fMeasure = trainingSummary.weightedFMeasure()
+    precision = trainingSummary.weightedPrecision
+    recall = trainingSummary.weightedRecall
+    print("Accuracy: %s\nFPR: %s\nTPR: %s\nF-measure: %s\nPrecision: %s\nRecall: %s"
+          % (accuracy, falsePositiveRate, truePositiveRate, fMeasure, precision, recall))
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/multilayer_perceptron_classification.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/multilayer_perceptron_classification.py
new file mode 100644
index 0000000000000000000000000000000000000000..88fc69f753953721ce092c6ef7a23663cf3084f6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/multilayer_perceptron_classification.py
@@ -0,0 +1,58 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.classification import MultilayerPerceptronClassifier
+from pyspark.ml.evaluation import MulticlassClassificationEvaluator
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder.appName("multilayer_perceptron_classification_example").getOrCreate()
+
+    # $example on$
+    # Load training data
+    data = spark.read.format("libsvm")\
+        .load("data/mllib/sample_multiclass_classification_data.txt")
+
+    # Split the data into train and test
+    splits = data.randomSplit([0.6, 0.4], 1234)
+    train = splits[0]
+    test = splits[1]
+
+    # specify layers for the neural network:
+    # input layer of size 4 (features), two intermediate of size 5 and 4
+    # and output of size 3 (classes)
+    layers = [4, 5, 4, 3]
+
+    # create the trainer and set its parameters
+    trainer = MultilayerPerceptronClassifier(maxIter=100, layers=layers, blockSize=128, seed=1234)
+
+    # train the model
+    model = trainer.fit(train)
+
+    # compute accuracy on the test set
+    result = model.transform(test)
+    predictionAndLabels = result.select("prediction", "label")
+    evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
+    print("Test set accuracy = " + str(evaluator.evaluate(predictionAndLabels)))
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/n_gram_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/n_gram_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..31676e076a11b7ee5beef484673965f47988787b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/n_gram_example.py
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import NGram
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("NGramExample")\
+        .getOrCreate()
+
+    # $example on$
+    wordDataFrame = spark.createDataFrame([
+        (0, ["Hi", "I", "heard", "about", "Spark"]),
+        (1, ["I", "wish", "Java", "could", "use", "case", "classes"]),
+        (2, ["Logistic", "regression", "models", "are", "neat"])
+    ], ["id", "words"])
+
+    ngram = NGram(n=2, inputCol="words", outputCol="ngrams")
+
+    ngramDataFrame = ngram.transform(wordDataFrame)
+    ngramDataFrame.select("ngrams").show(truncate=False)
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/naive_bayes_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/naive_bayes_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..7290ab81cd0ec9fc1bb3c953e61b58c60c9851ee
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/naive_bayes_example.py
@@ -0,0 +1,59 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.classification import NaiveBayes
+from pyspark.ml.evaluation import MulticlassClassificationEvaluator
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("NaiveBayesExample")\
+        .getOrCreate()
+
+    # $example on$
+    # Load training data
+    data = spark.read.format("libsvm") \
+        .load("data/mllib/sample_libsvm_data.txt")
+
+    # Split the data into train and test
+    splits = data.randomSplit([0.6, 0.4], 1234)
+    train = splits[0]
+    test = splits[1]
+
+    # create the trainer and set its parameters
+    nb = NaiveBayes(smoothing=1.0, modelType="multinomial")
+
+    # train the model
+    model = nb.fit(train)
+
+    # select example rows to display.
+    predictions = model.transform(test)
+    predictions.show()
+
+    # compute accuracy on the test set
+    evaluator = MulticlassClassificationEvaluator(labelCol="label", predictionCol="prediction",
+                                                  metricName="accuracy")
+    accuracy = evaluator.evaluate(predictions)
+    print("Test set accuracy = " + str(accuracy))
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/normalizer_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/normalizer_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..510bd825fd286d17b12e9c5c9dc0a7ef965383dc
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/normalizer_example.py
@@ -0,0 +1,51 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import Normalizer
+from pyspark.ml.linalg import Vectors
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("NormalizerExample")\
+        .getOrCreate()
+
+    # $example on$
+    dataFrame = spark.createDataFrame([
+        (0, Vectors.dense([1.0, 0.5, -1.0]),),
+        (1, Vectors.dense([2.0, 1.0, 1.0]),),
+        (2, Vectors.dense([4.0, 10.0, 2.0]),)
+    ], ["id", "features"])
+
+    # Normalize each Vector using $L^1$ norm.
+    normalizer = Normalizer(inputCol="features", outputCol="normFeatures", p=1.0)
+    l1NormData = normalizer.transform(dataFrame)
+    print("Normalized using L^1 norm")
+    l1NormData.show()
+
+    # Normalize each Vector using $L^\infty$ norm.
+    lInfNormData = normalizer.transform(dataFrame, {normalizer.p: float("inf")})
+    print("Normalized using L^inf norm")
+    lInfNormData.show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/one_vs_rest_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/one_vs_rest_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..956e94ae4ab62e238d91cb245b165685e7625567
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/one_vs_rest_example.py
@@ -0,0 +1,66 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+An example of Multiclass to Binary Reduction with One Vs Rest,
+using Logistic Regression as the base classifier.
+Run with:
+  bin/spark-submit examples/src/main/python/ml/one_vs_rest_example.py
+"""
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.classification import LogisticRegression, OneVsRest
+from pyspark.ml.evaluation import MulticlassClassificationEvaluator
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession \
+        .builder \
+        .appName("OneVsRestExample") \
+        .getOrCreate()
+
+    # $example on$
+    # load data file.
+    inputData = spark.read.format("libsvm") \
+        .load("data/mllib/sample_multiclass_classification_data.txt")
+
+    # generate the train/test split.
+    (train, test) = inputData.randomSplit([0.8, 0.2])
+
+    # instantiate the base classifier.
+    lr = LogisticRegression(maxIter=10, tol=1E-6, fitIntercept=True)
+
+    # instantiate the One Vs Rest Classifier.
+    ovr = OneVsRest(classifier=lr)
+
+    # train the multiclass model.
+    ovrModel = ovr.fit(train)
+
+    # score the model on test data.
+    predictions = ovrModel.transform(test)
+
+    # obtain evaluator.
+    evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
+
+    # compute the classification error on test data.
+    accuracy = evaluator.evaluate(predictions)
+    print("Test Error = %g" % (1.0 - accuracy))
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/onehot_encoder_estimator_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/onehot_encoder_estimator_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..2723e681cea7c175a970669e4bbc5354fcf06d6a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/onehot_encoder_estimator_example.py
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import OneHotEncoderEstimator
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("OneHotEncoderEstimatorExample")\
+        .getOrCreate()
+
+    # Note: categorical features are usually first encoded with StringIndexer
+    # $example on$
+    df = spark.createDataFrame([
+        (0.0, 1.0),
+        (1.0, 0.0),
+        (2.0, 1.0),
+        (0.0, 2.0),
+        (0.0, 1.0),
+        (2.0, 0.0)
+    ], ["categoryIndex1", "categoryIndex2"])
+
+    encoder = OneHotEncoderEstimator(inputCols=["categoryIndex1", "categoryIndex2"],
+                                     outputCols=["categoryVec1", "categoryVec2"])
+    model = encoder.fit(df)
+    encoded = model.transform(df)
+    encoded.show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/pca_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/pca_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..38746aced096af18e7c814f1510225003425831d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/pca_example.py
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import PCA
+from pyspark.ml.linalg import Vectors
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("PCAExample")\
+        .getOrCreate()
+
+    # $example on$
+    data = [(Vectors.sparse(5, [(1, 1.0), (3, 7.0)]),),
+            (Vectors.dense([2.0, 0.0, 3.0, 4.0, 5.0]),),
+            (Vectors.dense([4.0, 0.0, 0.0, 6.0, 7.0]),)]
+    df = spark.createDataFrame(data, ["features"])
+
+    pca = PCA(k=3, inputCol="features", outputCol="pcaFeatures")
+    model = pca.fit(df)
+
+    result = model.transform(df).select("pcaFeatures")
+    result.show(truncate=False)
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/pipeline_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/pipeline_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..e1fab7cbe6d801dc02901557b9b3d44b731f2faa
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/pipeline_example.py
@@ -0,0 +1,69 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Pipeline Example.
+"""
+
+# $example on$
+from pyspark.ml import Pipeline
+from pyspark.ml.classification import LogisticRegression
+from pyspark.ml.feature import HashingTF, Tokenizer
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("PipelineExample")\
+        .getOrCreate()
+
+    # $example on$
+    # Prepare training documents from a list of (id, text, label) tuples.
+    training = spark.createDataFrame([
+        (0, "a b c d e spark", 1.0),
+        (1, "b d", 0.0),
+        (2, "spark f g h", 1.0),
+        (3, "hadoop mapreduce", 0.0)
+    ], ["id", "text", "label"])
+
+    # Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr.
+    tokenizer = Tokenizer(inputCol="text", outputCol="words")
+    hashingTF = HashingTF(inputCol=tokenizer.getOutputCol(), outputCol="features")
+    lr = LogisticRegression(maxIter=10, regParam=0.001)
+    pipeline = Pipeline(stages=[tokenizer, hashingTF, lr])
+
+    # Fit the pipeline to training documents.
+    model = pipeline.fit(training)
+
+    # Prepare test documents, which are unlabeled (id, text) tuples.
+    test = spark.createDataFrame([
+        (4, "spark i j k"),
+        (5, "l m n"),
+        (6, "spark hadoop spark"),
+        (7, "apache hadoop")
+    ], ["id", "text"])
+
+    # Make predictions on test documents and print columns of interest.
+    prediction = model.transform(test)
+    selected = prediction.select("id", "text", "probability", "prediction")
+    for row in selected.collect():
+        rid, text, prob, prediction = row
+        print("(%d, %s) --> prob=%s, prediction=%f" % (rid, text, str(prob), prediction))
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/polynomial_expansion_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/polynomial_expansion_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..40bcb7b13a3de109f324756bb81bb1f7ed2d90fd
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/polynomial_expansion_example.py
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import PolynomialExpansion
+from pyspark.ml.linalg import Vectors
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("PolynomialExpansionExample")\
+        .getOrCreate()
+
+    # $example on$
+    df = spark.createDataFrame([
+        (Vectors.dense([2.0, 1.0]),),
+        (Vectors.dense([0.0, 0.0]),),
+        (Vectors.dense([3.0, -1.0]),)
+    ], ["features"])
+
+    polyExpansion = PolynomialExpansion(degree=3, inputCol="features", outputCol="polyFeatures")
+    polyDF = polyExpansion.transform(df)
+
+    polyDF.show(truncate=False)
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/prefixspan_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/prefixspan_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..88d1d4197341b0a71c40525f06f1de3ee754d54b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/prefixspan_example.py
@@ -0,0 +1,48 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+An example demonstrating PrefixSpan.
+Run with:
+  bin/spark-submit examples/src/main/python/ml/prefixspan_example.py
+"""
+# $example on$
+from pyspark.ml.fpm import PrefixSpan
+# $example off$
+from pyspark.sql import Row, SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("PrefixSpanExample")\
+        .getOrCreate()
+    sc = spark.sparkContext
+
+    # $example on$
+    df = sc.parallelize([Row(sequence=[[1, 2], [3]]),
+                         Row(sequence=[[1], [3, 2], [1, 2]]),
+                         Row(sequence=[[1, 2], [5]]),
+                         Row(sequence=[[6]])]).toDF()
+
+    prefixSpan = PrefixSpan(minSupport=0.5, maxPatternLength=5,
+                            maxLocalProjDBSize=32000000)
+
+    # Find frequent sequential patterns.
+    prefixSpan.findFrequentSequentialPatterns(df).show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/quantile_discretizer_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/quantile_discretizer_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..0fc1d1949a77db3a872edf894c6264adf0e79b6b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/quantile_discretizer_example.py
@@ -0,0 +1,48 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import QuantileDiscretizer
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("QuantileDiscretizerExample")\
+        .getOrCreate()
+
+    # $example on$
+    data = [(0, 18.0), (1, 19.0), (2, 8.0), (3, 5.0), (4, 2.2)]
+    df = spark.createDataFrame(data, ["id", "hour"])
+    # $example off$
+
+    # Output of QuantileDiscretizer for such small datasets can depend on the number of
+    # partitions. Here we force a single partition to ensure consistent results.
+    # Note this is not necessary for normal use cases
+    df = df.repartition(1)
+
+    # $example on$
+    discretizer = QuantileDiscretizer(numBuckets=3, inputCol="hour", outputCol="result")
+
+    result = discretizer.fit(df).transform(df)
+    result.show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/random_forest_classifier_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/random_forest_classifier_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..4eaa94dd7f489ae721814805beab203ed048a34b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/random_forest_classifier_example.py
@@ -0,0 +1,82 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Random Forest Classifier Example.
+"""
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml import Pipeline
+from pyspark.ml.classification import RandomForestClassifier
+from pyspark.ml.feature import IndexToString, StringIndexer, VectorIndexer
+from pyspark.ml.evaluation import MulticlassClassificationEvaluator
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("RandomForestClassifierExample")\
+        .getOrCreate()
+
+    # $example on$
+    # Load and parse the data file, converting it to a DataFrame.
+    data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    # Index labels, adding metadata to the label column.
+    # Fit on whole dataset to include all labels in index.
+    labelIndexer = StringIndexer(inputCol="label", outputCol="indexedLabel").fit(data)
+
+    # Automatically identify categorical features, and index them.
+    # Set maxCategories so features with > 4 distinct values are treated as continuous.
+    featureIndexer =\
+        VectorIndexer(inputCol="features", outputCol="indexedFeatures", maxCategories=4).fit(data)
+
+    # Split the data into training and test sets (30% held out for testing)
+    (trainingData, testData) = data.randomSplit([0.7, 0.3])
+
+    # Train a RandomForest model.
+    rf = RandomForestClassifier(labelCol="indexedLabel", featuresCol="indexedFeatures", numTrees=10)
+
+    # Convert indexed labels back to original labels.
+    labelConverter = IndexToString(inputCol="prediction", outputCol="predictedLabel",
+                                   labels=labelIndexer.labels)
+
+    # Chain indexers and forest in a Pipeline
+    pipeline = Pipeline(stages=[labelIndexer, featureIndexer, rf, labelConverter])
+
+    # Train model.  This also runs the indexers.
+    model = pipeline.fit(trainingData)
+
+    # Make predictions.
+    predictions = model.transform(testData)
+
+    # Select example rows to display.
+    predictions.select("predictedLabel", "label", "features").show(5)
+
+    # Select (prediction, true label) and compute test error
+    evaluator = MulticlassClassificationEvaluator(
+        labelCol="indexedLabel", predictionCol="prediction", metricName="accuracy")
+    accuracy = evaluator.evaluate(predictions)
+    print("Test Error = %g" % (1.0 - accuracy))
+
+    rfModel = model.stages[2]
+    print(rfModel)  # summary only
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/random_forest_regressor_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/random_forest_regressor_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..a34edff2ecaa223730e9c2ec864d385f38f55463
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/random_forest_regressor_example.py
@@ -0,0 +1,74 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Random Forest Regressor Example.
+"""
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml import Pipeline
+from pyspark.ml.regression import RandomForestRegressor
+from pyspark.ml.feature import VectorIndexer
+from pyspark.ml.evaluation import RegressionEvaluator
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("RandomForestRegressorExample")\
+        .getOrCreate()
+
+    # $example on$
+    # Load and parse the data file, converting it to a DataFrame.
+    data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    # Automatically identify categorical features, and index them.
+    # Set maxCategories so features with > 4 distinct values are treated as continuous.
+    featureIndexer =\
+        VectorIndexer(inputCol="features", outputCol="indexedFeatures", maxCategories=4).fit(data)
+
+    # Split the data into training and test sets (30% held out for testing)
+    (trainingData, testData) = data.randomSplit([0.7, 0.3])
+
+    # Train a RandomForest model.
+    rf = RandomForestRegressor(featuresCol="indexedFeatures")
+
+    # Chain indexer and forest in a Pipeline
+    pipeline = Pipeline(stages=[featureIndexer, rf])
+
+    # Train model.  This also runs the indexer.
+    model = pipeline.fit(trainingData)
+
+    # Make predictions.
+    predictions = model.transform(testData)
+
+    # Select example rows to display.
+    predictions.select("prediction", "label", "features").show(5)
+
+    # Select (prediction, true label) and compute test error
+    evaluator = RegressionEvaluator(
+        labelCol="label", predictionCol="prediction", metricName="rmse")
+    rmse = evaluator.evaluate(predictions)
+    print("Root Mean Squared Error (RMSE) on test data = %g" % rmse)
+
+    rfModel = model.stages[1]
+    print(rfModel)  # summary only
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/rformula_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/rformula_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..6629239db29ece9a6d2ef175e9583fa3c3c586fe
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/rformula_example.py
@@ -0,0 +1,47 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import RFormula
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("RFormulaExample")\
+        .getOrCreate()
+
+    # $example on$
+    dataset = spark.createDataFrame(
+        [(7, "US", 18, 1.0),
+         (8, "CA", 12, 0.0),
+         (9, "NZ", 15, 0.0)],
+        ["id", "country", "hour", "clicked"])
+
+    formula = RFormula(
+        formula="clicked ~ country + hour",
+        featuresCol="features",
+        labelCol="label")
+
+    output = formula.fit(dataset).transform(dataset)
+    output.select("features", "label").show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/sql_transformer.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/sql_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..0bf8f35720c95cdd3558425313c691c1a5e14880
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/sql_transformer.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import SQLTransformer
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("SQLTransformerExample")\
+        .getOrCreate()
+
+    # $example on$
+    df = spark.createDataFrame([
+        (0, 1.0, 3.0),
+        (2, 2.0, 5.0)
+    ], ["id", "v1", "v2"])
+    sqlTrans = SQLTransformer(
+        statement="SELECT *, (v1 + v2) AS v3, (v1 * v2) AS v4 FROM __THIS__")
+    sqlTrans.transform(df).show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/standard_scaler_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/standard_scaler_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0027480e69b3028525543505e329229e2a1302b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/standard_scaler_example.py
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import StandardScaler
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("StandardScalerExample")\
+        .getOrCreate()
+
+    # $example on$
+    dataFrame = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+    scaler = StandardScaler(inputCol="features", outputCol="scaledFeatures",
+                            withStd=True, withMean=False)
+
+    # Compute summary statistics by fitting the StandardScaler
+    scalerModel = scaler.fit(dataFrame)
+
+    # Normalize each feature to have unit standard deviation.
+    scaledData = scalerModel.transform(dataFrame)
+    scaledData.show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/stopwords_remover_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/stopwords_remover_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b8e7855e3e79092d4b471f1c1250c6803a67c1c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/stopwords_remover_example.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import StopWordsRemover
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("StopWordsRemoverExample")\
+        .getOrCreate()
+
+    # $example on$
+    sentenceData = spark.createDataFrame([
+        (0, ["I", "saw", "the", "red", "balloon"]),
+        (1, ["Mary", "had", "a", "little", "lamb"])
+    ], ["id", "raw"])
+
+    remover = StopWordsRemover(inputCol="raw", outputCol="filtered")
+    remover.transform(sentenceData).show(truncate=False)
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/string_indexer_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/string_indexer_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..2255bfb9c1a60866bee494106d0b77ee7907dffb
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/string_indexer_example.py
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import StringIndexer
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("StringIndexerExample")\
+        .getOrCreate()
+
+    # $example on$
+    df = spark.createDataFrame(
+        [(0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c")],
+        ["id", "category"])
+
+    indexer = StringIndexer(inputCol="category", outputCol="categoryIndex")
+    indexed = indexer.fit(df).transform(df)
+    indexed.show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/summarizer_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/summarizer_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..8835f189a1ad4e9fde4a9ad58f0995fbf483e57f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/summarizer_example.py
@@ -0,0 +1,59 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+An example for summarizer.
+Run with:
+  bin/spark-submit examples/src/main/python/ml/summarizer_example.py
+"""
+from __future__ import print_function
+
+from pyspark.sql import SparkSession
+# $example on$
+from pyspark.ml.stat import Summarizer
+from pyspark.sql import Row
+from pyspark.ml.linalg import Vectors
+# $example off$
+
+if __name__ == "__main__":
+    spark = SparkSession \
+        .builder \
+        .appName("SummarizerExample") \
+        .getOrCreate()
+    sc = spark.sparkContext
+
+    # $example on$
+    df = sc.parallelize([Row(weight=1.0, features=Vectors.dense(1.0, 1.0, 1.0)),
+                         Row(weight=0.0, features=Vectors.dense(1.0, 2.0, 3.0))]).toDF()
+
+    # create summarizer for multiple metrics "mean" and "count"
+    summarizer = Summarizer.metrics("mean", "count")
+
+    # compute statistics for multiple metrics with weight
+    df.select(summarizer.summary(df.features, df.weight)).show(truncate=False)
+
+    # compute statistics for multiple metrics without weight
+    df.select(summarizer.summary(df.features)).show(truncate=False)
+
+    # compute statistics for single metric "mean" with weight
+    df.select(Summarizer.mean(df.features, df.weight)).show(truncate=False)
+
+    # compute statistics for single metric "mean" without weight
+    df.select(Summarizer.mean(df.features)).show(truncate=False)
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/tf_idf_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/tf_idf_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..d43244fa68e9746258944c06f64bbf010e9e8347
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/tf_idf_example.py
@@ -0,0 +1,52 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import HashingTF, IDF, Tokenizer
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("TfIdfExample")\
+        .getOrCreate()
+
+    # $example on$
+    sentenceData = spark.createDataFrame([
+        (0.0, "Hi I heard about Spark"),
+        (0.0, "I wish Java could use case classes"),
+        (1.0, "Logistic regression models are neat")
+    ], ["label", "sentence"])
+
+    tokenizer = Tokenizer(inputCol="sentence", outputCol="words")
+    wordsData = tokenizer.transform(sentenceData)
+
+    hashingTF = HashingTF(inputCol="words", outputCol="rawFeatures", numFeatures=20)
+    featurizedData = hashingTF.transform(wordsData)
+    # alternatively, CountVectorizer can also be used to get term frequency vectors
+
+    idf = IDF(inputCol="rawFeatures", outputCol="features")
+    idfModel = idf.fit(featurizedData)
+    rescaledData = idfModel.transform(featurizedData)
+
+    rescaledData.select("label", "features").show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/tokenizer_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/tokenizer_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c65c5c9f82608b3b7811185a637c8b504f4ea16
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/tokenizer_example.py
@@ -0,0 +1,56 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import Tokenizer, RegexTokenizer
+from pyspark.sql.functions import col, udf
+from pyspark.sql.types import IntegerType
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("TokenizerExample")\
+        .getOrCreate()
+
+    # $example on$
+    sentenceDataFrame = spark.createDataFrame([
+        (0, "Hi I heard about Spark"),
+        (1, "I wish Java could use case classes"),
+        (2, "Logistic,regression,models,are,neat")
+    ], ["id", "sentence"])
+
+    tokenizer = Tokenizer(inputCol="sentence", outputCol="words")
+
+    regexTokenizer = RegexTokenizer(inputCol="sentence", outputCol="words", pattern="\\W")
+    # alternatively, pattern="\\w+", gaps(False)
+
+    countTokens = udf(lambda words: len(words), IntegerType())
+
+    tokenized = tokenizer.transform(sentenceDataFrame)
+    tokenized.select("sentence", "words")\
+        .withColumn("tokens", countTokens(col("words"))).show(truncate=False)
+
+    regexTokenized = regexTokenizer.transform(sentenceDataFrame)
+    regexTokenized.select("sentence", "words") \
+        .withColumn("tokens", countTokens(col("words"))).show(truncate=False)
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/train_validation_split.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/train_validation_split.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4f9184bf576e9b8d387f9800fb29149f640d391
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/train_validation_split.py
@@ -0,0 +1,73 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+This example demonstrates applying TrainValidationSplit to split data
+and preform model selection.
+Run with:
+
+  bin/spark-submit examples/src/main/python/ml/train_validation_split.py
+"""
+# $example on$
+from pyspark.ml.evaluation import RegressionEvaluator
+from pyspark.ml.regression import LinearRegression
+from pyspark.ml.tuning import ParamGridBuilder, TrainValidationSplit
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("TrainValidationSplit")\
+        .getOrCreate()
+
+    # $example on$
+    # Prepare training and test data.
+    data = spark.read.format("libsvm")\
+        .load("data/mllib/sample_linear_regression_data.txt")
+    train, test = data.randomSplit([0.9, 0.1], seed=12345)
+
+    lr = LinearRegression(maxIter=10)
+
+    # We use a ParamGridBuilder to construct a grid of parameters to search over.
+    # TrainValidationSplit will try all combinations of values and determine best model using
+    # the evaluator.
+    paramGrid = ParamGridBuilder()\
+        .addGrid(lr.regParam, [0.1, 0.01]) \
+        .addGrid(lr.fitIntercept, [False, True])\
+        .addGrid(lr.elasticNetParam, [0.0, 0.5, 1.0])\
+        .build()
+
+    # In this case the estimator is simply the linear regression.
+    # A TrainValidationSplit requires an Estimator, a set of Estimator ParamMaps, and an Evaluator.
+    tvs = TrainValidationSplit(estimator=lr,
+                               estimatorParamMaps=paramGrid,
+                               evaluator=RegressionEvaluator(),
+                               # 80% of the data will be used for training, 20% for validation.
+                               trainRatio=0.8)
+
+    # Run TrainValidationSplit, and choose the best set of parameters.
+    model = tvs.fit(train)
+
+    # Make predictions on test data. model is the model with combination of parameters
+    # that performed best.
+    model.transform(test)\
+        .select("features", "label", "prediction")\
+        .show()
+
+    # $example off$
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/vector_assembler_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/vector_assembler_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..98de1d5ea7dacbf7d33d8983d21ec09cc60df07a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/vector_assembler_example.py
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.linalg import Vectors
+from pyspark.ml.feature import VectorAssembler
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("VectorAssemblerExample")\
+        .getOrCreate()
+
+    # $example on$
+    dataset = spark.createDataFrame(
+        [(0, 18, 1.0, Vectors.dense([0.0, 10.0, 0.5]), 1.0)],
+        ["id", "hour", "mobile", "userFeatures", "clicked"])
+
+    assembler = VectorAssembler(
+        inputCols=["hour", "mobile", "userFeatures"],
+        outputCol="features")
+
+    output = assembler.transform(dataset)
+    print("Assembled columns 'hour', 'mobile', 'userFeatures' to vector column 'features'")
+    output.select("features", "clicked").show(truncate=False)
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/vector_indexer_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/vector_indexer_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c2956077d6ce167cb272c720b4ba73a8e3676bf
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/vector_indexer_example.py
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import VectorIndexer
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("VectorIndexerExample")\
+        .getOrCreate()
+
+    # $example on$
+    data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    indexer = VectorIndexer(inputCol="features", outputCol="indexed", maxCategories=10)
+    indexerModel = indexer.fit(data)
+
+    categoricalFeatures = indexerModel.categoryMaps
+    print("Chose %d categorical features: %s" %
+          (len(categoricalFeatures), ", ".join(str(k) for k in categoricalFeatures.keys())))
+
+    # Create new column "indexed" with categorical values transformed to indices
+    indexedData = indexerModel.transform(data)
+    indexedData.show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/vector_size_hint_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/vector_size_hint_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb77dacec629d039bdaec711aff22e37217fdfc2
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/vector_size_hint_example.py
@@ -0,0 +1,57 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.linalg import Vectors
+from pyspark.ml.feature import (VectorSizeHint, VectorAssembler)
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("VectorSizeHintExample")\
+        .getOrCreate()
+
+    # $example on$
+    dataset = spark.createDataFrame(
+        [(0, 18, 1.0, Vectors.dense([0.0, 10.0, 0.5]), 1.0),
+         (0, 18, 1.0, Vectors.dense([0.0, 10.0]), 0.0)],
+        ["id", "hour", "mobile", "userFeatures", "clicked"])
+
+    sizeHint = VectorSizeHint(
+        inputCol="userFeatures",
+        handleInvalid="skip",
+        size=3)
+
+    datasetWithSize = sizeHint.transform(dataset)
+    print("Rows where 'userFeatures' is not the right size are filtered out")
+    datasetWithSize.show(truncate=False)
+
+    assembler = VectorAssembler(
+        inputCols=["hour", "mobile", "userFeatures"],
+        outputCol="features")
+
+    # This dataframe can be used by downstream transformers as before
+    output = assembler.transform(datasetWithSize)
+    print("Assembled columns 'hour', 'mobile', 'userFeatures' to vector column 'features'")
+    output.select("features", "clicked").show(truncate=False)
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/vector_slicer_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/vector_slicer_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..68c8cfe27e375a9d08cf1469f25b7af0867e4921
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/vector_slicer_example.py
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import VectorSlicer
+from pyspark.ml.linalg import Vectors
+from pyspark.sql.types import Row
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("VectorSlicerExample")\
+        .getOrCreate()
+
+    # $example on$
+    df = spark.createDataFrame([
+        Row(userFeatures=Vectors.sparse(3, {0: -2.0, 1: 2.3})),
+        Row(userFeatures=Vectors.dense([-2.0, 2.3, 0.0]))])
+
+    slicer = VectorSlicer(inputCol="userFeatures", outputCol="features", indices=[1])
+
+    output = slicer.transform(df)
+
+    output.select("userFeatures", "features").show()
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/word2vec_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/word2vec_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..77f8951df0883e9c4515bfcb17d98968c949ed97
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/ml/word2vec_example.py
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.feature import Word2Vec
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("Word2VecExample")\
+        .getOrCreate()
+
+    # $example on$
+    # Input data: Each row is a bag of words from a sentence or document.
+    documentDF = spark.createDataFrame([
+        ("Hi I heard about Spark".split(" "), ),
+        ("I wish Java could use case classes".split(" "), ),
+        ("Logistic regression models are neat".split(" "), )
+    ], ["text"])
+
+    # Learn a mapping from words to Vectors.
+    word2Vec = Word2Vec(vectorSize=3, minCount=0, inputCol="text", outputCol="result")
+    model = word2Vec.fit(documentDF)
+
+    result = model.transform(documentDF)
+    for row in result.collect():
+        text, vector = row
+        print("Text: [%s] => \nVector: %s\n" % (", ".join(text), str(vector)))
+    # $example off$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/binary_classification_metrics_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/binary_classification_metrics_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..d14ce7982e24fff1d5b4aab193139cc3aba21400
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/binary_classification_metrics_example.py
@@ -0,0 +1,56 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""
+Binary Classification Metrics Example.
+"""
+from __future__ import print_function
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.classification import LogisticRegressionWithLBFGS
+from pyspark.mllib.evaluation import BinaryClassificationMetrics
+from pyspark.mllib.util import MLUtils
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="BinaryClassificationMetricsExample")
+
+    # $example on$
+    # Several of the methods available in scala are currently missing from pyspark
+    # Load training data in LIBSVM format
+    data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_binary_classification_data.txt")
+
+    # Split data into training (60%) and test (40%)
+    training, test = data.randomSplit([0.6, 0.4], seed=11)
+    training.cache()
+
+    # Run training algorithm to build the model
+    model = LogisticRegressionWithLBFGS.train(training)
+
+    # Compute raw scores on the test set
+    predictionAndLabels = test.map(lambda lp: (float(model.predict(lp.features)), lp.label))
+
+    # Instantiate metrics object
+    metrics = BinaryClassificationMetrics(predictionAndLabels)
+
+    # Area under precision-recall curve
+    print("Area under PR = %s" % metrics.areaUnderPR)
+
+    # Area under ROC curve
+    print("Area under ROC = %s" % metrics.areaUnderROC)
+    # $example off$
+
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/bisecting_k_means_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/bisecting_k_means_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..31f3e72d7ff1f0c0b37181d3d4ea59dbc7f8562e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/bisecting_k_means_example.py
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from numpy import array
+# $example off$
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.clustering import BisectingKMeans, BisectingKMeansModel
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="PythonBisectingKMeansExample")  # SparkContext
+
+    # $example on$
+    # Load and parse the data
+    data = sc.textFile("data/mllib/kmeans_data.txt")
+    parsedData = data.map(lambda line: array([float(x) for x in line.split(' ')]))
+
+    # Build the model (cluster the data)
+    model = BisectingKMeans.train(parsedData, 2, maxIterations=5)
+
+    # Evaluate clustering
+    cost = model.computeCost(parsedData)
+    print("Bisecting K-means Cost = " + str(cost))
+    # $example off$
+
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/correlations.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/correlations.py
new file mode 100755
index 0000000000000000000000000000000000000000..089504fa7064b99bd5eb5af4ca8fd4819512c96b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/correlations.py
@@ -0,0 +1,61 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Correlations using MLlib.
+"""
+from __future__ import print_function
+
+import sys
+
+from pyspark import SparkContext
+from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.stat import Statistics
+from pyspark.mllib.util import MLUtils
+
+
+if __name__ == "__main__":
+    if len(sys.argv) not in [1, 2]:
+        print("Usage: correlations (<file>)", file=sys.stderr)
+        sys.exit(-1)
+    sc = SparkContext(appName="PythonCorrelations")
+    if len(sys.argv) == 2:
+        filepath = sys.argv[1]
+    else:
+        filepath = 'data/mllib/sample_linear_regression_data.txt'
+    corrType = 'pearson'
+
+    points = MLUtils.loadLibSVMFile(sc, filepath)\
+        .map(lambda lp: LabeledPoint(lp.label, lp.features.toArray()))
+
+    print()
+    print('Summary of data file: ' + filepath)
+    print('%d data points' % points.count())
+
+    # Statistics (correlations)
+    print()
+    print('Correlation (%s) between label and each feature' % corrType)
+    print('Feature\tCorrelation')
+    numFeatures = points.take(1)[0].features.size
+    labelRDD = points.map(lambda lp: lp.label)
+    for i in range(numFeatures):
+        featureRDD = points.map(lambda lp: lp.features[i])
+        corr = Statistics.corr(labelRDD, featureRDD, corrType)
+        print('%d\t%g' % (i, corr))
+    print()
+
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/correlations_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/correlations_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..66d18f6e5df1738e3afe4ce0ea5473b259dd86d6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/correlations_example.py
@@ -0,0 +1,48 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+import numpy as np
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.stat import Statistics
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="CorrelationsExample")  # SparkContext
+
+    # $example on$
+    seriesX = sc.parallelize([1.0, 2.0, 3.0, 3.0, 5.0])  # a series
+    # seriesY must have the same number of partitions and cardinality as seriesX
+    seriesY = sc.parallelize([11.0, 22.0, 33.0, 33.0, 555.0])
+
+    # Compute the correlation using Pearson's method. Enter "spearman" for Spearman's method.
+    # If a method is not specified, Pearson's method will be used by default.
+    print("Correlation is: " + str(Statistics.corr(seriesX, seriesY, method="pearson")))
+
+    data = sc.parallelize(
+        [np.array([1.0, 10.0, 100.0]), np.array([2.0, 20.0, 200.0]), np.array([5.0, 33.0, 366.0])]
+    )  # an RDD of Vectors
+
+    # calculate the correlation matrix using Pearson's method. Use "spearman" for Spearman's method.
+    # If a method is not specified, Pearson's method will be used by default.
+    print(Statistics.corr(data, method="pearson"))
+    # $example off$
+
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/decision_tree_classification_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/decision_tree_classification_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..7eecf500584ad81655852604e2784ad9df5f1be7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/decision_tree_classification_example.py
@@ -0,0 +1,56 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Decision Tree Classification Example.
+"""
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.tree import DecisionTree, DecisionTreeModel
+from pyspark.mllib.util import MLUtils
+# $example off$
+
+if __name__ == "__main__":
+
+    sc = SparkContext(appName="PythonDecisionTreeClassificationExample")
+
+    # $example on$
+    # Load and parse the data file into an RDD of LabeledPoint.
+    data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt')
+    # Split the data into training and test sets (30% held out for testing)
+    (trainingData, testData) = data.randomSplit([0.7, 0.3])
+
+    # Train a DecisionTree model.
+    #  Empty categoricalFeaturesInfo indicates all features are continuous.
+    model = DecisionTree.trainClassifier(trainingData, numClasses=2, categoricalFeaturesInfo={},
+                                         impurity='gini', maxDepth=5, maxBins=32)
+
+    # Evaluate model on test instances and compute test error
+    predictions = model.predict(testData.map(lambda x: x.features))
+    labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
+    testErr = labelsAndPredictions.filter(
+        lambda lp: lp[0] != lp[1]).count() / float(testData.count())
+    print('Test Error = ' + str(testErr))
+    print('Learned classification tree model:')
+    print(model.toDebugString())
+
+    # Save and load model
+    model.save(sc, "target/tmp/myDecisionTreeClassificationModel")
+    sameModel = DecisionTreeModel.load(sc, "target/tmp/myDecisionTreeClassificationModel")
+    # $example off$
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/decision_tree_regression_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/decision_tree_regression_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..acf9e25fdf31c03c3d1315b7fb1373506536276b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/decision_tree_regression_example.py
@@ -0,0 +1,56 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Decision Tree Regression Example.
+"""
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.tree import DecisionTree, DecisionTreeModel
+from pyspark.mllib.util import MLUtils
+# $example off$
+
+if __name__ == "__main__":
+
+    sc = SparkContext(appName="PythonDecisionTreeRegressionExample")
+
+    # $example on$
+    # Load and parse the data file into an RDD of LabeledPoint.
+    data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt')
+    # Split the data into training and test sets (30% held out for testing)
+    (trainingData, testData) = data.randomSplit([0.7, 0.3])
+
+    # Train a DecisionTree model.
+    #  Empty categoricalFeaturesInfo indicates all features are continuous.
+    model = DecisionTree.trainRegressor(trainingData, categoricalFeaturesInfo={},
+                                        impurity='variance', maxDepth=5, maxBins=32)
+
+    # Evaluate model on test instances and compute test error
+    predictions = model.predict(testData.map(lambda x: x.features))
+    labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
+    testMSE = labelsAndPredictions.map(lambda lp: (lp[0] - lp[1]) * (lp[0] - lp[1])).sum() /\
+        float(testData.count())
+    print('Test Mean Squared Error = ' + str(testMSE))
+    print('Learned regression tree model:')
+    print(model.toDebugString())
+
+    # Save and load model
+    model.save(sc, "target/tmp/myDecisionTreeRegressionModel")
+    sameModel = DecisionTreeModel.load(sc, "target/tmp/myDecisionTreeRegressionModel")
+    # $example off$
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/elementwise_product_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/elementwise_product_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ae9afb1dc47784743bdc98775ece55dd3f0de06
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/elementwise_product_example.py
@@ -0,0 +1,51 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.feature import ElementwiseProduct
+from pyspark.mllib.linalg import Vectors
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="ElementwiseProductExample")  # SparkContext
+
+    # $example on$
+    data = sc.textFile("data/mllib/kmeans_data.txt")
+    parsedData = data.map(lambda x: [float(t) for t in x.split(" ")])
+
+    # Create weight vector.
+    transformingVector = Vectors.dense([0.0, 1.0, 2.0])
+    transformer = ElementwiseProduct(transformingVector)
+
+    # Batch transform
+    transformedData = transformer.transform(parsedData)
+    # Single-row transform
+    transformedData2 = transformer.transform(parsedData.first())
+    # $example off$
+
+    print("transformedData:")
+    for each in transformedData.collect():
+        print(each)
+
+    print("transformedData2:")
+    for each in transformedData2:
+        print(each)
+
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/fpgrowth_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/fpgrowth_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..715f5268206cbdd1ba278a12dd5161a602ee4b0d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/fpgrowth_example.py
@@ -0,0 +1,33 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# $example on$
+from pyspark.mllib.fpm import FPGrowth
+# $example off$
+from pyspark import SparkContext
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="FPGrowth")
+
+    # $example on$
+    data = sc.textFile("data/mllib/sample_fpgrowth.txt")
+    transactions = data.map(lambda line: line.strip().split(' '))
+    model = FPGrowth.train(transactions, minSupport=0.2, numPartitions=10)
+    result = model.freqItemsets().collect()
+    for fi in result:
+        print(fi)
+    # $example off$
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/gaussian_mixture_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/gaussian_mixture_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..a60e799d62eb16f841b1cee954575f5d86ef05c7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/gaussian_mixture_example.py
@@ -0,0 +1,51 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from numpy import array
+# $example off$
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.clustering import GaussianMixture, GaussianMixtureModel
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="GaussianMixtureExample")  # SparkContext
+
+    # $example on$
+    # Load and parse the data
+    data = sc.textFile("data/mllib/gmm_data.txt")
+    parsedData = data.map(lambda line: array([float(x) for x in line.strip().split(' ')]))
+
+    # Build the model (cluster the data)
+    gmm = GaussianMixture.train(parsedData, 2)
+
+    # Save and load model
+    gmm.save(sc, "target/org/apache/spark/PythonGaussianMixtureExample/GaussianMixtureModel")
+    sameModel = GaussianMixtureModel\
+        .load(sc, "target/org/apache/spark/PythonGaussianMixtureExample/GaussianMixtureModel")
+
+    # output parameters of model
+    for i in range(2):
+        print("weight = ", gmm.weights[i], "mu = ", gmm.gaussians[i].mu,
+              "sigma = ", gmm.gaussians[i].sigma.toArray())
+    # $example off$
+
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/gaussian_mixture_model.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/gaussian_mixture_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b46e27ddaaa8cfaf3d421e888f0b052de5f135f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/gaussian_mixture_model.py
@@ -0,0 +1,74 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+A Gaussian Mixture Model clustering program using MLlib.
+"""
+from __future__ import print_function
+
+import sys
+if sys.version >= '3':
+    long = int
+
+import random
+import argparse
+import numpy as np
+
+from pyspark import SparkConf, SparkContext
+from pyspark.mllib.clustering import GaussianMixture
+
+
+def parseVector(line):
+    return np.array([float(x) for x in line.split(' ')])
+
+
+if __name__ == "__main__":
+    """
+    Parameters
+    ----------
+    :param inputFile:        Input file path which contains data points
+    :param k:                Number of mixture components
+    :param convergenceTol:   Convergence threshold. Default to 1e-3
+    :param maxIterations:    Number of EM iterations to perform. Default to 100
+    :param seed:             Random seed
+    """
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('inputFile', help='Input File')
+    parser.add_argument('k', type=int, help='Number of clusters')
+    parser.add_argument('--convergenceTol', default=1e-3, type=float, help='convergence threshold')
+    parser.add_argument('--maxIterations', default=100, type=int, help='Number of iterations')
+    parser.add_argument('--seed', default=random.getrandbits(19),
+                        type=long, help='Random seed')
+    args = parser.parse_args()
+
+    conf = SparkConf().setAppName("GMM")
+    sc = SparkContext(conf=conf)
+
+    lines = sc.textFile(args.inputFile)
+    data = lines.map(parseVector)
+    model = GaussianMixture.train(data, args.k, args.convergenceTol,
+                                  args.maxIterations, args.seed)
+    for i in range(args.k):
+        print(("weight = ", model.weights[i], "mu = ", model.gaussians[i].mu,
+               "sigma = ", model.gaussians[i].sigma.toArray()))
+    print("\n")
+    print(("The membership value of each vector to all mixture components (first 100): ",
+           model.predictSoft(data).take(100)))
+    print("\n")
+    print(("Cluster labels (first 100): ", model.predict(data).take(100)))
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/gradient_boosting_classification_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/gradient_boosting_classification_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..65a03572be9b512169f9b89eb767d160f7a0144e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/gradient_boosting_classification_example.py
@@ -0,0 +1,56 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Gradient Boosted Trees Classification Example.
+"""
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.tree import GradientBoostedTrees, GradientBoostedTreesModel
+from pyspark.mllib.util import MLUtils
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="PythonGradientBoostedTreesClassificationExample")
+    # $example on$
+    # Load and parse the data file.
+    data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+    # Split the data into training and test sets (30% held out for testing)
+    (trainingData, testData) = data.randomSplit([0.7, 0.3])
+
+    # Train a GradientBoostedTrees model.
+    #  Notes: (a) Empty categoricalFeaturesInfo indicates all features are continuous.
+    #         (b) Use more iterations in practice.
+    model = GradientBoostedTrees.trainClassifier(trainingData,
+                                                 categoricalFeaturesInfo={}, numIterations=3)
+
+    # Evaluate model on test instances and compute test error
+    predictions = model.predict(testData.map(lambda x: x.features))
+    labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
+    testErr = labelsAndPredictions.filter(
+        lambda lp: lp[0] != lp[1]).count() / float(testData.count())
+    print('Test Error = ' + str(testErr))
+    print('Learned classification GBT model:')
+    print(model.toDebugString())
+
+    # Save and load model
+    model.save(sc, "target/tmp/myGradientBoostingClassificationModel")
+    sameModel = GradientBoostedTreesModel.load(sc,
+                                               "target/tmp/myGradientBoostingClassificationModel")
+    # $example off$
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/gradient_boosting_regression_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/gradient_boosting_regression_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..877f8ab461ccdf8fa718243d089d80e8fad6a3aa
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/gradient_boosting_regression_example.py
@@ -0,0 +1,55 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Gradient Boosted Trees Regression Example.
+"""
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.tree import GradientBoostedTrees, GradientBoostedTreesModel
+from pyspark.mllib.util import MLUtils
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="PythonGradientBoostedTreesRegressionExample")
+    # $example on$
+    # Load and parse the data file.
+    data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+    # Split the data into training and test sets (30% held out for testing)
+    (trainingData, testData) = data.randomSplit([0.7, 0.3])
+
+    # Train a GradientBoostedTrees model.
+    #  Notes: (a) Empty categoricalFeaturesInfo indicates all features are continuous.
+    #         (b) Use more iterations in practice.
+    model = GradientBoostedTrees.trainRegressor(trainingData,
+                                                categoricalFeaturesInfo={}, numIterations=3)
+
+    # Evaluate model on test instances and compute test error
+    predictions = model.predict(testData.map(lambda x: x.features))
+    labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
+    testMSE = labelsAndPredictions.map(lambda lp: (lp[0] - lp[1]) * (lp[0] - lp[1])).sum() /\
+        float(testData.count())
+    print('Test Mean Squared Error = ' + str(testMSE))
+    print('Learned regression GBT model:')
+    print(model.toDebugString())
+
+    # Save and load model
+    model.save(sc, "target/tmp/myGradientBoostingRegressionModel")
+    sameModel = GradientBoostedTreesModel.load(sc, "target/tmp/myGradientBoostingRegressionModel")
+    # $example off$
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/hypothesis_testing_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/hypothesis_testing_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..21a5584fd6e06309587c40b0c915d59f211eb5e9
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/hypothesis_testing_example.py
@@ -0,0 +1,65 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.linalg import Matrices, Vectors
+from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.stat import Statistics
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="HypothesisTestingExample")
+
+    # $example on$
+    vec = Vectors.dense(0.1, 0.15, 0.2, 0.3, 0.25)  # a vector composed of the frequencies of events
+
+    # compute the goodness of fit. If a second vector to test against
+    # is not supplied as a parameter, the test runs against a uniform distribution.
+    goodnessOfFitTestResult = Statistics.chiSqTest(vec)
+
+    # summary of the test including the p-value, degrees of freedom,
+    # test statistic, the method used, and the null hypothesis.
+    print("%s\n" % goodnessOfFitTestResult)
+
+    mat = Matrices.dense(3, 2, [1.0, 3.0, 5.0, 2.0, 4.0, 6.0])  # a contingency matrix
+
+    # conduct Pearson's independence test on the input contingency matrix
+    independenceTestResult = Statistics.chiSqTest(mat)
+
+    # summary of the test including the p-value, degrees of freedom,
+    # test statistic, the method used, and the null hypothesis.
+    print("%s\n" % independenceTestResult)
+
+    obs = sc.parallelize(
+        [LabeledPoint(1.0, [1.0, 0.0, 3.0]),
+         LabeledPoint(1.0, [1.0, 2.0, 0.0]),
+         LabeledPoint(1.0, [-1.0, 0.0, -0.5])]
+    )  # LabeledPoint(label, feature)
+
+    # The contingency table is constructed from an RDD of LabeledPoint and used to conduct
+    # the independence test. Returns an array containing the ChiSquaredTestResult for every feature
+    # against the label.
+    featureTestResults = Statistics.chiSqTest(obs)
+
+    for i, result in enumerate(featureTestResults):
+        print("Column %d:\n%s" % (i + 1, result))
+    # $example off$
+
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/hypothesis_testing_kolmogorov_smirnov_test_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/hypothesis_testing_kolmogorov_smirnov_test_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef380dee79d3d3aa3a375fe2c4f53b920543a24f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/hypothesis_testing_kolmogorov_smirnov_test_example.py
@@ -0,0 +1,40 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.stat import Statistics
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="HypothesisTestingKolmogorovSmirnovTestExample")
+
+    # $example on$
+    parallelData = sc.parallelize([0.1, 0.15, 0.2, 0.3, 0.25])
+
+    # run a KS test for the sample versus a standard normal distribution
+    testResult = Statistics.kolmogorovSmirnovTest(parallelData, "norm", 0, 1)
+    # summary of the test including the p-value, test statistic, and null hypothesis
+    # if our p-value indicates significance, we can reject the null hypothesis
+    # Note that the Scala functionality of calling Statistics.kolmogorovSmirnovTest with
+    # a lambda to calculate the CDF is not made available in the Python API
+    print(testResult)
+    # $example off$
+
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/isotonic_regression_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/isotonic_regression_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..33d618ab48ea9b21dc4c518c00c28177927eb8be
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/isotonic_regression_example.py
@@ -0,0 +1,61 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Isotonic Regression Example.
+"""
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+import math
+from pyspark.mllib.regression import LabeledPoint, IsotonicRegression, IsotonicRegressionModel
+from pyspark.mllib.util import MLUtils
+# $example off$
+
+if __name__ == "__main__":
+
+    sc = SparkContext(appName="PythonIsotonicRegressionExample")
+
+    # $example on$
+    # Load and parse the data
+    def parsePoint(labeledData):
+        return (labeledData.label, labeledData.features[0], 1.0)
+
+    data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_isotonic_regression_libsvm_data.txt")
+
+    # Create label, feature, weight tuples from input data with weight set to default value 1.0.
+    parsedData = data.map(parsePoint)
+
+    # Split data into training (60%) and test (40%) sets.
+    training, test = parsedData.randomSplit([0.6, 0.4], 11)
+
+    # Create isotonic regression model from training data.
+    # Isotonic parameter defaults to true so it is only shown for demonstration
+    model = IsotonicRegression.train(training)
+
+    # Create tuples of predicted and real labels.
+    predictionAndLabel = test.map(lambda p: (model.predict(p[1]), p[0]))
+
+    # Calculate mean squared error between predicted and real labels.
+    meanSquaredError = predictionAndLabel.map(lambda pl: math.pow((pl[0] - pl[1]), 2)).mean()
+    print("Mean Squared Error = " + str(meanSquaredError))
+
+    # Save and load model
+    model.save(sc, "target/tmp/myIsotonicRegressionModel")
+    sameModel = IsotonicRegressionModel.load(sc, "target/tmp/myIsotonicRegressionModel")
+    # $example off$
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/k_means_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/k_means_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6058f45020c4f4f4a909f4994b7267b963cafa3
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/k_means_example.py
@@ -0,0 +1,54 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from numpy import array
+from math import sqrt
+# $example off$
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.clustering import KMeans, KMeansModel
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="KMeansExample")  # SparkContext
+
+    # $example on$
+    # Load and parse the data
+    data = sc.textFile("data/mllib/kmeans_data.txt")
+    parsedData = data.map(lambda line: array([float(x) for x in line.split(' ')]))
+
+    # Build the model (cluster the data)
+    clusters = KMeans.train(parsedData, 2, maxIterations=10, initializationMode="random")
+
+    # Evaluate clustering by computing Within Set Sum of Squared Errors
+    def error(point):
+        center = clusters.centers[clusters.predict(point)]
+        return sqrt(sum([x**2 for x in (point - center)]))
+
+    WSSSE = parsedData.map(lambda point: error(point)).reduce(lambda x, y: x + y)
+    print("Within Set Sum of Squared Error = " + str(WSSSE))
+
+    # Save and load model
+    clusters.save(sc, "target/org/apache/spark/PythonKMeansExample/KMeansModel")
+    sameModel = KMeansModel.load(sc, "target/org/apache/spark/PythonKMeansExample/KMeansModel")
+    # $example off$
+
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/kernel_density_estimation_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/kernel_density_estimation_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e8f7241a4a1e76823fb450508c6d16da3bb2a96
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/kernel_density_estimation_example.py
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.stat import KernelDensity
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="KernelDensityEstimationExample")  # SparkContext
+
+    # $example on$
+    # an RDD of sample data
+    data = sc.parallelize([1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0, 8.0, 9.0, 9.0])
+
+    # Construct the density estimator with the sample data and a standard deviation for the Gaussian
+    # kernels
+    kd = KernelDensity()
+    kd.setSample(data)
+    kd.setBandwidth(3.0)
+
+    # Find density estimates for the given values
+    densities = kd.estimate([-1.0, 2.0, 5.0])
+    # $example off$
+
+    print(densities)
+
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/kmeans.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/kmeans.py
new file mode 100755
index 0000000000000000000000000000000000000000..1bdb3e9b4a2af206f0c52d814c10b7ae5affbf3e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/kmeans.py
@@ -0,0 +1,47 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+A K-means clustering program using MLlib.
+
+This example requires NumPy (http://www.numpy.org/).
+"""
+from __future__ import print_function
+
+import sys
+
+import numpy as np
+from pyspark import SparkContext
+from pyspark.mllib.clustering import KMeans
+
+
+def parseVector(line):
+    return np.array([float(x) for x in line.split(' ')])
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: kmeans <file> <k>", file=sys.stderr)
+        sys.exit(-1)
+    sc = SparkContext(appName="KMeans")
+    lines = sc.textFile(sys.argv[1])
+    data = lines.map(parseVector)
+    k = int(sys.argv[2])
+    model = KMeans.train(data, k)
+    print("Final centers: " + str(model.clusterCenters))
+    print("Total Cost: " + str(model.computeCost(data)))
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/latent_dirichlet_allocation_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/latent_dirichlet_allocation_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a1bef5f207b742465e15f0eea13df83cc5349bb
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/latent_dirichlet_allocation_example.py
@@ -0,0 +1,54 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.clustering import LDA, LDAModel
+from pyspark.mllib.linalg import Vectors
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="LatentDirichletAllocationExample")  # SparkContext
+
+    # $example on$
+    # Load and parse the data
+    data = sc.textFile("data/mllib/sample_lda_data.txt")
+    parsedData = data.map(lambda line: Vectors.dense([float(x) for x in line.strip().split(' ')]))
+    # Index documents with unique IDs
+    corpus = parsedData.zipWithIndex().map(lambda x: [x[1], x[0]]).cache()
+
+    # Cluster the documents into three topics using LDA
+    ldaModel = LDA.train(corpus, k=3)
+
+    # Output topics. Each is a distribution over words (matching word count vectors)
+    print("Learned topics (as distributions over vocab of " + str(ldaModel.vocabSize())
+          + " words):")
+    topics = ldaModel.topicsMatrix()
+    for topic in range(3):
+        print("Topic " + str(topic) + ":")
+        for word in range(0, ldaModel.vocabSize()):
+            print(" " + str(topics[word][topic]))
+
+    # Save and load model
+    ldaModel.save(sc, "target/org/apache/spark/PythonLatentDirichletAllocationExample/LDAModel")
+    sameModel = LDAModel\
+        .load(sc, "target/org/apache/spark/PythonLatentDirichletAllocationExample/LDAModel")
+    # $example off$
+
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/linear_regression_with_sgd_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/linear_regression_with_sgd_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..6744463d40ef19bed04ead56b36a99a58d77e551
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/linear_regression_with_sgd_example.py
@@ -0,0 +1,54 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Linear Regression With SGD Example.
+"""
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.regression import LabeledPoint, LinearRegressionWithSGD, LinearRegressionModel
+# $example off$
+
+if __name__ == "__main__":
+
+    sc = SparkContext(appName="PythonLinearRegressionWithSGDExample")
+
+    # $example on$
+    # Load and parse the data
+    def parsePoint(line):
+        values = [float(x) for x in line.replace(',', ' ').split(' ')]
+        return LabeledPoint(values[0], values[1:])
+
+    data = sc.textFile("data/mllib/ridge-data/lpsa.data")
+    parsedData = data.map(parsePoint)
+
+    # Build the model
+    model = LinearRegressionWithSGD.train(parsedData, iterations=100, step=0.00000001)
+
+    # Evaluate the model on training data
+    valuesAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
+    MSE = valuesAndPreds \
+        .map(lambda vp: (vp[0] - vp[1])**2) \
+        .reduce(lambda x, y: x + y) / valuesAndPreds.count()
+    print("Mean Squared Error = " + str(MSE))
+
+    # Save and load model
+    model.save(sc, "target/tmp/pythonLinearRegressionWithSGDModel")
+    sameModel = LinearRegressionModel.load(sc, "target/tmp/pythonLinearRegressionWithSGDModel")
+    # $example off$
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/logistic_regression.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/logistic_regression.py
new file mode 100755
index 0000000000000000000000000000000000000000..87efe17375226446603cdc84c9d585098f451966
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/logistic_regression.py
@@ -0,0 +1,52 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Logistic regression using MLlib.
+
+This example requires NumPy (http://www.numpy.org/).
+"""
+from __future__ import print_function
+
+import sys
+
+from pyspark import SparkContext
+from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.classification import LogisticRegressionWithSGD
+
+
+def parsePoint(line):
+    """
+    Parse a line of text into an MLlib LabeledPoint object.
+    """
+    values = [float(s) for s in line.split(' ')]
+    if values[0] == -1:   # Convert -1 labels to 0 for MLlib
+        values[0] = 0
+    return LabeledPoint(values[0], values[1:])
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: logistic_regression <file> <iterations>", file=sys.stderr)
+        sys.exit(-1)
+    sc = SparkContext(appName="PythonLR")
+    points = sc.textFile(sys.argv[1]).map(parsePoint)
+    iterations = int(sys.argv[2])
+    model = LogisticRegressionWithSGD.train(points, iterations)
+    print("Final weights: " + str(model.weights))
+    print("Final intercept: " + str(model.intercept))
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9b768b3147d25e866f9cf458add5c86e2663c40
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py
@@ -0,0 +1,54 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Logistic Regression With LBFGS Example.
+"""
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.classification import LogisticRegressionWithLBFGS, LogisticRegressionModel
+from pyspark.mllib.regression import LabeledPoint
+# $example off$
+
+if __name__ == "__main__":
+
+    sc = SparkContext(appName="PythonLogisticRegressionWithLBFGSExample")
+
+    # $example on$
+    # Load and parse the data
+    def parsePoint(line):
+        values = [float(x) for x in line.split(' ')]
+        return LabeledPoint(values[0], values[1:])
+
+    data = sc.textFile("data/mllib/sample_svm_data.txt")
+    parsedData = data.map(parsePoint)
+
+    # Build the model
+    model = LogisticRegressionWithLBFGS.train(parsedData)
+
+    # Evaluating the model on training data
+    labelsAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
+    trainErr = labelsAndPreds.filter(lambda lp: lp[0] != lp[1]).count() / float(parsedData.count())
+    print("Training Error = " + str(trainErr))
+
+    # Save and load model
+    model.save(sc, "target/tmp/pythonLogisticRegressionWithLBFGSModel")
+    sameModel = LogisticRegressionModel.load(sc,
+                                             "target/tmp/pythonLogisticRegressionWithLBFGSModel")
+    # $example off$
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/multi_class_metrics_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/multi_class_metrics_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..7dc5fb4f9127f89d5dc2dfeded92d1b13f26028b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/multi_class_metrics_example.py
@@ -0,0 +1,69 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# $example on$
+from pyspark.mllib.classification import LogisticRegressionWithLBFGS
+from pyspark.mllib.util import MLUtils
+from pyspark.mllib.evaluation import MulticlassMetrics
+# $example off$
+
+from pyspark import SparkContext
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="MultiClassMetricsExample")
+
+    # Several of the methods available in scala are currently missing from pyspark
+    # $example on$
+    # Load training data in LIBSVM format
+    data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_multiclass_classification_data.txt")
+
+    # Split data into training (60%) and test (40%)
+    training, test = data.randomSplit([0.6, 0.4], seed=11)
+    training.cache()
+
+    # Run training algorithm to build the model
+    model = LogisticRegressionWithLBFGS.train(training, numClasses=3)
+
+    # Compute raw scores on the test set
+    predictionAndLabels = test.map(lambda lp: (float(model.predict(lp.features)), lp.label))
+
+    # Instantiate metrics object
+    metrics = MulticlassMetrics(predictionAndLabels)
+
+    # Overall statistics
+    precision = metrics.precision()
+    recall = metrics.recall()
+    f1Score = metrics.fMeasure()
+    print("Summary Stats")
+    print("Precision = %s" % precision)
+    print("Recall = %s" % recall)
+    print("F1 Score = %s" % f1Score)
+
+    # Statistics by class
+    labels = data.map(lambda lp: lp.label).distinct().collect()
+    for label in sorted(labels):
+        print("Class %s precision = %s" % (label, metrics.precision(label)))
+        print("Class %s recall = %s" % (label, metrics.recall(label)))
+        print("Class %s F1 Measure = %s" % (label, metrics.fMeasure(label, beta=1.0)))
+
+    # Weighted stats
+    print("Weighted recall = %s" % metrics.weightedRecall)
+    print("Weighted precision = %s" % metrics.weightedPrecision)
+    print("Weighted F(1) Score = %s" % metrics.weightedFMeasure())
+    print("Weighted F(0.5) Score = %s" % metrics.weightedFMeasure(beta=0.5))
+    print("Weighted false positive rate = %s" % metrics.weightedFalsePositiveRate)
+    # $example off$
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/multi_label_metrics_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/multi_label_metrics_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..960ade6597379a59cce82fa1b64b3e8ba5b68378
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/multi_label_metrics_example.py
@@ -0,0 +1,61 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# $example on$
+from pyspark.mllib.evaluation import MultilabelMetrics
+# $example off$
+from pyspark import SparkContext
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="MultiLabelMetricsExample")
+    # $example on$
+    scoreAndLabels = sc.parallelize([
+        ([0.0, 1.0], [0.0, 2.0]),
+        ([0.0, 2.0], [0.0, 1.0]),
+        ([], [0.0]),
+        ([2.0], [2.0]),
+        ([2.0, 0.0], [2.0, 0.0]),
+        ([0.0, 1.0, 2.0], [0.0, 1.0]),
+        ([1.0], [1.0, 2.0])])
+
+    # Instantiate metrics object
+    metrics = MultilabelMetrics(scoreAndLabels)
+
+    # Summary stats
+    print("Recall = %s" % metrics.recall())
+    print("Precision = %s" % metrics.precision())
+    print("F1 measure = %s" % metrics.f1Measure())
+    print("Accuracy = %s" % metrics.accuracy)
+
+    # Individual label stats
+    labels = scoreAndLabels.flatMap(lambda x: x[1]).distinct().collect()
+    for label in labels:
+        print("Class %s precision = %s" % (label, metrics.precision(label)))
+        print("Class %s recall = %s" % (label, metrics.recall(label)))
+        print("Class %s F1 Measure = %s" % (label, metrics.f1Measure(label)))
+
+    # Micro stats
+    print("Micro precision = %s" % metrics.microPrecision)
+    print("Micro recall = %s" % metrics.microRecall)
+    print("Micro F1 measure = %s" % metrics.microF1Measure)
+
+    # Hamming loss
+    print("Hamming loss = %s" % metrics.hammingLoss)
+
+    # Subset accuracy
+    print("Subset accuracy = %s" % metrics.subsetAccuracy)
+    # $example off$
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/naive_bayes_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/naive_bayes_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..a29fcccac5bfc3cbf643ab566feb15dff680975b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/naive_bayes_example.py
@@ -0,0 +1,65 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+NaiveBayes Example.
+
+Usage:
+  `spark-submit --master local[4] examples/src/main/python/mllib/naive_bayes_example.py`
+"""
+
+from __future__ import print_function
+
+import shutil
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.classification import NaiveBayes, NaiveBayesModel
+from pyspark.mllib.util import MLUtils
+
+
+# $example off$
+
+if __name__ == "__main__":
+
+    sc = SparkContext(appName="PythonNaiveBayesExample")
+
+    # $example on$
+    # Load and parse the data file.
+    data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+
+    # Split data approximately into training (60%) and test (40%)
+    training, test = data.randomSplit([0.6, 0.4])
+
+    # Train a naive Bayes model.
+    model = NaiveBayes.train(training, 1.0)
+
+    # Make prediction and test accuracy.
+    predictionAndLabel = test.map(lambda p: (model.predict(p.features), p.label))
+    accuracy = 1.0 * predictionAndLabel.filter(lambda pl: pl[0] == pl[1]).count() / test.count()
+    print('model accuracy {}'.format(accuracy))
+
+    # Save and load model
+    output_dir = 'target/tmp/myNaiveBayesModel'
+    shutil.rmtree(output_dir, ignore_errors=True)
+    model.save(sc, output_dir)
+    sameModel = NaiveBayesModel.load(sc, output_dir)
+    predictionAndLabel = test.map(lambda p: (sameModel.predict(p.features), p.label))
+    accuracy = 1.0 * predictionAndLabel.filter(lambda pl: pl[0] == pl[1]).count() / test.count()
+    print('sameModel accuracy {}'.format(accuracy))
+
+    # $example off$
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/normalizer_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/normalizer_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..a4e028ca9af8bfc8b06ea432631ea304e529c159
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/normalizer_example.py
@@ -0,0 +1,52 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.feature import Normalizer
+from pyspark.mllib.util import MLUtils
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="NormalizerExample")  # SparkContext
+
+    # $example on$
+    data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+    labels = data.map(lambda x: x.label)
+    features = data.map(lambda x: x.features)
+
+    normalizer1 = Normalizer()
+    normalizer2 = Normalizer(p=float("inf"))
+
+    # Each sample in data1 will be normalized using $L^2$ norm.
+    data1 = labels.zip(normalizer1.transform(features))
+
+    # Each sample in data2 will be normalized using $L^\infty$ norm.
+    data2 = labels.zip(normalizer2.transform(features))
+    # $example off$
+
+    print("data1:")
+    for each in data1.collect():
+        print(each)
+
+    print("data2:")
+    for each in data2.collect():
+        print(each)
+
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/pca_rowmatrix_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/pca_rowmatrix_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..49b9b1bbe08e980f80178c0bd151a9018b16151e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/pca_rowmatrix_example.py
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.linalg import Vectors
+from pyspark.mllib.linalg.distributed import RowMatrix
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="PythonPCAOnRowMatrixExample")
+
+    # $example on$
+    rows = sc.parallelize([
+        Vectors.sparse(5, {1: 1.0, 3: 7.0}),
+        Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
+        Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
+    ])
+
+    mat = RowMatrix(rows)
+    # Compute the top 4 principal components.
+    # Principal components are stored in a local dense matrix.
+    pc = mat.computePrincipalComponents(4)
+
+    # Project the rows to the linear space spanned by the top 4 principal components.
+    projected = mat.multiply(pc)
+    # $example off$
+    collected = projected.rows.collect()
+    print("Projected Row Matrix of principal component:")
+    for vector in collected:
+        print(vector)
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/power_iteration_clustering_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/power_iteration_clustering_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca19c0ccb60c8872a27d16257d93d359960d6ab2
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/power_iteration_clustering_example.py
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.clustering import PowerIterationClustering, PowerIterationClusteringModel
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="PowerIterationClusteringExample")  # SparkContext
+
+    # $example on$
+    # Load and parse the data
+    data = sc.textFile("data/mllib/pic_data.txt")
+    similarities = data.map(lambda line: tuple([float(x) for x in line.split(' ')]))
+
+    # Cluster the data into two classes using PowerIterationClustering
+    model = PowerIterationClustering.train(similarities, 2, 10)
+
+    model.assignments().foreach(lambda x: print(str(x.id) + " -> " + str(x.cluster)))
+
+    # Save and load model
+    model.save(sc, "target/org/apache/spark/PythonPowerIterationClusteringExample/PICModel")
+    sameModel = PowerIterationClusteringModel\
+        .load(sc, "target/org/apache/spark/PythonPowerIterationClusteringExample/PICModel")
+    # $example off$
+
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/random_forest_classification_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/random_forest_classification_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ac67520daee03929f11c3f55bbcc6839a784db1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/random_forest_classification_example.py
@@ -0,0 +1,57 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Random Forest Classification Example.
+"""
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.tree import RandomForest, RandomForestModel
+from pyspark.mllib.util import MLUtils
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="PythonRandomForestClassificationExample")
+    # $example on$
+    # Load and parse the data file into an RDD of LabeledPoint.
+    data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt')
+    # Split the data into training and test sets (30% held out for testing)
+    (trainingData, testData) = data.randomSplit([0.7, 0.3])
+
+    # Train a RandomForest model.
+    #  Empty categoricalFeaturesInfo indicates all features are continuous.
+    #  Note: Use larger numTrees in practice.
+    #  Setting featureSubsetStrategy="auto" lets the algorithm choose.
+    model = RandomForest.trainClassifier(trainingData, numClasses=2, categoricalFeaturesInfo={},
+                                         numTrees=3, featureSubsetStrategy="auto",
+                                         impurity='gini', maxDepth=4, maxBins=32)
+
+    # Evaluate model on test instances and compute test error
+    predictions = model.predict(testData.map(lambda x: x.features))
+    labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
+    testErr = labelsAndPredictions.filter(
+        lambda lp: lp[0] != lp[1]).count() / float(testData.count())
+    print('Test Error = ' + str(testErr))
+    print('Learned classification forest model:')
+    print(model.toDebugString())
+
+    # Save and load model
+    model.save(sc, "target/tmp/myRandomForestClassificationModel")
+    sameModel = RandomForestModel.load(sc, "target/tmp/myRandomForestClassificationModel")
+    # $example off$
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/random_forest_regression_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/random_forest_regression_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e986a0d307f04248844cbc018e3c9785a4a4498
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/random_forest_regression_example.py
@@ -0,0 +1,57 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Random Forest Regression Example.
+"""
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.tree import RandomForest, RandomForestModel
+from pyspark.mllib.util import MLUtils
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="PythonRandomForestRegressionExample")
+    # $example on$
+    # Load and parse the data file into an RDD of LabeledPoint.
+    data = MLUtils.loadLibSVMFile(sc, 'data/mllib/sample_libsvm_data.txt')
+    # Split the data into training and test sets (30% held out for testing)
+    (trainingData, testData) = data.randomSplit([0.7, 0.3])
+
+    # Train a RandomForest model.
+    #  Empty categoricalFeaturesInfo indicates all features are continuous.
+    #  Note: Use larger numTrees in practice.
+    #  Setting featureSubsetStrategy="auto" lets the algorithm choose.
+    model = RandomForest.trainRegressor(trainingData, categoricalFeaturesInfo={},
+                                        numTrees=3, featureSubsetStrategy="auto",
+                                        impurity='variance', maxDepth=4, maxBins=32)
+
+    # Evaluate model on test instances and compute test error
+    predictions = model.predict(testData.map(lambda x: x.features))
+    labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
+    testMSE = labelsAndPredictions.map(lambda lp: (lp[0] - lp[1]) * (lp[0] - lp[1])).sum() /\
+        float(testData.count())
+    print('Test Mean Squared Error = ' + str(testMSE))
+    print('Learned regression forest model:')
+    print(model.toDebugString())
+
+    # Save and load model
+    model.save(sc, "target/tmp/myRandomForestRegressionModel")
+    sameModel = RandomForestModel.load(sc, "target/tmp/myRandomForestRegressionModel")
+    # $example off$
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/random_rdd_generation.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/random_rdd_generation.py
new file mode 100755
index 0000000000000000000000000000000000000000..9a429b5f8abdf815321bf3c3f201a6041ab1fc37
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/random_rdd_generation.py
@@ -0,0 +1,56 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Randomly generated RDDs.
+"""
+from __future__ import print_function
+
+import sys
+
+from pyspark import SparkContext
+from pyspark.mllib.random import RandomRDDs
+
+
+if __name__ == "__main__":
+    if len(sys.argv) not in [1, 2]:
+        print("Usage: random_rdd_generation", file=sys.stderr)
+        sys.exit(-1)
+
+    sc = SparkContext(appName="PythonRandomRDDGeneration")
+
+    numExamples = 10000  # number of examples to generate
+    fraction = 0.1  # fraction of data to sample
+
+    # Example: RandomRDDs.normalRDD
+    normalRDD = RandomRDDs.normalRDD(sc, numExamples)
+    print('Generated RDD of %d examples sampled from the standard normal distribution'
+          % normalRDD.count())
+    print('  First 5 samples:')
+    for sample in normalRDD.take(5):
+        print('    ' + str(sample))
+    print()
+
+    # Example: RandomRDDs.normalVectorRDD
+    normalVectorRDD = RandomRDDs.normalVectorRDD(sc, numRows=numExamples, numCols=2)
+    print('Generated RDD of %d examples of length-2 vectors.' % normalVectorRDD.count())
+    print('  First 5 samples:')
+    for sample in normalVectorRDD.take(5):
+        print('    ' + str(sample))
+    print()
+
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/ranking_metrics_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/ranking_metrics_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..21333deded35dfb2424055b0bcce761574547ecb
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/ranking_metrics_example.py
@@ -0,0 +1,55 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# $example on$
+from pyspark.mllib.recommendation import ALS, Rating
+from pyspark.mllib.evaluation import RegressionMetrics, RankingMetrics
+# $example off$
+from pyspark import SparkContext
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="Ranking Metrics Example")
+
+    # Several of the methods available in scala are currently missing from pyspark
+    # $example on$
+    # Read in the ratings data
+    lines = sc.textFile("data/mllib/sample_movielens_data.txt")
+
+    def parseLine(line):
+        fields = line.split("::")
+        return Rating(int(fields[0]), int(fields[1]), float(fields[2]) - 2.5)
+    ratings = lines.map(lambda r: parseLine(r))
+
+    # Train a model on to predict user-product ratings
+    model = ALS.train(ratings, 10, 10, 0.01)
+
+    # Get predicted ratings on all existing user-product pairs
+    testData = ratings.map(lambda p: (p.user, p.product))
+    predictions = model.predictAll(testData).map(lambda r: ((r.user, r.product), r.rating))
+
+    ratingsTuple = ratings.map(lambda r: ((r.user, r.product), r.rating))
+    scoreAndLabels = predictions.join(ratingsTuple).map(lambda tup: tup[1])
+
+    # Instantiate regression metrics to compare predicted and actual ratings
+    metrics = RegressionMetrics(scoreAndLabels)
+
+    # Root mean squared error
+    print("RMSE = %s" % metrics.rootMeanSquaredError)
+
+    # R-squared
+    print("R-squared = %s" % metrics.r2)
+    # $example off$
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/recommendation_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/recommendation_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..00e683c3ae938e51b11b9d12e3579920aa7721f6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/recommendation_example.py
@@ -0,0 +1,52 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Collaborative Filtering Classification Example.
+"""
+from __future__ import print_function
+
+from pyspark import SparkContext
+
+# $example on$
+from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="PythonCollaborativeFilteringExample")
+    # $example on$
+    # Load and parse the data
+    data = sc.textFile("data/mllib/als/test.data")
+    ratings = data.map(lambda l: l.split(','))\
+        .map(lambda l: Rating(int(l[0]), int(l[1]), float(l[2])))
+
+    # Build the recommendation model using Alternating Least Squares
+    rank = 10
+    numIterations = 10
+    model = ALS.train(ratings, rank, numIterations)
+
+    # Evaluate the model on training data
+    testdata = ratings.map(lambda p: (p[0], p[1]))
+    predictions = model.predictAll(testdata).map(lambda r: ((r[0], r[1]), r[2]))
+    ratesAndPreds = ratings.map(lambda r: ((r[0], r[1]), r[2])).join(predictions)
+    MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).mean()
+    print("Mean Squared Error = " + str(MSE))
+
+    # Save and load model
+    model.save(sc, "target/tmp/myCollaborativeFilter")
+    sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter")
+    # $example off$
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/regression_metrics_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/regression_metrics_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3a83aafd7a1f3745079110402363a020294eaf5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/regression_metrics_example.py
@@ -0,0 +1,59 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# $example on$
+from pyspark.mllib.regression import LabeledPoint, LinearRegressionWithSGD
+from pyspark.mllib.evaluation import RegressionMetrics
+from pyspark.mllib.linalg import DenseVector
+# $example off$
+
+from pyspark import SparkContext
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="Regression Metrics Example")
+
+    # $example on$
+    # Load and parse the data
+    def parsePoint(line):
+        values = line.split()
+        return LabeledPoint(float(values[0]),
+                            DenseVector([float(x.split(':')[1]) for x in values[1:]]))
+
+    data = sc.textFile("data/mllib/sample_linear_regression_data.txt")
+    parsedData = data.map(parsePoint)
+
+    # Build the model
+    model = LinearRegressionWithSGD.train(parsedData)
+
+    # Get predictions
+    valuesAndPreds = parsedData.map(lambda p: (float(model.predict(p.features)), p.label))
+
+    # Instantiate metrics object
+    metrics = RegressionMetrics(valuesAndPreds)
+
+    # Squared Error
+    print("MSE = %s" % metrics.meanSquaredError)
+    print("RMSE = %s" % metrics.rootMeanSquaredError)
+
+    # R-squared
+    print("R-squared = %s" % metrics.r2)
+
+    # Mean absolute error
+    print("MAE = %s" % metrics.meanAbsoluteError)
+
+    # Explained variance
+    print("Explained variance = %s" % metrics.explainedVariance)
+    # $example off$
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/sampled_rdds.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/sampled_rdds.py
new file mode 100755
index 0000000000000000000000000000000000000000..00e7cf4bbcdbfada376eb2744833eb136a0aee23
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/sampled_rdds.py
@@ -0,0 +1,87 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Randomly sampled RDDs.
+"""
+from __future__ import print_function
+
+import sys
+
+from pyspark import SparkContext
+from pyspark.mllib.util import MLUtils
+
+
+if __name__ == "__main__":
+    if len(sys.argv) not in [1, 2]:
+        print("Usage: sampled_rdds <libsvm data file>", file=sys.stderr)
+        sys.exit(-1)
+    if len(sys.argv) == 2:
+        datapath = sys.argv[1]
+    else:
+        datapath = 'data/mllib/sample_binary_classification_data.txt'
+
+    sc = SparkContext(appName="PythonSampledRDDs")
+
+    fraction = 0.1  # fraction of data to sample
+
+    examples = MLUtils.loadLibSVMFile(sc, datapath)
+    numExamples = examples.count()
+    if numExamples == 0:
+        print("Error: Data file had no samples to load.", file=sys.stderr)
+        sys.exit(1)
+    print('Loaded data with %d examples from file: %s' % (numExamples, datapath))
+
+    # Example: RDD.sample() and RDD.takeSample()
+    expectedSampleSize = int(numExamples * fraction)
+    print('Sampling RDD using fraction %g.  Expected sample size = %d.'
+          % (fraction, expectedSampleSize))
+    sampledRDD = examples.sample(withReplacement=True, fraction=fraction)
+    print('  RDD.sample(): sample has %d examples' % sampledRDD.count())
+    sampledArray = examples.takeSample(withReplacement=True, num=expectedSampleSize)
+    print('  RDD.takeSample(): sample has %d examples' % len(sampledArray))
+
+    print()
+
+    # Example: RDD.sampleByKey()
+    keyedRDD = examples.map(lambda lp: (int(lp.label), lp.features))
+    print('  Keyed data using label (Int) as key ==> Orig')
+    #  Count examples per label in original data.
+    keyCountsA = keyedRDD.countByKey()
+
+    #  Subsample, and count examples per label in sampled data.
+    fractions = {}
+    for k in keyCountsA.keys():
+        fractions[k] = fraction
+    sampledByKeyRDD = keyedRDD.sampleByKey(withReplacement=True, fractions=fractions)
+    keyCountsB = sampledByKeyRDD.countByKey()
+    sizeB = sum(keyCountsB.values())
+    print('  Sampled %d examples using approximate stratified sampling (by label). ==> Sample'
+          % sizeB)
+
+    #  Compare samples
+    print('   \tFractions of examples with key')
+    print('Key\tOrig\tSample')
+    for k in sorted(keyCountsA.keys()):
+        fracA = keyCountsA[k] / float(numExamples)
+        if sizeB != 0:
+            fracB = keyCountsB.get(k, 0) / float(sizeB)
+        else:
+            fracB = 0
+        print('%d\t%g\t%g' % (k, fracA, fracB))
+
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/standard_scaler_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/standard_scaler_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..442094e1bf3669d66715962e5636605800902c0b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/standard_scaler_example.py
@@ -0,0 +1,53 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.feature import StandardScaler, StandardScalerModel
+from pyspark.mllib.linalg import Vectors
+from pyspark.mllib.util import MLUtils
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="StandardScalerExample")  # SparkContext
+
+    # $example on$
+    data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+    label = data.map(lambda x: x.label)
+    features = data.map(lambda x: x.features)
+
+    scaler1 = StandardScaler().fit(features)
+    scaler2 = StandardScaler(withMean=True, withStd=True).fit(features)
+
+    # data1 will be unit variance.
+    data1 = label.zip(scaler1.transform(features))
+
+    # data2 will be unit variance and zero mean.
+    data2 = label.zip(scaler2.transform(features.map(lambda x: Vectors.dense(x.toArray()))))
+    # $example off$
+
+    print("data1:")
+    for each in data1.collect():
+        print(each)
+
+    print("data2:")
+    for each in data2.collect():
+        print(each)
+
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/stratified_sampling_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/stratified_sampling_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..a13f8f08dd68b88929367b1bd94b19961676e151
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/stratified_sampling_example.py
@@ -0,0 +1,38 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="StratifiedSamplingExample")  # SparkContext
+
+    # $example on$
+    # an RDD of any key value pairs
+    data = sc.parallelize([(1, 'a'), (1, 'b'), (2, 'c'), (2, 'd'), (2, 'e'), (3, 'f')])
+
+    # specify the exact fraction desired from each key as a dictionary
+    fractions = {1: 0.1, 2: 0.6, 3: 0.3}
+
+    approxSample = data.sampleByKey(False, fractions)
+    # $example off$
+
+    for each in approxSample.collect():
+        print(each)
+
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/streaming_k_means_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/streaming_k_means_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..e82509ad3ffb63ca2355cf71756197e0a67f939d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/streaming_k_means_example.py
@@ -0,0 +1,66 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+# $example on$
+from pyspark.mllib.linalg import Vectors
+from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.clustering import StreamingKMeans
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="StreamingKMeansExample")  # SparkContext
+    ssc = StreamingContext(sc, 1)
+
+    # $example on$
+    # we make an input stream of vectors for training,
+    # as well as a stream of vectors for testing
+    def parse(lp):
+        label = float(lp[lp.find('(') + 1: lp.find(')')])
+        vec = Vectors.dense(lp[lp.find('[') + 1: lp.find(']')].split(','))
+
+        return LabeledPoint(label, vec)
+
+    trainingData = sc.textFile("data/mllib/kmeans_data.txt")\
+        .map(lambda line: Vectors.dense([float(x) for x in line.strip().split(' ')]))
+
+    testingData = sc.textFile("data/mllib/streaming_kmeans_data_test.txt").map(parse)
+
+    trainingQueue = [trainingData]
+    testingQueue = [testingData]
+
+    trainingStream = ssc.queueStream(trainingQueue)
+    testingStream = ssc.queueStream(testingQueue)
+
+    # We create a model with random clusters and specify the number of clusters to find
+    model = StreamingKMeans(k=2, decayFactor=1.0).setRandomCenters(3, 1.0, 0)
+
+    # Now register the streams for training and testing and start the job,
+    # printing the predicted cluster assignments on new data points as they arrive.
+    model.trainOn(trainingStream)
+
+    result = model.predictOnValues(testingStream.map(lambda lp: (lp.label, lp.features)))
+    result.pprint()
+
+    ssc.start()
+    ssc.stop(stopSparkContext=True, stopGraceFully=True)
+    # $example off$
+
+    print("Final centers: " + str(model.latestModel().centers))
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/streaming_linear_regression_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/streaming_linear_regression_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..714c9a0de7217a2337a84993a037d50ac8de856b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/streaming_linear_regression_example.py
@@ -0,0 +1,62 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Streaming Linear Regression Example.
+"""
+from __future__ import print_function
+
+# $example on$
+import sys
+# $example off$
+
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+# $example on$
+from pyspark.mllib.linalg import Vectors
+from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.regression import StreamingLinearRegressionWithSGD
+# $example off$
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: streaming_linear_regression_example.py <trainingDir> <testDir>",
+              file=sys.stderr)
+        sys.exit(-1)
+
+    sc = SparkContext(appName="PythonLogisticRegressionWithLBFGSExample")
+    ssc = StreamingContext(sc, 1)
+
+    # $example on$
+    def parse(lp):
+        label = float(lp[lp.find('(') + 1: lp.find(',')])
+        vec = Vectors.dense(lp[lp.find('[') + 1: lp.find(']')].split(','))
+        return LabeledPoint(label, vec)
+
+    trainingData = ssc.textFileStream(sys.argv[1]).map(parse).cache()
+    testData = ssc.textFileStream(sys.argv[2]).map(parse)
+
+    numFeatures = 3
+    model = StreamingLinearRegressionWithSGD()
+    model.setInitialWeights([0.0, 0.0, 0.0])
+
+    model.trainOn(trainingData)
+    print(model.predictOnValues(testData.map(lambda lp: (lp.label, lp.features))))
+
+    ssc.start()
+    ssc.awaitTermination()
+    # $example off$
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/summary_statistics_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/summary_statistics_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..d55d1a2c2d0e11e842b51934ab33ed8ec47df26f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/summary_statistics_example.py
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+import numpy as np
+
+from pyspark.mllib.stat import Statistics
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="SummaryStatisticsExample")  # SparkContext
+
+    # $example on$
+    mat = sc.parallelize(
+        [np.array([1.0, 10.0, 100.0]), np.array([2.0, 20.0, 200.0]), np.array([3.0, 30.0, 300.0])]
+    )  # an RDD of Vectors
+
+    # Compute column summary statistics.
+    summary = Statistics.colStats(mat)
+    print(summary.mean())  # a dense vector containing the mean value for each column
+    print(summary.variance())  # column-wise variance
+    print(summary.numNonzeros())  # number of nonzeros in each column
+    # $example off$
+
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/svd_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/svd_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b220fdb3fd6741ada3a40aeac6c4929e901c4a4
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/svd_example.py
@@ -0,0 +1,48 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.linalg import Vectors
+from pyspark.mllib.linalg.distributed import RowMatrix
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="PythonSVDExample")
+
+    # $example on$
+    rows = sc.parallelize([
+        Vectors.sparse(5, {1: 1.0, 3: 7.0}),
+        Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
+        Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
+    ])
+
+    mat = RowMatrix(rows)
+
+    # Compute the top 5 singular values and corresponding singular vectors.
+    svd = mat.computeSVD(5, computeU=True)
+    U = svd.U       # The U factor is a RowMatrix.
+    s = svd.s       # The singular values are stored in a local dense vector.
+    V = svd.V       # The V factor is a local dense matrix.
+    # $example off$
+    collected = U.rows.collect()
+    print("U factor is:")
+    for vector in collected:
+        print(vector)
+    print("Singular values are: %s" % s)
+    print("V factor is:\n%s" % V)
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/svm_with_sgd_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/svm_with_sgd_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..24b8f431e059e7cfd69718f600ca1f0ef591838c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/svm_with_sgd_example.py
@@ -0,0 +1,47 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.classification import SVMWithSGD, SVMModel
+from pyspark.mllib.regression import LabeledPoint
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="PythonSVMWithSGDExample")
+
+    # $example on$
+    # Load and parse the data
+    def parsePoint(line):
+        values = [float(x) for x in line.split(' ')]
+        return LabeledPoint(values[0], values[1:])
+
+    data = sc.textFile("data/mllib/sample_svm_data.txt")
+    parsedData = data.map(parsePoint)
+
+    # Build the model
+    model = SVMWithSGD.train(parsedData, iterations=100)
+
+    # Evaluating the model on training data
+    labelsAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
+    trainErr = labelsAndPreds.filter(lambda lp: lp[0] != lp[1]).count() / float(parsedData.count())
+    print("Training Error = " + str(trainErr))
+
+    # Save and load model
+    model.save(sc, "target/tmp/pythonSVMWithSGDModel")
+    sameModel = SVMModel.load(sc, "target/tmp/pythonSVMWithSGDModel")
+    # $example off$
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/tf_idf_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/tf_idf_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..b66412b2334e7c91734b9728acb9865132e61c4b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/tf_idf_example.py
@@ -0,0 +1,57 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.feature import HashingTF, IDF
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="TFIDFExample")  # SparkContext
+
+    # $example on$
+    # Load documents (one per line).
+    documents = sc.textFile("data/mllib/kmeans_data.txt").map(lambda line: line.split(" "))
+
+    hashingTF = HashingTF()
+    tf = hashingTF.transform(documents)
+
+    # While applying HashingTF only needs a single pass to the data, applying IDF needs two passes:
+    # First to compute the IDF vector and second to scale the term frequencies by IDF.
+    tf.cache()
+    idf = IDF().fit(tf)
+    tfidf = idf.transform(tf)
+
+    # spark.mllib's IDF implementation provides an option for ignoring terms
+    # which occur in less than a minimum number of documents.
+    # In such cases, the IDF for these terms is set to 0.
+    # This feature can be used by passing the minDocFreq value to the IDF constructor.
+    idfIgnore = IDF(minDocFreq=2).fit(tf)
+    tfidfIgnore = idfIgnore.transform(tf)
+    # $example off$
+
+    print("tfidf:")
+    for each in tfidf.collect():
+        print(each)
+
+    print("tfidfIgnore:")
+    for each in tfidfIgnore.collect():
+        print(each)
+
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/word2vec.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/word2vec.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e7d4f7610c242fe8935b4163c3eb1d4fbdc7298
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/word2vec.py
@@ -0,0 +1,51 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This example uses text8 file from http://mattmahoney.net/dc/text8.zip
+# The file was downloaded, unzipped and split into multiple lines using
+#
+# wget http://mattmahoney.net/dc/text8.zip
+# unzip text8.zip
+# grep -o -E '\w+(\W+\w+){0,15}' text8 > text8_lines
+# This was done so that the example can be run in local mode
+
+from __future__ import print_function
+
+import sys
+
+from pyspark import SparkContext
+from pyspark.mllib.feature import Word2Vec
+
+USAGE = ("bin/spark-submit --driver-memory 4g "
+         "examples/src/main/python/mllib/word2vec.py text8_lines")
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print(USAGE)
+        sys.exit("Argument for file not provided")
+    file_path = sys.argv[1]
+    sc = SparkContext(appName='Word2Vec')
+    inp = sc.textFile(file_path).map(lambda row: row.split(" "))
+
+    word2vec = Word2Vec()
+    model = word2vec.fit(inp)
+
+    synonyms = model.findSynonyms('china', 40)
+
+    for word, cosine_distance in synonyms:
+        print("{}: {}".format(word, cosine_distance))
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/word2vec_example.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/word2vec_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad1090c77ee11c83990c46e56e5bc9b8de735d40
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/mllib/word2vec_example.py
@@ -0,0 +1,40 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.feature import Word2Vec
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="Word2VecExample")  # SparkContext
+
+    # $example on$
+    inp = sc.textFile("data/mllib/sample_lda_data.txt").map(lambda row: row.split(" "))
+
+    word2vec = Word2Vec()
+    model = word2vec.fit(inp)
+
+    synonyms = model.findSynonyms('1', 5)
+
+    for word, cosine_distance in synonyms:
+        print("{}: {}".format(word, cosine_distance))
+    # $example off$
+
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/pagerank.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/pagerank.py
new file mode 100755
index 0000000000000000000000000000000000000000..2c19e8700ab16d7a3fe184ffb5234eef0596cb5f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/pagerank.py
@@ -0,0 +1,88 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+This is an example implementation of PageRank. For more conventional use,
+Please refer to PageRank implementation provided by graphx
+
+Example Usage:
+bin/spark-submit examples/src/main/python/pagerank.py data/mllib/pagerank_data.txt 10
+"""
+from __future__ import print_function
+
+import re
+import sys
+from operator import add
+
+from pyspark.sql import SparkSession
+
+
+def computeContribs(urls, rank):
+    """Calculates URL contributions to the rank of other URLs."""
+    num_urls = len(urls)
+    for url in urls:
+        yield (url, rank / num_urls)
+
+
+def parseNeighbors(urls):
+    """Parses a urls pair string into urls pair."""
+    parts = re.split(r'\s+', urls)
+    return parts[0], parts[1]
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: pagerank <file> <iterations>", file=sys.stderr)
+        sys.exit(-1)
+
+    print("WARN: This is a naive implementation of PageRank and is given as an example!\n" +
+          "Please refer to PageRank implementation provided by graphx",
+          file=sys.stderr)
+
+    # Initialize the spark context.
+    spark = SparkSession\
+        .builder\
+        .appName("PythonPageRank")\
+        .getOrCreate()
+
+    # Loads in input file. It should be in format of:
+    #     URL         neighbor URL
+    #     URL         neighbor URL
+    #     URL         neighbor URL
+    #     ...
+    lines = spark.read.text(sys.argv[1]).rdd.map(lambda r: r[0])
+
+    # Loads all URLs from input file and initialize their neighbors.
+    links = lines.map(lambda urls: parseNeighbors(urls)).distinct().groupByKey().cache()
+
+    # Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
+    ranks = links.map(lambda url_neighbors: (url_neighbors[0], 1.0))
+
+    # Calculates and updates URL ranks continuously using PageRank algorithm.
+    for iteration in range(int(sys.argv[2])):
+        # Calculates URL contributions to the rank of other URLs.
+        contribs = links.join(ranks).flatMap(
+            lambda url_urls_rank: computeContribs(url_urls_rank[1][0], url_urls_rank[1][1]))
+
+        # Re-calculates URL ranks based on neighbor contributions.
+        ranks = contribs.reduceByKey(add).mapValues(lambda rank: rank * 0.85 + 0.15)
+
+    # Collects all URL ranks and dump them to console.
+    for (link, rank) in ranks.collect():
+        print("%s has rank: %s." % (link, rank))
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/parquet_inputformat.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/parquet_inputformat.py
new file mode 100644
index 0000000000000000000000000000000000000000..83041f0040a0c4811da54f6b6556985af521dc85
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/parquet_inputformat.py
@@ -0,0 +1,69 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Read data file users.parquet in local Spark distro:
+
+$ cd $SPARK_HOME
+$ export AVRO_PARQUET_JARS=/path/to/parquet-avro-1.5.0.jar
+$ ./bin/spark-submit --driver-class-path /path/to/example/jar \\
+        --jars $AVRO_PARQUET_JARS \\
+        ./examples/src/main/python/parquet_inputformat.py \\
+        examples/src/main/resources/users.parquet
+<...lots of log output...>
+{u'favorite_color': None, u'name': u'Alyssa', u'favorite_numbers': [3, 9, 15, 20]}
+{u'favorite_color': u'red', u'name': u'Ben', u'favorite_numbers': []}
+<...more log output...>
+"""
+from __future__ import print_function
+
+import sys
+
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("""
+        Usage: parquet_inputformat.py <data_file>
+
+        Run with example jar:
+        ./bin/spark-submit --driver-class-path /path/to/example/jar \\
+                /path/to/examples/parquet_inputformat.py <data_file>
+        Assumes you have Parquet data stored in <data_file>.
+        """, file=sys.stderr)
+        sys.exit(-1)
+
+    path = sys.argv[1]
+
+    spark = SparkSession\
+        .builder\
+        .appName("ParquetInputFormat")\
+        .getOrCreate()
+
+    sc = spark.sparkContext
+
+    parquet_rdd = sc.newAPIHadoopFile(
+        path,
+        'org.apache.parquet.avro.AvroParquetInputFormat',
+        'java.lang.Void',
+        'org.apache.avro.generic.IndexedRecord',
+        valueConverter='org.apache.spark.examples.pythonconverters.IndexedRecordToJavaConverter')
+    output = parquet_rdd.map(lambda x: x[1]).collect()
+    for k in output:
+        print(k)
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/pi.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/pi.py
new file mode 100755
index 0000000000000000000000000000000000000000..5839cc28749565186349b9ae505583c5a88d21f9
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/pi.py
@@ -0,0 +1,47 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+import sys
+from random import random
+from operator import add
+
+from pyspark.sql import SparkSession
+
+
+if __name__ == "__main__":
+    """
+        Usage: pi [partitions]
+    """
+    spark = SparkSession\
+        .builder\
+        .appName("PythonPi")\
+        .getOrCreate()
+
+    partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2
+    n = 100000 * partitions
+
+    def f(_):
+        x = random() * 2 - 1
+        y = random() * 2 - 1
+        return 1 if x ** 2 + y ** 2 <= 1 else 0
+
+    count = spark.sparkContext.parallelize(range(1, n + 1), partitions).map(f).reduce(add)
+    print("Pi is roughly %f" % (4.0 * count / n))
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sort.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sort.py
new file mode 100755
index 0000000000000000000000000000000000000000..d3cd985d197e350a916a878ea35c88eded94c024
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sort.py
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+import sys
+
+from pyspark.sql import SparkSession
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: sort <file>", file=sys.stderr)
+        sys.exit(-1)
+
+    spark = SparkSession\
+        .builder\
+        .appName("PythonSort")\
+        .getOrCreate()
+
+    lines = spark.read.text(sys.argv[1]).rdd.map(lambda r: r[0])
+    sortedCount = lines.flatMap(lambda x: x.split(' ')) \
+        .map(lambda x: (int(x), 1)) \
+        .sortByKey()
+    # This is just a demo on how to bring all the sorted data back to a single node.
+    # In reality, we wouldn't want to collect all the data to the driver node.
+    output = sortedCount.collect()
+    for (num, unitcount) in output:
+        print(num)
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sql/arrow.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sql/arrow.py
new file mode 100644
index 0000000000000000000000000000000000000000..5eb164b20ad042c9f44afefb8071595df63732d6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sql/arrow.py
@@ -0,0 +1,166 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+A simple example demonstrating Arrow in Spark.
+Run with:
+  ./bin/spark-submit examples/src/main/python/sql/arrow.py
+"""
+
+from __future__ import print_function
+
+from pyspark.sql import SparkSession
+from pyspark.sql.utils import require_minimum_pandas_version, require_minimum_pyarrow_version
+
+require_minimum_pandas_version()
+require_minimum_pyarrow_version()
+
+
+def dataframe_with_arrow_example(spark):
+    # $example on:dataframe_with_arrow$
+    import numpy as np
+    import pandas as pd
+
+    # Enable Arrow-based columnar data transfers
+    spark.conf.set("spark.sql.execution.arrow.enabled", "true")
+
+    # Generate a Pandas DataFrame
+    pdf = pd.DataFrame(np.random.rand(100, 3))
+
+    # Create a Spark DataFrame from a Pandas DataFrame using Arrow
+    df = spark.createDataFrame(pdf)
+
+    # Convert the Spark DataFrame back to a Pandas DataFrame using Arrow
+    result_pdf = df.select("*").toPandas()
+    # $example off:dataframe_with_arrow$
+    print("Pandas DataFrame result statistics:\n%s\n" % str(result_pdf.describe()))
+
+
+def scalar_pandas_udf_example(spark):
+    # $example on:scalar_pandas_udf$
+    import pandas as pd
+
+    from pyspark.sql.functions import col, pandas_udf
+    from pyspark.sql.types import LongType
+
+    # Declare the function and create the UDF
+    def multiply_func(a, b):
+        return a * b
+
+    multiply = pandas_udf(multiply_func, returnType=LongType())
+
+    # The function for a pandas_udf should be able to execute with local Pandas data
+    x = pd.Series([1, 2, 3])
+    print(multiply_func(x, x))
+    # 0    1
+    # 1    4
+    # 2    9
+    # dtype: int64
+
+    # Create a Spark DataFrame, 'spark' is an existing SparkSession
+    df = spark.createDataFrame(pd.DataFrame(x, columns=["x"]))
+
+    # Execute function as a Spark vectorized UDF
+    df.select(multiply(col("x"), col("x"))).show()
+    # +-------------------+
+    # |multiply_func(x, x)|
+    # +-------------------+
+    # |                  1|
+    # |                  4|
+    # |                  9|
+    # +-------------------+
+    # $example off:scalar_pandas_udf$
+
+
+def grouped_map_pandas_udf_example(spark):
+    # $example on:grouped_map_pandas_udf$
+    from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+    df = spark.createDataFrame(
+        [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
+        ("id", "v"))
+
+    @pandas_udf("id long, v double", PandasUDFType.GROUPED_MAP)
+    def subtract_mean(pdf):
+        # pdf is a pandas.DataFrame
+        v = pdf.v
+        return pdf.assign(v=v - v.mean())
+
+    df.groupby("id").apply(subtract_mean).show()
+    # +---+----+
+    # | id|   v|
+    # +---+----+
+    # |  1|-0.5|
+    # |  1| 0.5|
+    # |  2|-3.0|
+    # |  2|-1.0|
+    # |  2| 4.0|
+    # +---+----+
+    # $example off:grouped_map_pandas_udf$
+
+
+def grouped_agg_pandas_udf_example(spark):
+    # $example on:grouped_agg_pandas_udf$
+    from pyspark.sql.functions import pandas_udf, PandasUDFType
+    from pyspark.sql import Window
+
+    df = spark.createDataFrame(
+        [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
+        ("id", "v"))
+
+    @pandas_udf("double", PandasUDFType.GROUPED_AGG)
+    def mean_udf(v):
+        return v.mean()
+
+    df.groupby("id").agg(mean_udf(df['v'])).show()
+    # +---+-----------+
+    # | id|mean_udf(v)|
+    # +---+-----------+
+    # |  1|        1.5|
+    # |  2|        6.0|
+    # +---+-----------+
+
+    w = Window \
+        .partitionBy('id') \
+        .rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)
+    df.withColumn('mean_v', mean_udf(df['v']).over(w)).show()
+    # +---+----+------+
+    # | id|   v|mean_v|
+    # +---+----+------+
+    # |  1| 1.0|   1.5|
+    # |  1| 2.0|   1.5|
+    # |  2| 3.0|   6.0|
+    # |  2| 5.0|   6.0|
+    # |  2|10.0|   6.0|
+    # +---+----+------+
+    # $example off:grouped_agg_pandas_udf$
+
+
+if __name__ == "__main__":
+    spark = SparkSession \
+        .builder \
+        .appName("Python Arrow-in-Spark example") \
+        .getOrCreate()
+
+    print("Running Pandas to/from conversion example")
+    dataframe_with_arrow_example(spark)
+    print("Running pandas_udf scalar example")
+    scalar_pandas_udf_example(spark)
+    print("Running pandas_udf grouped map example")
+    grouped_map_pandas_udf_example(spark)
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sql/basic.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sql/basic.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8fb25d0533b58b1540181193589a6038fb83b83
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sql/basic.py
@@ -0,0 +1,215 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+A simple example demonstrating basic Spark SQL features.
+Run with:
+  ./bin/spark-submit examples/src/main/python/sql/basic.py
+"""
+from __future__ import print_function
+
+# $example on:init_session$
+from pyspark.sql import SparkSession
+# $example off:init_session$
+
+# $example on:schema_inferring$
+from pyspark.sql import Row
+# $example off:schema_inferring$
+
+# $example on:programmatic_schema$
+# Import data types
+from pyspark.sql.types import *
+# $example off:programmatic_schema$
+
+
+def basic_df_example(spark):
+    # $example on:create_df$
+    # spark is an existing SparkSession
+    df = spark.read.json("examples/src/main/resources/people.json")
+    # Displays the content of the DataFrame to stdout
+    df.show()
+    # +----+-------+
+    # | age|   name|
+    # +----+-------+
+    # |null|Michael|
+    # |  30|   Andy|
+    # |  19| Justin|
+    # +----+-------+
+    # $example off:create_df$
+
+    # $example on:untyped_ops$
+    # spark, df are from the previous example
+    # Print the schema in a tree format
+    df.printSchema()
+    # root
+    # |-- age: long (nullable = true)
+    # |-- name: string (nullable = true)
+
+    # Select only the "name" column
+    df.select("name").show()
+    # +-------+
+    # |   name|
+    # +-------+
+    # |Michael|
+    # |   Andy|
+    # | Justin|
+    # +-------+
+
+    # Select everybody, but increment the age by 1
+    df.select(df['name'], df['age'] + 1).show()
+    # +-------+---------+
+    # |   name|(age + 1)|
+    # +-------+---------+
+    # |Michael|     null|
+    # |   Andy|       31|
+    # | Justin|       20|
+    # +-------+---------+
+
+    # Select people older than 21
+    df.filter(df['age'] > 21).show()
+    # +---+----+
+    # |age|name|
+    # +---+----+
+    # | 30|Andy|
+    # +---+----+
+
+    # Count people by age
+    df.groupBy("age").count().show()
+    # +----+-----+
+    # | age|count|
+    # +----+-----+
+    # |  19|    1|
+    # |null|    1|
+    # |  30|    1|
+    # +----+-----+
+    # $example off:untyped_ops$
+
+    # $example on:run_sql$
+    # Register the DataFrame as a SQL temporary view
+    df.createOrReplaceTempView("people")
+
+    sqlDF = spark.sql("SELECT * FROM people")
+    sqlDF.show()
+    # +----+-------+
+    # | age|   name|
+    # +----+-------+
+    # |null|Michael|
+    # |  30|   Andy|
+    # |  19| Justin|
+    # +----+-------+
+    # $example off:run_sql$
+
+    # $example on:global_temp_view$
+    # Register the DataFrame as a global temporary view
+    df.createGlobalTempView("people")
+
+    # Global temporary view is tied to a system preserved database `global_temp`
+    spark.sql("SELECT * FROM global_temp.people").show()
+    # +----+-------+
+    # | age|   name|
+    # +----+-------+
+    # |null|Michael|
+    # |  30|   Andy|
+    # |  19| Justin|
+    # +----+-------+
+
+    # Global temporary view is cross-session
+    spark.newSession().sql("SELECT * FROM global_temp.people").show()
+    # +----+-------+
+    # | age|   name|
+    # +----+-------+
+    # |null|Michael|
+    # |  30|   Andy|
+    # |  19| Justin|
+    # +----+-------+
+    # $example off:global_temp_view$
+
+
+def schema_inference_example(spark):
+    # $example on:schema_inferring$
+    sc = spark.sparkContext
+
+    # Load a text file and convert each line to a Row.
+    lines = sc.textFile("examples/src/main/resources/people.txt")
+    parts = lines.map(lambda l: l.split(","))
+    people = parts.map(lambda p: Row(name=p[0], age=int(p[1])))
+
+    # Infer the schema, and register the DataFrame as a table.
+    schemaPeople = spark.createDataFrame(people)
+    schemaPeople.createOrReplaceTempView("people")
+
+    # SQL can be run over DataFrames that have been registered as a table.
+    teenagers = spark.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
+
+    # The results of SQL queries are Dataframe objects.
+    # rdd returns the content as an :class:`pyspark.RDD` of :class:`Row`.
+    teenNames = teenagers.rdd.map(lambda p: "Name: " + p.name).collect()
+    for name in teenNames:
+        print(name)
+    # Name: Justin
+    # $example off:schema_inferring$
+
+
+def programmatic_schema_example(spark):
+    # $example on:programmatic_schema$
+    sc = spark.sparkContext
+
+    # Load a text file and convert each line to a Row.
+    lines = sc.textFile("examples/src/main/resources/people.txt")
+    parts = lines.map(lambda l: l.split(","))
+    # Each line is converted to a tuple.
+    people = parts.map(lambda p: (p[0], p[1].strip()))
+
+    # The schema is encoded in a string.
+    schemaString = "name age"
+
+    fields = [StructField(field_name, StringType(), True) for field_name in schemaString.split()]
+    schema = StructType(fields)
+
+    # Apply the schema to the RDD.
+    schemaPeople = spark.createDataFrame(people, schema)
+
+    # Creates a temporary view using the DataFrame
+    schemaPeople.createOrReplaceTempView("people")
+
+    # SQL can be run over DataFrames that have been registered as a table.
+    results = spark.sql("SELECT name FROM people")
+
+    results.show()
+    # +-------+
+    # |   name|
+    # +-------+
+    # |Michael|
+    # |   Andy|
+    # | Justin|
+    # +-------+
+    # $example off:programmatic_schema$
+
+if __name__ == "__main__":
+    # $example on:init_session$
+    spark = SparkSession \
+        .builder \
+        .appName("Python Spark SQL basic example") \
+        .config("spark.some.config.option", "some-value") \
+        .getOrCreate()
+    # $example off:init_session$
+
+    basic_df_example(spark)
+    schema_inference_example(spark)
+    programmatic_schema_example(spark)
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sql/datasource.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sql/datasource.py
new file mode 100644
index 0000000000000000000000000000000000000000..888e92a6200706942a1efc24c3642968dc402b41
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sql/datasource.py
@@ -0,0 +1,235 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+A simple example demonstrating Spark SQL data sources.
+Run with:
+  ./bin/spark-submit examples/src/main/python/sql/datasource.py
+"""
+from __future__ import print_function
+
+from pyspark.sql import SparkSession
+# $example on:schema_merging$
+from pyspark.sql import Row
+# $example off:schema_merging$
+
+
+def basic_datasource_example(spark):
+    # $example on:generic_load_save_functions$
+    df = spark.read.load("examples/src/main/resources/users.parquet")
+    df.select("name", "favorite_color").write.save("namesAndFavColors.parquet")
+    # $example off:generic_load_save_functions$
+
+    # $example on:write_partitioning$
+    df.write.partitionBy("favorite_color").format("parquet").save("namesPartByColor.parquet")
+    # $example off:write_partitioning$
+
+    # $example on:write_partition_and_bucket$
+    df = spark.read.parquet("examples/src/main/resources/users.parquet")
+    (df
+        .write
+        .partitionBy("favorite_color")
+        .bucketBy(42, "name")
+        .saveAsTable("people_partitioned_bucketed"))
+    # $example off:write_partition_and_bucket$
+
+    # $example on:manual_load_options$
+    df = spark.read.load("examples/src/main/resources/people.json", format="json")
+    df.select("name", "age").write.save("namesAndAges.parquet", format="parquet")
+    # $example off:manual_load_options$
+
+    # $example on:manual_load_options_csv$
+    df = spark.read.load("examples/src/main/resources/people.csv",
+                         format="csv", sep=":", inferSchema="true", header="true")
+    # $example off:manual_load_options_csv$
+
+    # $example on:manual_save_options_orc$
+    df = spark.read.orc("examples/src/main/resources/users.orc")
+    (df.write.format("orc")
+        .option("orc.bloom.filter.columns", "favorite_color")
+        .option("orc.dictionary.key.threshold", "1.0")
+        .save("users_with_options.orc"))
+    # $example off:manual_save_options_orc$
+
+    # $example on:write_sorting_and_bucketing$
+    df.write.bucketBy(42, "name").sortBy("age").saveAsTable("people_bucketed")
+    # $example off:write_sorting_and_bucketing$
+
+    # $example on:direct_sql$
+    df = spark.sql("SELECT * FROM parquet.`examples/src/main/resources/users.parquet`")
+    # $example off:direct_sql$
+
+    spark.sql("DROP TABLE IF EXISTS people_bucketed")
+    spark.sql("DROP TABLE IF EXISTS people_partitioned_bucketed")
+
+
+def parquet_example(spark):
+    # $example on:basic_parquet_example$
+    peopleDF = spark.read.json("examples/src/main/resources/people.json")
+
+    # DataFrames can be saved as Parquet files, maintaining the schema information.
+    peopleDF.write.parquet("people.parquet")
+
+    # Read in the Parquet file created above.
+    # Parquet files are self-describing so the schema is preserved.
+    # The result of loading a parquet file is also a DataFrame.
+    parquetFile = spark.read.parquet("people.parquet")
+
+    # Parquet files can also be used to create a temporary view and then used in SQL statements.
+    parquetFile.createOrReplaceTempView("parquetFile")
+    teenagers = spark.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19")
+    teenagers.show()
+    # +------+
+    # |  name|
+    # +------+
+    # |Justin|
+    # +------+
+    # $example off:basic_parquet_example$
+
+
+def parquet_schema_merging_example(spark):
+    # $example on:schema_merging$
+    # spark is from the previous example.
+    # Create a simple DataFrame, stored into a partition directory
+    sc = spark.sparkContext
+
+    squaresDF = spark.createDataFrame(sc.parallelize(range(1, 6))
+                                      .map(lambda i: Row(single=i, double=i ** 2)))
+    squaresDF.write.parquet("data/test_table/key=1")
+
+    # Create another DataFrame in a new partition directory,
+    # adding a new column and dropping an existing column
+    cubesDF = spark.createDataFrame(sc.parallelize(range(6, 11))
+                                    .map(lambda i: Row(single=i, triple=i ** 3)))
+    cubesDF.write.parquet("data/test_table/key=2")
+
+    # Read the partitioned table
+    mergedDF = spark.read.option("mergeSchema", "true").parquet("data/test_table")
+    mergedDF.printSchema()
+
+    # The final schema consists of all 3 columns in the Parquet files together
+    # with the partitioning column appeared in the partition directory paths.
+    # root
+    #  |-- double: long (nullable = true)
+    #  |-- single: long (nullable = true)
+    #  |-- triple: long (nullable = true)
+    #  |-- key: integer (nullable = true)
+    # $example off:schema_merging$
+
+
+def json_dataset_example(spark):
+    # $example on:json_dataset$
+    # spark is from the previous example.
+    sc = spark.sparkContext
+
+    # A JSON dataset is pointed to by path.
+    # The path can be either a single text file or a directory storing text files
+    path = "examples/src/main/resources/people.json"
+    peopleDF = spark.read.json(path)
+
+    # The inferred schema can be visualized using the printSchema() method
+    peopleDF.printSchema()
+    # root
+    #  |-- age: long (nullable = true)
+    #  |-- name: string (nullable = true)
+
+    # Creates a temporary view using the DataFrame
+    peopleDF.createOrReplaceTempView("people")
+
+    # SQL statements can be run by using the sql methods provided by spark
+    teenagerNamesDF = spark.sql("SELECT name FROM people WHERE age BETWEEN 13 AND 19")
+    teenagerNamesDF.show()
+    # +------+
+    # |  name|
+    # +------+
+    # |Justin|
+    # +------+
+
+    # Alternatively, a DataFrame can be created for a JSON dataset represented by
+    # an RDD[String] storing one JSON object per string
+    jsonStrings = ['{"name":"Yin","address":{"city":"Columbus","state":"Ohio"}}']
+    otherPeopleRDD = sc.parallelize(jsonStrings)
+    otherPeople = spark.read.json(otherPeopleRDD)
+    otherPeople.show()
+    # +---------------+----+
+    # |        address|name|
+    # +---------------+----+
+    # |[Columbus,Ohio]| Yin|
+    # +---------------+----+
+    # $example off:json_dataset$
+
+
+def jdbc_dataset_example(spark):
+    # $example on:jdbc_dataset$
+    # Note: JDBC loading and saving can be achieved via either the load/save or jdbc methods
+    # Loading data from a JDBC source
+    jdbcDF = spark.read \
+        .format("jdbc") \
+        .option("url", "jdbc:postgresql:dbserver") \
+        .option("dbtable", "schema.tablename") \
+        .option("user", "username") \
+        .option("password", "password") \
+        .load()
+
+    jdbcDF2 = spark.read \
+        .jdbc("jdbc:postgresql:dbserver", "schema.tablename",
+              properties={"user": "username", "password": "password"})
+
+    # Specifying dataframe column data types on read
+    jdbcDF3 = spark.read \
+        .format("jdbc") \
+        .option("url", "jdbc:postgresql:dbserver") \
+        .option("dbtable", "schema.tablename") \
+        .option("user", "username") \
+        .option("password", "password") \
+        .option("customSchema", "id DECIMAL(38, 0), name STRING") \
+        .load()
+
+    # Saving data to a JDBC source
+    jdbcDF.write \
+        .format("jdbc") \
+        .option("url", "jdbc:postgresql:dbserver") \
+        .option("dbtable", "schema.tablename") \
+        .option("user", "username") \
+        .option("password", "password") \
+        .save()
+
+    jdbcDF2.write \
+        .jdbc("jdbc:postgresql:dbserver", "schema.tablename",
+              properties={"user": "username", "password": "password"})
+
+    # Specifying create table column data types on write
+    jdbcDF.write \
+        .option("createTableColumnTypes", "name CHAR(64), comments VARCHAR(1024)") \
+        .jdbc("jdbc:postgresql:dbserver", "schema.tablename",
+              properties={"user": "username", "password": "password"})
+    # $example off:jdbc_dataset$
+
+
+if __name__ == "__main__":
+    spark = SparkSession \
+        .builder \
+        .appName("Python Spark SQL data source example") \
+        .getOrCreate()
+
+    basic_datasource_example(spark)
+    parquet_example(spark)
+    parquet_schema_merging_example(spark)
+    json_dataset_example(spark)
+    jdbc_dataset_example(spark)
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sql/hive.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sql/hive.py
new file mode 100644
index 0000000000000000000000000000000000000000..33fc2dfbeefa22e597333717ee8e245e3dde1ce0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sql/hive.py
@@ -0,0 +1,95 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+A simple example demonstrating Spark SQL Hive integration.
+Run with:
+  ./bin/spark-submit examples/src/main/python/sql/hive.py
+"""
+from __future__ import print_function
+
+# $example on:spark_hive$
+from os.path import expanduser, join, abspath
+
+from pyspark.sql import SparkSession
+from pyspark.sql import Row
+# $example off:spark_hive$
+
+
+if __name__ == "__main__":
+    # $example on:spark_hive$
+    # warehouse_location points to the default location for managed databases and tables
+    warehouse_location = abspath('spark-warehouse')
+
+    spark = SparkSession \
+        .builder \
+        .appName("Python Spark SQL Hive integration example") \
+        .config("spark.sql.warehouse.dir", warehouse_location) \
+        .enableHiveSupport() \
+        .getOrCreate()
+
+    # spark is an existing SparkSession
+    spark.sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING) USING hive")
+    spark.sql("LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE src")
+
+    # Queries are expressed in HiveQL
+    spark.sql("SELECT * FROM src").show()
+    # +---+-------+
+    # |key|  value|
+    # +---+-------+
+    # |238|val_238|
+    # | 86| val_86|
+    # |311|val_311|
+    # ...
+
+    # Aggregation queries are also supported.
+    spark.sql("SELECT COUNT(*) FROM src").show()
+    # +--------+
+    # |count(1)|
+    # +--------+
+    # |    500 |
+    # +--------+
+
+    # The results of SQL queries are themselves DataFrames and support all normal functions.
+    sqlDF = spark.sql("SELECT key, value FROM src WHERE key < 10 ORDER BY key")
+
+    # The items in DataFrames are of type Row, which allows you to access each column by ordinal.
+    stringsDS = sqlDF.rdd.map(lambda row: "Key: %d, Value: %s" % (row.key, row.value))
+    for record in stringsDS.collect():
+        print(record)
+    # Key: 0, Value: val_0
+    # Key: 0, Value: val_0
+    # Key: 0, Value: val_0
+    # ...
+
+    # You can also use DataFrames to create temporary views within a SparkSession.
+    Record = Row("key", "value")
+    recordsDF = spark.createDataFrame([Record(i, "val_" + str(i)) for i in range(1, 101)])
+    recordsDF.createOrReplaceTempView("records")
+
+    # Queries can then join DataFrame data with data stored in Hive.
+    spark.sql("SELECT * FROM records r JOIN src s ON r.key = s.key").show()
+    # +---+------+---+------+
+    # |key| value|key| value|
+    # +---+------+---+------+
+    # |  2| val_2|  2| val_2|
+    # |  4| val_4|  4| val_4|
+    # |  5| val_5|  5| val_5|
+    # ...
+    # $example off:spark_hive$
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sql/streaming/structured_kafka_wordcount.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sql/streaming/structured_kafka_wordcount.py
new file mode 100644
index 0000000000000000000000000000000000000000..921067891352a716576931fe4b2b8df4fd77a937
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sql/streaming/structured_kafka_wordcount.py
@@ -0,0 +1,90 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+ Consumes messages from one or more topics in Kafka and does wordcount.
+ Usage: structured_kafka_wordcount.py <bootstrap-servers> <subscribe-type> <topics>
+   <bootstrap-servers> The Kafka "bootstrap.servers" configuration. A
+   comma-separated list of host:port.
+   <subscribe-type> There are three kinds of type, i.e. 'assign', 'subscribe',
+   'subscribePattern'.
+   |- <assign> Specific TopicPartitions to consume. Json string
+   |  {"topicA":[0,1],"topicB":[2,4]}.
+   |- <subscribe> The topic list to subscribe. A comma-separated list of
+   |  topics.
+   |- <subscribePattern> The pattern used to subscribe to topic(s).
+   |  Java regex string.
+   |- Only one of "assign, "subscribe" or "subscribePattern" options can be
+   |  specified for Kafka source.
+   <topics> Different value format depends on the value of 'subscribe-type'.
+
+ Run the example
+    `$ bin/spark-submit examples/src/main/python/sql/streaming/structured_kafka_wordcount.py \
+    host1:port1,host2:port2 subscribe topic1,topic2`
+"""
+from __future__ import print_function
+
+import sys
+
+from pyspark.sql import SparkSession
+from pyspark.sql.functions import explode
+from pyspark.sql.functions import split
+
+if __name__ == "__main__":
+    if len(sys.argv) != 4:
+        print("""
+        Usage: structured_kafka_wordcount.py <bootstrap-servers> <subscribe-type> <topics>
+        """, file=sys.stderr)
+        sys.exit(-1)
+
+    bootstrapServers = sys.argv[1]
+    subscribeType = sys.argv[2]
+    topics = sys.argv[3]
+
+    spark = SparkSession\
+        .builder\
+        .appName("StructuredKafkaWordCount")\
+        .getOrCreate()
+
+    # Create DataSet representing the stream of input lines from kafka
+    lines = spark\
+        .readStream\
+        .format("kafka")\
+        .option("kafka.bootstrap.servers", bootstrapServers)\
+        .option(subscribeType, topics)\
+        .load()\
+        .selectExpr("CAST(value AS STRING)")
+
+    # Split the lines into words
+    words = lines.select(
+        # explode turns each item in an array into a separate row
+        explode(
+            split(lines.value, ' ')
+        ).alias('word')
+    )
+
+    # Generate running word count
+    wordCounts = words.groupBy('word').count()
+
+    # Start running the query that prints the running counts to the console
+    query = wordCounts\
+        .writeStream\
+        .outputMode('complete')\
+        .format('console')\
+        .start()
+
+    query.awaitTermination()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sql/streaming/structured_network_wordcount.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sql/streaming/structured_network_wordcount.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ac392164735b3265d4099f1b49c4da4fffa4240
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sql/streaming/structured_network_wordcount.py
@@ -0,0 +1,77 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+r"""
+ Counts words in UTF8 encoded, '\n' delimited text received from the network.
+ Usage: structured_network_wordcount.py <hostname> <port>
+   <hostname> and <port> describe the TCP server that Structured Streaming
+   would connect to receive data.
+
+ To run this on your local machine, you need to first run a Netcat server
+    `$ nc -lk 9999`
+ and then run the example
+    `$ bin/spark-submit examples/src/main/python/sql/streaming/structured_network_wordcount.py
+    localhost 9999`
+"""
+from __future__ import print_function
+
+import sys
+
+from pyspark.sql import SparkSession
+from pyspark.sql.functions import explode
+from pyspark.sql.functions import split
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: structured_network_wordcount.py <hostname> <port>", file=sys.stderr)
+        sys.exit(-1)
+
+    host = sys.argv[1]
+    port = int(sys.argv[2])
+
+    spark = SparkSession\
+        .builder\
+        .appName("StructuredNetworkWordCount")\
+        .getOrCreate()
+
+    # Create DataFrame representing the stream of input lines from connection to host:port
+    lines = spark\
+        .readStream\
+        .format('socket')\
+        .option('host', host)\
+        .option('port', port)\
+        .load()
+
+    # Split the lines into words
+    words = lines.select(
+        # explode turns each item in an array into a separate row
+        explode(
+            split(lines.value, ' ')
+        ).alias('word')
+    )
+
+    # Generate running word count
+    wordCounts = words.groupBy('word').count()
+
+    # Start running the query that prints the running counts to the console
+    query = wordCounts\
+        .writeStream\
+        .outputMode('complete')\
+        .format('console')\
+        .start()
+
+    query.awaitTermination()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4e3bbf44cd5aad5c942b59bbecfff3fbb86bf20
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py
@@ -0,0 +1,102 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+r"""
+ Counts words in UTF8 encoded, '\n' delimited text received from the network over a
+ sliding window of configurable duration. Each line from the network is tagged
+ with a timestamp that is used to determine the windows into which it falls.
+
+ Usage: structured_network_wordcount_windowed.py <hostname> <port> <window duration>
+   [<slide duration>]
+ <hostname> and <port> describe the TCP server that Structured Streaming
+ would connect to receive data.
+ <window duration> gives the size of window, specified as integer number of seconds
+ <slide duration> gives the amount of time successive windows are offset from one another,
+ given in the same units as above. <slide duration> should be less than or equal to
+ <window duration>. If the two are equal, successive windows have no overlap. If
+ <slide duration> is not provided, it defaults to <window duration>.
+
+ To run this on your local machine, you need to first run a Netcat server
+    `$ nc -lk 9999`
+ and then run the example
+    `$ bin/spark-submit
+    examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py
+    localhost 9999 <window duration> [<slide duration>]`
+
+ One recommended <window duration>, <slide duration> pair is 10, 5
+"""
+from __future__ import print_function
+
+import sys
+
+from pyspark.sql import SparkSession
+from pyspark.sql.functions import explode
+from pyspark.sql.functions import split
+from pyspark.sql.functions import window
+
+if __name__ == "__main__":
+    if len(sys.argv) != 5 and len(sys.argv) != 4:
+        msg = ("Usage: structured_network_wordcount_windowed.py <hostname> <port> "
+               "<window duration in seconds> [<slide duration in seconds>]")
+        print(msg, file=sys.stderr)
+        sys.exit(-1)
+
+    host = sys.argv[1]
+    port = int(sys.argv[2])
+    windowSize = int(sys.argv[3])
+    slideSize = int(sys.argv[4]) if (len(sys.argv) == 5) else windowSize
+    if slideSize > windowSize:
+        print("<slide duration> must be less than or equal to <window duration>", file=sys.stderr)
+    windowDuration = '{} seconds'.format(windowSize)
+    slideDuration = '{} seconds'.format(slideSize)
+
+    spark = SparkSession\
+        .builder\
+        .appName("StructuredNetworkWordCountWindowed")\
+        .getOrCreate()
+
+    # Create DataFrame representing the stream of input lines from connection to host:port
+    lines = spark\
+        .readStream\
+        .format('socket')\
+        .option('host', host)\
+        .option('port', port)\
+        .option('includeTimestamp', 'true')\
+        .load()
+
+    # Split the lines into words, retaining timestamps
+    # split() splits each line into an array, and explode() turns the array into multiple rows
+    words = lines.select(
+        explode(split(lines.value, ' ')).alias('word'),
+        lines.timestamp
+    )
+
+    # Group the data by window and word and compute the count of each group
+    windowedCounts = words.groupBy(
+        window(words.timestamp, windowDuration, slideDuration),
+        words.word
+    ).count().orderBy('window')
+
+    # Start running the query that prints the windowed word counts to the console
+    query = windowedCounts\
+        .writeStream\
+        .outputMode('complete')\
+        .format('console')\
+        .option('truncate', 'false')\
+        .start()
+
+    query.awaitTermination()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/status_api_demo.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/status_api_demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..8cc8cc820cfceef3b6e8c3a3fa707229cb945c6e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/status_api_demo.py
@@ -0,0 +1,73 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+import time
+import threading
+import sys
+if sys.version >= '3':
+    import queue as Queue
+else:
+    import Queue
+
+from pyspark import SparkConf, SparkContext
+
+
+def delayed(seconds):
+    def f(x):
+        time.sleep(seconds)
+        return x
+    return f
+
+
+def call_in_background(f, *args):
+    result = Queue.Queue(1)
+    t = threading.Thread(target=lambda: result.put(f(*args)))
+    t.daemon = True
+    t.start()
+    return result
+
+
+def main():
+    conf = SparkConf().set("spark.ui.showConsoleProgress", "false")
+    sc = SparkContext(appName="PythonStatusAPIDemo", conf=conf)
+
+    def run():
+        rdd = sc.parallelize(range(10), 10).map(delayed(2))
+        reduced = rdd.map(lambda x: (x, 1)).reduceByKey(lambda x, y: x + y)
+        return reduced.map(delayed(2)).collect()
+
+    result = call_in_background(run)
+    status = sc.statusTracker()
+    while result.empty():
+        ids = status.getJobIdsForGroup()
+        for id in ids:
+            job = status.getJobInfo(id)
+            print("Job", id, "status: ", job.status)
+            for sid in job.stageIds:
+                info = status.getStageInfo(sid)
+                if info:
+                    print("Stage %d: %d tasks total (%d active, %d complete)" %
+                          (sid, info.numTasks, info.numActiveTasks, info.numCompletedTasks))
+        time.sleep(1)
+
+    print("Job results are:", result.get())
+    sc.stop()
+
+if __name__ == "__main__":
+    main()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/direct_kafka_wordcount.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/direct_kafka_wordcount.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5c186c11f79a966175dbbdd3791603c3ab8ba15
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/direct_kafka_wordcount.py
@@ -0,0 +1,56 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+r"""
+ Counts words in UTF8 encoded, '\n' delimited text directly received from Kafka in every 2 seconds.
+ Usage: direct_kafka_wordcount.py <broker_list> <topic>
+
+ To run this on your local machine, you need to setup Kafka and create a producer first, see
+ http://kafka.apache.org/documentation.html#quickstart
+
+ and then run the example
+    `$ bin/spark-submit --jars \
+      external/kafka-assembly/target/scala-*/spark-streaming-kafka-assembly-*.jar \
+      examples/src/main/python/streaming/direct_kafka_wordcount.py \
+      localhost:9092 test`
+"""
+from __future__ import print_function
+
+import sys
+
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+from pyspark.streaming.kafka import KafkaUtils
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: direct_kafka_wordcount.py <broker_list> <topic>", file=sys.stderr)
+        sys.exit(-1)
+
+    sc = SparkContext(appName="PythonStreamingDirectKafkaWordCount")
+    ssc = StreamingContext(sc, 2)
+
+    brokers, topic = sys.argv[1:]
+    kvs = KafkaUtils.createDirectStream(ssc, [topic], {"metadata.broker.list": brokers})
+    lines = kvs.map(lambda x: x[1])
+    counts = lines.flatMap(lambda line: line.split(" ")) \
+        .map(lambda word: (word, 1)) \
+        .reduceByKey(lambda a, b: a+b)
+    counts.pprint()
+
+    ssc.start()
+    ssc.awaitTermination()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/flume_wordcount.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/flume_wordcount.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8ea92b61ca6e1228973847433cd4feb71f5008a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/flume_wordcount.py
@@ -0,0 +1,56 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+r"""
+ Counts words in UTF8 encoded, '\n' delimited text received from the network every second.
+ Usage: flume_wordcount.py <hostname> <port>
+
+ To run this on your local machine, you need to setup Flume first, see
+ https://flume.apache.org/documentation.html
+
+ and then run the example
+    `$ bin/spark-submit --jars \
+      external/flume-assembly/target/scala-*/spark-streaming-flume-assembly-*.jar \
+      examples/src/main/python/streaming/flume_wordcount.py \
+      localhost 12345
+"""
+from __future__ import print_function
+
+import sys
+
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+from pyspark.streaming.flume import FlumeUtils
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: flume_wordcount.py <hostname> <port>", file=sys.stderr)
+        sys.exit(-1)
+
+    sc = SparkContext(appName="PythonStreamingFlumeWordCount")
+    ssc = StreamingContext(sc, 1)
+
+    hostname, port = sys.argv[1:]
+    kvs = FlumeUtils.createStream(ssc, hostname, int(port))
+    lines = kvs.map(lambda x: x[1])
+    counts = lines.flatMap(lambda line: line.split(" ")) \
+        .map(lambda word: (word, 1)) \
+        .reduceByKey(lambda a, b: a+b)
+    counts.pprint()
+
+    ssc.start()
+    ssc.awaitTermination()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/hdfs_wordcount.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/hdfs_wordcount.py
new file mode 100644
index 0000000000000000000000000000000000000000..f9a5c43a8eaa9db4f1a9d56ea0ce126b30d20782
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/hdfs_wordcount.py
@@ -0,0 +1,50 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+ Counts words in new text files created in the given directory
+ Usage: hdfs_wordcount.py <directory>
+   <directory> is the directory that Spark Streaming will use to find and read new text files.
+
+ To run this on your local machine on directory `localdir`, run this example
+    $ bin/spark-submit examples/src/main/python/streaming/hdfs_wordcount.py localdir
+
+ Then create a text file in `localdir` and the words in the file will get counted.
+"""
+from __future__ import print_function
+
+import sys
+
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: hdfs_wordcount.py <directory>", file=sys.stderr)
+        sys.exit(-1)
+
+    sc = SparkContext(appName="PythonStreamingHDFSWordCount")
+    ssc = StreamingContext(sc, 1)
+
+    lines = ssc.textFileStream(sys.argv[1])
+    counts = lines.flatMap(lambda line: line.split(" "))\
+                  .map(lambda x: (x, 1))\
+                  .reduceByKey(lambda a, b: a+b)
+    counts.pprint()
+
+    ssc.start()
+    ssc.awaitTermination()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/kafka_wordcount.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/kafka_wordcount.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9ee08b9fd228166085ffca791b3b9917ee1cb48
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/kafka_wordcount.py
@@ -0,0 +1,56 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+r"""
+ Counts words in UTF8 encoded, '\n' delimited text received from the network every second.
+ Usage: kafka_wordcount.py <zk> <topic>
+
+ To run this on your local machine, you need to setup Kafka and create a producer first, see
+ http://kafka.apache.org/documentation.html#quickstart
+
+ and then run the example
+    `$ bin/spark-submit --jars \
+      external/kafka-assembly/target/scala-*/spark-streaming-kafka-assembly-*.jar \
+      examples/src/main/python/streaming/kafka_wordcount.py \
+      localhost:2181 test`
+"""
+from __future__ import print_function
+
+import sys
+
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+from pyspark.streaming.kafka import KafkaUtils
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: kafka_wordcount.py <zk> <topic>", file=sys.stderr)
+        sys.exit(-1)
+
+    sc = SparkContext(appName="PythonStreamingKafkaWordCount")
+    ssc = StreamingContext(sc, 1)
+
+    zkQuorum, topic = sys.argv[1:]
+    kvs = KafkaUtils.createStream(ssc, zkQuorum, "spark-streaming-consumer", {topic: 1})
+    lines = kvs.map(lambda x: x[1])
+    counts = lines.flatMap(lambda line: line.split(" ")) \
+        .map(lambda word: (word, 1)) \
+        .reduceByKey(lambda a, b: a+b)
+    counts.pprint()
+
+    ssc.start()
+    ssc.awaitTermination()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/network_wordcount.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/network_wordcount.py
new file mode 100644
index 0000000000000000000000000000000000000000..f3099d2517cd5136f7bbde87e67718f2c4b6bf62
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/network_wordcount.py
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+r"""
+ Counts words in UTF8 encoded, '\n' delimited text received from the network every second.
+ Usage: network_wordcount.py <hostname> <port>
+   <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive data.
+
+ To run this on your local machine, you need to first run a Netcat server
+    `$ nc -lk 9999`
+ and then run the example
+    `$ bin/spark-submit examples/src/main/python/streaming/network_wordcount.py localhost 9999`
+"""
+from __future__ import print_function
+
+import sys
+
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: network_wordcount.py <hostname> <port>", file=sys.stderr)
+        sys.exit(-1)
+    sc = SparkContext(appName="PythonStreamingNetworkWordCount")
+    ssc = StreamingContext(sc, 1)
+
+    lines = ssc.socketTextStream(sys.argv[1], int(sys.argv[2]))
+    counts = lines.flatMap(lambda line: line.split(" "))\
+                  .map(lambda word: (word, 1))\
+                  .reduceByKey(lambda a, b: a+b)
+    counts.pprint()
+
+    ssc.start()
+    ssc.awaitTermination()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/network_wordjoinsentiments.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/network_wordjoinsentiments.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b5434c0c845a9d6c97f40c8ea5d236ce13f9058
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/network_wordjoinsentiments.py
@@ -0,0 +1,77 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+r"""
+ Shows the most positive words in UTF8 encoded, '\n' delimited text directly received the network
+ every 5 seconds. The streaming data is joined with a static RDD of the AFINN word list
+ (http://neuro.imm.dtu.dk/wiki/AFINN)
+
+ Usage: network_wordjoinsentiments.py <hostname> <port>
+   <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive data.
+
+ To run this on your local machine, you need to first run a Netcat server
+    `$ nc -lk 9999`
+ and then run the example
+    `$ bin/spark-submit examples/src/main/python/streaming/network_wordjoinsentiments.py \
+    localhost 9999`
+"""
+
+from __future__ import print_function
+
+import sys
+
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+
+
+def print_happiest_words(rdd):
+    top_list = rdd.take(5)
+    print("Happiest topics in the last 5 seconds (%d total):" % rdd.count())
+    for tuple in top_list:
+        print("%s (%d happiness)" % (tuple[1], tuple[0]))
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: network_wordjoinsentiments.py <hostname> <port>", file=sys.stderr)
+        sys.exit(-1)
+
+    sc = SparkContext(appName="PythonStreamingNetworkWordJoinSentiments")
+    ssc = StreamingContext(sc, 5)
+
+    # Read in the word-sentiment list and create a static RDD from it
+    word_sentiments_file_path = "data/streaming/AFINN-111.txt"
+    word_sentiments = ssc.sparkContext.textFile(word_sentiments_file_path) \
+        .map(lambda line: tuple(line.split("\t")))
+
+    lines = ssc.socketTextStream(sys.argv[1], int(sys.argv[2]))
+
+    word_counts = lines.flatMap(lambda line: line.split(" ")) \
+        .map(lambda word: (word, 1)) \
+        .reduceByKey(lambda a, b: a + b)
+
+    # Determine the words with the highest sentiment values by joining the streaming RDD
+    # with the static RDD inside the transform() method and then multiplying
+    # the frequency of the words by its sentiment value
+    happiest_words = word_counts.transform(lambda rdd: word_sentiments.join(rdd)) \
+        .map(lambda word_tuples: (word_tuples[0], float(word_tuples[1][0]) * word_tuples[1][1])) \
+        .map(lambda word_happiness: (word_happiness[1], word_happiness[0])) \
+        .transform(lambda rdd: rdd.sortByKey(False))
+
+    happiest_words.foreachRDD(print_happiest_words)
+
+    ssc.start()
+    ssc.awaitTermination()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/queue_stream.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/queue_stream.py
new file mode 100644
index 0000000000000000000000000000000000000000..bdd2d48519494dc65c90388f3eae2819e2dd5e90
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/queue_stream.py
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+ Create a queue of RDDs that will be mapped/reduced one at a time in
+ 1 second intervals.
+
+ To run this example use
+    `$ bin/spark-submit examples/src/main/python/streaming/queue_stream.py
+"""
+import time
+
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+
+if __name__ == "__main__":
+
+    sc = SparkContext(appName="PythonStreamingQueueStream")
+    ssc = StreamingContext(sc, 1)
+
+    # Create the queue through which RDDs can be pushed to
+    # a QueueInputDStream
+    rddQueue = []
+    for i in range(5):
+        rddQueue += [ssc.sparkContext.parallelize([j for j in range(1, 1001)], 10)]
+
+    # Create the QueueInputDStream and use it do some processing
+    inputStream = ssc.queueStream(rddQueue)
+    mappedStream = inputStream.map(lambda x: (x % 10, 1))
+    reducedStream = mappedStream.reduceByKey(lambda a, b: a + b)
+    reducedStream.pprint()
+
+    ssc.start()
+    time.sleep(6)
+    ssc.stop(stopSparkContext=True, stopGraceFully=True)
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/recoverable_network_wordcount.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/recoverable_network_wordcount.py
new file mode 100644
index 0000000000000000000000000000000000000000..60167dc77254463f955247880ccae3948f889068
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/recoverable_network_wordcount.py
@@ -0,0 +1,109 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+ Counts words in text encoded with UTF8 received from the network every second.
+
+ Usage: recoverable_network_wordcount.py <hostname> <port> <checkpoint-directory> <output-file>
+   <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive
+   data. <checkpoint-directory> directory to HDFS-compatible file system which checkpoint data
+   <output-file> file to which the word counts will be appended
+
+ To run this on your local machine, you need to first run a Netcat server
+    `$ nc -lk 9999`
+
+ and then run the example
+    `$ bin/spark-submit examples/src/main/python/streaming/recoverable_network_wordcount.py \
+        localhost 9999 ~/checkpoint/ ~/out`
+
+ If the directory ~/checkpoint/ does not exist (e.g. running for the first time), it will create
+ a new StreamingContext (will print "Creating new context" to the console). Otherwise, if
+ checkpoint data exists in ~/checkpoint/, then it will create StreamingContext from
+ the checkpoint data.
+"""
+from __future__ import print_function
+
+import os
+import sys
+
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+
+
+# Get or register a Broadcast variable
+def getWordBlacklist(sparkContext):
+    if ('wordBlacklist' not in globals()):
+        globals()['wordBlacklist'] = sparkContext.broadcast(["a", "b", "c"])
+    return globals()['wordBlacklist']
+
+
+# Get or register an Accumulator
+def getDroppedWordsCounter(sparkContext):
+    if ('droppedWordsCounter' not in globals()):
+        globals()['droppedWordsCounter'] = sparkContext.accumulator(0)
+    return globals()['droppedWordsCounter']
+
+
+def createContext(host, port, outputPath):
+    # If you do not see this printed, that means the StreamingContext has been loaded
+    # from the new checkpoint
+    print("Creating new context")
+    if os.path.exists(outputPath):
+        os.remove(outputPath)
+    sc = SparkContext(appName="PythonStreamingRecoverableNetworkWordCount")
+    ssc = StreamingContext(sc, 1)
+
+    # Create a socket stream on target ip:port and count the
+    # words in input stream of \n delimited text (eg. generated by 'nc')
+    lines = ssc.socketTextStream(host, port)
+    words = lines.flatMap(lambda line: line.split(" "))
+    wordCounts = words.map(lambda x: (x, 1)).reduceByKey(lambda x, y: x + y)
+
+    def echo(time, rdd):
+        # Get or register the blacklist Broadcast
+        blacklist = getWordBlacklist(rdd.context)
+        # Get or register the droppedWordsCounter Accumulator
+        droppedWordsCounter = getDroppedWordsCounter(rdd.context)
+
+        # Use blacklist to drop words and use droppedWordsCounter to count them
+        def filterFunc(wordCount):
+            if wordCount[0] in blacklist.value:
+                droppedWordsCounter.add(wordCount[1])
+                False
+            else:
+                True
+
+        counts = "Counts at time %s %s" % (time, rdd.filter(filterFunc).collect())
+        print(counts)
+        print("Dropped %d word(s) totally" % droppedWordsCounter.value)
+        print("Appending to " + os.path.abspath(outputPath))
+        with open(outputPath, 'a') as f:
+            f.write(counts + "\n")
+
+    wordCounts.foreachRDD(echo)
+    return ssc
+
+if __name__ == "__main__":
+    if len(sys.argv) != 5:
+        print("Usage: recoverable_network_wordcount.py <hostname> <port> "
+              "<checkpoint-directory> <output-file>", file=sys.stderr)
+        sys.exit(-1)
+    host, port, checkpoint, output = sys.argv[1:]
+    ssc = StreamingContext.getOrCreate(checkpoint,
+                                       lambda: createContext(host, int(port), output))
+    ssc.start()
+    ssc.awaitTermination()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/sql_network_wordcount.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/sql_network_wordcount.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab3cfc067994da2f792bd11d8b5583d1a33495c9
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/sql_network_wordcount.py
@@ -0,0 +1,85 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+r"""
+ Use DataFrames and SQL to count words in UTF8 encoded, '\n' delimited text received from the
+ network every second.
+
+ Usage: sql_network_wordcount.py <hostname> <port>
+   <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive data.
+
+ To run this on your local machine, you need to first run a Netcat server
+    `$ nc -lk 9999`
+ and then run the example
+    `$ bin/spark-submit examples/src/main/python/streaming/sql_network_wordcount.py localhost 9999`
+"""
+from __future__ import print_function
+
+import sys
+
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+from pyspark.sql import Row, SparkSession
+
+
+def getSparkSessionInstance(sparkConf):
+    if ('sparkSessionSingletonInstance' not in globals()):
+        globals()['sparkSessionSingletonInstance'] = SparkSession\
+            .builder\
+            .config(conf=sparkConf)\
+            .getOrCreate()
+    return globals()['sparkSessionSingletonInstance']
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: sql_network_wordcount.py <hostname> <port> ", file=sys.stderr)
+        sys.exit(-1)
+    host, port = sys.argv[1:]
+    sc = SparkContext(appName="PythonSqlNetworkWordCount")
+    ssc = StreamingContext(sc, 1)
+
+    # Create a socket stream on target ip:port and count the
+    # words in input stream of \n delimited text (eg. generated by 'nc')
+    lines = ssc.socketTextStream(host, int(port))
+    words = lines.flatMap(lambda line: line.split(" "))
+
+    # Convert RDDs of the words DStream to DataFrame and run SQL query
+    def process(time, rdd):
+        print("========= %s =========" % str(time))
+
+        try:
+            # Get the singleton instance of SparkSession
+            spark = getSparkSessionInstance(rdd.context.getConf())
+
+            # Convert RDD[String] to RDD[Row] to DataFrame
+            rowRdd = rdd.map(lambda w: Row(word=w))
+            wordsDataFrame = spark.createDataFrame(rowRdd)
+
+            # Creates a temporary view using the DataFrame.
+            wordsDataFrame.createOrReplaceTempView("words")
+
+            # Do word count on table using SQL and print it
+            wordCountsDataFrame = \
+                spark.sql("select word, count(*) as total from words group by word")
+            wordCountsDataFrame.show()
+        except:
+            pass
+
+    words.foreachRDD(process)
+    ssc.start()
+    ssc.awaitTermination()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/stateful_network_wordcount.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/stateful_network_wordcount.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5d1eba6c5969e054f2a594597f3c01a23249b4c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/streaming/stateful_network_wordcount.py
@@ -0,0 +1,61 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+r"""
+ Counts words in UTF8 encoded, '\n' delimited text received from the
+ network every second.
+
+ Usage: stateful_network_wordcount.py <hostname> <port>
+   <hostname> and <port> describe the TCP server that Spark Streaming
+    would connect to receive data.
+
+ To run this on your local machine, you need to first run a Netcat server
+    `$ nc -lk 9999`
+ and then run the example
+    `$ bin/spark-submit examples/src/main/python/streaming/stateful_network_wordcount.py \
+        localhost 9999`
+"""
+from __future__ import print_function
+
+import sys
+
+from pyspark import SparkContext
+from pyspark.streaming import StreamingContext
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print("Usage: stateful_network_wordcount.py <hostname> <port>", file=sys.stderr)
+        sys.exit(-1)
+    sc = SparkContext(appName="PythonStreamingStatefulNetworkWordCount")
+    ssc = StreamingContext(sc, 1)
+    ssc.checkpoint("checkpoint")
+
+    # RDD with initial state (key, value) pairs
+    initialStateRDD = sc.parallelize([(u'hello', 1), (u'world', 1)])
+
+    def updateFunc(new_values, last_sum):
+        return sum(new_values) + (last_sum or 0)
+
+    lines = ssc.socketTextStream(sys.argv[1], int(sys.argv[2]))
+    running_counts = lines.flatMap(lambda line: line.split(" "))\
+                          .map(lambda word: (word, 1))\
+                          .updateStateByKey(updateFunc, initialRDD=initialStateRDD)
+
+    running_counts.pprint()
+
+    ssc.start()
+    ssc.awaitTermination()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/transitive_closure.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/transitive_closure.py
new file mode 100755
index 0000000000000000000000000000000000000000..49551d40851cc9c42909537fcf0e9502fd24d232
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/transitive_closure.py
@@ -0,0 +1,74 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+import sys
+from random import Random
+
+from pyspark.sql import SparkSession
+
+numEdges = 200
+numVertices = 100
+rand = Random(42)
+
+
+def generateGraph():
+    edges = set()
+    while len(edges) < numEdges:
+        src = rand.randrange(0, numVertices)
+        dst = rand.randrange(0, numVertices)
+        if src != dst:
+            edges.add((src, dst))
+    return edges
+
+
+if __name__ == "__main__":
+    """
+    Usage: transitive_closure [partitions]
+    """
+    spark = SparkSession\
+        .builder\
+        .appName("PythonTransitiveClosure")\
+        .getOrCreate()
+
+    partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2
+    tc = spark.sparkContext.parallelize(generateGraph(), partitions).cache()
+
+    # Linear transitive closure: each round grows paths by one edge,
+    # by joining the graph's edges with the already-discovered paths.
+    # e.g. join the path (y, z) from the TC with the edge (x, y) from
+    # the graph to obtain the path (x, z).
+
+    # Because join() joins on keys, the edges are stored in reversed order.
+    edges = tc.map(lambda x_y: (x_y[1], x_y[0]))
+
+    oldCount = 0
+    nextCount = tc.count()
+    while True:
+        oldCount = nextCount
+        # Perform the join, obtaining an RDD of (y, (z, x)) pairs,
+        # then project the result to obtain the new (x, z) paths.
+        new_edges = tc.join(edges).map(lambda __a_b: (__a_b[1][1], __a_b[1][0]))
+        tc = tc.union(new_edges).distinct().cache()
+        nextCount = tc.count()
+        if nextCount == oldCount:
+            break
+
+    print("TC has %i edges" % tc.count())
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/wordcount.py b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/wordcount.py
new file mode 100755
index 0000000000000000000000000000000000000000..a05e24ff3ff952561cd900d4d391778e6d4d1d49
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/python/wordcount.py
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+import sys
+from operator import add
+
+from pyspark.sql import SparkSession
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: wordcount <file>", file=sys.stderr)
+        sys.exit(-1)
+
+    spark = SparkSession\
+        .builder\
+        .appName("PythonWordCount")\
+        .getOrCreate()
+
+    lines = spark.read.text(sys.argv[1]).rdd.map(lambda r: r[0])
+    counts = lines.flatMap(lambda x: x.split(' ')) \
+                  .map(lambda x: (x, 1)) \
+                  .reduceByKey(add)
+    output = counts.collect()
+    for (word, count) in output:
+        print("%s: %i" % (word, count))
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/RSparkSQLExample.R b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/RSparkSQLExample.R
new file mode 100644
index 0000000000000000000000000000000000000000..f3d2ab8cbc8440a12efbbaad37efcd89e1bc3b39
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/RSparkSQLExample.R
@@ -0,0 +1,228 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/RSparkSQLExample.R
+
+library(SparkR)
+
+# $example on:init_session$
+sparkR.session(appName = "R Spark SQL basic example", sparkConfig = list(spark.some.config.option = "some-value"))
+# $example off:init_session$
+
+
+# $example on:create_df$
+df <- read.json("examples/src/main/resources/people.json")
+
+# Displays the content of the DataFrame
+head(df)
+##   age    name
+## 1  NA Michael
+## 2  30    Andy
+## 3  19  Justin
+
+# Another method to print the first few rows and optionally truncate the printing of long values
+showDF(df)
+## +----+-------+
+## | age|   name|
+## +----+-------+
+## |null|Michael|
+## |  30|   Andy|
+## |  19| Justin|
+## +----+-------+
+## $example off:create_df$
+
+
+# $example on:untyped_ops$
+# Create the DataFrame
+df <- read.json("examples/src/main/resources/people.json")
+
+# Show the content of the DataFrame
+head(df)
+##   age    name
+## 1  NA Michael
+## 2  30    Andy
+## 3  19  Justin
+
+
+# Print the schema in a tree format
+printSchema(df)
+## root
+## |-- age: long (nullable = true)
+## |-- name: string (nullable = true)
+
+# Select only the "name" column
+head(select(df, "name"))
+##      name
+## 1 Michael
+## 2    Andy
+## 3  Justin
+
+# Select everybody, but increment the age by 1
+head(select(df, df$name, df$age + 1))
+##      name (age + 1.0)
+## 1 Michael          NA
+## 2    Andy          31
+## 3  Justin          20
+
+# Select people older than 21
+head(where(df, df$age > 21))
+##   age name
+## 1  30 Andy
+
+# Count people by age
+head(count(groupBy(df, "age")))
+##   age count
+## 1  19     1
+## 2  NA     1
+## 3  30     1
+# $example off:untyped_ops$
+
+
+# Register this DataFrame as a table.
+createOrReplaceTempView(df, "table")
+# $example on:run_sql$
+df <- sql("SELECT * FROM table")
+# $example off:run_sql$
+
+
+# $example on:generic_load_save_functions$
+df <- read.df("examples/src/main/resources/users.parquet")
+write.df(select(df, "name", "favorite_color"), "namesAndFavColors.parquet")
+# $example off:generic_load_save_functions$
+
+
+# $example on:manual_load_options$
+df <- read.df("examples/src/main/resources/people.json", "json")
+namesAndAges <- select(df, "name", "age")
+write.df(namesAndAges, "namesAndAges.parquet", "parquet")
+# $example off:manual_load_options$
+
+
+# $example on:manual_load_options_csv$
+df <- read.df("examples/src/main/resources/people.csv", "csv", sep = ";", inferSchema = TRUE, header = TRUE)
+namesAndAges <- select(df, "name", "age")
+# $example off:manual_load_options_csv$
+
+# $example on:manual_save_options_orc$
+df <- read.df("examples/src/main/resources/users.orc", "orc")
+write.orc(df, "users_with_options.orc", orc.bloom.filter.columns = "favorite_color", orc.dictionary.key.threshold = 1.0)
+# $example off:manual_save_options_orc$
+
+# $example on:direct_sql$
+df <- sql("SELECT * FROM parquet.`examples/src/main/resources/users.parquet`")
+# $example off:direct_sql$
+
+
+# $example on:basic_parquet_example$
+df <- read.df("examples/src/main/resources/people.json", "json")
+
+# SparkDataFrame can be saved as Parquet files, maintaining the schema information.
+write.parquet(df, "people.parquet")
+
+# Read in the Parquet file created above. Parquet files are self-describing so the schema is preserved.
+# The result of loading a parquet file is also a DataFrame.
+parquetFile <- read.parquet("people.parquet")
+
+# Parquet files can also be used to create a temporary view and then used in SQL statements.
+createOrReplaceTempView(parquetFile, "parquetFile")
+teenagers <- sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19")
+head(teenagers)
+##     name
+## 1 Justin
+
+# We can also run custom R-UDFs on Spark DataFrames. Here we prefix all the names with "Name:"
+schema <- structType(structField("name", "string"))
+teenNames <- dapply(df, function(p) { cbind(paste("Name:", p$name)) }, schema)
+for (teenName in collect(teenNames)$name) {
+  cat(teenName, "\n")
+}
+## Name: Michael
+## Name: Andy
+## Name: Justin
+# $example off:basic_parquet_example$
+
+
+# $example on:schema_merging$
+df1 <- createDataFrame(data.frame(single=c(12, 29), double=c(19, 23)))
+df2 <- createDataFrame(data.frame(double=c(19, 23), triple=c(23, 18)))
+
+# Create a simple DataFrame, stored into a partition directory
+write.df(df1, "data/test_table/key=1", "parquet", "overwrite")
+
+# Create another DataFrame in a new partition directory,
+# adding a new column and dropping an existing column
+write.df(df2, "data/test_table/key=2", "parquet", "overwrite")
+
+# Read the partitioned table
+df3 <- read.df("data/test_table", "parquet", mergeSchema = "true")
+printSchema(df3)
+# The final schema consists of all 3 columns in the Parquet files together
+# with the partitioning column appeared in the partition directory paths
+## root
+##  |-- single: double (nullable = true)
+##  |-- double: double (nullable = true)
+##  |-- triple: double (nullable = true)
+##  |-- key: integer (nullable = true)
+# $example off:schema_merging$
+
+
+# $example on:json_dataset$
+# A JSON dataset is pointed to by path.
+# The path can be either a single text file or a directory storing text files.
+path <- "examples/src/main/resources/people.json"
+# Create a DataFrame from the file(s) pointed to by path
+people <- read.json(path)
+
+# The inferred schema can be visualized using the printSchema() method.
+printSchema(people)
+## root
+##  |-- age: long (nullable = true)
+##  |-- name: string (nullable = true)
+
+# Register this DataFrame as a table.
+createOrReplaceTempView(people, "people")
+
+# SQL statements can be run by using the sql methods.
+teenagers <- sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
+head(teenagers)
+##     name
+## 1 Justin
+# $example off:json_dataset$
+
+
+# $example on:spark_hive$
+# enableHiveSupport defaults to TRUE
+sparkR.session(enableHiveSupport = TRUE)
+sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING) USING hive")
+sql("LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE src")
+
+# Queries can be expressed in HiveQL.
+results <- collect(sql("FROM src SELECT key, value"))
+# $example off:spark_hive$
+
+
+# $example on:jdbc_dataset$
+# Loading data from a JDBC source
+df <- read.jdbc("jdbc:postgresql:dbserver", "schema.tablename", user = "username", password = "password")
+
+# Saving data to a JDBC source
+write.jdbc(df, "jdbc:postgresql:dbserver", "schema.tablename", user = "username", password = "password")
+# $example off:jdbc_dataset$
+
+# Stop the SparkSession now
+sparkR.session.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/data-manipulation.R b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/data-manipulation.R
new file mode 100644
index 0000000000000000000000000000000000000000..371335a62e92dcdda3ac2bd85506709e945582a9
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/data-manipulation.R
@@ -0,0 +1,103 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# For this example, we shall use the "flights" dataset
+# The dataset consists of every flight departing Houston in 2011.
+# The data set is made up of 227,496 rows x 14 columns.
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/data-manipulation.R <path_to_csv>
+
+# Load SparkR library into your R session
+library(SparkR)
+
+args <- commandArgs(trailing = TRUE)
+
+if (length(args) != 1) {
+  print("Usage: data-manipulation.R <path-to-flights.csv>")
+  print("The data can be downloaded from: http://s3-us-west-2.amazonaws.com/sparkr-data/flights.csv")
+  q("no")
+}
+
+## Initialize SparkSession
+sparkR.session(appName = "SparkR-data-manipulation-example")
+
+flightsCsvPath <- args[[1]]
+
+# Create a local R dataframe
+flights_df <- read.csv(flightsCsvPath, header = TRUE)
+flights_df$date <- as.Date(flights_df$date)
+
+## Filter flights whose destination is San Francisco and write to a local data frame
+SFO_df <- flights_df[flights_df$dest == "SFO", ]
+
+# Convert the local data frame into a SparkDataFrame
+SFO_DF <- createDataFrame(SFO_df)
+
+#  Directly create a SparkDataFrame from the source data
+flightsDF <- read.df(flightsCsvPath, source = "csv", header = "true")
+
+# Print the schema of this SparkDataFrame
+printSchema(flightsDF)
+
+# Cache the SparkDataFrame
+cache(flightsDF)
+
+# Print the first 6 rows of the SparkDataFrame
+showDF(flightsDF, numRows = 6) ## Or
+head(flightsDF)
+
+# Show the column names in the SparkDataFrame
+columns(flightsDF)
+
+# Show the number of rows in the SparkDataFrame
+count(flightsDF)
+
+# Select specific columns
+destDF <- select(flightsDF, "dest", "cancelled")
+
+# Using SQL to select columns of data
+# First, register the flights SparkDataFrame as a table
+createOrReplaceTempView(flightsDF, "flightsTable")
+destDF <- sql("SELECT dest, cancelled FROM flightsTable")
+
+# Use collect to create a local R data frame
+local_df <- collect(destDF)
+
+# Print the newly created local data frame
+head(local_df)
+
+# Filter flights whose destination is JFK
+jfkDF <- filter(flightsDF, "dest = \"JFK\"") ##OR
+jfkDF <- filter(flightsDF, flightsDF$dest == "JFK")
+
+# If the magrittr library is available, we can use it to
+# chain data frame operations
+if("magrittr" %in% rownames(installed.packages())) {
+  library(magrittr)
+
+  # Group the flights by date and then find the average daily delay
+  # Write the result into a SparkDataFrame
+  groupBy(flightsDF, flightsDF$date) %>%
+    summarize(avg(flightsDF$dep_delay), avg(flightsDF$arr_delay)) -> dailyDelayDF
+
+  # Print the computed SparkDataFrame
+  head(dailyDelayDF)
+}
+
+# Stop the SparkSession now
+sparkR.session.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/dataframe.R b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/dataframe.R
new file mode 100644
index 0000000000000000000000000000000000000000..311350497f87343c587d5b43e6b3f693fb6c598f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/dataframe.R
@@ -0,0 +1,59 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/dataframe.R
+
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-DataFrame-example")
+
+# Create a simple local data.frame
+localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18))
+
+# Convert local data frame to a SparkDataFrame
+df <- createDataFrame(localDF)
+
+# Print its schema
+printSchema(df)
+# root
+#  |-- name: string (nullable = true)
+#  |-- age: double (nullable = true)
+
+# Create a DataFrame from a JSON file
+path <- file.path(Sys.getenv("SPARK_HOME"), "examples/src/main/resources/people.json")
+peopleDF <- read.json(path)
+printSchema(peopleDF)
+# root
+#  |-- age: long (nullable = true)
+#  |-- name: string (nullable = true)
+
+# Register this DataFrame as a table.
+createOrReplaceTempView(peopleDF, "people")
+
+# SQL statements can be run by using the sql methods
+teenagers <- sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
+
+# Call collect to get a local data.frame
+teenagersLocalDF <- collect(teenagers)
+
+# Print the teenagers in our dataset
+print(teenagersLocalDF)
+
+# Stop the SparkSession now
+sparkR.session.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/als.R b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/als.R
new file mode 100644
index 0000000000000000000000000000000000000000..4d1c91add54e1805d5f005e4293b5d0f08ce74c6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/als.R
@@ -0,0 +1,47 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/als.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-als-example")
+
+# $example on$
+# Load training data
+data <- list(list(0, 0, 4.0), list(0, 1, 2.0), list(1, 1, 3.0),
+             list(1, 2, 4.0), list(2, 1, 1.0), list(2, 2, 5.0))
+df <- createDataFrame(data, c("userId", "movieId", "rating"))
+training <- df
+test <- df
+
+# Fit a recommendation model using ALS with spark.als
+model <- spark.als(training, maxIter = 5, regParam = 0.01, userCol = "userId",
+                   itemCol = "movieId", ratingCol = "rating")
+
+# Model summary
+summary(model)
+
+# Prediction
+predictions <- predict(model, test)
+head(predictions)
+# $example off$
+
+sparkR.session.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/bisectingKmeans.R b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/bisectingKmeans.R
new file mode 100644
index 0000000000000000000000000000000000000000..b3eaa6dd86d7dca221cc9fd504b24d706bc39f5a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/bisectingKmeans.R
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/bisectingKmeans.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-bisectingKmeans-example")
+
+# $example on$
+t <- as.data.frame(Titanic)
+training <- createDataFrame(t)
+
+# Fit bisecting k-means model with four centers
+model <- spark.bisectingKmeans(training, Class ~ Survived, k = 4)
+
+# get fitted result from a bisecting k-means model
+fitted.model <- fitted(model, "centers")
+
+# Model summary
+head(summary(fitted.model))
+
+# fitted values on training data
+fitted <- predict(model, training)
+head(select(fitted, "Class", "prediction"))
+# $example off$
+
+sparkR.session.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/decisionTree.R b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/decisionTree.R
new file mode 100644
index 0000000000000000000000000000000000000000..9e10ae5519cd374366aedc666f731278f40654d7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/decisionTree.R
@@ -0,0 +1,65 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/decisionTree.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-decisionTree-example")
+
+# DecisionTree classification model
+
+# $example on:classification$
+# Load training data
+df <- read.df("data/mllib/sample_libsvm_data.txt", source = "libsvm")
+training <- df
+test <- df
+
+# Fit a DecisionTree classification model with spark.decisionTree
+model <- spark.decisionTree(training, label ~ features, "classification")
+
+# Model summary
+summary(model)
+
+# Prediction
+predictions <- predict(model, test)
+head(predictions)
+# $example off:classification$
+
+# DecisionTree regression model
+
+# $example on:regression$
+# Load training data
+df <- read.df("data/mllib/sample_linear_regression_data.txt", source = "libsvm")
+training <- df
+test <- df
+
+# Fit a DecisionTree regression model with spark.decisionTree
+model <- spark.decisionTree(training, label ~ features, "regression")
+
+# Model summary
+summary(model)
+
+# Prediction
+predictions <- predict(model, test)
+head(predictions)
+# $example off:regression$
+
+sparkR.session.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/fpm.R b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/fpm.R
new file mode 100644
index 0000000000000000000000000000000000000000..89c4564457d9e6736370090e80b8baa35f13fc71
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/fpm.R
@@ -0,0 +1,50 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/fpm.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-fpm-example")
+
+# $example on$
+# Load training data
+
+df <- selectExpr(createDataFrame(data.frame(rawItems = c(
+  "1,2,5", "1,2,3,5", "1,2"
+))), "split(rawItems, ',') AS items")
+
+fpm <- spark.fpGrowth(df, itemsCol="items", minSupport=0.5, minConfidence=0.6)
+
+# Extracting frequent itemsets
+
+spark.freqItemsets(fpm)
+
+# Extracting association rules
+
+spark.associationRules(fpm)
+
+# Predict uses association rules to and combines possible consequents
+
+predict(fpm, df)
+
+# $example off$
+
+sparkR.session.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/gaussianMixture.R b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/gaussianMixture.R
new file mode 100644
index 0000000000000000000000000000000000000000..558e44cc112ec5cd47b00f4b6dca6a395773e0ad
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/gaussianMixture.R
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/gaussianMixture.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-gaussianMixture-example")
+
+# $example on$
+# Load training data
+df <- read.df("data/mllib/sample_kmeans_data.txt", source = "libsvm")
+training <- df
+test <- df
+
+# Fit a gaussian mixture clustering model with spark.gaussianMixture
+model <- spark.gaussianMixture(training, ~ features, k = 2)
+
+# Model summary
+summary(model)
+
+# Prediction
+predictions <- predict(model, test)
+head(predictions)
+# $example off$
+
+sparkR.session.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/gbt.R b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/gbt.R
new file mode 100644
index 0000000000000000000000000000000000000000..bc654f1df7ab6d3d7135724896422c92e562fd00
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/gbt.R
@@ -0,0 +1,65 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/gbt.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-gbt-example")
+
+# GBT classification model
+
+# $example on:classification$
+# Load training data
+df <- read.df("data/mllib/sample_libsvm_data.txt", source = "libsvm")
+training <- df
+test <- df
+
+# Fit a GBT classification model with spark.gbt
+model <- spark.gbt(training, label ~ features, "classification", maxIter = 10)
+
+# Model summary
+summary(model)
+
+# Prediction
+predictions <- predict(model, test)
+head(predictions)
+# $example off:classification$
+
+# GBT regression model
+
+# $example on:regression$
+# Load training data
+df <- read.df("data/mllib/sample_linear_regression_data.txt", source = "libsvm")
+training <- df
+test <- df
+
+# Fit a GBT regression model with spark.gbt
+model <- spark.gbt(training, label ~ features, "regression", maxIter = 10)
+
+# Model summary
+summary(model)
+
+# Prediction
+predictions <- predict(model, test)
+head(predictions)
+# $example off:regression$
+
+sparkR.session.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/glm.R b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/glm.R
new file mode 100644
index 0000000000000000000000000000000000000000..68787f9aa9dca181114514f3305f53bbe6563d4e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/glm.R
@@ -0,0 +1,71 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/glm.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-glm-example")
+
+# $example on$
+training <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm")
+# Fit a generalized linear model of family "gaussian" with spark.glm
+df_list <- randomSplit(training, c(7, 3), 2)
+gaussianDF <- df_list[[1]]
+gaussianTestDF <- df_list[[2]]
+gaussianGLM <- spark.glm(gaussianDF, label ~ features, family = "gaussian")
+
+# Model summary
+summary(gaussianGLM)
+
+# Prediction
+gaussianPredictions <- predict(gaussianGLM, gaussianTestDF)
+head(gaussianPredictions)
+
+# Fit a generalized linear model with glm (R-compliant)
+gaussianGLM2 <- glm(label ~ features, gaussianDF, family = "gaussian")
+summary(gaussianGLM2)
+
+# Fit a generalized linear model of family "binomial" with spark.glm
+training2 <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm")
+training2 <- transform(training2, label = cast(training2$label > 1, "integer"))
+df_list2 <- randomSplit(training2, c(7, 3), 2)
+binomialDF <- df_list2[[1]]
+binomialTestDF <- df_list2[[2]]
+binomialGLM <- spark.glm(binomialDF, label ~ features, family = "binomial")
+
+# Model summary
+summary(binomialGLM)
+
+# Prediction
+binomialPredictions <- predict(binomialGLM, binomialTestDF)
+head(binomialPredictions)
+
+# Fit a generalized linear model of family "tweedie" with spark.glm
+training3 <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm")
+tweedieDF <- transform(training3, label = training3$label * exp(randn(10)))
+tweedieGLM <- spark.glm(tweedieDF, label ~ features, family = "tweedie",
+                        var.power = 1.2, link.power = 0)
+
+# Model summary
+summary(tweedieGLM)
+# $example off$
+
+sparkR.session.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/isoreg.R b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/isoreg.R
new file mode 100644
index 0000000000000000000000000000000000000000..a53c83eac430756afdec35dec53a651f0f384f80
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/isoreg.R
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/isoreg.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-isoreg-example")
+
+# $example on$
+# Load training data
+df <- read.df("data/mllib/sample_isotonic_regression_libsvm_data.txt", source = "libsvm")
+training <- df
+test <- df
+
+# Fit an isotonic regression model with spark.isoreg
+model <- spark.isoreg(training, label ~ features, isotonic = FALSE)
+
+# Model summary
+summary(model)
+
+# Prediction
+predictions <- predict(model, test)
+head(predictions)
+# $example off$
+
+sparkR.session.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/kmeans.R b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/kmeans.R
new file mode 100644
index 0000000000000000000000000000000000000000..824df20644fa1da622e4dca8530e57d7f28916dd
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/kmeans.R
@@ -0,0 +1,48 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/kmeans.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-kmeans-example")
+
+# $example on$
+# Fit a k-means model with spark.kmeans
+t <- as.data.frame(Titanic)
+training <- createDataFrame(t)
+df_list <- randomSplit(training, c(7,3), 2)
+kmeansDF <- df_list[[1]]
+kmeansTestDF <- df_list[[2]]
+kmeansModel <- spark.kmeans(kmeansDF, ~ Class + Sex + Age + Freq,
+                            k = 3)
+
+# Model summary
+summary(kmeansModel)
+
+# Get fitted result from the k-means model
+head(fitted(kmeansModel))
+
+# Prediction
+kmeansPredictions <- predict(kmeansModel, kmeansTestDF)
+head(kmeansPredictions)
+# $example off$
+
+sparkR.session.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/kstest.R b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/kstest.R
new file mode 100644
index 0000000000000000000000000000000000000000..e2b07702b6f3aef97dc9dccc2316c0fda8488a4f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/kstest.R
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/kstest.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-kstest-example")
+
+# $example on$
+# Load training data
+data <- data.frame(test = c(0.1, 0.15, 0.2, 0.3, 0.25, -1, -0.5))
+df <- createDataFrame(data)
+training <- df
+test <- df
+
+# Conduct the two-sided Kolmogorov-Smirnov (KS) test with spark.kstest
+model <- spark.kstest(df, "test", "norm")
+
+# Model summary
+summary(model)
+# $example off$
+
+sparkR.session.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/lda.R b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/lda.R
new file mode 100644
index 0000000000000000000000000000000000000000..769be0a78dfb6842e6b2ad2e6fb25c97857b586f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/lda.R
@@ -0,0 +1,48 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/lda.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-lda-example")
+
+# $example on$
+# Load training data
+df <- read.df("data/mllib/sample_lda_libsvm_data.txt", source = "libsvm")
+training <- df
+test <- df
+
+# Fit a latent dirichlet allocation model with spark.lda
+model <- spark.lda(training, k = 10, maxIter = 10)
+
+# Model summary
+summary(model)
+
+# Posterior probabilities
+posterior <- spark.posterior(model, test)
+head(posterior)
+
+# The log perplexity of the LDA model
+logPerplexity <- spark.perplexity(model, test)
+print(paste0("The upper bound bound on perplexity: ", logPerplexity))
+# $example off$
+
+sparkR.session.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/logit.R b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/logit.R
new file mode 100644
index 0000000000000000000000000000000000000000..4c8fd428d3852f4c052458f7dd9b88ae73a86a64
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/logit.R
@@ -0,0 +1,65 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/logit.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-logit-example")
+
+# Binomial logistic regression
+
+# $example on:binomial$
+# Load training data
+df <- read.df("data/mllib/sample_libsvm_data.txt", source = "libsvm")
+training <- df
+test <- df
+
+# Fit an binomial logistic regression model with spark.logit
+model <- spark.logit(training, label ~ features, maxIter = 10, regParam = 0.3, elasticNetParam = 0.8)
+
+# Model summary
+summary(model)
+
+# Prediction
+predictions <- predict(model, test)
+head(predictions)
+# $example off:binomial$
+
+# Multinomial logistic regression
+
+# $example on:multinomial$
+# Load training data
+df <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm")
+training <- df
+test <- df
+
+# Fit a multinomial logistic regression model with spark.logit
+model <- spark.logit(training, label ~ features, maxIter = 10, regParam = 0.3, elasticNetParam = 0.8)
+
+# Model summary
+summary(model)
+
+# Prediction
+predictions <- predict(model, test)
+head(predictions)
+# $example off:multinomial$
+
+sparkR.session.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/ml.R b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/ml.R
new file mode 100644
index 0000000000000000000000000000000000000000..41b7867f64e368f98f98f98d1830b070021e82e5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/ml.R
@@ -0,0 +1,65 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/ml.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-example")
+
+############################ model read/write ##############################################
+# $example on:read_write$
+training <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm")
+# Fit a generalized linear model of family "gaussian" with spark.glm
+df_list <- randomSplit(training, c(7,3), 2)
+gaussianDF <- df_list[[1]]
+gaussianTestDF <- df_list[[2]]
+gaussianGLM <- spark.glm(gaussianDF, label ~ features, family = "gaussian")
+
+# Save and then load a fitted MLlib model
+modelPath <- tempfile(pattern = "ml", fileext = ".tmp")
+write.ml(gaussianGLM, modelPath)
+gaussianGLM2 <- read.ml(modelPath)
+
+# Check model summary
+summary(gaussianGLM2)
+
+# Check model prediction
+gaussianPredictions <- predict(gaussianGLM2, gaussianTestDF)
+head(gaussianPredictions)
+
+unlink(modelPath)
+# $example off:read_write$
+
+############################ fit models with spark.lapply #####################################
+# Perform distributed training of multiple models with spark.lapply
+algorithms <- c("Hartigan-Wong", "Lloyd", "MacQueen")
+train <- function(algorithm) {
+  model <- kmeans(x = iris[1:4], centers = 3, algorithm = algorithm)
+  model$withinss
+}
+
+model.withinss <- spark.lapply(algorithms, train)
+
+# Print the within-cluster sum of squares for each model
+print(model.withinss)
+
+# Stop the SparkSession now
+sparkR.session.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/mlp.R b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/mlp.R
new file mode 100644
index 0000000000000000000000000000000000000000..b69ac845f2dbd3357ef5f569da96c5fd27b5a2df
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/mlp.R
@@ -0,0 +1,50 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/mlp.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-mlp-example")
+
+# $example on$
+# Load training data
+df <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm")
+training <- df
+test <- df
+
+# specify layers for the neural network:
+# input layer of size 4 (features), two intermediate of size 5 and 4
+# and output of size 3 (classes)
+layers = c(4, 5, 4, 3)
+
+# Fit a multi-layer perceptron neural network model with spark.mlp
+model <- spark.mlp(training, label ~ features, maxIter = 100,
+                   layers = layers, blockSize = 128, seed = 1234)
+
+# Model summary
+summary(model)
+
+# Prediction
+predictions <- predict(model, test)
+head(predictions)
+# $example off$
+
+sparkR.session.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/naiveBayes.R b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/naiveBayes.R
new file mode 100644
index 0000000000000000000000000000000000000000..da69e93ef294e5e0162b61609d6fe194ac048859
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/naiveBayes.R
@@ -0,0 +1,43 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/naiveBayes.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-naiveBayes-example")
+
+# $example on$
+# Fit a Bernoulli naive Bayes model with spark.naiveBayes
+titanic <- as.data.frame(Titanic)
+titanicDF <- createDataFrame(titanic[titanic$Freq > 0, -5])
+nbDF <- titanicDF
+nbTestDF <- titanicDF
+nbModel <- spark.naiveBayes(nbDF, Survived ~ Class + Sex + Age)
+
+# Model summary
+summary(nbModel)
+
+# Prediction
+nbPredictions <- predict(nbModel, nbTestDF)
+head(nbPredictions)
+# $example off$
+
+sparkR.session.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/randomForest.R b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/randomForest.R
new file mode 100644
index 0000000000000000000000000000000000000000..5d99502cd971de519a1d29a9f59babb40b9ebf97
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/randomForest.R
@@ -0,0 +1,65 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/randomForest.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-randomForest-example")
+
+# Random forest classification model
+
+# $example on:classification$
+# Load training data
+df <- read.df("data/mllib/sample_libsvm_data.txt", source = "libsvm")
+training <- df
+test <- df
+
+# Fit a random forest classification model with spark.randomForest
+model <- spark.randomForest(training, label ~ features, "classification", numTrees = 10)
+
+# Model summary
+summary(model)
+
+# Prediction
+predictions <- predict(model, test)
+head(predictions)
+# $example off:classification$
+
+# Random forest regression model
+
+# $example on:regression$
+# Load training data
+df <- read.df("data/mllib/sample_linear_regression_data.txt", source = "libsvm")
+training <- df
+test <- df
+
+# Fit a random forest regression model with spark.randomForest
+model <- spark.randomForest(training, label ~ features, "regression", numTrees = 10)
+
+# Model summary
+summary(model)
+
+# Prediction
+predictions <- predict(model, test)
+head(predictions)
+# $example off:regression$
+
+sparkR.session.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/survreg.R b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/survreg.R
new file mode 100644
index 0000000000000000000000000000000000000000..e4eadfca86f6cf30da0b9b7dab511d81e3014583
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/survreg.R
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/survreg.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-survreg-example")
+
+# $example on$
+# Use the ovarian dataset available in R survival package
+library(survival)
+
+# Fit an accelerated failure time (AFT) survival regression model with spark.survreg
+ovarianDF <- suppressWarnings(createDataFrame(ovarian))
+aftDF <- ovarianDF
+aftTestDF <- ovarianDF
+aftModel <- spark.survreg(aftDF, Surv(futime, fustat) ~ ecog_ps + rx)
+
+# Model summary
+summary(aftModel)
+
+# Prediction
+aftPredictions <- predict(aftModel, aftTestDF)
+head(aftPredictions)
+# $example off$
+
+sparkR.session.stop()
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/svmLinear.R b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/svmLinear.R
new file mode 100644
index 0000000000000000000000000000000000000000..c632f1282ea764ab1dc1c815013e904a0b9c8de0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/ml/svmLinear.R
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/svmLinear.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-svmLinear-example")
+
+# $example on$
+# load training data
+t <- as.data.frame(Titanic)
+training <- createDataFrame(t)
+
+# fit Linear SVM model
+model <- spark.svmLinear(training,  Survived ~ ., regParam = 0.01, maxIter = 10)
+
+# Model summary
+summary(model)
+
+# Prediction
+prediction <- predict(model, training)
+showDF(prediction)
+# $example off$
+sparkR.session.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/streaming/structured_network_wordcount.R b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/streaming/structured_network_wordcount.R
new file mode 100644
index 0000000000000000000000000000000000000000..cda18ebc072eead68973866c5e6732eda4a0fe7d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/r/streaming/structured_network_wordcount.R
@@ -0,0 +1,57 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Counts words in UTF8 encoded, '\n' delimited text received from the network.
+
+# To run this on your local machine, you need to first run a Netcat server
+# $ nc -lk 9999
+# and then run the example
+# ./bin/spark-submit examples/src/main/r/streaming/structured_network_wordcount.R localhost 9999
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-Streaming-structured-network-wordcount-example")
+
+args <- commandArgs(trailing = TRUE)
+
+if (length(args) != 2) {
+  print("Usage: structured_network_wordcount.R <hostname> <port>")
+  print("<hostname> and <port> describe the TCP server that Structured Streaming")
+  print("would connect to receive data.")
+  q("no")
+}
+
+hostname <- args[[1]]
+port <- as.integer(args[[2]])
+
+# Create DataFrame representing the stream of input lines from connection to localhost:9999
+lines <- read.stream("socket", host = hostname, port = port)
+
+# Split the lines into words
+words <- selectExpr(lines, "explode(split(value, ' ')) as word")
+
+# Generate running word count
+wordCounts <- count(groupBy(words, "word"))
+
+# Start running the query that prints the running counts to the console
+query <- write.stream(wordCounts, "console", outputMode = "complete")
+
+awaitTermination(query)
+
+sparkR.session.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/employees.json b/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/employees.json
new file mode 100644
index 0000000000000000000000000000000000000000..6b2e6329a1cb27dbf237e3561797957220dd4d51
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/employees.json
@@ -0,0 +1,4 @@
+{"name":"Michael", "salary":3000}
+{"name":"Andy", "salary":4500}
+{"name":"Justin", "salary":3500}
+{"name":"Berta", "salary":4000}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/full_user.avsc b/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/full_user.avsc
new file mode 100644
index 0000000000000000000000000000000000000000..04e7ba2dca4f6c4dd459c2c36e736a89d86ee2f3
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/full_user.avsc
@@ -0,0 +1 @@
+{"type": "record", "namespace": "example.avro", "name": "User", "fields": [{"type": "string", "name": "name"}, {"type": ["string", "null"], "name": "favorite_color"}, {"type": {"items": "int", "type": "array"}, "name": "favorite_numbers"}]}
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/kv1.txt b/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/kv1.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9825414ecf8f21a36c9639999b22a8ad45169ab1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/kv1.txt
@@ -0,0 +1,500 @@
+238val_238
+86val_86
+311val_311
+27val_27
+165val_165
+409val_409
+255val_255
+278val_278
+98val_98
+484val_484
+265val_265
+193val_193
+401val_401
+150val_150
+273val_273
+224val_224
+369val_369
+66val_66
+128val_128
+213val_213
+146val_146
+406val_406
+429val_429
+374val_374
+152val_152
+469val_469
+145val_145
+495val_495
+37val_37
+327val_327
+281val_281
+277val_277
+209val_209
+15val_15
+82val_82
+403val_403
+166val_166
+417val_417
+430val_430
+252val_252
+292val_292
+219val_219
+287val_287
+153val_153
+193val_193
+338val_338
+446val_446
+459val_459
+394val_394
+237val_237
+482val_482
+174val_174
+413val_413
+494val_494
+207val_207
+199val_199
+466val_466
+208val_208
+174val_174
+399val_399
+396val_396
+247val_247
+417val_417
+489val_489
+162val_162
+377val_377
+397val_397
+309val_309
+365val_365
+266val_266
+439val_439
+342val_342
+367val_367
+325val_325
+167val_167
+195val_195
+475val_475
+17val_17
+113val_113
+155val_155
+203val_203
+339val_339
+0val_0
+455val_455
+128val_128
+311val_311
+316val_316
+57val_57
+302val_302
+205val_205
+149val_149
+438val_438
+345val_345
+129val_129
+170val_170
+20val_20
+489val_489
+157val_157
+378val_378
+221val_221
+92val_92
+111val_111
+47val_47
+72val_72
+4val_4
+280val_280
+35val_35
+427val_427
+277val_277
+208val_208
+356val_356
+399val_399
+169val_169
+382val_382
+498val_498
+125val_125
+386val_386
+437val_437
+469val_469
+192val_192
+286val_286
+187val_187
+176val_176
+54val_54
+459val_459
+51val_51
+138val_138
+103val_103
+239val_239
+213val_213
+216val_216
+430val_430
+278val_278
+176val_176
+289val_289
+221val_221
+65val_65
+318val_318
+332val_332
+311val_311
+275val_275
+137val_137
+241val_241
+83val_83
+333val_333
+180val_180
+284val_284
+12val_12
+230val_230
+181val_181
+67val_67
+260val_260
+404val_404
+384val_384
+489val_489
+353val_353
+373val_373
+272val_272
+138val_138
+217val_217
+84val_84
+348val_348
+466val_466
+58val_58
+8val_8
+411val_411
+230val_230
+208val_208
+348val_348
+24val_24
+463val_463
+431val_431
+179val_179
+172val_172
+42val_42
+129val_129
+158val_158
+119val_119
+496val_496
+0val_0
+322val_322
+197val_197
+468val_468
+393val_393
+454val_454
+100val_100
+298val_298
+199val_199
+191val_191
+418val_418
+96val_96
+26val_26
+165val_165
+327val_327
+230val_230
+205val_205
+120val_120
+131val_131
+51val_51
+404val_404
+43val_43
+436val_436
+156val_156
+469val_469
+468val_468
+308val_308
+95val_95
+196val_196
+288val_288
+481val_481
+457val_457
+98val_98
+282val_282
+197val_197
+187val_187
+318val_318
+318val_318
+409val_409
+470val_470
+137val_137
+369val_369
+316val_316
+169val_169
+413val_413
+85val_85
+77val_77
+0val_0
+490val_490
+87val_87
+364val_364
+179val_179
+118val_118
+134val_134
+395val_395
+282val_282
+138val_138
+238val_238
+419val_419
+15val_15
+118val_118
+72val_72
+90val_90
+307val_307
+19val_19
+435val_435
+10val_10
+277val_277
+273val_273
+306val_306
+224val_224
+309val_309
+389val_389
+327val_327
+242val_242
+369val_369
+392val_392
+272val_272
+331val_331
+401val_401
+242val_242
+452val_452
+177val_177
+226val_226
+5val_5
+497val_497
+402val_402
+396val_396
+317val_317
+395val_395
+58val_58
+35val_35
+336val_336
+95val_95
+11val_11
+168val_168
+34val_34
+229val_229
+233val_233
+143val_143
+472val_472
+322val_322
+498val_498
+160val_160
+195val_195
+42val_42
+321val_321
+430val_430
+119val_119
+489val_489
+458val_458
+78val_78
+76val_76
+41val_41
+223val_223
+492val_492
+149val_149
+449val_449
+218val_218
+228val_228
+138val_138
+453val_453
+30val_30
+209val_209
+64val_64
+468val_468
+76val_76
+74val_74
+342val_342
+69val_69
+230val_230
+33val_33
+368val_368
+103val_103
+296val_296
+113val_113
+216val_216
+367val_367
+344val_344
+167val_167
+274val_274
+219val_219
+239val_239
+485val_485
+116val_116
+223val_223
+256val_256
+263val_263
+70val_70
+487val_487
+480val_480
+401val_401
+288val_288
+191val_191
+5val_5
+244val_244
+438val_438
+128val_128
+467val_467
+432val_432
+202val_202
+316val_316
+229val_229
+469val_469
+463val_463
+280val_280
+2val_2
+35val_35
+283val_283
+331val_331
+235val_235
+80val_80
+44val_44
+193val_193
+321val_321
+335val_335
+104val_104
+466val_466
+366val_366
+175val_175
+403val_403
+483val_483
+53val_53
+105val_105
+257val_257
+406val_406
+409val_409
+190val_190
+406val_406
+401val_401
+114val_114
+258val_258
+90val_90
+203val_203
+262val_262
+348val_348
+424val_424
+12val_12
+396val_396
+201val_201
+217val_217
+164val_164
+431val_431
+454val_454
+478val_478
+298val_298
+125val_125
+431val_431
+164val_164
+424val_424
+187val_187
+382val_382
+5val_5
+70val_70
+397val_397
+480val_480
+291val_291
+24val_24
+351val_351
+255val_255
+104val_104
+70val_70
+163val_163
+438val_438
+119val_119
+414val_414
+200val_200
+491val_491
+237val_237
+439val_439
+360val_360
+248val_248
+479val_479
+305val_305
+417val_417
+199val_199
+444val_444
+120val_120
+429val_429
+169val_169
+443val_443
+323val_323
+325val_325
+277val_277
+230val_230
+478val_478
+178val_178
+468val_468
+310val_310
+317val_317
+333val_333
+493val_493
+460val_460
+207val_207
+249val_249
+265val_265
+480val_480
+83val_83
+136val_136
+353val_353
+172val_172
+214val_214
+462val_462
+233val_233
+406val_406
+133val_133
+175val_175
+189val_189
+454val_454
+375val_375
+401val_401
+421val_421
+407val_407
+384val_384
+256val_256
+26val_26
+134val_134
+67val_67
+384val_384
+379val_379
+18val_18
+462val_462
+492val_492
+100val_100
+298val_298
+9val_9
+341val_341
+498val_498
+146val_146
+458val_458
+362val_362
+186val_186
+285val_285
+348val_348
+167val_167
+18val_18
+273val_273
+183val_183
+281val_281
+344val_344
+97val_97
+469val_469
+315val_315
+84val_84
+28val_28
+37val_37
+448val_448
+152val_152
+348val_348
+307val_307
+194val_194
+414val_414
+477val_477
+222val_222
+126val_126
+90val_90
+169val_169
+403val_403
+400val_400
+200val_200
+97val_97
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/people.csv b/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/people.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7fe5adba93d77f80299472e9ff13cf3a4615c5b3
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/people.csv
@@ -0,0 +1,3 @@
+name;age;job
+Jorge;30;Developer
+Bob;32;Developer
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/people.json b/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/people.json
new file mode 100644
index 0000000000000000000000000000000000000000..50a859cbd7ee826a613802a499772cad0907fe67
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/people.json
@@ -0,0 +1,3 @@
+{"name":"Michael"}
+{"name":"Andy", "age":30}
+{"name":"Justin", "age":19}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/people.txt b/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/people.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3bcace4a44c235bb9a79d35587d5dc06b21e13b9
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/people.txt
@@ -0,0 +1,3 @@
+Michael, 29
+Andy, 30
+Justin, 19
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/user.avsc b/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/user.avsc
new file mode 100644
index 0000000000000000000000000000000000000000..4995357ab3736e8392f9d6a10775dfd42c91594b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/user.avsc
@@ -0,0 +1,8 @@
+{"namespace": "example.avro",
+ "type": "record",
+ "name": "User",
+ "fields": [
+     {"name": "name", "type": "string"},
+     {"name": "favorite_color", "type": ["string", "null"]}
+ ]
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/users.avro b/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/users.avro
new file mode 100644
index 0000000000000000000000000000000000000000..27c526ab114b2f42f6d4e13325c373706ba0f880
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/users.avro differ
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/users.orc b/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/users.orc
new file mode 100644
index 0000000000000000000000000000000000000000..12478a5d03c26cb30b35af232a5764e076eaab1f
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/users.orc differ
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/users.parquet b/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/users.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..aa527338c43a8400fd56e549cb28aa1e6a9ccccf
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/examples/src/main/resources/users.parquet differ
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
new file mode 100644
index 0000000000000000000000000000000000000000..3311de12dbd9769697f692e41457d6180eb283f2
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import org.apache.spark.sql.SparkSession
+
+/**
+ * Usage: BroadcastTest [partitions] [numElem] [blockSize]
+ */
+object BroadcastTest {
+  def main(args: Array[String]) {
+
+    val blockSize = if (args.length > 2) args(2) else "4096"
+
+    val spark = SparkSession
+      .builder()
+      .appName("Broadcast Test")
+      .config("spark.broadcast.blockSize", blockSize)
+      .getOrCreate()
+
+    val sc = spark.sparkContext
+
+    val slices = if (args.length > 0) args(0).toInt else 2
+    val num = if (args.length > 1) args(1).toInt else 1000000
+
+    val arr1 = (0 until num).toArray
+
+    for (i <- 0 until 3) {
+      println(s"Iteration $i")
+      println("===========")
+      val startTime = System.nanoTime
+      val barr1 = sc.broadcast(arr1)
+      val observedSizes = sc.parallelize(1 to 10, slices).map(_ => barr1.value.length)
+      // Collect the small RDD so we can print the observed sizes locally.
+      observedSizes.collect().foreach(i => println(i))
+      println("Iteration %d took %.0f milliseconds".format(i, (System.nanoTime - startTime) / 1E6))
+    }
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/DFSReadWriteTest.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/DFSReadWriteTest.scala
new file mode 100644
index 0000000000000000000000000000000000000000..1a779716ec4c0043170ce2799279b3b78e06e579
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/DFSReadWriteTest.scala
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import java.io.File
+
+import scala.io.Source._
+
+import org.apache.spark.sql.SparkSession
+
+/**
+ * Simple test for reading and writing to a distributed
+ * file system.  This example does the following:
+ *
+ *   1. Reads local file
+ *   2. Computes word count on local file
+ *   3. Writes local file to a DFS
+ *   4. Reads the file back from the DFS
+ *   5. Computes word count on the file using Spark
+ *   6. Compares the word count results
+ */
+object DFSReadWriteTest {
+
+  private var localFilePath: File = new File(".")
+  private var dfsDirPath: String = ""
+
+  private val NPARAMS = 2
+
+  private def readFile(filename: String): List[String] = {
+    val lineIter: Iterator[String] = fromFile(filename).getLines()
+    val lineList: List[String] = lineIter.toList
+    lineList
+  }
+
+  private def printUsage(): Unit = {
+    val usage = """DFS Read-Write Test
+    |Usage: localFile dfsDir
+    |localFile - (string) local file to use in test
+    |dfsDir - (string) DFS directory for read/write tests""".stripMargin
+
+    println(usage)
+  }
+
+  private def parseArgs(args: Array[String]): Unit = {
+    if (args.length != NPARAMS) {
+      printUsage()
+      System.exit(1)
+    }
+
+    var i = 0
+
+    localFilePath = new File(args(i))
+    if (!localFilePath.exists) {
+      System.err.println(s"Given path (${args(i)}) does not exist")
+      printUsage()
+      System.exit(1)
+    }
+
+    if (!localFilePath.isFile) {
+      System.err.println(s"Given path (${args(i)}) is not a file")
+      printUsage()
+      System.exit(1)
+    }
+
+    i += 1
+    dfsDirPath = args(i)
+  }
+
+  def runLocalWordCount(fileContents: List[String]): Int = {
+    fileContents.flatMap(_.split(" "))
+      .flatMap(_.split("\t"))
+      .filter(_.nonEmpty)
+      .groupBy(w => w)
+      .mapValues(_.size)
+      .values
+      .sum
+  }
+
+  def main(args: Array[String]): Unit = {
+    parseArgs(args)
+
+    println("Performing local word count")
+    val fileContents = readFile(localFilePath.toString())
+    val localWordCount = runLocalWordCount(fileContents)
+
+    println("Creating SparkSession")
+    val spark = SparkSession
+      .builder
+      .appName("DFS Read Write Test")
+      .getOrCreate()
+
+    println("Writing local file to DFS")
+    val dfsFilename = s"$dfsDirPath/dfs_read_write_test"
+    val fileRDD = spark.sparkContext.parallelize(fileContents)
+    fileRDD.saveAsTextFile(dfsFilename)
+
+    println("Reading file from DFS and running Word Count")
+    val readFileRDD = spark.sparkContext.textFile(dfsFilename)
+
+    val dfsWordCount = readFileRDD
+      .flatMap(_.split(" "))
+      .flatMap(_.split("\t"))
+      .filter(_.nonEmpty)
+      .map(w => (w, 1))
+      .countByKey()
+      .values
+      .sum
+
+    spark.stop()
+
+    if (localWordCount == dfsWordCount) {
+      println(s"Success! Local Word Count $localWordCount and " +
+        s"DFS Word Count $dfsWordCount agree.")
+    } else {
+      println(s"Failure! Local Word Count $localWordCount " +
+        s"and DFS Word Count $dfsWordCount disagree.")
+    }
+
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala
new file mode 100644
index 0000000000000000000000000000000000000000..d12ef642bd2cd80d616c6502fcfd0523e60c77f6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.util.Utils
+
+/**
+ * Prints out environmental information, sleeps, and then exits. Made to
+ * test driver submission in the standalone scheduler.
+ */
+object DriverSubmissionTest {
+  def main(args: Array[String]) {
+    if (args.length < 1) {
+      println("Usage: DriverSubmissionTest <seconds-to-sleep>")
+      System.exit(0)
+    }
+    val numSecondsToSleep = args(0).toInt
+
+    val env = System.getenv()
+    val properties = Utils.getSystemProperties
+
+    println("Environment variables containing SPARK_TEST:")
+    env.asScala.filter { case (k, _) => k.contains("SPARK_TEST")}.foreach(println)
+
+    println("System properties containing spark.test:")
+    properties.filter { case (k, _) => k.toString.contains("spark.test") }.foreach(println)
+
+    for (i <- 1 until numSecondsToSleep) {
+      println(s"Alive for $i out of $numSecondsToSleep seconds")
+      Thread.sleep(1000)
+    }
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala
new file mode 100644
index 0000000000000000000000000000000000000000..45c4953a84be26ac6ad4cb1f82fefa5ffd615f73
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples
+
+import org.apache.spark.sql.SparkSession
+
+object ExceptionHandlingTest {
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder
+      .appName("ExceptionHandlingTest")
+      .getOrCreate()
+
+    spark.sparkContext.parallelize(0 until spark.sparkContext.defaultParallelism).foreach { i =>
+      if (math.random > 0.75) {
+        throw new Exception("Testing exception handling")
+      }
+    }
+
+    spark.stop()
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala
new file mode 100644
index 0000000000000000000000000000000000000000..2f2bbb12754386b5df0f17aa9b750c14b2ce4d75
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import java.util.Random
+
+import org.apache.spark.sql.SparkSession
+
+/**
+ * Usage: GroupByTest [numMappers] [numKVPairs] [KeySize] [numReducers]
+ */
+object GroupByTest {
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder
+      .appName("GroupBy Test")
+      .getOrCreate()
+
+    val numMappers = if (args.length > 0) args(0).toInt else 2
+    val numKVPairs = if (args.length > 1) args(1).toInt else 1000
+    val valSize = if (args.length > 2) args(2).toInt else 1000
+    val numReducers = if (args.length > 3) args(3).toInt else numMappers
+
+    val pairs1 = spark.sparkContext.parallelize(0 until numMappers, numMappers).flatMap { p =>
+      val ranGen = new Random
+      val arr1 = new Array[(Int, Array[Byte])](numKVPairs)
+      for (i <- 0 until numKVPairs) {
+        val byteArr = new Array[Byte](valSize)
+        ranGen.nextBytes(byteArr)
+        arr1(i) = (ranGen.nextInt(Int.MaxValue), byteArr)
+      }
+      arr1
+    }.cache()
+    // Enforce that everything has been calculated and in cache
+    pairs1.count()
+
+    println(pairs1.groupByKey(numReducers).count())
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
new file mode 100644
index 0000000000000000000000000000000000000000..e1f985ece8c067f4962826730822983fd32486d8
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import org.apache.spark.sql.SparkSession
+
+
+object HdfsTest {
+
+  /** Usage: HdfsTest [file] */
+  def main(args: Array[String]) {
+    if (args.length < 1) {
+      System.err.println("Usage: HdfsTest <file>")
+      System.exit(1)
+    }
+    val spark = SparkSession
+      .builder
+      .appName("HdfsTest")
+      .getOrCreate()
+    val file = spark.read.text(args(0)).rdd
+    val mapped = file.map(s => s.length).cache()
+    for (iter <- 1 to 10) {
+      val start = System.currentTimeMillis()
+      for (x <- mapped) { x + 2 }
+      val end = System.currentTimeMillis()
+      println(s"Iteration $iter took ${end-start} ms")
+    }
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
new file mode 100644
index 0000000000000000000000000000000000000000..3f9cea35d6503ee7901a0f3e40ddc41c24344634
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import org.apache.commons.math3.linear._
+
+/**
+ * Alternating least squares matrix factorization.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.ml.recommendation.ALS.
+ */
+object LocalALS {
+
+  // Parameters set through command line arguments
+  var M = 0 // Number of movies
+  var U = 0 // Number of users
+  var F = 0 // Number of features
+  var ITERATIONS = 0
+  val LAMBDA = 0.01 // Regularization coefficient
+
+  def generateR(): RealMatrix = {
+    val mh = randomMatrix(M, F)
+    val uh = randomMatrix(U, F)
+    mh.multiply(uh.transpose())
+  }
+
+  def rmse(targetR: RealMatrix, ms: Array[RealVector], us: Array[RealVector]): Double = {
+    val r = new Array2DRowRealMatrix(M, U)
+    for (i <- 0 until M; j <- 0 until U) {
+      r.setEntry(i, j, ms(i).dotProduct(us(j)))
+    }
+    val diffs = r.subtract(targetR)
+    var sumSqs = 0.0
+    for (i <- 0 until M; j <- 0 until U) {
+      val diff = diffs.getEntry(i, j)
+      sumSqs += diff * diff
+    }
+    math.sqrt(sumSqs / (M.toDouble * U.toDouble))
+  }
+
+  def updateMovie(i: Int, m: RealVector, us: Array[RealVector], R: RealMatrix) : RealVector = {
+    var XtX: RealMatrix = new Array2DRowRealMatrix(F, F)
+    var Xty: RealVector = new ArrayRealVector(F)
+    // For each user that rated the movie
+    for (j <- 0 until U) {
+      val u = us(j)
+      // Add u * u^t to XtX
+      XtX = XtX.add(u.outerProduct(u))
+      // Add u * rating to Xty
+      Xty = Xty.add(u.mapMultiply(R.getEntry(i, j)))
+    }
+    // Add regularization coefficients to diagonal terms
+    for (d <- 0 until F) {
+      XtX.addToEntry(d, d, LAMBDA * U)
+    }
+    // Solve it with Cholesky
+    new CholeskyDecomposition(XtX).getSolver.solve(Xty)
+  }
+
+  def updateUser(j: Int, u: RealVector, ms: Array[RealVector], R: RealMatrix) : RealVector = {
+    var XtX: RealMatrix = new Array2DRowRealMatrix(F, F)
+    var Xty: RealVector = new ArrayRealVector(F)
+    // For each movie that the user rated
+    for (i <- 0 until M) {
+      val m = ms(i)
+      // Add m * m^t to XtX
+      XtX = XtX.add(m.outerProduct(m))
+      // Add m * rating to Xty
+      Xty = Xty.add(m.mapMultiply(R.getEntry(i, j)))
+    }
+    // Add regularization coefficients to diagonal terms
+    for (d <- 0 until F) {
+      XtX.addToEntry(d, d, LAMBDA * M)
+    }
+    // Solve it with Cholesky
+    new CholeskyDecomposition(XtX).getSolver.solve(Xty)
+  }
+
+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of ALS and is given as an example!
+        |Please use org.apache.spark.ml.recommendation.ALS
+        |for more conventional use.
+      """.stripMargin)
+  }
+
+  def main(args: Array[String]) {
+
+    args match {
+      case Array(m, u, f, iters) =>
+        M = m.toInt
+        U = u.toInt
+        F = f.toInt
+        ITERATIONS = iters.toInt
+      case _ =>
+        System.err.println("Usage: LocalALS <M> <U> <F> <iters>")
+        System.exit(1)
+    }
+
+    showWarning()
+
+    println(s"Running with M=$M, U=$U, F=$F, iters=$ITERATIONS")
+
+    val R = generateR()
+
+    // Initialize m and u randomly
+    var ms = Array.fill(M)(randomVector(F))
+    var us = Array.fill(U)(randomVector(F))
+
+    // Iteratively update movies then users
+    for (iter <- 1 to ITERATIONS) {
+      println(s"Iteration $iter:")
+      ms = (0 until M).map(i => updateMovie(i, ms(i), us, R)).toArray
+      us = (0 until U).map(j => updateUser(j, us(j), ms, R)).toArray
+      println(s"RMSE = ${rmse(R, ms, us)}")
+    }
+  }
+
+  private def randomVector(n: Int): RealVector =
+    new ArrayRealVector(Array.fill(n)(math.random))
+
+  private def randomMatrix(rows: Int, cols: Int): RealMatrix =
+    new Array2DRowRealMatrix(Array.fill(rows, cols)(math.random))
+
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
new file mode 100644
index 0000000000000000000000000000000000000000..5512e33e41ac30180afa0411693341eadc63906c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import java.util.Random
+
+import breeze.linalg.{DenseVector, Vector}
+
+/**
+ * Logistic regression based classification.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.ml.classification.LogisticRegression.
+ */
+object LocalFileLR {
+  val D = 10   // Number of dimensions
+  val rand = new Random(42)
+
+  case class DataPoint(x: Vector[Double], y: Double)
+
+  def parsePoint(line: String): DataPoint = {
+    val nums = line.split(' ').map(_.toDouble)
+    DataPoint(new DenseVector(nums.slice(1, D + 1)), nums(0))
+  }
+
+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
+        |Please use org.apache.spark.ml.classification.LogisticRegression
+        |for more conventional use.
+      """.stripMargin)
+  }
+
+  def main(args: Array[String]) {
+
+    showWarning()
+
+    val fileSrc = scala.io.Source.fromFile(args(0))
+    val lines = fileSrc.getLines().toArray
+    val points = lines.map(parsePoint)
+    val ITERATIONS = args(1).toInt
+
+    // Initialize w to a random value
+    val w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
+    println(s"Initial w: $w")
+
+    for (i <- 1 to ITERATIONS) {
+      println(s"On iteration $i")
+      val gradient = DenseVector.zeros[Double](D)
+      for (p <- points) {
+        val scale = (1 / (1 + math.exp(-p.y * (w.dot(p.x)))) - 1) * p.y
+        gradient += p.x * scale
+      }
+      w -= gradient
+    }
+
+    fileSrc.close()
+    println(s"Final w: $w")
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
new file mode 100644
index 0000000000000000000000000000000000000000..f5162a59522f0f8a0a001be69d583f7ca3a5fc60
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import java.util.Random
+
+import scala.collection.mutable.HashMap
+import scala.collection.mutable.HashSet
+
+import breeze.linalg.{squaredDistance, DenseVector, Vector}
+
+/**
+ * K-means clustering.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.ml.clustering.KMeans.
+ */
+object LocalKMeans {
+  val N = 1000
+  val R = 1000    // Scaling factor
+  val D = 10
+  val K = 10
+  val convergeDist = 0.001
+  val rand = new Random(42)
+
+  def generateData: Array[DenseVector[Double]] = {
+    def generatePoint(i: Int): DenseVector[Double] = {
+      DenseVector.fill(D) {rand.nextDouble * R}
+    }
+    Array.tabulate(N)(generatePoint)
+  }
+
+  def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = {
+    var bestIndex = 0
+    var closest = Double.PositiveInfinity
+
+    for (i <- 1 to centers.size) {
+      val vCurr = centers(i)
+      val tempDist = squaredDistance(p, vCurr)
+      if (tempDist < closest) {
+        closest = tempDist
+        bestIndex = i
+      }
+    }
+
+    bestIndex
+  }
+
+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
+        |Please use org.apache.spark.ml.clustering.KMeans
+        |for more conventional use.
+      """.stripMargin)
+  }
+
+  def main(args: Array[String]) {
+
+    showWarning()
+
+    val data = generateData
+    val points = new HashSet[Vector[Double]]
+    val kPoints = new HashMap[Int, Vector[Double]]
+    var tempDist = 1.0
+
+    while (points.size < K) {
+      points.add(data(rand.nextInt(N)))
+    }
+
+    val iter = points.iterator
+    for (i <- 1 to points.size) {
+      kPoints.put(i, iter.next())
+    }
+
+    println(s"Initial centers: $kPoints")
+
+    while(tempDist > convergeDist) {
+      val closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))
+
+      val mappings = closest.groupBy[Int] (x => x._1)
+
+      val pointStats = mappings.map { pair =>
+        pair._2.reduceLeft [(Int, (Vector[Double], Int))] {
+          case ((id1, (p1, c1)), (id2, (p2, c2))) => (id1, (p1 + p2, c1 + c2))
+        }
+      }
+
+      var newPoints = pointStats.map {mapping =>
+        (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))}
+
+      tempDist = 0.0
+      for (mapping <- newPoints) {
+        tempDist += squaredDistance(kPoints(mapping._1), mapping._2)
+      }
+
+      for (newP <- newPoints) {
+        kPoints.put(newP._1, newP._2)
+      }
+    }
+
+    println(s"Final centers: $kPoints")
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
new file mode 100644
index 0000000000000000000000000000000000000000..bde8ccd305960b70647d1cdd300709ecbedcded7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import java.util.Random
+
+import breeze.linalg.{DenseVector, Vector}
+
+/**
+ * Logistic regression based classification.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.ml.classification.LogisticRegression.
+ */
+object LocalLR {
+  val N = 10000  // Number of data points
+  val D = 10   // Number of dimensions
+  val R = 0.7  // Scaling factor
+  val ITERATIONS = 5
+  val rand = new Random(42)
+
+  case class DataPoint(x: Vector[Double], y: Double)
+
+  def generateData: Array[DataPoint] = {
+    def generatePoint(i: Int): DataPoint = {
+      val y = if (i % 2 == 0) -1 else 1
+      val x = DenseVector.fill(D) {rand.nextGaussian + y * R}
+      DataPoint(x, y)
+    }
+    Array.tabulate(N)(generatePoint)
+  }
+
+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
+        |Please use org.apache.spark.ml.classification.LogisticRegression
+        |for more conventional use.
+      """.stripMargin)
+  }
+
+  def main(args: Array[String]) {
+
+    showWarning()
+
+    val data = generateData
+    // Initialize w to a random value
+    val w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
+    println(s"Initial w: $w")
+
+    for (i <- 1 to ITERATIONS) {
+      println(s"On iteration $i")
+      val gradient = DenseVector.zeros[Double](D)
+      for (p <- data) {
+        val scale = (1 / (1 + math.exp(-p.y * (w.dot(p.x)))) - 1) * p.y
+        gradient +=  p.x * scale
+      }
+      w -= gradient
+    }
+
+    println(s"Final w: $w")
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/LocalPi.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/LocalPi.scala
new file mode 100644
index 0000000000000000000000000000000000000000..a93c15c85cfc19cc146cd7cffc65d92c03a020eb
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/LocalPi.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import scala.math.random
+
+object LocalPi {
+  def main(args: Array[String]) {
+    var count = 0
+    for (i <- 1 to 100000) {
+      val x = random * 2 - 1
+      val y = random * 2 - 1
+      if (x*x + y*y <= 1) count += 1
+    }
+    println(s"Pi is roughly ${4 * count / 100000.0}")
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala
new file mode 100644
index 0000000000000000000000000000000000000000..c55b68e033964073946ff144338d9ced7c5c0202
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import org.apache.spark.{SparkConf, SparkContext}
+
+/**
+ * Executes a roll up-style query against Apache logs.
+ *
+ * Usage: LogQuery [logFile]
+ */
+object LogQuery {
+  val exampleApacheLogs = List(
+    """10.10.10.10 - "FRED" [18/Jan/2013:17:56:07 +1100] "GET http://images.com/2013/Generic.jpg
+      | HTTP/1.1" 304 315 "http://referall.com/" "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;
+      | GTB7.4; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30; .NET CLR 3.0.04506.648; .NET CLR
+      | 3.5.21022; .NET CLR 3.0.4506.2152; .NET CLR 1.0.3705; .NET CLR 1.1.4322; .NET CLR
+      | 3.5.30729; Release=ARP)" "UD-1" - "image/jpeg" "whatever" 0.350 "-" - "" 265 923 934 ""
+      | 62.24.11.25 images.com 1358492167 - Whatup""".stripMargin.lines.mkString,
+    """10.10.10.10 - "FRED" [18/Jan/2013:18:02:37 +1100] "GET http://images.com/2013/Generic.jpg
+      | HTTP/1.1" 304 306 "http:/referall.com" "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;
+      | GTB7.4; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30; .NET CLR 3.0.04506.648; .NET CLR
+      | 3.5.21022; .NET CLR 3.0.4506.2152; .NET CLR 1.0.3705; .NET CLR 1.1.4322; .NET CLR
+      | 3.5.30729; Release=ARP)" "UD-1" - "image/jpeg" "whatever" 0.352 "-" - "" 256 977 988 ""
+      | 0 73.23.2.15 images.com 1358492557 - Whatup""".stripMargin.lines.mkString
+  )
+
+  def main(args: Array[String]) {
+
+    val sparkConf = new SparkConf().setAppName("Log Query")
+    val sc = new SparkContext(sparkConf)
+
+    val dataSet =
+      if (args.length == 1) sc.textFile(args(0)) else sc.parallelize(exampleApacheLogs)
+    // scalastyle:off
+    val apacheLogRegex =
+      """^([\d.]+) (\S+) (\S+) \[([\w\d:/]+\s[+\-]\d{4})\] "(.+?)" (\d{3}) ([\d\-]+) "([^"]+)" "([^"]+)".*""".r
+    // scalastyle:on
+    /** Tracks the total query count and number of aggregate bytes for a particular group. */
+    class Stats(val count: Int, val numBytes: Int) extends Serializable {
+      def merge(other: Stats): Stats = new Stats(count + other.count, numBytes + other.numBytes)
+      override def toString: String = "bytes=%s\tn=%s".format(numBytes, count)
+    }
+
+    def extractKey(line: String): (String, String, String) = {
+      apacheLogRegex.findFirstIn(line) match {
+        case Some(apacheLogRegex(ip, _, user, dateTime, query, status, bytes, referer, ua)) =>
+          if (user != "\"-\"") (ip, user, query)
+          else (null, null, null)
+        case _ => (null, null, null)
+      }
+    }
+
+    def extractStats(line: String): Stats = {
+      apacheLogRegex.findFirstIn(line) match {
+        case Some(apacheLogRegex(ip, _, user, dateTime, query, status, bytes, referer, ua)) =>
+          new Stats(1, bytes.toInt)
+        case _ => new Stats(1, 0)
+      }
+    }
+
+    dataSet.map(line => (extractKey(line), extractStats(line)))
+      .reduceByKey((a, b) => a.merge(b))
+      .collect().foreach{
+        case (user, query) => println("%s\t%s".format(user, query))}
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
new file mode 100644
index 0000000000000000000000000000000000000000..e6f33b7adf5d1bf186e0abdf338b5f16fb5b244b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.SparkSession
+
+
+/**
+ * Usage: MultiBroadcastTest [partitions] [numElem]
+ */
+object MultiBroadcastTest {
+  def main(args: Array[String]) {
+
+    val spark = SparkSession
+      .builder
+      .appName("Multi-Broadcast Test")
+      .getOrCreate()
+
+    val slices = if (args.length > 0) args(0).toInt else 2
+    val num = if (args.length > 1) args(1).toInt else 1000000
+
+    val arr1 = new Array[Int](num)
+    for (i <- 0 until arr1.length) {
+      arr1(i) = i
+    }
+
+    val arr2 = new Array[Int](num)
+    for (i <- 0 until arr2.length) {
+      arr2(i) = i
+    }
+
+    val barr1 = spark.sparkContext.broadcast(arr1)
+    val barr2 = spark.sparkContext.broadcast(arr2)
+    val observedSizes: RDD[(Int, Int)] = spark.sparkContext.parallelize(1 to 10, slices).map { _ =>
+      (barr1.value.length, barr2.value.length)
+    }
+    // Collect the small RDD so we can print the observed sizes locally.
+    observedSizes.collect().foreach(i => println(i))
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala
new file mode 100644
index 0000000000000000000000000000000000000000..2332a661f26a03c77fd2786d7c115cac485093de
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import java.util.Random
+
+import org.apache.spark.sql.SparkSession
+
+/**
+ * Usage: SimpleSkewedGroupByTest [numMappers] [numKVPairs] [valSize] [numReducers] [ratio]
+ */
+object SimpleSkewedGroupByTest {
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder
+      .appName("SimpleSkewedGroupByTest")
+      .getOrCreate()
+
+    val numMappers = if (args.length > 0) args(0).toInt else 2
+    val numKVPairs = if (args.length > 1) args(1).toInt else 1000
+    val valSize = if (args.length > 2) args(2).toInt else 1000
+    val numReducers = if (args.length > 3) args(3).toInt else numMappers
+    val ratio = if (args.length > 4) args(4).toInt else 5.0
+
+    val pairs1 = spark.sparkContext.parallelize(0 until numMappers, numMappers).flatMap { p =>
+      val ranGen = new Random
+      val result = new Array[(Int, Array[Byte])](numKVPairs)
+      for (i <- 0 until numKVPairs) {
+        val byteArr = new Array[Byte](valSize)
+        ranGen.nextBytes(byteArr)
+        val offset = ranGen.nextInt(1000) * numReducers
+        if (ranGen.nextDouble < ratio / (numReducers + ratio - 1)) {
+          // give ratio times higher chance of generating key 0 (for reducer 0)
+          result(i) = (offset, byteArr)
+        } else {
+          // generate a key for one of the other reducers
+          val key = 1 + ranGen.nextInt(numReducers-1) + offset
+          result(i) = (key, byteArr)
+        }
+      }
+      result
+    }.cache
+    // Enforce that everything has been calculated and in cache
+    pairs1.count
+
+    println(s"RESULT: ${pairs1.groupByKey(numReducers).count}")
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala
new file mode 100644
index 0000000000000000000000000000000000000000..4d3c34041bc17227db849a9ea6c0fa5ea7efa632
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import java.util.Random
+
+import org.apache.spark.sql.SparkSession
+
+/**
+ * Usage: GroupByTest [numMappers] [numKVPairs] [KeySize] [numReducers]
+ */
+object SkewedGroupByTest {
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder
+      .appName("GroupBy Test")
+      .getOrCreate()
+
+    val numMappers = if (args.length > 0) args(0).toInt else 2
+    var numKVPairs = if (args.length > 1) args(1).toInt else 1000
+    val valSize = if (args.length > 2) args(2).toInt else 1000
+    val numReducers = if (args.length > 3) args(3).toInt else numMappers
+
+    val pairs1 = spark.sparkContext.parallelize(0 until numMappers, numMappers).flatMap { p =>
+      val ranGen = new Random
+
+      // map output sizes linearly increase from the 1st to the last
+      numKVPairs = (1.0 * (p + 1) / numMappers * numKVPairs).toInt
+
+      val arr1 = new Array[(Int, Array[Byte])](numKVPairs)
+      for (i <- 0 until numKVPairs) {
+        val byteArr = new Array[Byte](valSize)
+        ranGen.nextBytes(byteArr)
+        arr1(i) = (ranGen.nextInt(Int.MaxValue), byteArr)
+      }
+      arr1
+    }.cache()
+    // Enforce that everything has been calculated and in cache
+    pairs1.count()
+
+    println(pairs1.groupByKey(numReducers).count())
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
new file mode 100644
index 0000000000000000000000000000000000000000..d3e7b7a967de78100c641dbf65020a22244626a0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import org.apache.commons.math3.linear._
+
+import org.apache.spark.sql.SparkSession
+
+/**
+ * Alternating least squares matrix factorization.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.ml.recommendation.ALS.
+ */
+object SparkALS {
+
+  // Parameters set through command line arguments
+  var M = 0 // Number of movies
+  var U = 0 // Number of users
+  var F = 0 // Number of features
+  var ITERATIONS = 0
+  val LAMBDA = 0.01 // Regularization coefficient
+
+  def generateR(): RealMatrix = {
+    val mh = randomMatrix(M, F)
+    val uh = randomMatrix(U, F)
+    mh.multiply(uh.transpose())
+  }
+
+  def rmse(targetR: RealMatrix, ms: Array[RealVector], us: Array[RealVector]): Double = {
+    val r = new Array2DRowRealMatrix(M, U)
+    for (i <- 0 until M; j <- 0 until U) {
+      r.setEntry(i, j, ms(i).dotProduct(us(j)))
+    }
+    val diffs = r.subtract(targetR)
+    var sumSqs = 0.0
+    for (i <- 0 until M; j <- 0 until U) {
+      val diff = diffs.getEntry(i, j)
+      sumSqs += diff * diff
+    }
+    math.sqrt(sumSqs / (M.toDouble * U.toDouble))
+  }
+
+  def update(i: Int, m: RealVector, us: Array[RealVector], R: RealMatrix) : RealVector = {
+    val U = us.length
+    val F = us(0).getDimension
+    var XtX: RealMatrix = new Array2DRowRealMatrix(F, F)
+    var Xty: RealVector = new ArrayRealVector(F)
+    // For each user that rated the movie
+    for (j <- 0 until U) {
+      val u = us(j)
+      // Add u * u^t to XtX
+      XtX = XtX.add(u.outerProduct(u))
+      // Add u * rating to Xty
+      Xty = Xty.add(u.mapMultiply(R.getEntry(i, j)))
+    }
+    // Add regularization coefs to diagonal terms
+    for (d <- 0 until F) {
+      XtX.addToEntry(d, d, LAMBDA * U)
+    }
+    // Solve it with Cholesky
+    new CholeskyDecomposition(XtX).getSolver.solve(Xty)
+  }
+
+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of ALS and is given as an example!
+        |Please use org.apache.spark.ml.recommendation.ALS
+        |for more conventional use.
+      """.stripMargin)
+  }
+
+  def main(args: Array[String]) {
+
+    var slices = 0
+
+    val options = (0 to 4).map(i => if (i < args.length) Some(args(i)) else None)
+
+    options.toArray match {
+      case Array(m, u, f, iters, slices_) =>
+        M = m.getOrElse("100").toInt
+        U = u.getOrElse("500").toInt
+        F = f.getOrElse("10").toInt
+        ITERATIONS = iters.getOrElse("5").toInt
+        slices = slices_.getOrElse("2").toInt
+      case _ =>
+        System.err.println("Usage: SparkALS [M] [U] [F] [iters] [partitions]")
+        System.exit(1)
+    }
+
+    showWarning()
+
+    println(s"Running with M=$M, U=$U, F=$F, iters=$ITERATIONS")
+
+    val spark = SparkSession
+      .builder
+      .appName("SparkALS")
+      .getOrCreate()
+
+    val sc = spark.sparkContext
+
+    val R = generateR()
+
+    // Initialize m and u randomly
+    var ms = Array.fill(M)(randomVector(F))
+    var us = Array.fill(U)(randomVector(F))
+
+    // Iteratively update movies then users
+    val Rc = sc.broadcast(R)
+    var msb = sc.broadcast(ms)
+    var usb = sc.broadcast(us)
+    for (iter <- 1 to ITERATIONS) {
+      println(s"Iteration $iter:")
+      ms = sc.parallelize(0 until M, slices)
+                .map(i => update(i, msb.value(i), usb.value, Rc.value))
+                .collect()
+      msb = sc.broadcast(ms) // Re-broadcast ms because it was updated
+      us = sc.parallelize(0 until U, slices)
+                .map(i => update(i, usb.value(i), msb.value, Rc.value.transpose()))
+                .collect()
+      usb = sc.broadcast(us) // Re-broadcast us because it was updated
+      println(s"RMSE = ${rmse(R, ms, us)}")
+    }
+    spark.stop()
+  }
+
+  private def randomVector(n: Int): RealVector =
+    new ArrayRealVector(Array.fill(n)(math.random))
+
+  private def randomMatrix(rows: Int, cols: Int): RealMatrix =
+    new Array2DRowRealMatrix(Array.fill(rows, cols)(math.random))
+
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
new file mode 100644
index 0000000000000000000000000000000000000000..23eaa879114a94e1a918a5e934c9a93e4d9383e2
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import java.util.Random
+
+import scala.math.exp
+
+import breeze.linalg.{DenseVector, Vector}
+
+import org.apache.spark.sql.SparkSession
+
+/**
+ * Logistic regression based classification.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.ml.classification.LogisticRegression.
+ */
+object SparkHdfsLR {
+  val D = 10   // Number of dimensions
+  val rand = new Random(42)
+
+  case class DataPoint(x: Vector[Double], y: Double)
+
+  def parsePoint(line: String): DataPoint = {
+    val tok = new java.util.StringTokenizer(line, " ")
+    val y = tok.nextToken.toDouble
+    val x = new Array[Double](D)
+    var i = 0
+    while (i < D) {
+      x(i) = tok.nextToken.toDouble; i += 1
+    }
+    DataPoint(new DenseVector(x), y)
+  }
+
+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
+        |Please use org.apache.spark.ml.classification.LogisticRegression
+        |for more conventional use.
+      """.stripMargin)
+  }
+
+  def main(args: Array[String]) {
+
+    if (args.length < 2) {
+      System.err.println("Usage: SparkHdfsLR <file> <iters>")
+      System.exit(1)
+    }
+
+    showWarning()
+
+    val spark = SparkSession
+      .builder
+      .appName("SparkHdfsLR")
+      .getOrCreate()
+
+    val inputPath = args(0)
+    val lines = spark.read.textFile(inputPath).rdd
+
+    val points = lines.map(parsePoint).cache()
+    val ITERATIONS = args(1).toInt
+
+    // Initialize w to a random value
+    val w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
+    println(s"Initial w: $w")
+
+    for (i <- 1 to ITERATIONS) {
+      println(s"On iteration $i")
+      val gradient = points.map { p =>
+        p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y
+      }.reduce(_ + _)
+      w -= gradient
+    }
+
+    println(s"Final w: $w")
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
new file mode 100644
index 0000000000000000000000000000000000000000..b005cb6971c160988b76c289f3fcb61e3099cba5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import breeze.linalg.{squaredDistance, DenseVector, Vector}
+
+import org.apache.spark.sql.SparkSession
+
+/**
+ * K-means clustering.
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.ml.clustering.KMeans.
+ */
+object SparkKMeans {
+
+  def parseVector(line: String): Vector[Double] = {
+    DenseVector(line.split(' ').map(_.toDouble))
+  }
+
+  def closestPoint(p: Vector[Double], centers: Array[Vector[Double]]): Int = {
+    var bestIndex = 0
+    var closest = Double.PositiveInfinity
+
+    for (i <- 0 until centers.length) {
+      val tempDist = squaredDistance(p, centers(i))
+      if (tempDist < closest) {
+        closest = tempDist
+        bestIndex = i
+      }
+    }
+
+    bestIndex
+  }
+
+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
+        |Please use org.apache.spark.ml.clustering.KMeans
+        |for more conventional use.
+      """.stripMargin)
+  }
+
+  def main(args: Array[String]) {
+
+    if (args.length < 3) {
+      System.err.println("Usage: SparkKMeans <file> <k> <convergeDist>")
+      System.exit(1)
+    }
+
+    showWarning()
+
+    val spark = SparkSession
+      .builder
+      .appName("SparkKMeans")
+      .getOrCreate()
+
+    val lines = spark.read.textFile(args(0)).rdd
+    val data = lines.map(parseVector _).cache()
+    val K = args(1).toInt
+    val convergeDist = args(2).toDouble
+
+    val kPoints = data.takeSample(withReplacement = false, K, 42)
+    var tempDist = 1.0
+
+    while(tempDist > convergeDist) {
+      val closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))
+
+      val pointStats = closest.reduceByKey{case ((p1, c1), (p2, c2)) => (p1 + p2, c1 + c2)}
+
+      val newPoints = pointStats.map {pair =>
+        (pair._1, pair._2._1 * (1.0 / pair._2._2))}.collectAsMap()
+
+      tempDist = 0.0
+      for (i <- 0 until K) {
+        tempDist += squaredDistance(kPoints(i), newPoints(i))
+      }
+
+      for (newP <- newPoints) {
+        kPoints(newP._1) = newP._2
+      }
+      println(s"Finished iteration (delta = $tempDist)")
+    }
+
+    println("Final centers:")
+    kPoints.foreach(println)
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
new file mode 100644
index 0000000000000000000000000000000000000000..4b1497345af829db3f7db7e95bee7cac632ffe4f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import java.util.Random
+
+import scala.math.exp
+
+import breeze.linalg.{DenseVector, Vector}
+
+import org.apache.spark.sql.SparkSession
+
+/**
+ * Logistic regression based classification.
+ * Usage: SparkLR [partitions]
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.ml.classification.LogisticRegression.
+ */
+object SparkLR {
+  val N = 10000  // Number of data points
+  val D = 10   // Number of dimensions
+  val R = 0.7  // Scaling factor
+  val ITERATIONS = 5
+  val rand = new Random(42)
+
+  case class DataPoint(x: Vector[Double], y: Double)
+
+  def generateData: Array[DataPoint] = {
+    def generatePoint(i: Int): DataPoint = {
+      val y = if (i % 2 == 0) -1 else 1
+      val x = DenseVector.fill(D) {rand.nextGaussian + y * R}
+      DataPoint(x, y)
+    }
+    Array.tabulate(N)(generatePoint)
+  }
+
+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of Logistic Regression and is given as an example!
+        |Please use org.apache.spark.ml.classification.LogisticRegression
+        |for more conventional use.
+      """.stripMargin)
+  }
+
+  def main(args: Array[String]) {
+
+    showWarning()
+
+    val spark = SparkSession
+      .builder
+      .appName("SparkLR")
+      .getOrCreate()
+
+    val numSlices = if (args.length > 0) args(0).toInt else 2
+    val points = spark.sparkContext.parallelize(generateData, numSlices).cache()
+
+    // Initialize w to a random value
+    val w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
+    println(s"Initial w: $w")
+
+    for (i <- 1 to ITERATIONS) {
+      println(s"On iteration $i")
+      val gradient = points.map { p =>
+        p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y
+      }.reduce(_ + _)
+      w -= gradient
+    }
+
+    println(s"Final w: $w")
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
new file mode 100644
index 0000000000000000000000000000000000000000..9299bad5d3290d74f39605375d9831065130f453
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import org.apache.spark.sql.SparkSession
+
+/**
+ * Computes the PageRank of URLs from an input file. Input file should
+ * be in format of:
+ * URL         neighbor URL
+ * URL         neighbor URL
+ * URL         neighbor URL
+ * ...
+ * where URL and their neighbors are separated by space(s).
+ *
+ * This is an example implementation for learning how to use Spark. For more conventional use,
+ * please refer to org.apache.spark.graphx.lib.PageRank
+ *
+ * Example Usage:
+ * {{{
+ * bin/run-example SparkPageRank data/mllib/pagerank_data.txt 10
+ * }}}
+ */
+object SparkPageRank {
+
+  def showWarning() {
+    System.err.println(
+      """WARN: This is a naive implementation of PageRank and is given as an example!
+        |Please use the PageRank implementation found in org.apache.spark.graphx.lib.PageRank
+        |for more conventional use.
+      """.stripMargin)
+  }
+
+  def main(args: Array[String]) {
+    if (args.length < 1) {
+      System.err.println("Usage: SparkPageRank <file> <iter>")
+      System.exit(1)
+    }
+
+    showWarning()
+
+    val spark = SparkSession
+      .builder
+      .appName("SparkPageRank")
+      .getOrCreate()
+
+    val iters = if (args.length > 1) args(1).toInt else 10
+    val lines = spark.read.textFile(args(0)).rdd
+    val links = lines.map{ s =>
+      val parts = s.split("\\s+")
+      (parts(0), parts(1))
+    }.distinct().groupByKey().cache()
+    var ranks = links.mapValues(v => 1.0)
+
+    for (i <- 1 to iters) {
+      val contribs = links.join(ranks).values.flatMap{ case (urls, rank) =>
+        val size = urls.size
+        urls.map(url => (url, rank / size))
+      }
+      ranks = contribs.reduceByKey(_ + _).mapValues(0.15 + 0.85 * _)
+    }
+
+    val output = ranks.collect()
+    output.foreach(tup => println(s"${tup._1} has rank:  ${tup._2} ."))
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala
new file mode 100644
index 0000000000000000000000000000000000000000..828d98b5001d768c4de6aff5504d140b533104e4
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import scala.math.random
+
+import org.apache.spark.sql.SparkSession
+
+/** Computes an approximation to pi */
+object SparkPi {
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder
+      .appName("Spark Pi")
+      .getOrCreate()
+    val slices = if (args.length > 0) args(0).toInt else 2
+    val n = math.min(100000L * slices, Int.MaxValue).toInt // avoid overflow
+    val count = spark.sparkContext.parallelize(1 until n, slices).map { i =>
+      val x = random * 2 - 1
+      val y = random * 2 - 1
+      if (x*x + y*y <= 1) 1 else 0
+    }.reduce(_ + _)
+    println(s"Pi is roughly ${4.0 * count / (n - 1)}")
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkRemoteFileTest.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkRemoteFileTest.scala
new file mode 100644
index 0000000000000000000000000000000000000000..64076f2deb706e893a2b75b32f9a1d026624d7fc
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkRemoteFileTest.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import java.io.File
+
+import org.apache.spark.SparkFiles
+import org.apache.spark.sql.SparkSession
+
+/** Usage: SparkRemoteFileTest [file] */
+object SparkRemoteFileTest {
+  def main(args: Array[String]) {
+    if (args.length < 1) {
+      System.err.println("Usage: SparkRemoteFileTest <file>")
+      System.exit(1)
+    }
+    val spark = SparkSession
+      .builder()
+      .appName("SparkRemoteFileTest")
+      .getOrCreate()
+    val sc = spark.sparkContext
+    val rdd = sc.parallelize(Seq(1)).map(_ => {
+      val localLocation = SparkFiles.get(args(0))
+      println(s"${args(0)} is stored at: $localLocation")
+      new File(localLocation).isFile
+    })
+    val truthCheck = rdd.collect().head
+    println(s"Mounting of ${args(0)} was $truthCheck")
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala
new file mode 100644
index 0000000000000000000000000000000000000000..f5d42141f5dd28e701c374260ed53e38e6673026
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples
+
+import scala.collection.mutable
+import scala.util.Random
+
+import org.apache.spark.sql.SparkSession
+
+/**
+ * Transitive closure on a graph.
+ */
+object SparkTC {
+  val numEdges = 200
+  val numVertices = 100
+  val rand = new Random(42)
+
+  def generateGraph: Seq[(Int, Int)] = {
+    val edges: mutable.Set[(Int, Int)] = mutable.Set.empty
+    while (edges.size < numEdges) {
+      val from = rand.nextInt(numVertices)
+      val to = rand.nextInt(numVertices)
+      if (from != to) edges.+=((from, to))
+    }
+    edges.toSeq
+  }
+
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder
+      .appName("SparkTC")
+      .getOrCreate()
+    val slices = if (args.length > 0) args(0).toInt else 2
+    var tc = spark.sparkContext.parallelize(generateGraph, slices).cache()
+
+    // Linear transitive closure: each round grows paths by one edge,
+    // by joining the graph's edges with the already-discovered paths.
+    // e.g. join the path (y, z) from the TC with the edge (x, y) from
+    // the graph to obtain the path (x, z).
+
+    // Because join() joins on keys, the edges are stored in reversed order.
+    val edges = tc.map(x => (x._2, x._1))
+
+    // This join is iterated until a fixed point is reached.
+    var oldCount = 0L
+    var nextCount = tc.count()
+    do {
+      oldCount = nextCount
+      // Perform the join, obtaining an RDD of (y, (z, x)) pairs,
+      // then project the result to obtain the new (x, z) paths.
+      tc = tc.union(tc.join(edges).map(x => (x._2._2, x._2._1))).distinct().cache()
+      nextCount = tc.count()
+    } while (nextCount != oldCount)
+
+    println(s"TC has ${tc.count()} edges.")
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/AggregateMessagesExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/AggregateMessagesExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..8441b5a9dd8aaf80a758130398c4c3c1631c0181
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/AggregateMessagesExample.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.graphx
+
+// $example on$
+import org.apache.spark.graphx.{Graph, VertexRDD}
+import org.apache.spark.graphx.util.GraphGenerators
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example use the [`aggregateMessages`][Graph.aggregateMessages] operator to
+ * compute the average age of the more senior followers of each user
+ * Run with
+ * {{{
+ * bin/run-example graphx.AggregateMessagesExample
+ * }}}
+ */
+object AggregateMessagesExample {
+
+  def main(args: Array[String]): Unit = {
+    // Creates a SparkSession.
+    val spark = SparkSession
+      .builder
+      .appName(s"${this.getClass.getSimpleName}")
+      .getOrCreate()
+    val sc = spark.sparkContext
+
+    // $example on$
+    // Create a graph with "age" as the vertex property.
+    // Here we use a random graph for simplicity.
+    val graph: Graph[Double, Int] =
+      GraphGenerators.logNormalGraph(sc, numVertices = 100).mapVertices( (id, _) => id.toDouble )
+    // Compute the number of older followers and their total age
+    val olderFollowers: VertexRDD[(Int, Double)] = graph.aggregateMessages[(Int, Double)](
+      triplet => { // Map Function
+        if (triplet.srcAttr > triplet.dstAttr) {
+          // Send message to destination vertex containing counter and age
+          triplet.sendToDst((1, triplet.srcAttr))
+        }
+      },
+      // Add counter and age
+      (a, b) => (a._1 + b._1, a._2 + b._2) // Reduce Function
+    )
+    // Divide total age by number of older followers to get average age of older followers
+    val avgAgeOfOlderFollowers: VertexRDD[Double] =
+      olderFollowers.mapValues( (id, value) =>
+        value match { case (count, totalAge) => totalAge / count } )
+    // Display the results
+    avgAgeOfOlderFollowers.collect.foreach(println(_))
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/Analytics.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/Analytics.scala
new file mode 100644
index 0000000000000000000000000000000000000000..815404d1218b78574670e723e6db99de1f80ed9f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/Analytics.scala
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.graphx
+
+import scala.collection.mutable
+
+import org.apache.spark._
+import org.apache.spark.graphx._
+import org.apache.spark.graphx.PartitionStrategy._
+import org.apache.spark.graphx.lib._
+import org.apache.spark.internal.Logging
+import org.apache.spark.storage.StorageLevel
+
+
+/**
+ * Driver program for running graph algorithms.
+ */
+object Analytics extends Logging {
+
+  def main(args: Array[String]): Unit = {
+    if (args.length < 2) {
+      val usage = """Usage: Analytics <taskType> <file> --numEPart=<num_edge_partitions>
+      |[other options] Supported 'taskType' as follows:
+      |pagerank    Compute PageRank
+      |cc          Compute the connected components of vertices
+      |triangles   Count the number of triangles""".stripMargin
+      System.err.println(usage)
+      System.exit(1)
+    }
+
+    val taskType = args(0)
+    val fname = args(1)
+    val optionsList = args.drop(2).map { arg =>
+      arg.dropWhile(_ == '-').split('=') match {
+        case Array(opt, v) => (opt -> v)
+        case _ => throw new IllegalArgumentException(s"Invalid argument: $arg")
+      }
+    }
+    val options = mutable.Map(optionsList: _*)
+
+    val conf = new SparkConf()
+    GraphXUtils.registerKryoClasses(conf)
+
+    val numEPart = options.remove("numEPart").map(_.toInt).getOrElse {
+      println("Set the number of edge partitions using --numEPart.")
+      sys.exit(1)
+    }
+    val partitionStrategy: Option[PartitionStrategy] = options.remove("partStrategy")
+      .map(PartitionStrategy.fromString(_))
+    val edgeStorageLevel = options.remove("edgeStorageLevel")
+      .map(StorageLevel.fromString(_)).getOrElse(StorageLevel.MEMORY_ONLY)
+    val vertexStorageLevel = options.remove("vertexStorageLevel")
+      .map(StorageLevel.fromString(_)).getOrElse(StorageLevel.MEMORY_ONLY)
+
+    taskType match {
+      case "pagerank" =>
+        val tol = options.remove("tol").map(_.toFloat).getOrElse(0.001F)
+        val outFname = options.remove("output").getOrElse("")
+        val numIterOpt = options.remove("numIter").map(_.toInt)
+
+        options.foreach {
+          case (opt, _) => throw new IllegalArgumentException(s"Invalid option: $opt")
+        }
+
+        println("======================================")
+        println("|             PageRank               |")
+        println("======================================")
+
+        val sc = new SparkContext(conf.setAppName(s"PageRank($fname)"))
+
+        val unpartitionedGraph = GraphLoader.edgeListFile(sc, fname,
+          numEdgePartitions = numEPart,
+          edgeStorageLevel = edgeStorageLevel,
+          vertexStorageLevel = vertexStorageLevel).cache()
+        val graph = partitionStrategy.foldLeft(unpartitionedGraph)(_.partitionBy(_))
+
+        println(s"GRAPHX: Number of vertices ${graph.vertices.count}")
+        println(s"GRAPHX: Number of edges ${graph.edges.count}")
+
+        val pr = (numIterOpt match {
+          case Some(numIter) => PageRank.run(graph, numIter)
+          case None => PageRank.runUntilConvergence(graph, tol)
+        }).vertices.cache()
+
+        println(s"GRAPHX: Total rank: ${pr.map(_._2).reduce(_ + _)}")
+
+        if (!outFname.isEmpty) {
+          logWarning(s"Saving pageranks of pages to $outFname")
+          pr.map { case (id, r) => id + "\t" + r }.saveAsTextFile(outFname)
+        }
+
+        sc.stop()
+
+      case "cc" =>
+        options.foreach {
+          case (opt, _) => throw new IllegalArgumentException(s"Invalid option: $opt")
+        }
+
+        println("======================================")
+        println("|      Connected Components          |")
+        println("======================================")
+
+        val sc = new SparkContext(conf.setAppName(s"ConnectedComponents($fname)"))
+        val unpartitionedGraph = GraphLoader.edgeListFile(sc, fname,
+          numEdgePartitions = numEPart,
+          edgeStorageLevel = edgeStorageLevel,
+          vertexStorageLevel = vertexStorageLevel).cache()
+        val graph = partitionStrategy.foldLeft(unpartitionedGraph)(_.partitionBy(_))
+
+        val cc = ConnectedComponents.run(graph)
+        println(s"Components: ${cc.vertices.map { case (vid, data) => data }.distinct()}")
+        sc.stop()
+
+      case "triangles" =>
+        options.foreach {
+          case (opt, _) => throw new IllegalArgumentException(s"Invalid option: $opt")
+        }
+
+        println("======================================")
+        println("|      Triangle Count                |")
+        println("======================================")
+
+        val sc = new SparkContext(conf.setAppName(s"TriangleCount($fname)"))
+        val graph = GraphLoader.edgeListFile(sc, fname,
+          canonicalOrientation = true,
+          numEdgePartitions = numEPart,
+          edgeStorageLevel = edgeStorageLevel,
+          vertexStorageLevel = vertexStorageLevel)
+          // TriangleCount requires the graph to be partitioned
+          .partitionBy(partitionStrategy.getOrElse(RandomVertexCut)).cache()
+        val triangles = TriangleCount.run(graph)
+        val triangleTypes = triangles.vertices.map {
+          case (vid, data) => data.toLong
+        }.reduce(_ + _) / 3
+
+        println(s"Triangles: ${triangleTypes}")
+        sc.stop()
+
+      case _ =>
+        println("Invalid task type.")
+    }
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/ComprehensiveExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/ComprehensiveExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..6598863bd2ea086ce914d3db71da354f740aee02
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/ComprehensiveExample.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.graphx
+
+// $example on$
+import org.apache.spark.graphx.GraphLoader
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * Suppose I want to build a graph from some text files, restrict the graph
+ * to important relationships and users, run page-rank on the sub-graph, and
+ * then finally return attributes associated with the top users.
+ * This example do all of this in just a few lines with GraphX.
+ *
+ * Run with
+ * {{{
+ * bin/run-example graphx.ComprehensiveExample
+ * }}}
+ */
+object ComprehensiveExample {
+
+  def main(args: Array[String]): Unit = {
+    // Creates a SparkSession.
+    val spark = SparkSession
+      .builder
+      .appName(s"${this.getClass.getSimpleName}")
+      .getOrCreate()
+    val sc = spark.sparkContext
+
+    // $example on$
+    // Load my user data and parse into tuples of user id and attribute list
+    val users = (sc.textFile("data/graphx/users.txt")
+      .map(line => line.split(",")).map( parts => (parts.head.toLong, parts.tail) ))
+
+    // Parse the edge data which is already in userId -> userId format
+    val followerGraph = GraphLoader.edgeListFile(sc, "data/graphx/followers.txt")
+
+    // Attach the user attributes
+    val graph = followerGraph.outerJoinVertices(users) {
+      case (uid, deg, Some(attrList)) => attrList
+      // Some users may not have attributes so we set them as empty
+      case (uid, deg, None) => Array.empty[String]
+    }
+
+    // Restrict the graph to users with usernames and names
+    val subgraph = graph.subgraph(vpred = (vid, attr) => attr.size == 2)
+
+    // Compute the PageRank
+    val pagerankGraph = subgraph.pageRank(0.001)
+
+    // Get the attributes of the top pagerank users
+    val userInfoWithPageRank = subgraph.outerJoinVertices(pagerankGraph.vertices) {
+      case (uid, attrList, Some(pr)) => (pr, attrList.toList)
+      case (uid, attrList, None) => (0.0, attrList.toList)
+    }
+
+    println(userInfoWithPageRank.vertices.top(5)(Ordering.by(_._2._1)).mkString("\n"))
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/ConnectedComponentsExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/ConnectedComponentsExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..5377ddb3594b0a79d6ad5707c464b71d9ef3e63e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/ConnectedComponentsExample.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.graphx
+
+// $example on$
+import org.apache.spark.graphx.GraphLoader
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * A connected components algorithm example.
+ * The connected components algorithm labels each connected component of the graph
+ * with the ID of its lowest-numbered vertex.
+ * For example, in a social network, connected components can approximate clusters.
+ * GraphX contains an implementation of the algorithm in the
+ * [`ConnectedComponents` object][ConnectedComponents],
+ * and we compute the connected components of the example social network dataset.
+ *
+ * Run with
+ * {{{
+ * bin/run-example graphx.ConnectedComponentsExample
+ * }}}
+ */
+object ConnectedComponentsExample {
+  def main(args: Array[String]): Unit = {
+    // Creates a SparkSession.
+    val spark = SparkSession
+      .builder
+      .appName(s"${this.getClass.getSimpleName}")
+      .getOrCreate()
+    val sc = spark.sparkContext
+
+    // $example on$
+    // Load the graph as in the PageRank example
+    val graph = GraphLoader.edgeListFile(sc, "data/graphx/followers.txt")
+    // Find the connected components
+    val cc = graph.connectedComponents().vertices
+    // Join the connected components with the usernames
+    val users = sc.textFile("data/graphx/users.txt").map { line =>
+      val fields = line.split(",")
+      (fields(0).toLong, fields(1))
+    }
+    val ccByUsername = users.join(cc).map {
+      case (id, (username, cc)) => (username, cc)
+    }
+    // Print the result
+    println(ccByUsername.collect().mkString("\n"))
+    // $example off$
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/LiveJournalPageRank.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/LiveJournalPageRank.scala
new file mode 100644
index 0000000000000000000000000000000000000000..da3ffca1a6f2a74d3d9d720a66fe711000726b81
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/LiveJournalPageRank.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.graphx
+
+/**
+ * Uses GraphX to run PageRank on a LiveJournal social network graph. Download the dataset from
+ * http://snap.stanford.edu/data/soc-LiveJournal1.html.
+ */
+object LiveJournalPageRank {
+  def main(args: Array[String]) {
+    if (args.length < 1) {
+      System.err.println(
+        "Usage: LiveJournalPageRank <edge_list_file>\n" +
+          "    --numEPart=<num_edge_partitions>\n" +
+          "        The number of partitions for the graph's edge RDD.\n" +
+          "    [--tol=<tolerance>]\n" +
+          "        The tolerance allowed at convergence (smaller => more accurate). Default is " +
+          "0.001.\n" +
+          "    [--output=<output_file>]\n" +
+          "        If specified, the file to write the ranks to.\n" +
+          "    [--partStrategy=RandomVertexCut | EdgePartition1D | EdgePartition2D | " +
+          "CanonicalRandomVertexCut]\n" +
+          "        The way edges are assigned to edge partitions. Default is RandomVertexCut.")
+      System.exit(-1)
+    }
+
+    Analytics.main(args.patch(0, List("pagerank"), 0))
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/PageRankExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/PageRankExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..9e9affca07a18edb7ef5469d7a544573aa101d76
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/PageRankExample.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.graphx
+
+// $example on$
+import org.apache.spark.graphx.GraphLoader
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * A PageRank example on social network dataset
+ * Run with
+ * {{{
+ * bin/run-example graphx.PageRankExample
+ * }}}
+ */
+object PageRankExample {
+  def main(args: Array[String]): Unit = {
+    // Creates a SparkSession.
+    val spark = SparkSession
+      .builder
+      .appName(s"${this.getClass.getSimpleName}")
+      .getOrCreate()
+    val sc = spark.sparkContext
+
+    // $example on$
+    // Load the edges as a graph
+    val graph = GraphLoader.edgeListFile(sc, "data/graphx/followers.txt")
+    // Run PageRank
+    val ranks = graph.pageRank(0.0001).vertices
+    // Join the ranks with the usernames
+    val users = sc.textFile("data/graphx/users.txt").map { line =>
+      val fields = line.split(",")
+      (fields(0).toLong, fields(1))
+    }
+    val ranksByUsername = users.join(ranks).map {
+      case (id, (username, rank)) => (username, rank)
+    }
+    // Print the result
+    println(ranksByUsername.collect().mkString("\n"))
+    // $example off$
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/SSSPExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/SSSPExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..5e8b19671de7a1a78dea0fdee002710b26a027e2
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/SSSPExample.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.graphx
+
+// $example on$
+import org.apache.spark.graphx.{Graph, VertexId}
+import org.apache.spark.graphx.util.GraphGenerators
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example use the Pregel operator to express computation
+ * such as single source shortest path
+ * Run with
+ * {{{
+ * bin/run-example graphx.SSSPExample
+ * }}}
+ */
+object SSSPExample {
+  def main(args: Array[String]): Unit = {
+    // Creates a SparkSession.
+    val spark = SparkSession
+      .builder
+      .appName(s"${this.getClass.getSimpleName}")
+      .getOrCreate()
+    val sc = spark.sparkContext
+
+    // $example on$
+    // A graph with edge attributes containing distances
+    val graph: Graph[Long, Double] =
+      GraphGenerators.logNormalGraph(sc, numVertices = 100).mapEdges(e => e.attr.toDouble)
+    val sourceId: VertexId = 42 // The ultimate source
+    // Initialize the graph such that all vertices except the root have distance infinity.
+    val initialGraph = graph.mapVertices((id, _) =>
+        if (id == sourceId) 0.0 else Double.PositiveInfinity)
+    val sssp = initialGraph.pregel(Double.PositiveInfinity)(
+      (id, dist, newDist) => math.min(dist, newDist), // Vertex Program
+      triplet => {  // Send Message
+        if (triplet.srcAttr + triplet.attr < triplet.dstAttr) {
+          Iterator((triplet.dstId, triplet.srcAttr + triplet.attr))
+        } else {
+          Iterator.empty
+        }
+      },
+      (a, b) => math.min(a, b) // Merge Message
+    )
+    println(sssp.vertices.collect.mkString("\n"))
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala
new file mode 100644
index 0000000000000000000000000000000000000000..57b2edf992208c5343d2da47c5fc2e943bec33e0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.graphx
+
+import java.io.{FileOutputStream, PrintWriter}
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.graphx.{GraphXUtils, PartitionStrategy}
+import org.apache.spark.graphx.util.GraphGenerators
+
+/**
+ * The SynthBenchmark application can be used to run various GraphX algorithms on
+ * synthetic log-normal graphs.  The intent of this code is to enable users to
+ * profile the GraphX system without access to large graph datasets.
+ */
+object SynthBenchmark {
+
+  /**
+   * To run this program use the following:
+   *
+   * MASTER=spark://foobar bin/run-example graphx.SynthBenchmark -app=pagerank
+   *
+   * Options:
+   *   -app "pagerank" or "cc" for pagerank or connected components. (Default: pagerank)
+   *   -niters the number of iterations of pagerank to use (Default: 10)
+   *   -nverts the number of vertices in the graph (Default: 1000000)
+   *   -numEPart the number of edge partitions in the graph (Default: number of cores)
+   *   -partStrategy the graph partitioning strategy to use
+   *   -mu the mean parameter for the log-normal graph (Default: 4.0)
+   *   -sigma the stdev parameter for the log-normal graph (Default: 1.3)
+   *   -degFile the local file to save the degree information (Default: Empty)
+   *   -seed seed to use for RNGs (Default: -1, picks seed randomly)
+   */
+  def main(args: Array[String]) {
+    val options = args.map {
+      arg =>
+        arg.dropWhile(_ == '-').split('=') match {
+          case Array(opt, v) => (opt -> v)
+          case _ => throw new IllegalArgumentException(s"Invalid argument: $arg")
+        }
+    }
+
+    var app = "pagerank"
+    var niter = 10
+    var numVertices = 100000
+    var numEPart: Option[Int] = None
+    var partitionStrategy: Option[PartitionStrategy] = None
+    var mu: Double = 4.0
+    var sigma: Double = 1.3
+    var degFile: String = ""
+    var seed: Int = -1
+
+    options.foreach {
+      case ("app", v) => app = v
+      case ("niters", v) => niter = v.toInt
+      case ("nverts", v) => numVertices = v.toInt
+      case ("numEPart", v) => numEPart = Some(v.toInt)
+      case ("partStrategy", v) => partitionStrategy = Some(PartitionStrategy.fromString(v))
+      case ("mu", v) => mu = v.toDouble
+      case ("sigma", v) => sigma = v.toDouble
+      case ("degFile", v) => degFile = v
+      case ("seed", v) => seed = v.toInt
+      case (opt, _) => throw new IllegalArgumentException(s"Invalid option: $opt")
+    }
+
+    val conf = new SparkConf()
+      .setAppName(s"GraphX Synth Benchmark (nverts = $numVertices, app = $app)")
+    GraphXUtils.registerKryoClasses(conf)
+
+    val sc = new SparkContext(conf)
+
+    // Create the graph
+    println("Creating graph...")
+    val unpartitionedGraph = GraphGenerators.logNormalGraph(sc, numVertices,
+      numEPart.getOrElse(sc.defaultParallelism), mu, sigma, seed)
+    // Repartition the graph
+    val graph = partitionStrategy.foldLeft(unpartitionedGraph)(_.partitionBy(_)).cache()
+
+    var startTime = System.currentTimeMillis()
+    val numEdges = graph.edges.count()
+    println(s"Done creating graph. Num Vertices = $numVertices, Num Edges = $numEdges")
+    val loadTime = System.currentTimeMillis() - startTime
+
+    // Collect the degree distribution (if desired)
+    if (!degFile.isEmpty) {
+      val fos = new FileOutputStream(degFile)
+      val pos = new PrintWriter(fos)
+      val hist = graph.vertices.leftJoin(graph.degrees)((id, _, optDeg) => optDeg.getOrElse(0))
+        .map(p => p._2).countByValue()
+      hist.foreach {
+        case (deg, count) => pos.println(s"$deg \t $count")
+      }
+    }
+
+    // Run PageRank
+    startTime = System.currentTimeMillis()
+    if (app == "pagerank") {
+      println("Running PageRank")
+      val totalPR = graph.staticPageRank(niter).vertices.map(_._2).sum()
+      println(s"Total PageRank = $totalPR")
+    } else if (app == "cc") {
+      println("Running Connected Components")
+      val numComponents = graph.connectedComponents.vertices.map(_._2).distinct().count()
+      println(s"Number of components = $numComponents")
+    }
+    val runTime = System.currentTimeMillis() - startTime
+
+    println(s"Num Vertices = $numVertices")
+    println(s"Num Edges = $numEdges")
+    println(s"Creation time = ${loadTime/1000.0} seconds")
+    println(s"Run time = ${runTime/1000.0} seconds")
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/TriangleCountingExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/TriangleCountingExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..b9bff69086cc1ac63c05537599ddf520f4f8ae81
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/graphx/TriangleCountingExample.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.graphx
+
+// $example on$
+import org.apache.spark.graphx.{GraphLoader, PartitionStrategy}
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * A vertex is part of a triangle when it has two adjacent vertices with an edge between them.
+ * GraphX implements a triangle counting algorithm in the [`TriangleCount` object][TriangleCount]
+ * that determines the number of triangles passing through each vertex,
+ * providing a measure of clustering.
+ * We compute the triangle count of the social network dataset.
+ *
+ * Note that `TriangleCount` requires the edges to be in canonical orientation (`srcId < dstId`)
+ * and the graph to be partitioned using [`Graph.partitionBy`][Graph.partitionBy].
+ *
+ * Run with
+ * {{{
+ * bin/run-example graphx.TriangleCountingExample
+ * }}}
+ */
+object TriangleCountingExample {
+  def main(args: Array[String]): Unit = {
+    // Creates a SparkSession.
+    val spark = SparkSession
+      .builder
+      .appName(s"${this.getClass.getSimpleName}")
+      .getOrCreate()
+    val sc = spark.sparkContext
+
+    // $example on$
+    // Load the edges in canonical order and partition the graph for triangle count
+    val graph = GraphLoader.edgeListFile(sc, "data/graphx/followers.txt", true)
+      .partitionBy(PartitionStrategy.RandomVertexCut)
+    // Find the triangle count for each vertex
+    val triCounts = graph.triangleCount().vertices
+    // Join the triangle counts with the usernames
+    val users = sc.textFile("data/graphx/users.txt").map { line =>
+      val fields = line.split(",")
+      (fields(0).toLong, fields(1))
+    }
+    val triCountByUsername = users.join(triCounts).map { case (id, (username, tc)) =>
+      (username, tc)
+    }
+    // Print the result
+    println(triCountByUsername.collect().mkString("\n"))
+    // $example off$
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/AFTSurvivalRegressionExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/AFTSurvivalRegressionExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..cdb33f4d6d2107a041d1d48ea56dae62b22905a9
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/AFTSurvivalRegressionExample.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.linalg.Vectors
+import org.apache.spark.ml.regression.AFTSurvivalRegression
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example demonstrating AFTSurvivalRegression.
+ * Run with
+ * {{{
+ * bin/run-example ml.AFTSurvivalRegressionExample
+ * }}}
+ */
+object AFTSurvivalRegressionExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("AFTSurvivalRegressionExample")
+      .getOrCreate()
+
+    // $example on$
+    val training = spark.createDataFrame(Seq(
+      (1.218, 1.0, Vectors.dense(1.560, -0.605)),
+      (2.949, 0.0, Vectors.dense(0.346, 2.158)),
+      (3.627, 0.0, Vectors.dense(1.380, 0.231)),
+      (0.273, 1.0, Vectors.dense(0.520, 1.151)),
+      (4.199, 0.0, Vectors.dense(0.795, -0.226))
+    )).toDF("label", "censor", "features")
+    val quantileProbabilities = Array(0.3, 0.6)
+    val aft = new AFTSurvivalRegression()
+      .setQuantileProbabilities(quantileProbabilities)
+      .setQuantilesCol("quantiles")
+
+    val model = aft.fit(training)
+
+    // Print the coefficients, intercept and scale parameter for AFT survival regression
+    println(s"Coefficients: ${model.coefficients}")
+    println(s"Intercept: ${model.intercept}")
+    println(s"Scale: ${model.scale}")
+    model.transform(training).show(false)
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..8091838a2301ecbe1c628edf3dca6271309bc309
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.evaluation.RegressionEvaluator
+import org.apache.spark.ml.recommendation.ALS
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example demonstrating ALS.
+ * Run with
+ * {{{
+ * bin/run-example ml.ALSExample
+ * }}}
+ */
+object ALSExample {
+
+  // $example on$
+  case class Rating(userId: Int, movieId: Int, rating: Float, timestamp: Long)
+  def parseRating(str: String): Rating = {
+    val fields = str.split("::")
+    assert(fields.size == 4)
+    Rating(fields(0).toInt, fields(1).toInt, fields(2).toFloat, fields(3).toLong)
+  }
+  // $example off$
+
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder
+      .appName("ALSExample")
+      .getOrCreate()
+    import spark.implicits._
+
+    // $example on$
+    val ratings = spark.read.textFile("data/mllib/als/sample_movielens_ratings.txt")
+      .map(parseRating)
+      .toDF()
+    val Array(training, test) = ratings.randomSplit(Array(0.8, 0.2))
+
+    // Build the recommendation model using ALS on the training data
+    val als = new ALS()
+      .setMaxIter(5)
+      .setRegParam(0.01)
+      .setUserCol("userId")
+      .setItemCol("movieId")
+      .setRatingCol("rating")
+    val model = als.fit(training)
+
+    // Evaluate the model by computing the RMSE on the test data
+    // Note we set cold start strategy to 'drop' to ensure we don't get NaN evaluation metrics
+    model.setColdStartStrategy("drop")
+    val predictions = model.transform(test)
+
+    val evaluator = new RegressionEvaluator()
+      .setMetricName("rmse")
+      .setLabelCol("rating")
+      .setPredictionCol("prediction")
+    val rmse = evaluator.evaluate(predictions)
+    println(s"Root-mean-square error = $rmse")
+
+    // Generate top 10 movie recommendations for each user
+    val userRecs = model.recommendForAllUsers(10)
+    // Generate top 10 user recommendations for each movie
+    val movieRecs = model.recommendForAllItems(10)
+
+    // Generate top 10 movie recommendations for a specified set of users
+    val users = ratings.select(als.getUserCol).distinct().limit(3)
+    val userSubsetRecs = model.recommendForUserSubset(users, 10)
+    // Generate top 10 user recommendations for a specified set of movies
+    val movies = ratings.select(als.getItemCol).distinct().limit(3)
+    val movieSubSetRecs = model.recommendForItemSubset(movies, 10)
+    // $example off$
+    userRecs.show()
+    movieRecs.show()
+    userSubsetRecs.show()
+    movieSubSetRecs.show()
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/BinarizerExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/BinarizerExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..c2852aacb05d5583c03e849647e4d8d89b4cde16
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/BinarizerExample.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.Binarizer
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object BinarizerExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("BinarizerExample")
+      .getOrCreate()
+
+    // $example on$
+    val data = Array((0, 0.1), (1, 0.8), (2, 0.2))
+    val dataFrame = spark.createDataFrame(data).toDF("id", "feature")
+
+    val binarizer: Binarizer = new Binarizer()
+      .setInputCol("feature")
+      .setOutputCol("binarized_feature")
+      .setThreshold(0.5)
+
+    val binarizedDataFrame = binarizer.transform(dataFrame)
+
+    println(s"Binarizer output with Threshold = ${binarizer.getThreshold}")
+    binarizedDataFrame.show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..5f8f2c99cbaf4e3206bd263520de7070f056e3b1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+// scalastyle:off println
+
+// $example on$
+import org.apache.spark.ml.clustering.BisectingKMeans
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example demonstrating bisecting k-means clustering.
+ * Run with
+ * {{{
+ * bin/run-example ml.BisectingKMeansExample
+ * }}}
+ */
+object BisectingKMeansExample {
+
+  def main(args: Array[String]): Unit = {
+    // Creates a SparkSession
+    val spark = SparkSession
+      .builder
+      .appName("BisectingKMeansExample")
+      .getOrCreate()
+
+    // $example on$
+    // Loads data.
+    val dataset = spark.read.format("libsvm").load("data/mllib/sample_kmeans_data.txt")
+
+    // Trains a bisecting k-means model.
+    val bkm = new BisectingKMeans().setK(2).setSeed(1)
+    val model = bkm.fit(dataset)
+
+    // Evaluate clustering.
+    val cost = model.computeCost(dataset)
+    println(s"Within Set Sum of Squared Errors = $cost")
+
+    // Shows the result.
+    println("Cluster Centers: ")
+    val centers = model.clusterCenters
+    centers.foreach(println)
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/BucketedRandomProjectionLSHExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/BucketedRandomProjectionLSHExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..58f9fb30078dc244767aa489656fea8c4a109e1a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/BucketedRandomProjectionLSHExample.scala
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.BucketedRandomProjectionLSH
+import org.apache.spark.ml.linalg.Vectors
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.functions.col
+// $example off$
+
+/**
+ * An example demonstrating BucketedRandomProjectionLSH.
+ * Run with:
+ *   bin/run-example ml.BucketedRandomProjectionLSHExample
+ */
+object BucketedRandomProjectionLSHExample {
+  def main(args: Array[String]): Unit = {
+    // Creates a SparkSession
+    val spark = SparkSession
+      .builder
+      .appName("BucketedRandomProjectionLSHExample")
+      .getOrCreate()
+
+    // $example on$
+    val dfA = spark.createDataFrame(Seq(
+      (0, Vectors.dense(1.0, 1.0)),
+      (1, Vectors.dense(1.0, -1.0)),
+      (2, Vectors.dense(-1.0, -1.0)),
+      (3, Vectors.dense(-1.0, 1.0))
+    )).toDF("id", "features")
+
+    val dfB = spark.createDataFrame(Seq(
+      (4, Vectors.dense(1.0, 0.0)),
+      (5, Vectors.dense(-1.0, 0.0)),
+      (6, Vectors.dense(0.0, 1.0)),
+      (7, Vectors.dense(0.0, -1.0))
+    )).toDF("id", "features")
+
+    val key = Vectors.dense(1.0, 0.0)
+
+    val brp = new BucketedRandomProjectionLSH()
+      .setBucketLength(2.0)
+      .setNumHashTables(3)
+      .setInputCol("features")
+      .setOutputCol("hashes")
+
+    val model = brp.fit(dfA)
+
+    // Feature Transformation
+    println("The hashed dataset where hashed values are stored in the column 'hashes':")
+    model.transform(dfA).show()
+
+    // Compute the locality sensitive hashes for the input rows, then perform approximate
+    // similarity join.
+    // We could avoid computing hashes by passing in the already-transformed dataset, e.g.
+    // `model.approxSimilarityJoin(transformedA, transformedB, 1.5)`
+    println("Approximately joining dfA and dfB on Euclidean distance smaller than 1.5:")
+    model.approxSimilarityJoin(dfA, dfB, 1.5, "EuclideanDistance")
+      .select(col("datasetA.id").alias("idA"),
+        col("datasetB.id").alias("idB"),
+        col("EuclideanDistance")).show()
+
+    // Compute the locality sensitive hashes for the input rows, then perform approximate nearest
+    // neighbor search.
+    // We could avoid computing hashes by passing in the already-transformed dataset, e.g.
+    // `model.approxNearestNeighbors(transformedA, key, 2)`
+    println("Approximately searching dfA for 2 nearest neighbors of the key:")
+    model.approxNearestNeighbors(dfA, key, 2).show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/BucketizerExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/BucketizerExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..7e65f9c88907d7e0d9452a05a2aece8a489b67bc
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/BucketizerExample.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.Bucketizer
+// $example off$
+import org.apache.spark.sql.SparkSession
+/**
+ * An example for Bucketizer.
+ * Run with
+ * {{{
+ * bin/run-example ml.BucketizerExample
+ * }}}
+ */
+object BucketizerExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("BucketizerExample")
+      .getOrCreate()
+
+    // $example on$
+    val splits = Array(Double.NegativeInfinity, -0.5, 0.0, 0.5, Double.PositiveInfinity)
+
+    val data = Array(-999.9, -0.5, -0.3, 0.0, 0.2, 999.9)
+    val dataFrame = spark.createDataFrame(data.map(Tuple1.apply)).toDF("features")
+
+    val bucketizer = new Bucketizer()
+      .setInputCol("features")
+      .setOutputCol("bucketedFeatures")
+      .setSplits(splits)
+
+    // Transform original data into its bucket index.
+    val bucketedData = bucketizer.transform(dataFrame)
+
+    println(s"Bucketizer output with ${bucketizer.getSplits.length-1} buckets")
+    bucketedData.show()
+    // $example off$
+
+    // $example on$
+    val splitsArray = Array(
+      Array(Double.NegativeInfinity, -0.5, 0.0, 0.5, Double.PositiveInfinity),
+      Array(Double.NegativeInfinity, -0.3, 0.0, 0.3, Double.PositiveInfinity))
+
+    val data2 = Array(
+      (-999.9, -999.9),
+      (-0.5, -0.2),
+      (-0.3, -0.1),
+      (0.0, 0.0),
+      (0.2, 0.4),
+      (999.9, 999.9))
+    val dataFrame2 = spark.createDataFrame(data2).toDF("features1", "features2")
+
+    val bucketizer2 = new Bucketizer()
+      .setInputCols(Array("features1", "features2"))
+      .setOutputCols(Array("bucketedFeatures1", "bucketedFeatures2"))
+      .setSplitsArray(splitsArray)
+
+    // Transform original data into its bucket index.
+    val bucketedData2 = bucketizer2.transform(dataFrame2)
+
+    println(s"Bucketizer output with [" +
+      s"${bucketizer2.getSplitsArray(0).length-1}, " +
+      s"${bucketizer2.getSplitsArray(1).length-1}] buckets for each input column")
+    bucketedData2.show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/ChiSqSelectorExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/ChiSqSelectorExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..5638e66b8792ae0636a2165cffdc44c0b80595ab
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/ChiSqSelectorExample.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.ChiSqSelector
+import org.apache.spark.ml.linalg.Vectors
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object ChiSqSelectorExample {
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder
+      .appName("ChiSqSelectorExample")
+      .getOrCreate()
+    import spark.implicits._
+
+    // $example on$
+    val data = Seq(
+      (7, Vectors.dense(0.0, 0.0, 18.0, 1.0), 1.0),
+      (8, Vectors.dense(0.0, 1.0, 12.0, 0.0), 0.0),
+      (9, Vectors.dense(1.0, 0.0, 15.0, 0.1), 0.0)
+    )
+
+    val df = spark.createDataset(data).toDF("id", "features", "clicked")
+
+    val selector = new ChiSqSelector()
+      .setNumTopFeatures(1)
+      .setFeaturesCol("features")
+      .setLabelCol("clicked")
+      .setOutputCol("selectedFeatures")
+
+    val result = selector.fit(df).transform(df)
+
+    println(s"ChiSqSelector output with top ${selector.getNumTopFeatures} features selected")
+    result.show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/ChiSquareTestExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/ChiSquareTestExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..5146fd03164674db66c1ed9cfb2dba28cc696fce
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/ChiSquareTestExample.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.linalg.{Vector, Vectors}
+import org.apache.spark.ml.stat.ChiSquareTest
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example for Chi-square hypothesis testing.
+ * Run with
+ * {{{
+ * bin/run-example ml.ChiSquareTestExample
+ * }}}
+ */
+object ChiSquareTestExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("ChiSquareTestExample")
+      .getOrCreate()
+    import spark.implicits._
+
+    // $example on$
+    val data = Seq(
+      (0.0, Vectors.dense(0.5, 10.0)),
+      (0.0, Vectors.dense(1.5, 20.0)),
+      (1.0, Vectors.dense(1.5, 30.0)),
+      (0.0, Vectors.dense(3.5, 30.0)),
+      (0.0, Vectors.dense(3.5, 40.0)),
+      (1.0, Vectors.dense(3.5, 40.0))
+    )
+
+    val df = data.toDF("label", "features")
+    val chi = ChiSquareTest.test(df, "features", "label").head
+    println(s"pValues = ${chi.getAs[Vector](0)}")
+    println(s"degreesOfFreedom ${chi.getSeq[Int](1).mkString("[", ",", "]")}")
+    println(s"statistics ${chi.getAs[Vector](2)}")
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/CorrelationExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/CorrelationExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..d7f1fc8ed74d79b8268c2e29e7734e4cba9e85bd
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/CorrelationExample.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.linalg.{Matrix, Vectors}
+import org.apache.spark.ml.stat.Correlation
+import org.apache.spark.sql.Row
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example for computing correlation matrix.
+ * Run with
+ * {{{
+ * bin/run-example ml.CorrelationExample
+ * }}}
+ */
+object CorrelationExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("CorrelationExample")
+      .getOrCreate()
+    import spark.implicits._
+
+    // $example on$
+    val data = Seq(
+      Vectors.sparse(4, Seq((0, 1.0), (3, -2.0))),
+      Vectors.dense(4.0, 5.0, 0.0, 3.0),
+      Vectors.dense(6.0, 7.0, 0.0, 8.0),
+      Vectors.sparse(4, Seq((0, 9.0), (3, 1.0)))
+    )
+
+    val df = data.map(Tuple1.apply).toDF("features")
+    val Row(coeff1: Matrix) = Correlation.corr(df, "features").head
+    println(s"Pearson correlation matrix:\n $coeff1")
+
+    val Row(coeff2: Matrix) = Correlation.corr(df, "features", "spearman").head
+    println(s"Spearman correlation matrix:\n $coeff2")
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/CountVectorizerExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/CountVectorizerExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..91d861dd4380aad3793f46aedabf4e3ba442c57b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/CountVectorizerExample.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.{CountVectorizer, CountVectorizerModel}
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object CountVectorizerExample {
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder
+      .appName("CountVectorizerExample")
+      .getOrCreate()
+
+    // $example on$
+    val df = spark.createDataFrame(Seq(
+      (0, Array("a", "b", "c")),
+      (1, Array("a", "b", "b", "c", "a"))
+    )).toDF("id", "words")
+
+    // fit a CountVectorizerModel from the corpus
+    val cvModel: CountVectorizerModel = new CountVectorizer()
+      .setInputCol("words")
+      .setOutputCol("features")
+      .setVocabSize(3)
+      .setMinDF(2)
+      .fit(df)
+
+    // alternatively, define CountVectorizerModel with a-priori vocabulary
+    val cvm = new CountVectorizerModel(Array("a", "b", "c"))
+      .setInputCol("words")
+      .setOutputCol("features")
+
+    cvModel.transform(df).show(false)
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
+
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/DCTExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/DCTExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..3383171303ecae404f928f248b6da5f37009dc00
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/DCTExample.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.DCT
+import org.apache.spark.ml.linalg.Vectors
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object DCTExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("DCTExample")
+      .getOrCreate()
+
+    // $example on$
+    val data = Seq(
+      Vectors.dense(0.0, 1.0, -2.0, 3.0),
+      Vectors.dense(-1.0, 2.0, 4.0, -7.0),
+      Vectors.dense(14.0, -2.0, -5.0, 1.0))
+
+    val df = spark.createDataFrame(data.map(Tuple1.apply)).toDF("features")
+
+    val dct = new DCT()
+      .setInputCol("features")
+      .setOutputCol("featuresDCT")
+      .setInverse(false)
+
+    val dctDf = dct.transform(df)
+    dctDf.select("featuresDCT").show(false)
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..ee4469faab3a0cdc46d0dffb5e447ef8dc334949
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+import java.io.File
+
+import scopt.OptionParser
+
+import org.apache.spark.examples.mllib.AbstractParams
+import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
+import org.apache.spark.sql.{DataFrame, Row, SparkSession}
+import org.apache.spark.util.Utils
+
+/**
+ * An example of how to use [[DataFrame]] for ML. Run with
+ * {{{
+ * ./bin/run-example ml.DataFrameExample [options]
+ * }}}
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object DataFrameExample {
+
+  case class Params(input: String = "data/mllib/sample_libsvm_data.txt")
+    extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("DataFrameExample") {
+      head("DataFrameExample: an example app using DataFrame for ML.")
+      opt[String]("input")
+        .text("input path to dataframe")
+        .action((x, c) => c.copy(input = x))
+      checkConfig { params =>
+        success
+      }
+    }
+
+    parser.parse(args, defaultParams) match {
+      case Some(params) => run(params)
+      case _ => sys.exit(1)
+    }
+  }
+
+  def run(params: Params): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName(s"DataFrameExample with $params")
+      .getOrCreate()
+
+    // Load input data
+    println(s"Loading LIBSVM file with UDT from ${params.input}.")
+    val df: DataFrame = spark.read.format("libsvm").load(params.input).cache()
+    println("Schema from LIBSVM:")
+    df.printSchema()
+    println(s"Loaded training data as a DataFrame with ${df.count()} records.")
+
+    // Show statistical summary of labels.
+    val labelSummary = df.describe("label")
+    labelSummary.show()
+
+    // Convert features column to an RDD of vectors.
+    val features = df.select("features").rdd.map { case Row(v: Vector) => v }
+    val featureSummary = features.aggregate(new MultivariateOnlineSummarizer())(
+      (summary, feat) => summary.add(Vectors.fromML(feat)),
+      (sum1, sum2) => sum1.merge(sum2))
+    println(s"Selected features column with average values:\n ${featureSummary.mean.toString}")
+
+    // Save the records in a parquet file.
+    val tmpDir = Utils.createTempDir()
+    val outputDir = new File(tmpDir, "dataframe").toString
+    println(s"Saving to $outputDir as Parquet file.")
+    df.write.parquet(outputDir)
+
+    // Load the records back.
+    println(s"Loading Parquet file with UDT from $outputDir.")
+    val newDF = spark.read.parquet(outputDir)
+    println("Schema from Parquet:")
+    newDF.printSchema()
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeClassificationExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeClassificationExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..276cedab11abc4f88d3fa659dbe7b702b5b6a554
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeClassificationExample.scala
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.ml.classification.DecisionTreeClassificationModel
+import org.apache.spark.ml.classification.DecisionTreeClassifier
+import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
+import org.apache.spark.ml.feature.{IndexToString, StringIndexer, VectorIndexer}
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object DecisionTreeClassificationExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("DecisionTreeClassificationExample")
+      .getOrCreate()
+    // $example on$
+    // Load the data stored in LIBSVM format as a DataFrame.
+    val data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    // Index labels, adding metadata to the label column.
+    // Fit on whole dataset to include all labels in index.
+    val labelIndexer = new StringIndexer()
+      .setInputCol("label")
+      .setOutputCol("indexedLabel")
+      .fit(data)
+    // Automatically identify categorical features, and index them.
+    val featureIndexer = new VectorIndexer()
+      .setInputCol("features")
+      .setOutputCol("indexedFeatures")
+      .setMaxCategories(4) // features with > 4 distinct values are treated as continuous.
+      .fit(data)
+
+    // Split the data into training and test sets (30% held out for testing).
+    val Array(trainingData, testData) = data.randomSplit(Array(0.7, 0.3))
+
+    // Train a DecisionTree model.
+    val dt = new DecisionTreeClassifier()
+      .setLabelCol("indexedLabel")
+      .setFeaturesCol("indexedFeatures")
+
+    // Convert indexed labels back to original labels.
+    val labelConverter = new IndexToString()
+      .setInputCol("prediction")
+      .setOutputCol("predictedLabel")
+      .setLabels(labelIndexer.labels)
+
+    // Chain indexers and tree in a Pipeline.
+    val pipeline = new Pipeline()
+      .setStages(Array(labelIndexer, featureIndexer, dt, labelConverter))
+
+    // Train model. This also runs the indexers.
+    val model = pipeline.fit(trainingData)
+
+    // Make predictions.
+    val predictions = model.transform(testData)
+
+    // Select example rows to display.
+    predictions.select("predictedLabel", "label", "features").show(5)
+
+    // Select (prediction, true label) and compute test error.
+    val evaluator = new MulticlassClassificationEvaluator()
+      .setLabelCol("indexedLabel")
+      .setPredictionCol("prediction")
+      .setMetricName("accuracy")
+    val accuracy = evaluator.evaluate(predictions)
+    println(s"Test Error = ${(1.0 - accuracy)}")
+
+    val treeModel = model.stages(2).asInstanceOf[DecisionTreeClassificationModel]
+    println(s"Learned classification tree model:\n ${treeModel.toDebugString}")
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..19f2d7751bc5424c1c55011e8e2c2f859d12571a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
@@ -0,0 +1,347 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+import java.util.Locale
+
+import scala.collection.mutable
+
+import scopt.OptionParser
+
+import org.apache.spark.examples.mllib.AbstractParams
+import org.apache.spark.ml.{Pipeline, PipelineStage, Transformer}
+import org.apache.spark.ml.classification.{DecisionTreeClassificationModel, DecisionTreeClassifier}
+import org.apache.spark.ml.feature.{StringIndexer, VectorIndexer}
+import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.ml.regression.{DecisionTreeRegressionModel, DecisionTreeRegressor}
+import org.apache.spark.ml.util.MetadataUtils
+import org.apache.spark.mllib.evaluation.{MulticlassMetrics, RegressionMetrics}
+import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.sql.{DataFrame, SparkSession}
+
+/**
+ * An example runner for decision trees. Run with
+ * {{{
+ * ./bin/run-example ml.DecisionTreeExample [options]
+ * }}}
+ * Note that Decision Trees can take a large amount of memory. If the run-example command above
+ * fails, try running via spark-submit and specifying the amount of memory as at least 1g.
+ * For local mode, run
+ * {{{
+ * ./bin/spark-submit --class org.apache.spark.examples.ml.DecisionTreeExample --driver-memory 1g
+ *   [examples JAR path] [options]
+ * }}}
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object DecisionTreeExample {
+
+  case class Params(
+      input: String = null,
+      testInput: String = "",
+      dataFormat: String = "libsvm",
+      algo: String = "Classification",
+      maxDepth: Int = 5,
+      maxBins: Int = 32,
+      minInstancesPerNode: Int = 1,
+      minInfoGain: Double = 0.0,
+      fracTest: Double = 0.2,
+      cacheNodeIds: Boolean = false,
+      checkpointDir: Option[String] = None,
+      checkpointInterval: Int = 10) extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("DecisionTreeExample") {
+      head("DecisionTreeExample: an example decision tree app.")
+      opt[String]("algo")
+        .text(s"algorithm (classification, regression), default: ${defaultParams.algo}")
+        .action((x, c) => c.copy(algo = x))
+      opt[Int]("maxDepth")
+        .text(s"max depth of the tree, default: ${defaultParams.maxDepth}")
+        .action((x, c) => c.copy(maxDepth = x))
+      opt[Int]("maxBins")
+        .text(s"max number of bins, default: ${defaultParams.maxBins}")
+        .action((x, c) => c.copy(maxBins = x))
+      opt[Int]("minInstancesPerNode")
+        .text(s"min number of instances required at child nodes to create the parent split," +
+          s" default: ${defaultParams.minInstancesPerNode}")
+        .action((x, c) => c.copy(minInstancesPerNode = x))
+      opt[Double]("minInfoGain")
+        .text(s"min info gain required to create a split, default: ${defaultParams.minInfoGain}")
+        .action((x, c) => c.copy(minInfoGain = x))
+      opt[Double]("fracTest")
+        .text(s"fraction of data to hold out for testing. If given option testInput, " +
+          s"this option is ignored. default: ${defaultParams.fracTest}")
+        .action((x, c) => c.copy(fracTest = x))
+      opt[Boolean]("cacheNodeIds")
+        .text(s"whether to use node Id cache during training, " +
+          s"default: ${defaultParams.cacheNodeIds}")
+        .action((x, c) => c.copy(cacheNodeIds = x))
+      opt[String]("checkpointDir")
+        .text(s"checkpoint directory where intermediate node Id caches will be stored, " +
+         s"default: ${defaultParams.checkpointDir match {
+           case Some(strVal) => strVal
+           case None => "None"
+         }}")
+        .action((x, c) => c.copy(checkpointDir = Some(x)))
+      opt[Int]("checkpointInterval")
+        .text(s"how often to checkpoint the node Id cache, " +
+         s"default: ${defaultParams.checkpointInterval}")
+        .action((x, c) => c.copy(checkpointInterval = x))
+      opt[String]("testInput")
+        .text(s"input path to test dataset. If given, option fracTest is ignored." +
+          s" default: ${defaultParams.testInput}")
+        .action((x, c) => c.copy(testInput = x))
+      opt[String]("dataFormat")
+        .text("data format: libsvm (default), dense (deprecated in Spark v1.1)")
+        .action((x, c) => c.copy(dataFormat = x))
+      arg[String]("<input>")
+        .text("input path to labeled examples")
+        .required()
+        .action((x, c) => c.copy(input = x))
+      checkConfig { params =>
+        if (params.fracTest < 0 || params.fracTest >= 1) {
+          failure(s"fracTest ${params.fracTest} value incorrect; should be in [0,1).")
+        } else {
+          success
+        }
+      }
+    }
+
+    parser.parse(args, defaultParams) match {
+      case Some(params) => run(params)
+      case _ => sys.exit(1)
+    }
+  }
+
+  /** Load a dataset from the given path, using the given format */
+  private[ml] def loadData(
+      spark: SparkSession,
+      path: String,
+      format: String,
+      expectedNumFeatures: Option[Int] = None): DataFrame = {
+    import spark.implicits._
+
+    format match {
+      case "dense" => MLUtils.loadLabeledPoints(spark.sparkContext, path).toDF()
+      case "libsvm" => expectedNumFeatures match {
+        case Some(numFeatures) => spark.read.option("numFeatures", numFeatures.toString)
+          .format("libsvm").load(path)
+        case None => spark.read.format("libsvm").load(path)
+      }
+      case _ => throw new IllegalArgumentException(s"Bad data format: $format")
+    }
+  }
+
+  /**
+   * Load training and test data from files.
+   * @param input  Path to input dataset.
+   * @param dataFormat  "libsvm" or "dense"
+   * @param testInput  Path to test dataset.
+   * @param algo  Classification or Regression
+   * @param fracTest  Fraction of input data to hold out for testing. Ignored if testInput given.
+   * @return  (training dataset, test dataset)
+   */
+  private[ml] def loadDatasets(
+      input: String,
+      dataFormat: String,
+      testInput: String,
+      algo: String,
+      fracTest: Double): (DataFrame, DataFrame) = {
+    val spark = SparkSession
+      .builder
+      .getOrCreate()
+
+    // Load training data
+    val origExamples: DataFrame = loadData(spark, input, dataFormat)
+
+    // Load or create test set
+    val dataframes: Array[DataFrame] = if (testInput != "") {
+      // Load testInput.
+      val numFeatures = origExamples.first().getAs[Vector](1).size
+      val origTestExamples: DataFrame =
+        loadData(spark, testInput, dataFormat, Some(numFeatures))
+      Array(origExamples, origTestExamples)
+    } else {
+      // Split input into training, test.
+      origExamples.randomSplit(Array(1.0 - fracTest, fracTest), seed = 12345)
+    }
+
+    val training = dataframes(0).cache()
+    val test = dataframes(1).cache()
+
+    val numTraining = training.count()
+    val numTest = test.count()
+    val numFeatures = training.select("features").first().getAs[Vector](0).size
+    println("Loaded data:")
+    println(s"  numTraining = $numTraining, numTest = $numTest")
+    println(s"  numFeatures = $numFeatures")
+
+    (training, test)
+  }
+
+  def run(params: Params): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName(s"DecisionTreeExample with $params")
+      .getOrCreate()
+
+    params.checkpointDir.foreach(spark.sparkContext.setCheckpointDir)
+    val algo = params.algo.toLowerCase(Locale.ROOT)
+
+    println(s"DecisionTreeExample with parameters:\n$params")
+
+    // Load training and test data and cache it.
+    val (training: DataFrame, test: DataFrame) =
+      loadDatasets(params.input, params.dataFormat, params.testInput, algo, params.fracTest)
+
+    // Set up Pipeline.
+    val stages = new mutable.ArrayBuffer[PipelineStage]()
+    // (1) For classification, re-index classes.
+    val labelColName = if (algo == "classification") "indexedLabel" else "label"
+    if (algo == "classification") {
+      val labelIndexer = new StringIndexer()
+        .setInputCol("label")
+        .setOutputCol(labelColName)
+      stages += labelIndexer
+    }
+    // (2) Identify categorical features using VectorIndexer.
+    //     Features with more than maxCategories values will be treated as continuous.
+    val featuresIndexer = new VectorIndexer()
+      .setInputCol("features")
+      .setOutputCol("indexedFeatures")
+      .setMaxCategories(10)
+    stages += featuresIndexer
+    // (3) Learn Decision Tree.
+    val dt = algo match {
+      case "classification" =>
+        new DecisionTreeClassifier()
+          .setFeaturesCol("indexedFeatures")
+          .setLabelCol(labelColName)
+          .setMaxDepth(params.maxDepth)
+          .setMaxBins(params.maxBins)
+          .setMinInstancesPerNode(params.minInstancesPerNode)
+          .setMinInfoGain(params.minInfoGain)
+          .setCacheNodeIds(params.cacheNodeIds)
+          .setCheckpointInterval(params.checkpointInterval)
+      case "regression" =>
+        new DecisionTreeRegressor()
+          .setFeaturesCol("indexedFeatures")
+          .setLabelCol(labelColName)
+          .setMaxDepth(params.maxDepth)
+          .setMaxBins(params.maxBins)
+          .setMinInstancesPerNode(params.minInstancesPerNode)
+          .setMinInfoGain(params.minInfoGain)
+          .setCacheNodeIds(params.cacheNodeIds)
+          .setCheckpointInterval(params.checkpointInterval)
+      case _ => throw new IllegalArgumentException(s"Algo ${params.algo} not supported.")
+    }
+    stages += dt
+    val pipeline = new Pipeline().setStages(stages.toArray)
+
+    // Fit the Pipeline.
+    val startTime = System.nanoTime()
+    val pipelineModel = pipeline.fit(training)
+    val elapsedTime = (System.nanoTime() - startTime) / 1e9
+    println(s"Training time: $elapsedTime seconds")
+
+    // Get the trained Decision Tree from the fitted PipelineModel.
+    algo match {
+      case "classification" =>
+        val treeModel = pipelineModel.stages.last.asInstanceOf[DecisionTreeClassificationModel]
+        if (treeModel.numNodes < 20) {
+          println(treeModel.toDebugString) // Print full model.
+        } else {
+          println(treeModel) // Print model summary.
+        }
+      case "regression" =>
+        val treeModel = pipelineModel.stages.last.asInstanceOf[DecisionTreeRegressionModel]
+        if (treeModel.numNodes < 20) {
+          println(treeModel.toDebugString) // Print full model.
+        } else {
+          println(treeModel) // Print model summary.
+        }
+      case _ => throw new IllegalArgumentException(s"Algo ${params.algo} not supported.")
+    }
+
+    // Evaluate model on training, test data.
+    algo match {
+      case "classification" =>
+        println("Training data results:")
+        evaluateClassificationModel(pipelineModel, training, labelColName)
+        println("Test data results:")
+        evaluateClassificationModel(pipelineModel, test, labelColName)
+      case "regression" =>
+        println("Training data results:")
+        evaluateRegressionModel(pipelineModel, training, labelColName)
+        println("Test data results:")
+        evaluateRegressionModel(pipelineModel, test, labelColName)
+      case _ =>
+        throw new IllegalArgumentException(s"Algo ${params.algo} not supported.")
+    }
+
+    spark.stop()
+  }
+
+  /**
+   * Evaluate the given ClassificationModel on data. Print the results.
+   * @param model  Must fit ClassificationModel abstraction
+   * @param data  DataFrame with "prediction" and labelColName columns
+   * @param labelColName  Name of the labelCol parameter for the model
+   *
+   * TODO: Change model type to ClassificationModel once that API is public. SPARK-5995
+   */
+  private[ml] def evaluateClassificationModel(
+      model: Transformer,
+      data: DataFrame,
+      labelColName: String): Unit = {
+    val fullPredictions = model.transform(data).cache()
+    val predictions = fullPredictions.select("prediction").rdd.map(_.getDouble(0))
+    val labels = fullPredictions.select(labelColName).rdd.map(_.getDouble(0))
+    // Print number of classes for reference.
+    val numClasses = MetadataUtils.getNumClasses(fullPredictions.schema(labelColName)) match {
+      case Some(n) => n
+      case None => throw new RuntimeException(
+        "Unknown failure when indexing labels for classification.")
+    }
+    val accuracy = new MulticlassMetrics(predictions.zip(labels)).accuracy
+    println(s"  Accuracy ($numClasses classes): $accuracy")
+  }
+
+  /**
+   * Evaluate the given RegressionModel on data. Print the results.
+   * @param model  Must fit RegressionModel abstraction
+   * @param data  DataFrame with "prediction" and labelColName columns
+   * @param labelColName  Name of the labelCol parameter for the model
+   *
+   * TODO: Change model type to RegressionModel once that API is public. SPARK-5995
+   */
+  private[ml] def evaluateRegressionModel(
+      model: Transformer,
+      data: DataFrame,
+      labelColName: String): Unit = {
+    val fullPredictions = model.transform(data).cache()
+    val predictions = fullPredictions.select("prediction").rdd.map(_.getDouble(0))
+    val labels = fullPredictions.select(labelColName).rdd.map(_.getDouble(0))
+    val RMSE = new RegressionMetrics(predictions.zip(labels)).rootMeanSquaredError
+    println(s"  Root mean squared error (RMSE): $RMSE")
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeRegressionExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeRegressionExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..aaaecaea470818e080c96a9b3c600993f9d994fe
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeRegressionExample.scala
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.ml.evaluation.RegressionEvaluator
+import org.apache.spark.ml.feature.VectorIndexer
+import org.apache.spark.ml.regression.DecisionTreeRegressionModel
+import org.apache.spark.ml.regression.DecisionTreeRegressor
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object DecisionTreeRegressionExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("DecisionTreeRegressionExample")
+      .getOrCreate()
+
+    // $example on$
+    // Load the data stored in LIBSVM format as a DataFrame.
+    val data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    // Automatically identify categorical features, and index them.
+    // Here, we treat features with > 4 distinct values as continuous.
+    val featureIndexer = new VectorIndexer()
+      .setInputCol("features")
+      .setOutputCol("indexedFeatures")
+      .setMaxCategories(4)
+      .fit(data)
+
+    // Split the data into training and test sets (30% held out for testing).
+    val Array(trainingData, testData) = data.randomSplit(Array(0.7, 0.3))
+
+    // Train a DecisionTree model.
+    val dt = new DecisionTreeRegressor()
+      .setLabelCol("label")
+      .setFeaturesCol("indexedFeatures")
+
+    // Chain indexer and tree in a Pipeline.
+    val pipeline = new Pipeline()
+      .setStages(Array(featureIndexer, dt))
+
+    // Train model. This also runs the indexer.
+    val model = pipeline.fit(trainingData)
+
+    // Make predictions.
+    val predictions = model.transform(testData)
+
+    // Select example rows to display.
+    predictions.select("prediction", "label", "features").show(5)
+
+    // Select (prediction, true label) and compute test error.
+    val evaluator = new RegressionEvaluator()
+      .setLabelCol("label")
+      .setPredictionCol("prediction")
+      .setMetricName("rmse")
+    val rmse = evaluator.evaluate(predictions)
+    println(s"Root Mean Squared Error (RMSE) on test data = $rmse")
+
+    val treeModel = model.stages(1).asInstanceOf[DecisionTreeRegressionModel]
+    println(s"Learned regression tree model:\n ${treeModel.toDebugString}")
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..2dc11b07d88efb30e22917406a282585e93804ae
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+import org.apache.spark.ml.classification.{ClassificationModel, Classifier, ClassifierParams}
+import org.apache.spark.ml.feature.LabeledPoint
+import org.apache.spark.ml.linalg.{BLAS, Vector, Vectors}
+import org.apache.spark.ml.param.{IntParam, ParamMap}
+import org.apache.spark.ml.util.Identifiable
+import org.apache.spark.sql.{Dataset, Row, SparkSession}
+
+/**
+ * A simple example demonstrating how to write your own learning algorithm using Estimator,
+ * Transformer, and other abstractions.
+ * This mimics [[org.apache.spark.ml.classification.LogisticRegression]].
+ * Run with
+ * {{{
+ * bin/run-example ml.DeveloperApiExample
+ * }}}
+ */
+object DeveloperApiExample {
+
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder
+      .appName("DeveloperApiExample")
+      .getOrCreate()
+    import spark.implicits._
+
+    // Prepare training data.
+    val training = spark.createDataFrame(Seq(
+      LabeledPoint(1.0, Vectors.dense(0.0, 1.1, 0.1)),
+      LabeledPoint(0.0, Vectors.dense(2.0, 1.0, -1.0)),
+      LabeledPoint(0.0, Vectors.dense(2.0, 1.3, 1.0)),
+      LabeledPoint(1.0, Vectors.dense(0.0, 1.2, -0.5))))
+
+    // Create a LogisticRegression instance. This instance is an Estimator.
+    val lr = new MyLogisticRegression()
+    // Print out the parameters, documentation, and any default values.
+    println(s"MyLogisticRegression parameters:\n ${lr.explainParams()}")
+
+    // We may set parameters using setter methods.
+    lr.setMaxIter(10)
+
+    // Learn a LogisticRegression model. This uses the parameters stored in lr.
+    val model = lr.fit(training.toDF())
+
+    // Prepare test data.
+    val test = spark.createDataFrame(Seq(
+      LabeledPoint(1.0, Vectors.dense(-1.0, 1.5, 1.3)),
+      LabeledPoint(0.0, Vectors.dense(3.0, 2.0, -0.1)),
+      LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5))))
+
+    // Make predictions on test data.
+    val sumPredictions: Double = model.transform(test)
+      .select("features", "label", "prediction")
+      .collect()
+      .map { case Row(features: Vector, label: Double, prediction: Double) =>
+        prediction
+      }.sum
+    assert(sumPredictions == 0.0,
+      "MyLogisticRegression predicted something other than 0, even though all coefficients are 0!")
+
+    spark.stop()
+  }
+}
+
+/**
+ * Example of defining a parameter trait for a user-defined type of [[Classifier]].
+ *
+ * NOTE: This is private since it is an example. In practice, you may not want it to be private.
+ */
+private trait MyLogisticRegressionParams extends ClassifierParams {
+
+  /**
+   * Param for max number of iterations
+   *
+   * NOTE: The usual way to add a parameter to a model or algorithm is to include:
+   *   - val myParamName: ParamType
+   *   - def getMyParamName
+   *   - def setMyParamName
+   * Here, we have a trait to be mixed in with the Estimator and Model (MyLogisticRegression
+   * and MyLogisticRegressionModel). We place the setter (setMaxIter) method in the Estimator
+   * class since the maxIter parameter is only used during training (not in the Model).
+   */
+  val maxIter: IntParam = new IntParam(this, "maxIter", "max number of iterations")
+  def getMaxIter: Int = $(maxIter)
+}
+
+/**
+ * Example of defining a type of [[Classifier]].
+ *
+ * NOTE: This is private since it is an example. In practice, you may not want it to be private.
+ */
+private class MyLogisticRegression(override val uid: String)
+  extends Classifier[Vector, MyLogisticRegression, MyLogisticRegressionModel]
+  with MyLogisticRegressionParams {
+
+  def this() = this(Identifiable.randomUID("myLogReg"))
+
+  setMaxIter(100) // Initialize
+
+  // The parameter setter is in this class since it should return type MyLogisticRegression.
+  def setMaxIter(value: Int): this.type = set(maxIter, value)
+
+  // This method is used by fit()
+  override protected def train(dataset: Dataset[_]): MyLogisticRegressionModel = {
+    // Extract columns from data using helper method.
+    val oldDataset = extractLabeledPoints(dataset)
+
+    // Do learning to estimate the coefficients vector.
+    val numFeatures = oldDataset.take(1)(0).features.size
+    val coefficients = Vectors.zeros(numFeatures) // Learning would happen here.
+
+    // Create a model, and return it.
+    new MyLogisticRegressionModel(uid, coefficients).setParent(this)
+  }
+
+  override def copy(extra: ParamMap): MyLogisticRegression = defaultCopy(extra)
+}
+
+/**
+ * Example of defining a type of [[ClassificationModel]].
+ *
+ * NOTE: This is private since it is an example. In practice, you may not want it to be private.
+ */
+private class MyLogisticRegressionModel(
+    override val uid: String,
+    val coefficients: Vector)
+  extends ClassificationModel[Vector, MyLogisticRegressionModel]
+  with MyLogisticRegressionParams {
+
+  // This uses the default implementation of transform(), which reads column "features" and outputs
+  // columns "prediction" and "rawPrediction."
+
+  // This uses the default implementation of predict(), which chooses the label corresponding to
+  // the maximum value returned by [[predictRaw()]].
+
+  /**
+   * Raw prediction for each possible label.
+   * The meaning of a "raw" prediction may vary between algorithms, but it intuitively gives
+   * a measure of confidence in each possible label (where larger = more confident).
+   * This internal method is used to implement [[transform()]] and output [[rawPredictionCol]].
+   *
+   * @return  vector where element i is the raw prediction for label i.
+   *          This raw prediction may be any real number, where a larger value indicates greater
+   *          confidence for that label.
+   */
+  override protected def predictRaw(features: Vector): Vector = {
+    val margin = BLAS.dot(features, coefficients)
+    // There are 2 classes (binary classification), so we return a length-2 vector,
+    // where index i corresponds to class i (i = 0, 1).
+    Vectors.dense(-margin, margin)
+  }
+
+  // Number of classes the label can take. 2 indicates binary classification.
+  override val numClasses: Int = 2
+
+  // Number of features the model was trained on.
+  override val numFeatures: Int = coefficients.size
+
+  /**
+   * Create a copy of the model.
+   * The copy is shallow, except for the embedded paramMap, which gets a deep copy.
+   *
+   * This is used for the default implementation of [[transform()]].
+   */
+  override def copy(extra: ParamMap): MyLogisticRegressionModel = {
+    copyValues(new MyLogisticRegressionModel(uid, coefficients), extra).setParent(parent)
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/ElementwiseProductExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/ElementwiseProductExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..c0ffc01934b6fb262f803a80b02e2560825ee218
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/ElementwiseProductExample.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.ElementwiseProduct
+import org.apache.spark.ml.linalg.Vectors
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object ElementwiseProductExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("ElementwiseProductExample")
+      .getOrCreate()
+
+    // $example on$
+    // Create some vector data; also works for sparse vectors
+    val dataFrame = spark.createDataFrame(Seq(
+      ("a", Vectors.dense(1.0, 2.0, 3.0)),
+      ("b", Vectors.dense(4.0, 5.0, 6.0)))).toDF("id", "vector")
+
+    val transformingVector = Vectors.dense(0.0, 1.0, 2.0)
+    val transformer = new ElementwiseProduct()
+      .setScalingVec(transformingVector)
+      .setInputCol("vector")
+      .setOutputCol("transformedVector")
+
+    // Batch transform the vectors to create new column:
+    transformer.transform(dataFrame).show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/EstimatorTransformerParamExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/EstimatorTransformerParamExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..e5d91f132a3f2158aa988fbf466e63fd4ab2f22f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/EstimatorTransformerParamExample.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.classification.LogisticRegression
+import org.apache.spark.ml.linalg.{Vector, Vectors}
+import org.apache.spark.ml.param.ParamMap
+import org.apache.spark.sql.Row
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object EstimatorTransformerParamExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("EstimatorTransformerParamExample")
+      .getOrCreate()
+
+    // $example on$
+    // Prepare training data from a list of (label, features) tuples.
+    val training = spark.createDataFrame(Seq(
+      (1.0, Vectors.dense(0.0, 1.1, 0.1)),
+      (0.0, Vectors.dense(2.0, 1.0, -1.0)),
+      (0.0, Vectors.dense(2.0, 1.3, 1.0)),
+      (1.0, Vectors.dense(0.0, 1.2, -0.5))
+    )).toDF("label", "features")
+
+    // Create a LogisticRegression instance. This instance is an Estimator.
+    val lr = new LogisticRegression()
+    // Print out the parameters, documentation, and any default values.
+    println(s"LogisticRegression parameters:\n ${lr.explainParams()}\n")
+
+    // We may set parameters using setter methods.
+    lr.setMaxIter(10)
+      .setRegParam(0.01)
+
+    // Learn a LogisticRegression model. This uses the parameters stored in lr.
+    val model1 = lr.fit(training)
+    // Since model1 is a Model (i.e., a Transformer produced by an Estimator),
+    // we can view the parameters it used during fit().
+    // This prints the parameter (name: value) pairs, where names are unique IDs for this
+    // LogisticRegression instance.
+    println(s"Model 1 was fit using parameters: ${model1.parent.extractParamMap}")
+
+    // We may alternatively specify parameters using a ParamMap,
+    // which supports several methods for specifying parameters.
+    val paramMap = ParamMap(lr.maxIter -> 20)
+      .put(lr.maxIter, 30)  // Specify 1 Param. This overwrites the original maxIter.
+      .put(lr.regParam -> 0.1, lr.threshold -> 0.55)  // Specify multiple Params.
+
+    // One can also combine ParamMaps.
+    val paramMap2 = ParamMap(lr.probabilityCol -> "myProbability")  // Change output column name.
+    val paramMapCombined = paramMap ++ paramMap2
+
+    // Now learn a new model using the paramMapCombined parameters.
+    // paramMapCombined overrides all parameters set earlier via lr.set* methods.
+    val model2 = lr.fit(training, paramMapCombined)
+    println(s"Model 2 was fit using parameters: ${model2.parent.extractParamMap}")
+
+    // Prepare test data.
+    val test = spark.createDataFrame(Seq(
+      (1.0, Vectors.dense(-1.0, 1.5, 1.3)),
+      (0.0, Vectors.dense(3.0, 2.0, -0.1)),
+      (1.0, Vectors.dense(0.0, 2.2, -1.5))
+    )).toDF("label", "features")
+
+    // Make predictions on test data using the Transformer.transform() method.
+    // LogisticRegression.transform will only use the 'features' column.
+    // Note that model2.transform() outputs a 'myProbability' column instead of the usual
+    // 'probability' column since we renamed the lr.probabilityCol parameter previously.
+    model2.transform(test)
+      .select("features", "label", "myProbability", "prediction")
+      .collect()
+      .foreach { case Row(features: Vector, label: Double, prob: Vector, prediction: Double) =>
+        println(s"($features, $label) -> prob=$prob, prediction=$prediction")
+      }
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/FPGrowthExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/FPGrowthExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..59110d70de550791286a12c8925a5fcaf4f06ba1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/FPGrowthExample.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+// scalastyle:off println
+
+// $example on$
+import org.apache.spark.ml.fpm.FPGrowth
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example demonstrating FP-Growth.
+ * Run with
+ * {{{
+ * bin/run-example ml.FPGrowthExample
+ * }}}
+ */
+object FPGrowthExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName(s"${this.getClass.getSimpleName}")
+      .getOrCreate()
+    import spark.implicits._
+
+    // $example on$
+    val dataset = spark.createDataset(Seq(
+      "1 2 5",
+      "1 2 3 5",
+      "1 2")
+    ).map(t => t.split(" ")).toDF("items")
+
+    val fpgrowth = new FPGrowth().setItemsCol("items").setMinSupport(0.5).setMinConfidence(0.6)
+    val model = fpgrowth.fit(dataset)
+
+    // Display frequent itemsets.
+    model.freqItemsets.show()
+
+    // Display generated association rules.
+    model.associationRules.show()
+
+    // transform examines the input items against all the association rules and summarize the
+    // consequents as prediction
+    model.transform(dataset).show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/FeatureHasherExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/FeatureHasherExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..1aed10bfb2d384e30c2f123d66169fabc35df405
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/FeatureHasherExample.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.FeatureHasher
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object FeatureHasherExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("FeatureHasherExample")
+      .getOrCreate()
+
+    // $example on$
+    val dataset = spark.createDataFrame(Seq(
+      (2.2, true, "1", "foo"),
+      (3.3, false, "2", "bar"),
+      (4.4, false, "3", "baz"),
+      (5.5, false, "4", "foo")
+    )).toDF("real", "bool", "stringNum", "string")
+
+    val hasher = new FeatureHasher()
+      .setInputCols("real", "bool", "stringNum", "string")
+      .setOutputCol("features")
+
+    val featurized = hasher.transform(dataset)
+    featurized.show(false)
+    // $example off$
+
+    spark.stop()
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..8f3ce4b315bd3d5929fbeae451249a728cd8cc82
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala
@@ -0,0 +1,242 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+import java.util.Locale
+
+import scala.collection.mutable
+
+import scopt.OptionParser
+
+import org.apache.spark.examples.mllib.AbstractParams
+import org.apache.spark.ml.{Pipeline, PipelineStage}
+import org.apache.spark.ml.classification.{GBTClassificationModel, GBTClassifier}
+import org.apache.spark.ml.feature.{StringIndexer, VectorIndexer}
+import org.apache.spark.ml.regression.{GBTRegressionModel, GBTRegressor}
+import org.apache.spark.sql.{DataFrame, SparkSession}
+
+
+/**
+ * An example runner for decision trees. Run with
+ * {{{
+ * ./bin/run-example ml.GBTExample [options]
+ * }}}
+ * Decision Trees and ensembles can take a large amount of memory. If the run-example command
+ * above fails, try running via spark-submit and specifying the amount of memory as at least 1g.
+ * For local mode, run
+ * {{{
+ * ./bin/spark-submit --class org.apache.spark.examples.ml.GBTExample --driver-memory 1g
+ *   [examples JAR path] [options]
+ * }}}
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object GBTExample {
+
+  case class Params(
+      input: String = null,
+      testInput: String = "",
+      dataFormat: String = "libsvm",
+      algo: String = "classification",
+      maxDepth: Int = 5,
+      maxBins: Int = 32,
+      minInstancesPerNode: Int = 1,
+      minInfoGain: Double = 0.0,
+      maxIter: Int = 10,
+      fracTest: Double = 0.2,
+      cacheNodeIds: Boolean = false,
+      checkpointDir: Option[String] = None,
+      checkpointInterval: Int = 10) extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("GBTExample") {
+      head("GBTExample: an example Gradient-Boosted Trees app.")
+      opt[String]("algo")
+        .text(s"algorithm (classification, regression), default: ${defaultParams.algo}")
+        .action((x, c) => c.copy(algo = x))
+      opt[Int]("maxDepth")
+        .text(s"max depth of the tree, default: ${defaultParams.maxDepth}")
+        .action((x, c) => c.copy(maxDepth = x))
+      opt[Int]("maxBins")
+        .text(s"max number of bins, default: ${defaultParams.maxBins}")
+        .action((x, c) => c.copy(maxBins = x))
+      opt[Int]("minInstancesPerNode")
+        .text(s"min number of instances required at child nodes to create the parent split," +
+        s" default: ${defaultParams.minInstancesPerNode}")
+        .action((x, c) => c.copy(minInstancesPerNode = x))
+      opt[Double]("minInfoGain")
+        .text(s"min info gain required to create a split, default: ${defaultParams.minInfoGain}")
+        .action((x, c) => c.copy(minInfoGain = x))
+      opt[Int]("maxIter")
+        .text(s"number of trees in ensemble, default: ${defaultParams.maxIter}")
+        .action((x, c) => c.copy(maxIter = x))
+      opt[Double]("fracTest")
+        .text(s"fraction of data to hold out for testing. If given option testInput, " +
+        s"this option is ignored. default: ${defaultParams.fracTest}")
+        .action((x, c) => c.copy(fracTest = x))
+      opt[Boolean]("cacheNodeIds")
+        .text(s"whether to use node Id cache during training, " +
+        s"default: ${defaultParams.cacheNodeIds}")
+        .action((x, c) => c.copy(cacheNodeIds = x))
+      opt[String]("checkpointDir")
+        .text(s"checkpoint directory where intermediate node Id caches will be stored, " +
+        s"default: ${
+          defaultParams.checkpointDir match {
+            case Some(strVal) => strVal
+            case None => "None"
+          }
+        }")
+        .action((x, c) => c.copy(checkpointDir = Some(x)))
+      opt[Int]("checkpointInterval")
+        .text(s"how often to checkpoint the node Id cache, " +
+        s"default: ${defaultParams.checkpointInterval}")
+        .action((x, c) => c.copy(checkpointInterval = x))
+      opt[String]("testInput")
+        .text(s"input path to test dataset. If given, option fracTest is ignored." +
+        s" default: ${defaultParams.testInput}")
+        .action((x, c) => c.copy(testInput = x))
+      opt[String]("dataFormat")
+        .text("data format: libsvm (default), dense (deprecated in Spark v1.1)")
+        .action((x, c) => c.copy(dataFormat = x))
+      arg[String]("<input>")
+        .text("input path to labeled examples")
+        .required()
+        .action((x, c) => c.copy(input = x))
+      checkConfig { params =>
+        if (params.fracTest < 0 || params.fracTest >= 1) {
+          failure(s"fracTest ${params.fracTest} value incorrect; should be in [0,1).")
+        } else {
+          success
+        }
+      }
+    }
+
+    parser.parse(args, defaultParams) match {
+      case Some(params) => run(params)
+      case _ => sys.exit(1)
+    }
+  }
+
+  def run(params: Params): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName(s"GBTExample with $params")
+      .getOrCreate()
+
+    params.checkpointDir.foreach(spark.sparkContext.setCheckpointDir)
+    val algo = params.algo.toLowerCase(Locale.ROOT)
+
+    println(s"GBTExample with parameters:\n$params")
+
+    // Load training and test data and cache it.
+    val (training: DataFrame, test: DataFrame) = DecisionTreeExample.loadDatasets(params.input,
+      params.dataFormat, params.testInput, algo, params.fracTest)
+
+    // Set up Pipeline
+    val stages = new mutable.ArrayBuffer[PipelineStage]()
+    // (1) For classification, re-index classes.
+    val labelColName = if (algo == "classification") "indexedLabel" else "label"
+    if (algo == "classification") {
+      val labelIndexer = new StringIndexer()
+        .setInputCol("label")
+        .setOutputCol(labelColName)
+      stages += labelIndexer
+    }
+    // (2) Identify categorical features using VectorIndexer.
+    //     Features with more than maxCategories values will be treated as continuous.
+    val featuresIndexer = new VectorIndexer()
+      .setInputCol("features")
+      .setOutputCol("indexedFeatures")
+      .setMaxCategories(10)
+    stages += featuresIndexer
+    // (3) Learn GBT.
+    val dt = algo match {
+      case "classification" =>
+        new GBTClassifier()
+          .setFeaturesCol("indexedFeatures")
+          .setLabelCol(labelColName)
+          .setMaxDepth(params.maxDepth)
+          .setMaxBins(params.maxBins)
+          .setMinInstancesPerNode(params.minInstancesPerNode)
+          .setMinInfoGain(params.minInfoGain)
+          .setCacheNodeIds(params.cacheNodeIds)
+          .setCheckpointInterval(params.checkpointInterval)
+          .setMaxIter(params.maxIter)
+      case "regression" =>
+        new GBTRegressor()
+          .setFeaturesCol("indexedFeatures")
+          .setLabelCol(labelColName)
+          .setMaxDepth(params.maxDepth)
+          .setMaxBins(params.maxBins)
+          .setMinInstancesPerNode(params.minInstancesPerNode)
+          .setMinInfoGain(params.minInfoGain)
+          .setCacheNodeIds(params.cacheNodeIds)
+          .setCheckpointInterval(params.checkpointInterval)
+          .setMaxIter(params.maxIter)
+      case _ => throw new IllegalArgumentException(s"Algo ${params.algo} not supported.")
+    }
+    stages += dt
+    val pipeline = new Pipeline().setStages(stages.toArray)
+
+    // Fit the Pipeline.
+    val startTime = System.nanoTime()
+    val pipelineModel = pipeline.fit(training)
+    val elapsedTime = (System.nanoTime() - startTime) / 1e9
+    println(s"Training time: $elapsedTime seconds")
+
+    // Get the trained GBT from the fitted PipelineModel.
+    algo match {
+      case "classification" =>
+        val rfModel = pipelineModel.stages.last.asInstanceOf[GBTClassificationModel]
+        if (rfModel.totalNumNodes < 30) {
+          println(rfModel.toDebugString) // Print full model.
+        } else {
+          println(rfModel) // Print model summary.
+        }
+      case "regression" =>
+        val rfModel = pipelineModel.stages.last.asInstanceOf[GBTRegressionModel]
+        if (rfModel.totalNumNodes < 30) {
+          println(rfModel.toDebugString) // Print full model.
+        } else {
+          println(rfModel) // Print model summary.
+        }
+      case _ => throw new IllegalArgumentException(s"Algo ${params.algo} not supported.")
+    }
+
+    // Evaluate model on training, test data.
+    algo match {
+      case "classification" =>
+        println("Training data results:")
+        DecisionTreeExample.evaluateClassificationModel(pipelineModel, training, labelColName)
+        println("Test data results:")
+        DecisionTreeExample.evaluateClassificationModel(pipelineModel, test, labelColName)
+      case "regression" =>
+        println("Training data results:")
+        DecisionTreeExample.evaluateRegressionModel(pipelineModel, training, labelColName)
+        println("Test data results:")
+        DecisionTreeExample.evaluateRegressionModel(pipelineModel, test, labelColName)
+      case _ =>
+        throw new IllegalArgumentException(s"Algo ${params.algo} not supported.")
+    }
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/GaussianMixtureExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/GaussianMixtureExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..5e4bea4c4fb66a2b09d770e94b660eeb1eafa637
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/GaussianMixtureExample.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+// scalastyle:off println
+
+// $example on$
+import org.apache.spark.ml.clustering.GaussianMixture
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example demonstrating Gaussian Mixture Model (GMM).
+ * Run with
+ * {{{
+ * bin/run-example ml.GaussianMixtureExample
+ * }}}
+ */
+object GaussianMixtureExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+        .builder
+        .appName(s"${this.getClass.getSimpleName}")
+        .getOrCreate()
+
+    // $example on$
+    // Loads data
+    val dataset = spark.read.format("libsvm").load("data/mllib/sample_kmeans_data.txt")
+
+    // Trains Gaussian Mixture Model
+    val gmm = new GaussianMixture()
+      .setK(2)
+    val model = gmm.fit(dataset)
+
+    // output parameters of mixture model model
+    for (i <- 0 until model.getK) {
+      println(s"Gaussian $i:\nweight=${model.weights(i)}\n" +
+          s"mu=${model.gaussians(i).mean}\nsigma=\n${model.gaussians(i).cov}\n")
+    }
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/GeneralizedLinearRegressionExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/GeneralizedLinearRegressionExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..1b86d7cad0b383236a1ec48c01cd2c73291ad1d5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/GeneralizedLinearRegressionExample.scala
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.regression.GeneralizedLinearRegression
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example demonstrating generalized linear regression.
+ * Run with
+ * {{{
+ * bin/run-example ml.GeneralizedLinearRegressionExample
+ * }}}
+ */
+
+object GeneralizedLinearRegressionExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("GeneralizedLinearRegressionExample")
+      .getOrCreate()
+
+    // $example on$
+    // Load training data
+    val dataset = spark.read.format("libsvm")
+      .load("data/mllib/sample_linear_regression_data.txt")
+
+    val glr = new GeneralizedLinearRegression()
+      .setFamily("gaussian")
+      .setLink("identity")
+      .setMaxIter(10)
+      .setRegParam(0.3)
+
+    // Fit the model
+    val model = glr.fit(dataset)
+
+    // Print the coefficients and intercept for generalized linear regression model
+    println(s"Coefficients: ${model.coefficients}")
+    println(s"Intercept: ${model.intercept}")
+
+    // Summarize the model over the training set and print out some metrics
+    val summary = model.summary
+    println(s"Coefficient Standard Errors: ${summary.coefficientStandardErrors.mkString(",")}")
+    println(s"T Values: ${summary.tValues.mkString(",")}")
+    println(s"P Values: ${summary.pValues.mkString(",")}")
+    println(s"Dispersion: ${summary.dispersion}")
+    println(s"Null Deviance: ${summary.nullDeviance}")
+    println(s"Residual Degree Of Freedom Null: ${summary.residualDegreeOfFreedomNull}")
+    println(s"Deviance: ${summary.deviance}")
+    println(s"Residual Degree Of Freedom: ${summary.residualDegreeOfFreedom}")
+    println(s"AIC: ${summary.aic}")
+    println("Deviance Residuals: ")
+    summary.residuals().show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeClassifierExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeClassifierExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..ef78c0a1145ef95c17c49ca99c41bf606e49d0a0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeClassifierExample.scala
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.ml.classification.{GBTClassificationModel, GBTClassifier}
+import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
+import org.apache.spark.ml.feature.{IndexToString, StringIndexer, VectorIndexer}
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object GradientBoostedTreeClassifierExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("GradientBoostedTreeClassifierExample")
+      .getOrCreate()
+
+    // $example on$
+    // Load and parse the data file, converting it to a DataFrame.
+    val data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    // Index labels, adding metadata to the label column.
+    // Fit on whole dataset to include all labels in index.
+    val labelIndexer = new StringIndexer()
+      .setInputCol("label")
+      .setOutputCol("indexedLabel")
+      .fit(data)
+    // Automatically identify categorical features, and index them.
+    // Set maxCategories so features with > 4 distinct values are treated as continuous.
+    val featureIndexer = new VectorIndexer()
+      .setInputCol("features")
+      .setOutputCol("indexedFeatures")
+      .setMaxCategories(4)
+      .fit(data)
+
+    // Split the data into training and test sets (30% held out for testing).
+    val Array(trainingData, testData) = data.randomSplit(Array(0.7, 0.3))
+
+    // Train a GBT model.
+    val gbt = new GBTClassifier()
+      .setLabelCol("indexedLabel")
+      .setFeaturesCol("indexedFeatures")
+      .setMaxIter(10)
+      .setFeatureSubsetStrategy("auto")
+
+    // Convert indexed labels back to original labels.
+    val labelConverter = new IndexToString()
+      .setInputCol("prediction")
+      .setOutputCol("predictedLabel")
+      .setLabels(labelIndexer.labels)
+
+    // Chain indexers and GBT in a Pipeline.
+    val pipeline = new Pipeline()
+      .setStages(Array(labelIndexer, featureIndexer, gbt, labelConverter))
+
+    // Train model. This also runs the indexers.
+    val model = pipeline.fit(trainingData)
+
+    // Make predictions.
+    val predictions = model.transform(testData)
+
+    // Select example rows to display.
+    predictions.select("predictedLabel", "label", "features").show(5)
+
+    // Select (prediction, true label) and compute test error.
+    val evaluator = new MulticlassClassificationEvaluator()
+      .setLabelCol("indexedLabel")
+      .setPredictionCol("prediction")
+      .setMetricName("accuracy")
+    val accuracy = evaluator.evaluate(predictions)
+    println(s"Test Error = ${1.0 - accuracy}")
+
+    val gbtModel = model.stages(2).asInstanceOf[GBTClassificationModel]
+    println(s"Learned classification GBT model:\n ${gbtModel.toDebugString}")
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeRegressorExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeRegressorExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..3feb2343f6a85a55b3bc347154dfd393dd8c42dd
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/GradientBoostedTreeRegressorExample.scala
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.ml.evaluation.RegressionEvaluator
+import org.apache.spark.ml.feature.VectorIndexer
+import org.apache.spark.ml.regression.{GBTRegressionModel, GBTRegressor}
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object GradientBoostedTreeRegressorExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("GradientBoostedTreeRegressorExample")
+      .getOrCreate()
+
+    // $example on$
+    // Load and parse the data file, converting it to a DataFrame.
+    val data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    // Automatically identify categorical features, and index them.
+    // Set maxCategories so features with > 4 distinct values are treated as continuous.
+    val featureIndexer = new VectorIndexer()
+      .setInputCol("features")
+      .setOutputCol("indexedFeatures")
+      .setMaxCategories(4)
+      .fit(data)
+
+    // Split the data into training and test sets (30% held out for testing).
+    val Array(trainingData, testData) = data.randomSplit(Array(0.7, 0.3))
+
+    // Train a GBT model.
+    val gbt = new GBTRegressor()
+      .setLabelCol("label")
+      .setFeaturesCol("indexedFeatures")
+      .setMaxIter(10)
+
+    // Chain indexer and GBT in a Pipeline.
+    val pipeline = new Pipeline()
+      .setStages(Array(featureIndexer, gbt))
+
+    // Train model. This also runs the indexer.
+    val model = pipeline.fit(trainingData)
+
+    // Make predictions.
+    val predictions = model.transform(testData)
+
+    // Select example rows to display.
+    predictions.select("prediction", "label", "features").show(5)
+
+    // Select (prediction, true label) and compute test error.
+    val evaluator = new RegressionEvaluator()
+      .setLabelCol("label")
+      .setPredictionCol("prediction")
+      .setMetricName("rmse")
+    val rmse = evaluator.evaluate(predictions)
+    println(s"Root Mean Squared Error (RMSE) on test data = $rmse")
+
+    val gbtModel = model.stages(1).asInstanceOf[GBTRegressionModel]
+    println(s"Learned regression GBT model:\n ${gbtModel.toDebugString}")
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/ImputerExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/ImputerExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..49e98d0c622ca13bea8b0c2f00d7060d5163aaab
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/ImputerExample.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.Imputer
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example demonstrating Imputer.
+ * Run with:
+ *   bin/run-example ml.ImputerExample
+ */
+object ImputerExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession.builder
+      .appName("ImputerExample")
+      .getOrCreate()
+
+    // $example on$
+    val df = spark.createDataFrame(Seq(
+      (1.0, Double.NaN),
+      (2.0, Double.NaN),
+      (Double.NaN, 3.0),
+      (4.0, 4.0),
+      (5.0, 5.0)
+    )).toDF("a", "b")
+
+    val imputer = new Imputer()
+      .setInputCols(Array("a", "b"))
+      .setOutputCols(Array("out_a", "out_b"))
+
+    val model = imputer.fit(df)
+    model.transform(df).show()
+    // $example off$
+
+    spark.stop()
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/IndexToStringExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/IndexToStringExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..2940682c32801d0611619bf2dd781839f1017614
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/IndexToStringExample.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.attribute.Attribute
+import org.apache.spark.ml.feature.{IndexToString, StringIndexer}
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object IndexToStringExample {
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder
+      .appName("IndexToStringExample")
+      .getOrCreate()
+
+    // $example on$
+    val df = spark.createDataFrame(Seq(
+      (0, "a"),
+      (1, "b"),
+      (2, "c"),
+      (3, "a"),
+      (4, "a"),
+      (5, "c")
+    )).toDF("id", "category")
+
+    val indexer = new StringIndexer()
+      .setInputCol("category")
+      .setOutputCol("categoryIndex")
+      .fit(df)
+    val indexed = indexer.transform(df)
+
+    println(s"Transformed string column '${indexer.getInputCol}' " +
+        s"to indexed column '${indexer.getOutputCol}'")
+    indexed.show()
+
+    val inputColSchema = indexed.schema(indexer.getOutputCol)
+    println(s"StringIndexer will store labels in output column metadata: " +
+        s"${Attribute.fromStructField(inputColSchema).toString}\n")
+
+    val converter = new IndexToString()
+      .setInputCol("categoryIndex")
+      .setOutputCol("originalCategory")
+
+    val converted = converter.transform(indexed)
+
+    println(s"Transformed indexed column '${converter.getInputCol}' back to original string " +
+        s"column '${converter.getOutputCol}' using labels in metadata")
+    converted.select("id", "categoryIndex", "originalCategory").show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..8113c992b1d69e854a5cdc8344cc4d7acb6d2be1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.Interaction
+import org.apache.spark.ml.feature.VectorAssembler
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object InteractionExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("InteractionExample")
+      .getOrCreate()
+
+    // $example on$
+    val df = spark.createDataFrame(Seq(
+      (1, 1, 2, 3, 8, 4, 5),
+      (2, 4, 3, 8, 7, 9, 8),
+      (3, 6, 1, 9, 2, 3, 6),
+      (4, 10, 8, 6, 9, 4, 5),
+      (5, 9, 2, 7, 10, 7, 3),
+      (6, 1, 1, 4, 2, 8, 4)
+    )).toDF("id1", "id2", "id3", "id4", "id5", "id6", "id7")
+
+    val assembler1 = new VectorAssembler().
+      setInputCols(Array("id2", "id3", "id4")).
+      setOutputCol("vec1")
+
+    val assembled1 = assembler1.transform(df)
+
+    val assembler2 = new VectorAssembler().
+      setInputCols(Array("id5", "id6", "id7")).
+      setOutputCol("vec2")
+
+    val assembled2 = assembler2.transform(assembled1).select("id1", "vec1", "vec2")
+
+    val interaction = new Interaction()
+      .setInputCols(Array("id1", "vec1", "vec2"))
+      .setOutputCol("interactedCol")
+
+    val interacted = interaction.transform(assembled2)
+
+    interacted.show(truncate = false)
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/IsotonicRegressionExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/IsotonicRegressionExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..9bac16ec769a6c6e8548eb5db992b75f781a5bd7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/IsotonicRegressionExample.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.regression.IsotonicRegression
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example demonstrating Isotonic Regression.
+ * Run with
+ * {{{
+ * bin/run-example ml.IsotonicRegressionExample
+ * }}}
+ */
+object IsotonicRegressionExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName(s"${this.getClass.getSimpleName}")
+      .getOrCreate()
+
+    // $example on$
+    // Loads data.
+    val dataset = spark.read.format("libsvm")
+      .load("data/mllib/sample_isotonic_regression_libsvm_data.txt")
+
+    // Trains an isotonic regression model.
+    val ir = new IsotonicRegression()
+    val model = ir.fit(dataset)
+
+    println(s"Boundaries in increasing order: ${model.boundaries}\n")
+    println(s"Predictions associated with the boundaries: ${model.predictions}\n")
+
+    // Makes predictions.
+    model.transform(dataset).show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..2bc8184e623ff4545b36122bcb3593a7e750bfd0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+// scalastyle:off println
+
+// $example on$
+import org.apache.spark.ml.clustering.KMeans
+import org.apache.spark.ml.evaluation.ClusteringEvaluator
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example demonstrating k-means clustering.
+ * Run with
+ * {{{
+ * bin/run-example ml.KMeansExample
+ * }}}
+ */
+object KMeansExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName(s"${this.getClass.getSimpleName}")
+      .getOrCreate()
+
+    // $example on$
+    // Loads data.
+    val dataset = spark.read.format("libsvm").load("data/mllib/sample_kmeans_data.txt")
+
+    // Trains a k-means model.
+    val kmeans = new KMeans().setK(2).setSeed(1L)
+    val model = kmeans.fit(dataset)
+
+    // Make predictions
+    val predictions = model.transform(dataset)
+
+    // Evaluate clustering by computing Silhouette score
+    val evaluator = new ClusteringEvaluator()
+
+    val silhouette = evaluator.evaluate(predictions)
+    println(s"Silhouette with squared euclidean distance = $silhouette")
+
+    // Shows the result.
+    println("Cluster Centers: ")
+    model.clusterCenters.foreach(println)
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/LDAExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/LDAExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..4215d37cb59d51d478c3491fd2f7469fd91e4c7d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/LDAExample.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+// scalastyle:off println
+// $example on$
+import org.apache.spark.ml.clustering.LDA
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example demonstrating LDA.
+ * Run with
+ * {{{
+ * bin/run-example ml.LDAExample
+ * }}}
+ */
+object LDAExample {
+  def main(args: Array[String]): Unit = {
+    // Creates a SparkSession
+    val spark = SparkSession
+      .builder
+      .appName(s"${this.getClass.getSimpleName}")
+      .getOrCreate()
+
+    // $example on$
+    // Loads data.
+    val dataset = spark.read.format("libsvm")
+      .load("data/mllib/sample_lda_libsvm_data.txt")
+
+    // Trains a LDA model.
+    val lda = new LDA().setK(10).setMaxIter(10)
+    val model = lda.fit(dataset)
+
+    val ll = model.logLikelihood(dataset)
+    val lp = model.logPerplexity(dataset)
+    println(s"The lower bound on the log likelihood of the entire corpus: $ll")
+    println(s"The upper bound on perplexity: $lp")
+
+    // Describe topics.
+    val topics = model.describeTopics(3)
+    println("The topics described by their top-weighted terms:")
+    topics.show(false)
+
+    // Shows the result.
+    val transformed = model.transform(dataset)
+    transformed.show(false)
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..6903a1c298ced50542cc8dc9b87e22a547cc2d26
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+import scopt.OptionParser
+
+import org.apache.spark.examples.mllib.AbstractParams
+import org.apache.spark.ml.regression.LinearRegression
+import org.apache.spark.sql.{DataFrame, SparkSession}
+
+/**
+ * An example runner for linear regression with elastic-net (mixing L1/L2) regularization.
+ * Run with
+ * {{{
+ * bin/run-example ml.LinearRegressionExample [options]
+ * }}}
+ * A synthetic dataset can be found at `data/mllib/sample_linear_regression_data.txt` which can be
+ * trained by
+ * {{{
+ * bin/run-example ml.LinearRegressionExample --regParam 0.15 --elasticNetParam 1.0 \
+ *   data/mllib/sample_linear_regression_data.txt
+ * }}}
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object LinearRegressionExample {
+
+  case class Params(
+      input: String = null,
+      testInput: String = "",
+      dataFormat: String = "libsvm",
+      regParam: Double = 0.0,
+      elasticNetParam: Double = 0.0,
+      maxIter: Int = 100,
+      tol: Double = 1E-6,
+      fracTest: Double = 0.2) extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("LinearRegressionExample") {
+      head("LinearRegressionExample: an example Linear Regression with Elastic-Net app.")
+      opt[Double]("regParam")
+        .text(s"regularization parameter, default: ${defaultParams.regParam}")
+        .action((x, c) => c.copy(regParam = x))
+      opt[Double]("elasticNetParam")
+        .text(s"ElasticNet mixing parameter. For alpha = 0, the penalty is an L2 penalty. " +
+        s"For alpha = 1, it is an L1 penalty. For 0 < alpha < 1, the penalty is a combination of " +
+        s"L1 and L2, default: ${defaultParams.elasticNetParam}")
+        .action((x, c) => c.copy(elasticNetParam = x))
+      opt[Int]("maxIter")
+        .text(s"maximum number of iterations, default: ${defaultParams.maxIter}")
+        .action((x, c) => c.copy(maxIter = x))
+      opt[Double]("tol")
+        .text(s"the convergence tolerance of iterations, Smaller value will lead " +
+        s"to higher accuracy with the cost of more iterations, default: ${defaultParams.tol}")
+        .action((x, c) => c.copy(tol = x))
+      opt[Double]("fracTest")
+        .text(s"fraction of data to hold out for testing. If given option testInput, " +
+        s"this option is ignored. default: ${defaultParams.fracTest}")
+        .action((x, c) => c.copy(fracTest = x))
+      opt[String]("testInput")
+        .text(s"input path to test dataset. If given, option fracTest is ignored." +
+        s" default: ${defaultParams.testInput}")
+        .action((x, c) => c.copy(testInput = x))
+      opt[String]("dataFormat")
+        .text("data format: libsvm (default), dense (deprecated in Spark v1.1)")
+        .action((x, c) => c.copy(dataFormat = x))
+      arg[String]("<input>")
+        .text("input path to labeled examples")
+        .required()
+        .action((x, c) => c.copy(input = x))
+      checkConfig { params =>
+        if (params.fracTest < 0 || params.fracTest >= 1) {
+          failure(s"fracTest ${params.fracTest} value incorrect; should be in [0,1).")
+        } else {
+          success
+        }
+      }
+    }
+
+    parser.parse(args, defaultParams) match {
+      case Some(params) => run(params)
+      case _ => sys.exit(1)
+    }
+  }
+
+  def run(params: Params): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName(s"LinearRegressionExample with $params")
+      .getOrCreate()
+
+    println(s"LinearRegressionExample with parameters:\n$params")
+
+    // Load training and test data and cache it.
+    val (training: DataFrame, test: DataFrame) = DecisionTreeExample.loadDatasets(params.input,
+      params.dataFormat, params.testInput, "regression", params.fracTest)
+
+    val lir = new LinearRegression()
+      .setFeaturesCol("features")
+      .setLabelCol("label")
+      .setRegParam(params.regParam)
+      .setElasticNetParam(params.elasticNetParam)
+      .setMaxIter(params.maxIter)
+      .setTol(params.tol)
+
+    // Train the model
+    val startTime = System.nanoTime()
+    val lirModel = lir.fit(training)
+    val elapsedTime = (System.nanoTime() - startTime) / 1e9
+    println(s"Training time: $elapsedTime seconds")
+
+    // Print the weights and intercept for linear regression.
+    println(s"Weights: ${lirModel.coefficients} Intercept: ${lirModel.intercept}")
+
+    println("Training data results:")
+    DecisionTreeExample.evaluateRegressionModel(lirModel, training, "label")
+    println("Test data results:")
+    DecisionTreeExample.evaluateRegressionModel(lirModel, test, "label")
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionWithElasticNetExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionWithElasticNetExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..4540a8d72812a604848c2c0b2ec0c9cab802fe32
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionWithElasticNetExample.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.regression.LinearRegression
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object LinearRegressionWithElasticNetExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("LinearRegressionWithElasticNetExample")
+      .getOrCreate()
+
+    // $example on$
+    // Load training data
+    val training = spark.read.format("libsvm")
+      .load("data/mllib/sample_linear_regression_data.txt")
+
+    val lr = new LinearRegression()
+      .setMaxIter(10)
+      .setRegParam(0.3)
+      .setElasticNetParam(0.8)
+
+    // Fit the model
+    val lrModel = lr.fit(training)
+
+    // Print the coefficients and intercept for linear regression
+    println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}")
+
+    // Summarize the model over the training set and print out some metrics
+    val trainingSummary = lrModel.summary
+    println(s"numIterations: ${trainingSummary.totalIterations}")
+    println(s"objectiveHistory: [${trainingSummary.objectiveHistory.mkString(",")}]")
+    trainingSummary.residuals.show()
+    println(s"RMSE: ${trainingSummary.rootMeanSquaredError}")
+    println(s"r2: ${trainingSummary.r2}")
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/LinearSVCExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/LinearSVCExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..5f43e65712b5d8634c995a45430c1e7eac9cbe1a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/LinearSVCExample.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.classification.LinearSVC
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object LinearSVCExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("LinearSVCExample")
+      .getOrCreate()
+
+    // $example on$
+    // Load training data
+    val training = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    val lsvc = new LinearSVC()
+      .setMaxIter(10)
+      .setRegParam(0.1)
+
+    // Fit the model
+    val lsvcModel = lsvc.fit(training)
+
+    // Print the coefficients and intercept for linear svc
+    println(s"Coefficients: ${lsvcModel.coefficients} Intercept: ${lsvcModel.intercept}")
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..bd6cc8cff2348b6d124a27f942c26904452b72ef
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionExample.scala
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+import scala.collection.mutable
+
+import scopt.OptionParser
+
+import org.apache.spark.examples.mllib.AbstractParams
+import org.apache.spark.ml.{Pipeline, PipelineStage}
+import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
+import org.apache.spark.ml.feature.StringIndexer
+import org.apache.spark.sql.{DataFrame, SparkSession}
+
+/**
+ * An example runner for logistic regression with elastic-net (mixing L1/L2) regularization.
+ * Run with
+ * {{{
+ * bin/run-example ml.LogisticRegressionExample [options]
+ * }}}
+ * A synthetic dataset can be found at `data/mllib/sample_libsvm_data.txt` which can be
+ * trained by
+ * {{{
+ * bin/run-example ml.LogisticRegressionExample --regParam 0.3 --elasticNetParam 0.8 \
+ *   data/mllib/sample_libsvm_data.txt
+ * }}}
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object LogisticRegressionExample {
+
+  case class Params(
+      input: String = null,
+      testInput: String = "",
+      dataFormat: String = "libsvm",
+      regParam: Double = 0.0,
+      elasticNetParam: Double = 0.0,
+      maxIter: Int = 100,
+      fitIntercept: Boolean = true,
+      tol: Double = 1E-6,
+      fracTest: Double = 0.2) extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("LogisticRegressionExample") {
+      head("LogisticRegressionExample: an example Logistic Regression with Elastic-Net app.")
+      opt[Double]("regParam")
+        .text(s"regularization parameter, default: ${defaultParams.regParam}")
+        .action((x, c) => c.copy(regParam = x))
+      opt[Double]("elasticNetParam")
+        .text(s"ElasticNet mixing parameter. For alpha = 0, the penalty is an L2 penalty. " +
+        s"For alpha = 1, it is an L1 penalty. For 0 < alpha < 1, the penalty is a combination of " +
+        s"L1 and L2, default: ${defaultParams.elasticNetParam}")
+        .action((x, c) => c.copy(elasticNetParam = x))
+      opt[Int]("maxIter")
+        .text(s"maximum number of iterations, default: ${defaultParams.maxIter}")
+        .action((x, c) => c.copy(maxIter = x))
+      opt[Boolean]("fitIntercept")
+        .text(s"whether to fit an intercept term, default: ${defaultParams.fitIntercept}")
+        .action((x, c) => c.copy(fitIntercept = x))
+      opt[Double]("tol")
+        .text(s"the convergence tolerance of iterations, Smaller value will lead " +
+        s"to higher accuracy with the cost of more iterations, default: ${defaultParams.tol}")
+        .action((x, c) => c.copy(tol = x))
+      opt[Double]("fracTest")
+        .text(s"fraction of data to hold out for testing. If given option testInput, " +
+        s"this option is ignored. default: ${defaultParams.fracTest}")
+        .action((x, c) => c.copy(fracTest = x))
+      opt[String]("testInput")
+        .text(s"input path to test dataset. If given, option fracTest is ignored." +
+        s" default: ${defaultParams.testInput}")
+        .action((x, c) => c.copy(testInput = x))
+      opt[String]("dataFormat")
+        .text("data format: libsvm (default), dense (deprecated in Spark v1.1)")
+        .action((x, c) => c.copy(dataFormat = x))
+      arg[String]("<input>")
+        .text("input path to labeled examples")
+        .required()
+        .action((x, c) => c.copy(input = x))
+      checkConfig { params =>
+        if (params.fracTest < 0 || params.fracTest >= 1) {
+          failure(s"fracTest ${params.fracTest} value incorrect; should be in [0,1).")
+        } else {
+          success
+        }
+      }
+    }
+
+    parser.parse(args, defaultParams) match {
+      case Some(params) => run(params)
+      case _ => sys.exit(1)
+    }
+  }
+
+  def run(params: Params): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName(s"LogisticRegressionExample with $params")
+      .getOrCreate()
+
+    println(s"LogisticRegressionExample with parameters:\n$params")
+
+    // Load training and test data and cache it.
+    val (training: DataFrame, test: DataFrame) = DecisionTreeExample.loadDatasets(params.input,
+      params.dataFormat, params.testInput, "classification", params.fracTest)
+
+    // Set up Pipeline.
+    val stages = new mutable.ArrayBuffer[PipelineStage]()
+
+    val labelIndexer = new StringIndexer()
+      .setInputCol("label")
+      .setOutputCol("indexedLabel")
+    stages += labelIndexer
+
+    val lor = new LogisticRegression()
+      .setFeaturesCol("features")
+      .setLabelCol("indexedLabel")
+      .setRegParam(params.regParam)
+      .setElasticNetParam(params.elasticNetParam)
+      .setMaxIter(params.maxIter)
+      .setTol(params.tol)
+      .setFitIntercept(params.fitIntercept)
+
+    stages += lor
+    val pipeline = new Pipeline().setStages(stages.toArray)
+
+    // Fit the Pipeline.
+    val startTime = System.nanoTime()
+    val pipelineModel = pipeline.fit(training)
+    val elapsedTime = (System.nanoTime() - startTime) / 1e9
+    println(s"Training time: $elapsedTime seconds")
+
+    val lorModel = pipelineModel.stages.last.asInstanceOf[LogisticRegressionModel]
+    // Print the weights and intercept for logistic regression.
+    println(s"Weights: ${lorModel.coefficients} Intercept: ${lorModel.intercept}")
+
+    println("Training data results:")
+    DecisionTreeExample.evaluateClassificationModel(pipelineModel, training, "indexedLabel")
+    println("Test data results:")
+    DecisionTreeExample.evaluateClassificationModel(pipelineModel, test, "indexedLabel")
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionSummaryExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionSummaryExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..0368dcba460b5d4f652000a7aa27649a90ae1a6c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionSummaryExample.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.classification.LogisticRegression
+// $example off$
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.functions.max
+
+object LogisticRegressionSummaryExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("LogisticRegressionSummaryExample")
+      .getOrCreate()
+    import spark.implicits._
+
+    // Load training data
+    val training = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    val lr = new LogisticRegression()
+      .setMaxIter(10)
+      .setRegParam(0.3)
+      .setElasticNetParam(0.8)
+
+    // Fit the model
+    val lrModel = lr.fit(training)
+
+    // $example on$
+    // Extract the summary from the returned LogisticRegressionModel instance trained in the earlier
+    // example
+    val trainingSummary = lrModel.binarySummary
+
+    // Obtain the objective per iteration.
+    val objectiveHistory = trainingSummary.objectiveHistory
+    println("objectiveHistory:")
+    objectiveHistory.foreach(loss => println(loss))
+
+    // Obtain the receiver-operating characteristic as a dataframe and areaUnderROC.
+    val roc = trainingSummary.roc
+    roc.show()
+    println(s"areaUnderROC: ${trainingSummary.areaUnderROC}")
+
+    // Set the model threshold to maximize F-Measure
+    val fMeasure = trainingSummary.fMeasureByThreshold
+    val maxFMeasure = fMeasure.select(max("F-Measure")).head().getDouble(0)
+    val bestThreshold = fMeasure.where($"F-Measure" === maxFMeasure)
+      .select("threshold").head().getDouble(0)
+    lrModel.setThreshold(bestThreshold)
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionWithElasticNetExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionWithElasticNetExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..18471049087d9707c8a8d3abdbaaddc49a55376f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionWithElasticNetExample.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.classification.LogisticRegression
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object LogisticRegressionWithElasticNetExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("LogisticRegressionWithElasticNetExample")
+      .getOrCreate()
+
+    // $example on$
+    // Load training data
+    val training = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    val lr = new LogisticRegression()
+      .setMaxIter(10)
+      .setRegParam(0.3)
+      .setElasticNetParam(0.8)
+
+    // Fit the model
+    val lrModel = lr.fit(training)
+
+    // Print the coefficients and intercept for logistic regression
+    println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}")
+
+    // We can also use the multinomial family for binary classification
+    val mlr = new LogisticRegression()
+      .setMaxIter(10)
+      .setRegParam(0.3)
+      .setElasticNetParam(0.8)
+      .setFamily("multinomial")
+
+    val mlrModel = mlr.fit(training)
+
+    // Print the coefficients and intercepts for logistic regression with multinomial family
+    println(s"Multinomial coefficients: ${mlrModel.coefficientMatrix}")
+    println(s"Multinomial intercepts: ${mlrModel.interceptVector}")
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/MaxAbsScalerExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/MaxAbsScalerExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..85d071369d9c9dcab8fb09be9401aefe6a9f0a91
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/MaxAbsScalerExample.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.MaxAbsScaler
+import org.apache.spark.ml.linalg.Vectors
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object MaxAbsScalerExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("MaxAbsScalerExample")
+      .getOrCreate()
+
+    // $example on$
+    val dataFrame = spark.createDataFrame(Seq(
+      (0, Vectors.dense(1.0, 0.1, -8.0)),
+      (1, Vectors.dense(2.0, 1.0, -4.0)),
+      (2, Vectors.dense(4.0, 10.0, 8.0))
+    )).toDF("id", "features")
+
+    val scaler = new MaxAbsScaler()
+      .setInputCol("features")
+      .setOutputCol("scaledFeatures")
+
+    // Compute summary statistics and generate MaxAbsScalerModel
+    val scalerModel = scaler.fit(dataFrame)
+
+    // rescale each feature to range [-1, 1]
+    val scaledData = scalerModel.transform(dataFrame)
+    scaledData.select("features", "scaledFeatures").show()
+    // $example off$
+
+    spark.stop()
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/MinHashLSHExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/MinHashLSHExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..8515821a9413aa20364d136d4370ab7b3d876fe1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/MinHashLSHExample.scala
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.MinHashLSH
+import org.apache.spark.ml.linalg.Vectors
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.functions.col
+// $example off$
+
+/**
+ * An example demonstrating MinHashLSH.
+ * Run with:
+ *   bin/run-example ml.MinHashLSHExample
+ */
+object MinHashLSHExample {
+  def main(args: Array[String]): Unit = {
+    // Creates a SparkSession
+    val spark = SparkSession
+      .builder
+      .appName("MinHashLSHExample")
+      .getOrCreate()
+
+    // $example on$
+    val dfA = spark.createDataFrame(Seq(
+      (0, Vectors.sparse(6, Seq((0, 1.0), (1, 1.0), (2, 1.0)))),
+      (1, Vectors.sparse(6, Seq((2, 1.0), (3, 1.0), (4, 1.0)))),
+      (2, Vectors.sparse(6, Seq((0, 1.0), (2, 1.0), (4, 1.0))))
+    )).toDF("id", "features")
+
+    val dfB = spark.createDataFrame(Seq(
+      (3, Vectors.sparse(6, Seq((1, 1.0), (3, 1.0), (5, 1.0)))),
+      (4, Vectors.sparse(6, Seq((2, 1.0), (3, 1.0), (5, 1.0)))),
+      (5, Vectors.sparse(6, Seq((1, 1.0), (2, 1.0), (4, 1.0))))
+    )).toDF("id", "features")
+
+    val key = Vectors.sparse(6, Seq((1, 1.0), (3, 1.0)))
+
+    val mh = new MinHashLSH()
+      .setNumHashTables(5)
+      .setInputCol("features")
+      .setOutputCol("hashes")
+
+    val model = mh.fit(dfA)
+
+    // Feature Transformation
+    println("The hashed dataset where hashed values are stored in the column 'hashes':")
+    model.transform(dfA).show()
+
+    // Compute the locality sensitive hashes for the input rows, then perform approximate
+    // similarity join.
+    // We could avoid computing hashes by passing in the already-transformed dataset, e.g.
+    // `model.approxSimilarityJoin(transformedA, transformedB, 0.6)`
+    println("Approximately joining dfA and dfB on Jaccard distance smaller than 0.6:")
+    model.approxSimilarityJoin(dfA, dfB, 0.6, "JaccardDistance")
+      .select(col("datasetA.id").alias("idA"),
+        col("datasetB.id").alias("idB"),
+        col("JaccardDistance")).show()
+
+    // Compute the locality sensitive hashes for the input rows, then perform approximate nearest
+    // neighbor search.
+    // We could avoid computing hashes by passing in the already-transformed dataset, e.g.
+    // `model.approxNearestNeighbors(transformedA, key, 2)`
+    // It may return less than 2 rows when not enough approximate near-neighbor candidates are
+    // found.
+    println("Approximately searching dfA for 2 nearest neighbors of the key:")
+    model.approxNearestNeighbors(dfA, key, 2).show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..9ee6d9b44934c267f721af692c5cbaffcebd2140
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.MinMaxScaler
+import org.apache.spark.ml.linalg.Vectors
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object MinMaxScalerExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("MinMaxScalerExample")
+      .getOrCreate()
+
+    // $example on$
+    val dataFrame = spark.createDataFrame(Seq(
+      (0, Vectors.dense(1.0, 0.1, -1.0)),
+      (1, Vectors.dense(2.0, 1.1, 1.0)),
+      (2, Vectors.dense(3.0, 10.1, 3.0))
+    )).toDF("id", "features")
+
+    val scaler = new MinMaxScaler()
+      .setInputCol("features")
+      .setOutputCol("scaledFeatures")
+
+    // Compute summary statistics and generate MinMaxScalerModel
+    val scalerModel = scaler.fit(dataFrame)
+
+    // rescale each feature to range [min, max].
+    val scaledData = scalerModel.transform(dataFrame)
+    println(s"Features scaled to range: [${scaler.getMin}, ${scaler.getMax}]")
+    scaledData.select("features", "scaledFeatures").show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/ModelSelectionViaCrossValidationExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/ModelSelectionViaCrossValidationExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..87d96dd51eb94f6603d611a3bb95fe2585d24ad6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/ModelSelectionViaCrossValidationExample.scala
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.ml.classification.LogisticRegression
+import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator
+import org.apache.spark.ml.feature.{HashingTF, Tokenizer}
+import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.ml.tuning.{CrossValidator, ParamGridBuilder}
+import org.apache.spark.sql.Row
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * A simple example demonstrating model selection using CrossValidator.
+ * This example also demonstrates how Pipelines are Estimators.
+ *
+ * Run with
+ * {{{
+ * bin/run-example ml.ModelSelectionViaCrossValidationExample
+ * }}}
+ */
+object ModelSelectionViaCrossValidationExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("ModelSelectionViaCrossValidationExample")
+      .getOrCreate()
+
+    // $example on$
+    // Prepare training data from a list of (id, text, label) tuples.
+    val training = spark.createDataFrame(Seq(
+      (0L, "a b c d e spark", 1.0),
+      (1L, "b d", 0.0),
+      (2L, "spark f g h", 1.0),
+      (3L, "hadoop mapreduce", 0.0),
+      (4L, "b spark who", 1.0),
+      (5L, "g d a y", 0.0),
+      (6L, "spark fly", 1.0),
+      (7L, "was mapreduce", 0.0),
+      (8L, "e spark program", 1.0),
+      (9L, "a e c l", 0.0),
+      (10L, "spark compile", 1.0),
+      (11L, "hadoop software", 0.0)
+    )).toDF("id", "text", "label")
+
+    // Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr.
+    val tokenizer = new Tokenizer()
+      .setInputCol("text")
+      .setOutputCol("words")
+    val hashingTF = new HashingTF()
+      .setInputCol(tokenizer.getOutputCol)
+      .setOutputCol("features")
+    val lr = new LogisticRegression()
+      .setMaxIter(10)
+    val pipeline = new Pipeline()
+      .setStages(Array(tokenizer, hashingTF, lr))
+
+    // We use a ParamGridBuilder to construct a grid of parameters to search over.
+    // With 3 values for hashingTF.numFeatures and 2 values for lr.regParam,
+    // this grid will have 3 x 2 = 6 parameter settings for CrossValidator to choose from.
+    val paramGrid = new ParamGridBuilder()
+      .addGrid(hashingTF.numFeatures, Array(10, 100, 1000))
+      .addGrid(lr.regParam, Array(0.1, 0.01))
+      .build()
+
+    // We now treat the Pipeline as an Estimator, wrapping it in a CrossValidator instance.
+    // This will allow us to jointly choose parameters for all Pipeline stages.
+    // A CrossValidator requires an Estimator, a set of Estimator ParamMaps, and an Evaluator.
+    // Note that the evaluator here is a BinaryClassificationEvaluator and its default metric
+    // is areaUnderROC.
+    val cv = new CrossValidator()
+      .setEstimator(pipeline)
+      .setEvaluator(new BinaryClassificationEvaluator)
+      .setEstimatorParamMaps(paramGrid)
+      .setNumFolds(2)  // Use 3+ in practice
+      .setParallelism(2)  // Evaluate up to 2 parameter settings in parallel
+
+    // Run cross-validation, and choose the best set of parameters.
+    val cvModel = cv.fit(training)
+
+    // Prepare test documents, which are unlabeled (id, text) tuples.
+    val test = spark.createDataFrame(Seq(
+      (4L, "spark i j k"),
+      (5L, "l m n"),
+      (6L, "mapreduce spark"),
+      (7L, "apache hadoop")
+    )).toDF("id", "text")
+
+    // Make predictions on test documents. cvModel uses the best model found (lrModel).
+    cvModel.transform(test)
+      .select("id", "text", "probability", "prediction")
+      .collect()
+      .foreach { case Row(id: Long, text: String, prob: Vector, prediction: Double) =>
+        println(s"($id, $text) --> prob=$prob, prediction=$prediction")
+      }
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/ModelSelectionViaTrainValidationSplitExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/ModelSelectionViaTrainValidationSplitExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..71e41e7298c733df516a2d1624a1aecf08b0420c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/ModelSelectionViaTrainValidationSplitExample.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.evaluation.RegressionEvaluator
+import org.apache.spark.ml.regression.LinearRegression
+import org.apache.spark.ml.tuning.{ParamGridBuilder, TrainValidationSplit}
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * A simple example demonstrating model selection using TrainValidationSplit.
+ *
+ * Run with
+ * {{{
+ * bin/run-example ml.ModelSelectionViaTrainValidationSplitExample
+ * }}}
+ */
+object ModelSelectionViaTrainValidationSplitExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("ModelSelectionViaTrainValidationSplitExample")
+      .getOrCreate()
+
+    // $example on$
+    // Prepare training and test data.
+    val data = spark.read.format("libsvm").load("data/mllib/sample_linear_regression_data.txt")
+    val Array(training, test) = data.randomSplit(Array(0.9, 0.1), seed = 12345)
+
+    val lr = new LinearRegression()
+        .setMaxIter(10)
+
+    // We use a ParamGridBuilder to construct a grid of parameters to search over.
+    // TrainValidationSplit will try all combinations of values and determine best model using
+    // the evaluator.
+    val paramGrid = new ParamGridBuilder()
+      .addGrid(lr.regParam, Array(0.1, 0.01))
+      .addGrid(lr.fitIntercept)
+      .addGrid(lr.elasticNetParam, Array(0.0, 0.5, 1.0))
+      .build()
+
+    // In this case the estimator is simply the linear regression.
+    // A TrainValidationSplit requires an Estimator, a set of Estimator ParamMaps, and an Evaluator.
+    val trainValidationSplit = new TrainValidationSplit()
+      .setEstimator(lr)
+      .setEvaluator(new RegressionEvaluator)
+      .setEstimatorParamMaps(paramGrid)
+      // 80% of the data will be used for training and the remaining 20% for validation.
+      .setTrainRatio(0.8)
+      // Evaluate up to 2 parameter settings in parallel
+      .setParallelism(2)
+
+    // Run train validation split, and choose the best set of parameters.
+    val model = trainValidationSplit.fit(training)
+
+    // Make predictions on test data. model is the model with combination of parameters
+    // that performed best.
+    model.transform(test)
+      .select("features", "label", "prediction")
+      .show()
+    // $example off$
+
+    spark.stop()
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/MulticlassLogisticRegressionWithElasticNetExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/MulticlassLogisticRegressionWithElasticNetExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..1f7dbddd454e814343394b0c56ae394def7fed72
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/MulticlassLogisticRegressionWithElasticNetExample.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.classification.LogisticRegression
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object MulticlassLogisticRegressionWithElasticNetExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("MulticlassLogisticRegressionWithElasticNetExample")
+      .getOrCreate()
+
+    // $example on$
+    // Load training data
+    val training = spark
+      .read
+      .format("libsvm")
+      .load("data/mllib/sample_multiclass_classification_data.txt")
+
+    val lr = new LogisticRegression()
+      .setMaxIter(10)
+      .setRegParam(0.3)
+      .setElasticNetParam(0.8)
+
+    // Fit the model
+    val lrModel = lr.fit(training)
+
+    // Print the coefficients and intercept for multinomial logistic regression
+    println(s"Coefficients: \n${lrModel.coefficientMatrix}")
+    println(s"Intercepts: \n${lrModel.interceptVector}")
+
+    val trainingSummary = lrModel.summary
+
+    // Obtain the objective per iteration
+    val objectiveHistory = trainingSummary.objectiveHistory
+    println("objectiveHistory:")
+    objectiveHistory.foreach(println)
+
+    // for multiclass, we can inspect metrics on a per-label basis
+    println("False positive rate by label:")
+    trainingSummary.falsePositiveRateByLabel.zipWithIndex.foreach { case (rate, label) =>
+      println(s"label $label: $rate")
+    }
+
+    println("True positive rate by label:")
+    trainingSummary.truePositiveRateByLabel.zipWithIndex.foreach { case (rate, label) =>
+      println(s"label $label: $rate")
+    }
+
+    println("Precision by label:")
+    trainingSummary.precisionByLabel.zipWithIndex.foreach { case (prec, label) =>
+      println(s"label $label: $prec")
+    }
+
+    println("Recall by label:")
+    trainingSummary.recallByLabel.zipWithIndex.foreach { case (rec, label) =>
+      println(s"label $label: $rec")
+    }
+
+
+    println("F-measure by label:")
+    trainingSummary.fMeasureByLabel.zipWithIndex.foreach { case (f, label) =>
+      println(s"label $label: $f")
+    }
+
+    val accuracy = trainingSummary.accuracy
+    val falsePositiveRate = trainingSummary.weightedFalsePositiveRate
+    val truePositiveRate = trainingSummary.weightedTruePositiveRate
+    val fMeasure = trainingSummary.weightedFMeasure
+    val precision = trainingSummary.weightedPrecision
+    val recall = trainingSummary.weightedRecall
+    println(s"Accuracy: $accuracy\nFPR: $falsePositiveRate\nTPR: $truePositiveRate\n" +
+      s"F-measure: $fMeasure\nPrecision: $precision\nRecall: $recall")
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..646f46a925062267e97988ee8eb848e3d59230ca
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.classification.MultilayerPerceptronClassifier
+import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example for Multilayer Perceptron Classification.
+ */
+object MultilayerPerceptronClassifierExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("MultilayerPerceptronClassifierExample")
+      .getOrCreate()
+
+    // $example on$
+    // Load the data stored in LIBSVM format as a DataFrame.
+    val data = spark.read.format("libsvm")
+      .load("data/mllib/sample_multiclass_classification_data.txt")
+
+    // Split the data into train and test
+    val splits = data.randomSplit(Array(0.6, 0.4), seed = 1234L)
+    val train = splits(0)
+    val test = splits(1)
+
+    // specify layers for the neural network:
+    // input layer of size 4 (features), two intermediate of size 5 and 4
+    // and output of size 3 (classes)
+    val layers = Array[Int](4, 5, 4, 3)
+
+    // create the trainer and set its parameters
+    val trainer = new MultilayerPerceptronClassifier()
+      .setLayers(layers)
+      .setBlockSize(128)
+      .setSeed(1234L)
+      .setMaxIter(100)
+
+    // train the model
+    val model = trainer.fit(train)
+
+    // compute accuracy on the test set
+    val result = model.transform(test)
+    val predictionAndLabels = result.select("prediction", "label")
+    val evaluator = new MulticlassClassificationEvaluator()
+      .setMetricName("accuracy")
+
+    println(s"Test set accuracy = ${evaluator.evaluate(predictionAndLabels)}")
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/NGramExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/NGramExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..d2183d6b4956cf43c323242a47f3d98265d5793c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/NGramExample.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.NGram
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object NGramExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("NGramExample")
+      .getOrCreate()
+
+    // $example on$
+    val wordDataFrame = spark.createDataFrame(Seq(
+      (0, Array("Hi", "I", "heard", "about", "Spark")),
+      (1, Array("I", "wish", "Java", "could", "use", "case", "classes")),
+      (2, Array("Logistic", "regression", "models", "are", "neat"))
+    )).toDF("id", "words")
+
+    val ngram = new NGram().setN(2).setInputCol("words").setOutputCol("ngrams")
+
+    val ngramDataFrame = ngram.transform(wordDataFrame)
+    ngramDataFrame.select("ngrams").show(false)
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..50c70c626b128dc989dff0b4ba0ba6e0d53a3fea
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.classification.NaiveBayes
+import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object NaiveBayesExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("NaiveBayesExample")
+      .getOrCreate()
+
+    // $example on$
+    // Load the data stored in LIBSVM format as a DataFrame.
+    val data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    // Split the data into training and test sets (30% held out for testing)
+    val Array(trainingData, testData) = data.randomSplit(Array(0.7, 0.3), seed = 1234L)
+
+    // Train a NaiveBayes model.
+    val model = new NaiveBayes()
+      .fit(trainingData)
+
+    // Select example rows to display.
+    val predictions = model.transform(testData)
+    predictions.show()
+
+    // Select (prediction, true label) and compute test error
+    val evaluator = new MulticlassClassificationEvaluator()
+      .setLabelCol("label")
+      .setPredictionCol("prediction")
+      .setMetricName("accuracy")
+    val accuracy = evaluator.evaluate(predictions)
+    println(s"Test set accuracy = $accuracy")
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..989d250c17715f6f78a444606b41cff843b7432f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.Normalizer
+import org.apache.spark.ml.linalg.Vectors
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object NormalizerExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("NormalizerExample")
+      .getOrCreate()
+
+    // $example on$
+    val dataFrame = spark.createDataFrame(Seq(
+      (0, Vectors.dense(1.0, 0.5, -1.0)),
+      (1, Vectors.dense(2.0, 1.0, 1.0)),
+      (2, Vectors.dense(4.0, 10.0, 2.0))
+    )).toDF("id", "features")
+
+    // Normalize each Vector using $L^1$ norm.
+    val normalizer = new Normalizer()
+      .setInputCol("features")
+      .setOutputCol("normFeatures")
+      .setP(1.0)
+
+    val l1NormData = normalizer.transform(dataFrame)
+    println("Normalized using L^1 norm")
+    l1NormData.show()
+
+    // Normalize each Vector using $L^\infty$ norm.
+    val lInfNormData = normalizer.transform(dataFrame, normalizer.p -> Double.PositiveInfinity)
+    println("Normalized using L^inf norm")
+    lInfNormData.show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderEstimatorExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderEstimatorExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..45d816808ed8e41f121eef8f540082d1d0df5ae2
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderEstimatorExample.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.OneHotEncoderEstimator
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object OneHotEncoderEstimatorExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("OneHotEncoderEstimatorExample")
+      .getOrCreate()
+
+    // Note: categorical features are usually first encoded with StringIndexer
+    // $example on$
+    val df = spark.createDataFrame(Seq(
+      (0.0, 1.0),
+      (1.0, 0.0),
+      (2.0, 1.0),
+      (0.0, 2.0),
+      (0.0, 1.0),
+      (2.0, 0.0)
+    )).toDF("categoryIndex1", "categoryIndex2")
+
+    val encoder = new OneHotEncoderEstimator()
+      .setInputCols(Array("categoryIndex1", "categoryIndex2"))
+      .setOutputCols(Array("categoryVec1", "categoryVec2"))
+    val model = encoder.fit(df)
+
+    val encoded = model.transform(df)
+    encoded.show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..4ad6c7c3ef2025ec38acc11fe82e47bac7bb5e02
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.classification.{LogisticRegression, OneVsRest}
+import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example of Multiclass to Binary Reduction with One Vs Rest,
+ * using Logistic Regression as the base classifier.
+ * Run with
+ * {{{
+ * ./bin/run-example ml.OneVsRestExample
+ * }}}
+ */
+
+object OneVsRestExample {
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder
+      .appName(s"OneVsRestExample")
+      .getOrCreate()
+
+    // $example on$
+    // load data file.
+    val inputData = spark.read.format("libsvm")
+      .load("data/mllib/sample_multiclass_classification_data.txt")
+
+    // generate the train/test split.
+    val Array(train, test) = inputData.randomSplit(Array(0.8, 0.2))
+
+    // instantiate the base classifier
+    val classifier = new LogisticRegression()
+      .setMaxIter(10)
+      .setTol(1E-6)
+      .setFitIntercept(true)
+
+    // instantiate the One Vs Rest Classifier.
+    val ovr = new OneVsRest().setClassifier(classifier)
+
+    // train the multiclass model.
+    val ovrModel = ovr.fit(train)
+
+    // score the model on test data.
+    val predictions = ovrModel.transform(test)
+
+    // obtain evaluator.
+    val evaluator = new MulticlassClassificationEvaluator()
+      .setMetricName("accuracy")
+
+    // compute the classification error on test data.
+    val accuracy = evaluator.evaluate(predictions)
+    println(s"Test Error = ${1 - accuracy}")
+    // $example off$
+
+    spark.stop()
+  }
+
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..4e1d7cdbabdb9b77e3f0db677821307c30f81524
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.PCA
+import org.apache.spark.ml.linalg.Vectors
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object PCAExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("PCAExample")
+      .getOrCreate()
+
+    // $example on$
+    val data = Array(
+      Vectors.sparse(5, Seq((1, 1.0), (3, 7.0))),
+      Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
+      Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
+    )
+    val df = spark.createDataFrame(data.map(Tuple1.apply)).toDF("features")
+
+    val pca = new PCA()
+      .setInputCol("features")
+      .setOutputCol("pcaFeatures")
+      .setK(3)
+      .fit(df)
+
+    val result = pca.transform(df).select("pcaFeatures")
+    result.show(false)
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/PipelineExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/PipelineExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..12f8663b9ce55a0a25f409957d3790981d22a2ad
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/PipelineExample.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.{Pipeline, PipelineModel}
+import org.apache.spark.ml.classification.LogisticRegression
+import org.apache.spark.ml.feature.{HashingTF, Tokenizer}
+import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.sql.Row
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object PipelineExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("PipelineExample")
+      .getOrCreate()
+
+    // $example on$
+    // Prepare training documents from a list of (id, text, label) tuples.
+    val training = spark.createDataFrame(Seq(
+      (0L, "a b c d e spark", 1.0),
+      (1L, "b d", 0.0),
+      (2L, "spark f g h", 1.0),
+      (3L, "hadoop mapreduce", 0.0)
+    )).toDF("id", "text", "label")
+
+    // Configure an ML pipeline, which consists of three stages: tokenizer, hashingTF, and lr.
+    val tokenizer = new Tokenizer()
+      .setInputCol("text")
+      .setOutputCol("words")
+    val hashingTF = new HashingTF()
+      .setNumFeatures(1000)
+      .setInputCol(tokenizer.getOutputCol)
+      .setOutputCol("features")
+    val lr = new LogisticRegression()
+      .setMaxIter(10)
+      .setRegParam(0.001)
+    val pipeline = new Pipeline()
+      .setStages(Array(tokenizer, hashingTF, lr))
+
+    // Fit the pipeline to training documents.
+    val model = pipeline.fit(training)
+
+    // Now we can optionally save the fitted pipeline to disk
+    model.write.overwrite().save("/tmp/spark-logistic-regression-model")
+
+    // We can also save this unfit pipeline to disk
+    pipeline.write.overwrite().save("/tmp/unfit-lr-model")
+
+    // And load it back in during production
+    val sameModel = PipelineModel.load("/tmp/spark-logistic-regression-model")
+
+    // Prepare test documents, which are unlabeled (id, text) tuples.
+    val test = spark.createDataFrame(Seq(
+      (4L, "spark i j k"),
+      (5L, "l m n"),
+      (6L, "spark hadoop spark"),
+      (7L, "apache hadoop")
+    )).toDF("id", "text")
+
+    // Make predictions on test documents.
+    model.transform(test)
+      .select("id", "text", "probability", "prediction")
+      .collect()
+      .foreach { case Row(id: Long, text: String, prob: Vector, prediction: Double) =>
+        println(s"($id, $text) --> prob=$prob, prediction=$prediction")
+      }
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..f117b03ab217b7d806c47976cb3da6bd1398274d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.PolynomialExpansion
+import org.apache.spark.ml.linalg.Vectors
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object PolynomialExpansionExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("PolynomialExpansionExample")
+      .getOrCreate()
+
+    // $example on$
+    val data = Array(
+      Vectors.dense(2.0, 1.0),
+      Vectors.dense(0.0, 0.0),
+      Vectors.dense(3.0, -1.0)
+    )
+    val df = spark.createDataFrame(data.map(Tuple1.apply)).toDF("features")
+
+    val polyExpansion = new PolynomialExpansion()
+      .setInputCol("features")
+      .setOutputCol("polyFeatures")
+      .setDegree(3)
+
+    val polyDF = polyExpansion.transform(df)
+    polyDF.show(false)
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/PowerIterationClusteringExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/PowerIterationClusteringExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..ca8f7affb14e84a08f356341cf258f5698483461
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/PowerIterationClusteringExample.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.clustering.PowerIterationClustering
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object PowerIterationClusteringExample {
+   def main(args: Array[String]): Unit = {
+     val spark = SparkSession
+       .builder
+       .appName(s"${this.getClass.getSimpleName}")
+       .getOrCreate()
+
+     // $example on$
+     val dataset = spark.createDataFrame(Seq(
+       (0L, 1L, 1.0),
+       (0L, 2L, 1.0),
+       (1L, 2L, 1.0),
+       (3L, 4L, 1.0),
+       (4L, 0L, 0.1)
+     )).toDF("src", "dst", "weight")
+
+     val model = new PowerIterationClustering().
+       setK(2).
+       setMaxIter(20).
+       setInitMode("degree").
+       setWeightCol("weight")
+
+     val prediction = model.assignClusters(dataset).select("id", "cluster")
+
+     //  Shows the cluster assignment
+     prediction.show(false)
+     // $example off$
+
+     spark.stop()
+   }
+ }
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/PrefixSpanExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/PrefixSpanExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..0a2d31097a024530203f6bcd3c3895129f72a694
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/PrefixSpanExample.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+// scalastyle:off println
+
+// $example on$
+import org.apache.spark.ml.fpm.PrefixSpan
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example demonstrating PrefixSpan.
+ * Run with
+ * {{{
+ * bin/run-example ml.PrefixSpanExample
+ * }}}
+ */
+object PrefixSpanExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName(s"${this.getClass.getSimpleName}")
+      .getOrCreate()
+    import spark.implicits._
+
+    // $example on$
+    val smallTestData = Seq(
+      Seq(Seq(1, 2), Seq(3)),
+      Seq(Seq(1), Seq(3, 2), Seq(1, 2)),
+      Seq(Seq(1, 2), Seq(5)),
+      Seq(Seq(6)))
+
+    val df = smallTestData.toDF("sequence")
+    val result = new PrefixSpan()
+      .setMinSupport(0.5)
+      .setMaxPatternLength(5)
+      .setMaxLocalProjDBSize(32000000)
+      .findFrequentSequentialPatterns(df)
+      .show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..0fe16fb6dfa9fa1030c0b5fac05a8af1bc5964b1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.QuantileDiscretizer
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object QuantileDiscretizerExample {
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder
+      .appName("QuantileDiscretizerExample")
+      .getOrCreate()
+
+    // $example on$
+    val data = Array((0, 18.0), (1, 19.0), (2, 8.0), (3, 5.0), (4, 2.2))
+    val df = spark.createDataFrame(data).toDF("id", "hour")
+    // $example off$
+    // Output of QuantileDiscretizer for such small datasets can depend on the number of
+    // partitions. Here we force a single partition to ensure consistent results.
+    // Note this is not necessary for normal use cases
+      .repartition(1)
+
+    // $example on$
+    val discretizer = new QuantileDiscretizer()
+      .setInputCol("hour")
+      .setOutputCol("result")
+      .setNumBuckets(3)
+
+    val result = discretizer.fit(df).transform(df)
+    result.show(false)
+    // $example off$
+
+    spark.stop()
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..3498fa8a50c69b4b926d1329000ec403e2e5367d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.RFormula
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object RFormulaExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("RFormulaExample")
+      .getOrCreate()
+
+    // $example on$
+    val dataset = spark.createDataFrame(Seq(
+      (7, "US", 18, 1.0),
+      (8, "CA", 12, 0.0),
+      (9, "NZ", 15, 0.0)
+    )).toDF("id", "country", "hour", "clicked")
+
+    val formula = new RFormula()
+      .setFormula("clicked ~ country + hour")
+      .setFeaturesCol("features")
+      .setLabelCol("label")
+
+    val output = formula.fit(dataset).transform(dataset)
+    output.select("features", "label").show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestClassifierExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestClassifierExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..6265f839025282dab2f02d7d7898879152cd784e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestClassifierExample.scala
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.ml.classification.{RandomForestClassificationModel, RandomForestClassifier}
+import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
+import org.apache.spark.ml.feature.{IndexToString, StringIndexer, VectorIndexer}
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object RandomForestClassifierExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("RandomForestClassifierExample")
+      .getOrCreate()
+
+    // $example on$
+    // Load and parse the data file, converting it to a DataFrame.
+    val data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    // Index labels, adding metadata to the label column.
+    // Fit on whole dataset to include all labels in index.
+    val labelIndexer = new StringIndexer()
+      .setInputCol("label")
+      .setOutputCol("indexedLabel")
+      .fit(data)
+    // Automatically identify categorical features, and index them.
+    // Set maxCategories so features with > 4 distinct values are treated as continuous.
+    val featureIndexer = new VectorIndexer()
+      .setInputCol("features")
+      .setOutputCol("indexedFeatures")
+      .setMaxCategories(4)
+      .fit(data)
+
+    // Split the data into training and test sets (30% held out for testing).
+    val Array(trainingData, testData) = data.randomSplit(Array(0.7, 0.3))
+
+    // Train a RandomForest model.
+    val rf = new RandomForestClassifier()
+      .setLabelCol("indexedLabel")
+      .setFeaturesCol("indexedFeatures")
+      .setNumTrees(10)
+
+    // Convert indexed labels back to original labels.
+    val labelConverter = new IndexToString()
+      .setInputCol("prediction")
+      .setOutputCol("predictedLabel")
+      .setLabels(labelIndexer.labels)
+
+    // Chain indexers and forest in a Pipeline.
+    val pipeline = new Pipeline()
+      .setStages(Array(labelIndexer, featureIndexer, rf, labelConverter))
+
+    // Train model. This also runs the indexers.
+    val model = pipeline.fit(trainingData)
+
+    // Make predictions.
+    val predictions = model.transform(testData)
+
+    // Select example rows to display.
+    predictions.select("predictedLabel", "label", "features").show(5)
+
+    // Select (prediction, true label) and compute test error.
+    val evaluator = new MulticlassClassificationEvaluator()
+      .setLabelCol("indexedLabel")
+      .setPredictionCol("prediction")
+      .setMetricName("accuracy")
+    val accuracy = evaluator.evaluate(predictions)
+    println(s"Test Error = ${(1.0 - accuracy)}")
+
+    val rfModel = model.stages(2).asInstanceOf[RandomForestClassificationModel]
+    println(s"Learned classification forest model:\n ${rfModel.toDebugString}")
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..3c127a46e1f1034276a78cd6cc49d23cd0f27bd8
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+import java.util.Locale
+
+import scala.collection.mutable
+
+import scopt.OptionParser
+
+import org.apache.spark.examples.mllib.AbstractParams
+import org.apache.spark.ml.{Pipeline, PipelineStage}
+import org.apache.spark.ml.classification.{RandomForestClassificationModel, RandomForestClassifier}
+import org.apache.spark.ml.feature.{StringIndexer, VectorIndexer}
+import org.apache.spark.ml.regression.{RandomForestRegressionModel, RandomForestRegressor}
+import org.apache.spark.sql.{DataFrame, SparkSession}
+
+
+/**
+ * An example runner for decision trees. Run with
+ * {{{
+ * ./bin/run-example ml.RandomForestExample [options]
+ * }}}
+ * Decision Trees and ensembles can take a large amount of memory. If the run-example command
+ * above fails, try running via spark-submit and specifying the amount of memory as at least 1g.
+ * For local mode, run
+ * {{{
+ * ./bin/spark-submit --class org.apache.spark.examples.ml.RandomForestExample --driver-memory 1g
+ *   [examples JAR path] [options]
+ * }}}
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object RandomForestExample {
+
+  case class Params(
+      input: String = null,
+      testInput: String = "",
+      dataFormat: String = "libsvm",
+      algo: String = "classification",
+      maxDepth: Int = 5,
+      maxBins: Int = 32,
+      minInstancesPerNode: Int = 1,
+      minInfoGain: Double = 0.0,
+      numTrees: Int = 10,
+      featureSubsetStrategy: String = "auto",
+      fracTest: Double = 0.2,
+      cacheNodeIds: Boolean = false,
+      checkpointDir: Option[String] = None,
+      checkpointInterval: Int = 10) extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("RandomForestExample") {
+      head("RandomForestExample: an example random forest app.")
+      opt[String]("algo")
+        .text(s"algorithm (classification, regression), default: ${defaultParams.algo}")
+        .action((x, c) => c.copy(algo = x))
+      opt[Int]("maxDepth")
+        .text(s"max depth of the tree, default: ${defaultParams.maxDepth}")
+        .action((x, c) => c.copy(maxDepth = x))
+      opt[Int]("maxBins")
+        .text(s"max number of bins, default: ${defaultParams.maxBins}")
+        .action((x, c) => c.copy(maxBins = x))
+      opt[Int]("minInstancesPerNode")
+        .text(s"min number of instances required at child nodes to create the parent split," +
+        s" default: ${defaultParams.minInstancesPerNode}")
+        .action((x, c) => c.copy(minInstancesPerNode = x))
+      opt[Double]("minInfoGain")
+        .text(s"min info gain required to create a split, default: ${defaultParams.minInfoGain}")
+        .action((x, c) => c.copy(minInfoGain = x))
+      opt[Int]("numTrees")
+        .text(s"number of trees in ensemble, default: ${defaultParams.numTrees}")
+        .action((x, c) => c.copy(numTrees = x))
+      opt[String]("featureSubsetStrategy")
+        .text(s"number of features to use per node (supported:" +
+        s" ${RandomForestClassifier.supportedFeatureSubsetStrategies.mkString(",")})," +
+        s" default: ${defaultParams.numTrees}")
+        .action((x, c) => c.copy(featureSubsetStrategy = x))
+      opt[Double]("fracTest")
+        .text(s"fraction of data to hold out for testing. If given option testInput, " +
+        s"this option is ignored. default: ${defaultParams.fracTest}")
+        .action((x, c) => c.copy(fracTest = x))
+      opt[Boolean]("cacheNodeIds")
+        .text(s"whether to use node Id cache during training, " +
+        s"default: ${defaultParams.cacheNodeIds}")
+        .action((x, c) => c.copy(cacheNodeIds = x))
+      opt[String]("checkpointDir")
+        .text(s"checkpoint directory where intermediate node Id caches will be stored, " +
+        s"default: ${
+          defaultParams.checkpointDir match {
+            case Some(strVal) => strVal
+            case None => "None"
+          }
+        }")
+        .action((x, c) => c.copy(checkpointDir = Some(x)))
+      opt[Int]("checkpointInterval")
+        .text(s"how often to checkpoint the node Id cache, " +
+        s"default: ${defaultParams.checkpointInterval}")
+        .action((x, c) => c.copy(checkpointInterval = x))
+      opt[String]("testInput")
+        .text(s"input path to test dataset. If given, option fracTest is ignored." +
+        s" default: ${defaultParams.testInput}")
+        .action((x, c) => c.copy(testInput = x))
+      opt[String]("dataFormat")
+        .text("data format: libsvm (default), dense (deprecated in Spark v1.1)")
+        .action((x, c) => c.copy(dataFormat = x))
+      arg[String]("<input>")
+        .text("input path to labeled examples")
+        .required()
+        .action((x, c) => c.copy(input = x))
+      checkConfig { params =>
+        if (params.fracTest < 0 || params.fracTest >= 1) {
+          failure(s"fracTest ${params.fracTest} value incorrect; should be in [0,1).")
+        } else {
+          success
+        }
+      }
+    }
+
+    parser.parse(args, defaultParams) match {
+      case Some(params) => run(params)
+      case _ => sys.exit(1)
+    }
+  }
+
+  def run(params: Params): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName(s"RandomForestExample with $params")
+      .getOrCreate()
+
+    params.checkpointDir.foreach(spark.sparkContext.setCheckpointDir)
+    val algo = params.algo.toLowerCase(Locale.ROOT)
+
+    println(s"RandomForestExample with parameters:\n$params")
+
+    // Load training and test data and cache it.
+    val (training: DataFrame, test: DataFrame) = DecisionTreeExample.loadDatasets(params.input,
+      params.dataFormat, params.testInput, algo, params.fracTest)
+
+    // Set up Pipeline.
+    val stages = new mutable.ArrayBuffer[PipelineStage]()
+    // (1) For classification, re-index classes.
+    val labelColName = if (algo == "classification") "indexedLabel" else "label"
+    if (algo == "classification") {
+      val labelIndexer = new StringIndexer()
+        .setInputCol("label")
+        .setOutputCol(labelColName)
+      stages += labelIndexer
+    }
+    // (2) Identify categorical features using VectorIndexer.
+    //     Features with more than maxCategories values will be treated as continuous.
+    val featuresIndexer = new VectorIndexer()
+      .setInputCol("features")
+      .setOutputCol("indexedFeatures")
+      .setMaxCategories(10)
+    stages += featuresIndexer
+    // (3) Learn Random Forest.
+    val dt = algo match {
+      case "classification" =>
+        new RandomForestClassifier()
+          .setFeaturesCol("indexedFeatures")
+          .setLabelCol(labelColName)
+          .setMaxDepth(params.maxDepth)
+          .setMaxBins(params.maxBins)
+          .setMinInstancesPerNode(params.minInstancesPerNode)
+          .setMinInfoGain(params.minInfoGain)
+          .setCacheNodeIds(params.cacheNodeIds)
+          .setCheckpointInterval(params.checkpointInterval)
+          .setFeatureSubsetStrategy(params.featureSubsetStrategy)
+          .setNumTrees(params.numTrees)
+      case "regression" =>
+        new RandomForestRegressor()
+          .setFeaturesCol("indexedFeatures")
+          .setLabelCol(labelColName)
+          .setMaxDepth(params.maxDepth)
+          .setMaxBins(params.maxBins)
+          .setMinInstancesPerNode(params.minInstancesPerNode)
+          .setMinInfoGain(params.minInfoGain)
+          .setCacheNodeIds(params.cacheNodeIds)
+          .setCheckpointInterval(params.checkpointInterval)
+          .setFeatureSubsetStrategy(params.featureSubsetStrategy)
+          .setNumTrees(params.numTrees)
+      case _ => throw new IllegalArgumentException(s"Algo ${params.algo} not supported.")
+    }
+    stages += dt
+    val pipeline = new Pipeline().setStages(stages.toArray)
+
+    // Fit the Pipeline.
+    val startTime = System.nanoTime()
+    val pipelineModel = pipeline.fit(training)
+    val elapsedTime = (System.nanoTime() - startTime) / 1e9
+    println(s"Training time: $elapsedTime seconds")
+
+    // Get the trained Random Forest from the fitted PipelineModel.
+    algo match {
+      case "classification" =>
+        val rfModel = pipelineModel.stages.last.asInstanceOf[RandomForestClassificationModel]
+        if (rfModel.totalNumNodes < 30) {
+          println(rfModel.toDebugString) // Print full model.
+        } else {
+          println(rfModel) // Print model summary.
+        }
+      case "regression" =>
+        val rfModel = pipelineModel.stages.last.asInstanceOf[RandomForestRegressionModel]
+        if (rfModel.totalNumNodes < 30) {
+          println(rfModel.toDebugString) // Print full model.
+        } else {
+          println(rfModel) // Print model summary.
+        }
+      case _ => throw new IllegalArgumentException(s"Algo ${params.algo} not supported.")
+    }
+
+    // Evaluate model on training, test data.
+    algo match {
+      case "classification" =>
+        println("Training data results:")
+        DecisionTreeExample.evaluateClassificationModel(pipelineModel, training, labelColName)
+        println("Test data results:")
+        DecisionTreeExample.evaluateClassificationModel(pipelineModel, test, labelColName)
+      case "regression" =>
+        println("Training data results:")
+        DecisionTreeExample.evaluateRegressionModel(pipelineModel, training, labelColName)
+        println("Test data results:")
+        DecisionTreeExample.evaluateRegressionModel(pipelineModel, test, labelColName)
+      case _ =>
+        throw new IllegalArgumentException(s"Algo ${params.algo} not supported.")
+    }
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestRegressorExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestRegressorExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..2679fcb353a8a748f14205099ac8f69cd4793849
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestRegressorExample.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.ml.evaluation.RegressionEvaluator
+import org.apache.spark.ml.feature.VectorIndexer
+import org.apache.spark.ml.regression.{RandomForestRegressionModel, RandomForestRegressor}
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object RandomForestRegressorExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("RandomForestRegressorExample")
+      .getOrCreate()
+
+    // $example on$
+    // Load and parse the data file, converting it to a DataFrame.
+    val data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    // Automatically identify categorical features, and index them.
+    // Set maxCategories so features with > 4 distinct values are treated as continuous.
+    val featureIndexer = new VectorIndexer()
+      .setInputCol("features")
+      .setOutputCol("indexedFeatures")
+      .setMaxCategories(4)
+      .fit(data)
+
+    // Split the data into training and test sets (30% held out for testing).
+    val Array(trainingData, testData) = data.randomSplit(Array(0.7, 0.3))
+
+    // Train a RandomForest model.
+    val rf = new RandomForestRegressor()
+      .setLabelCol("label")
+      .setFeaturesCol("indexedFeatures")
+
+    // Chain indexer and forest in a Pipeline.
+    val pipeline = new Pipeline()
+      .setStages(Array(featureIndexer, rf))
+
+    // Train model. This also runs the indexer.
+    val model = pipeline.fit(trainingData)
+
+    // Make predictions.
+    val predictions = model.transform(testData)
+
+    // Select example rows to display.
+    predictions.select("prediction", "label", "features").show(5)
+
+    // Select (prediction, true label) and compute test error.
+    val evaluator = new RegressionEvaluator()
+      .setLabelCol("label")
+      .setPredictionCol("prediction")
+      .setMetricName("rmse")
+    val rmse = evaluator.evaluate(predictions)
+    println(s"Root Mean Squared Error (RMSE) on test data = $rmse")
+
+    val rfModel = model.stages(1).asInstanceOf[RandomForestRegressionModel]
+    println(s"Learned regression forest model:\n ${rfModel.toDebugString}")
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/SQLTransformerExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/SQLTransformerExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..bb4587b82cb37cffdd30bb56ad63d6ff14ac0737
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/SQLTransformerExample.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.SQLTransformer
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object SQLTransformerExample {
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder
+      .appName("SQLTransformerExample")
+      .getOrCreate()
+
+    // $example on$
+    val df = spark.createDataFrame(
+      Seq((0, 1.0, 3.0), (2, 2.0, 5.0))).toDF("id", "v1", "v2")
+
+    val sqlTrans = new SQLTransformer().setStatement(
+      "SELECT *, (v1 + v2) AS v3, (v1 * v2) AS v4 FROM __THIS__")
+
+    sqlTrans.transform(df).show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/StandardScalerExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/StandardScalerExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..4d668e8ab9670f6c737eb16b44cabda2890d40a9
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/StandardScalerExample.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.StandardScaler
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object StandardScalerExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("StandardScalerExample")
+      .getOrCreate()
+
+    // $example on$
+    val dataFrame = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    val scaler = new StandardScaler()
+      .setInputCol("features")
+      .setOutputCol("scaledFeatures")
+      .setWithStd(true)
+      .setWithMean(false)
+
+    // Compute summary statistics by fitting the StandardScaler.
+    val scalerModel = scaler.fit(dataFrame)
+
+    // Normalize each feature to have unit standard deviation.
+    val scaledData = scalerModel.transform(dataFrame)
+    scaledData.show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..369a6fffd79b596b742f041721d9bf8a629396e7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.StopWordsRemover
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object StopWordsRemoverExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("StopWordsRemoverExample")
+      .getOrCreate()
+
+    // $example on$
+    val remover = new StopWordsRemover()
+      .setInputCol("raw")
+      .setOutputCol("filtered")
+
+    val dataSet = spark.createDataFrame(Seq(
+      (0, Seq("I", "saw", "the", "red", "balloon")),
+      (1, Seq("Mary", "had", "a", "little", "lamb"))
+    )).toDF("id", "raw")
+
+    remover.transform(dataSet).show(false)
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/StringIndexerExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/StringIndexerExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..63f273e87a209412c26564a6361fe8344394b38f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/StringIndexerExample.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.StringIndexer
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object StringIndexerExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("StringIndexerExample")
+      .getOrCreate()
+
+    // $example on$
+    val df = spark.createDataFrame(
+      Seq((0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c"))
+    ).toDF("id", "category")
+
+    val indexer = new StringIndexer()
+      .setInputCol("category")
+      .setOutputCol("categoryIndex")
+
+    val indexed = indexer.fit(df).transform(df)
+    indexed.show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/SummarizerExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/SummarizerExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..2f54d1d81bc482f26dde1ce66e9590a8844ad4cd
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/SummarizerExample.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.linalg.{Vector, Vectors}
+import org.apache.spark.ml.stat.Summarizer
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object SummarizerExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("SummarizerExample")
+      .getOrCreate()
+
+    import spark.implicits._
+    import Summarizer._
+
+    // $example on$
+    val data = Seq(
+      (Vectors.dense(2.0, 3.0, 5.0), 1.0),
+      (Vectors.dense(4.0, 6.0, 7.0), 2.0)
+    )
+
+    val df = data.toDF("features", "weight")
+
+    val (meanVal, varianceVal) = df.select(metrics("mean", "variance")
+      .summary($"features", $"weight").as("summary"))
+      .select("summary.mean", "summary.variance")
+      .as[(Vector, Vector)].first()
+
+    println(s"with weight: mean = ${meanVal}, variance = ${varianceVal}")
+
+    val (meanVal2, varianceVal2) = df.select(mean($"features"), variance($"features"))
+      .as[(Vector, Vector)].first()
+
+    println(s"without weight: mean = ${meanVal2}, sum = ${varianceVal2}")
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..ec2df2ef876baa6ef6dd7f6f7cc959c24c01d10e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.{HashingTF, IDF, Tokenizer}
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object TfIdfExample {
+
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder
+      .appName("TfIdfExample")
+      .getOrCreate()
+
+    // $example on$
+    val sentenceData = spark.createDataFrame(Seq(
+      (0.0, "Hi I heard about Spark"),
+      (0.0, "I wish Java could use case classes"),
+      (1.0, "Logistic regression models are neat")
+    )).toDF("label", "sentence")
+
+    val tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words")
+    val wordsData = tokenizer.transform(sentenceData)
+
+    val hashingTF = new HashingTF()
+      .setInputCol("words").setOutputCol("rawFeatures").setNumFeatures(20)
+
+    val featurizedData = hashingTF.transform(wordsData)
+    // alternatively, CountVectorizer can also be used to get term frequency vectors
+
+    val idf = new IDF().setInputCol("rawFeatures").setOutputCol("features")
+    val idfModel = idf.fit(featurizedData)
+
+    val rescaledData = idfModel.transform(featurizedData)
+    rescaledData.select("label", "features").show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..154777690f0031100b0288e3d82e09a39bf1c255
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.{RegexTokenizer, Tokenizer}
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.functions._
+// $example off$
+
+object TokenizerExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("TokenizerExample")
+      .getOrCreate()
+
+    // $example on$
+    val sentenceDataFrame = spark.createDataFrame(Seq(
+      (0, "Hi I heard about Spark"),
+      (1, "I wish Java could use case classes"),
+      (2, "Logistic,regression,models,are,neat")
+    )).toDF("id", "sentence")
+
+    val tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words")
+    val regexTokenizer = new RegexTokenizer()
+      .setInputCol("sentence")
+      .setOutputCol("words")
+      .setPattern("\\W") // alternatively .setPattern("\\w+").setGaps(false)
+
+    val countTokens = udf { (words: Seq[String]) => words.length }
+
+    val tokenized = tokenizer.transform(sentenceDataFrame)
+    tokenized.select("sentence", "words")
+        .withColumn("tokens", countTokens(col("words"))).show(false)
+
+    val regexTokenized = regexTokenizer.transform(sentenceDataFrame)
+    regexTokenized.select("sentence", "words")
+        .withColumn("tokens", countTokens(col("words"))).show(false)
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/UnaryTransformerExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/UnaryTransformerExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..b4179ecc1e56db9d407b176a8d6564ebc97d3503
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/UnaryTransformerExample.scala
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.UnaryTransformer
+import org.apache.spark.ml.param.DoubleParam
+import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable}
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.types.{DataType, DataTypes}
+import org.apache.spark.util.Utils
+// $example off$
+
+/**
+ * An example demonstrating creating a custom [[org.apache.spark.ml.Transformer]] using
+ * the [[UnaryTransformer]] abstraction.
+ *
+ * Run with
+ * {{{
+ * bin/run-example ml.UnaryTransformerExample
+ * }}}
+ */
+object UnaryTransformerExample {
+
+  // $example on$
+  /**
+   * Simple Transformer which adds a constant value to input Doubles.
+   *
+   * [[UnaryTransformer]] can be used to create a stage usable within Pipelines.
+   * It defines parameters for specifying input and output columns:
+   * [[UnaryTransformer.inputCol]] and [[UnaryTransformer.outputCol]].
+   * It can optionally handle schema validation.
+   *
+   * [[DefaultParamsWritable]] provides a default implementation for persisting instances
+   * of this Transformer.
+   */
+  class MyTransformer(override val uid: String)
+    extends UnaryTransformer[Double, Double, MyTransformer] with DefaultParamsWritable {
+
+    final val shift: DoubleParam = new DoubleParam(this, "shift", "Value added to input")
+
+    def getShift: Double = $(shift)
+
+    def setShift(value: Double): this.type = set(shift, value)
+
+    def this() = this(Identifiable.randomUID("myT"))
+
+    override protected def createTransformFunc: Double => Double = (input: Double) => {
+      input + $(shift)
+    }
+
+    override protected def outputDataType: DataType = DataTypes.DoubleType
+
+    override protected def validateInputType(inputType: DataType): Unit = {
+      require(inputType == DataTypes.DoubleType, s"Bad input type: $inputType. Requires Double.")
+    }
+  }
+
+  /**
+   * Companion object for our simple Transformer.
+   *
+   * [[DefaultParamsReadable]] provides a default implementation for loading instances
+   * of this Transformer which were persisted using [[DefaultParamsWritable]].
+   */
+  object MyTransformer extends DefaultParamsReadable[MyTransformer]
+  // $example off$
+
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder()
+      .appName("UnaryTransformerExample")
+      .getOrCreate()
+
+    // $example on$
+    val myTransformer = new MyTransformer()
+      .setShift(0.5)
+      .setInputCol("input")
+      .setOutputCol("output")
+
+    // Create data, transform, and display it.
+    val data = spark.range(0, 5).toDF("input")
+      .select(col("input").cast("double").as("input"))
+    val result = myTransformer.transform(data)
+    println("Transformed by adding constant value")
+    result.show()
+
+    // Save and load the Transformer.
+    val tmpDir = Utils.createTempDir()
+    val dirName = tmpDir.getCanonicalPath
+    myTransformer.write.overwrite().save(dirName)
+    val sameTransformer = MyTransformer.load(dirName)
+
+    // Transform the data to show the results are identical.
+    println("Same transform applied from loaded model")
+    val sameResult = sameTransformer.transform(data)
+    sameResult.show()
+
+    Utils.deleteRecursively(tmpDir)
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..3d5c7efb2053bcad5db1f2bdc69fc9fdfdde6ba1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.VectorAssembler
+import org.apache.spark.ml.linalg.Vectors
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object VectorAssemblerExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("VectorAssemblerExample")
+      .getOrCreate()
+
+    // $example on$
+    val dataset = spark.createDataFrame(
+      Seq((0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0))
+    ).toDF("id", "hour", "mobile", "userFeatures", "clicked")
+
+    val assembler = new VectorAssembler()
+      .setInputCols(Array("hour", "mobile", "userFeatures"))
+      .setOutputCol("features")
+
+    val output = assembler.transform(dataset)
+    println("Assembled columns 'hour', 'mobile', 'userFeatures' to vector column 'features'")
+    output.select("features", "clicked").show(false)
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..96bb8ea2338af5735e67af972d1b33739a57c8e2
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.VectorIndexer
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object VectorIndexerExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("VectorIndexerExample")
+      .getOrCreate()
+
+    // $example on$
+    val data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    val indexer = new VectorIndexer()
+      .setInputCol("features")
+      .setOutputCol("indexed")
+      .setMaxCategories(10)
+
+    val indexerModel = indexer.fit(data)
+
+    val categoricalFeatures: Set[Int] = indexerModel.categoryMaps.keys.toSet
+    println(s"Chose ${categoricalFeatures.size} " +
+      s"categorical features: ${categoricalFeatures.mkString(", ")}")
+
+    // Create new column "indexed" with categorical values transformed to indices
+    val indexedData = indexerModel.transform(data)
+    indexedData.show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/VectorSizeHintExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/VectorSizeHintExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..688731a791f35b6a7b4974f76a3f9a19114d7db6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/VectorSizeHintExample.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.{VectorAssembler, VectorSizeHint}
+import org.apache.spark.ml.linalg.Vectors
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object VectorSizeHintExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("VectorSizeHintExample")
+      .getOrCreate()
+
+    // $example on$
+    val dataset = spark.createDataFrame(
+      Seq(
+        (0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0),
+        (0, 18, 1.0, Vectors.dense(0.0, 10.0), 0.0))
+    ).toDF("id", "hour", "mobile", "userFeatures", "clicked")
+
+    val sizeHint = new VectorSizeHint()
+      .setInputCol("userFeatures")
+      .setHandleInvalid("skip")
+      .setSize(3)
+
+    val datasetWithSize = sizeHint.transform(dataset)
+    println("Rows where 'userFeatures' is not the right size are filtered out")
+    datasetWithSize.show(false)
+
+    val assembler = new VectorAssembler()
+      .setInputCols(Array("hour", "mobile", "userFeatures"))
+      .setOutputCol("features")
+
+    // This dataframe can be used by downstream transformers as before
+    val output = assembler.transform(datasetWithSize)
+    println("Assembled columns 'hour', 'mobile', 'userFeatures' to vector column 'features'")
+    output.select("features", "clicked").show(false)
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/VectorSlicerExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/VectorSlicerExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..9a0af5d7b3585923e42c40d4ad26127e86a85ab5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/VectorSlicerExample.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import java.util.Arrays
+
+import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NumericAttribute}
+import org.apache.spark.ml.feature.VectorSlicer
+import org.apache.spark.ml.linalg.Vectors
+import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.sql.types.StructType
+// $example off$
+
+object VectorSlicerExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("VectorSlicerExample")
+      .getOrCreate()
+
+    // $example on$
+    val data = Arrays.asList(
+      Row(Vectors.sparse(3, Seq((0, -2.0), (1, 2.3)))),
+      Row(Vectors.dense(-2.0, 2.3, 0.0))
+    )
+
+    val defaultAttr = NumericAttribute.defaultAttr
+    val attrs = Array("f1", "f2", "f3").map(defaultAttr.withName)
+    val attrGroup = new AttributeGroup("userFeatures", attrs.asInstanceOf[Array[Attribute]])
+
+    val dataset = spark.createDataFrame(data, StructType(Array(attrGroup.toStructField())))
+
+    val slicer = new VectorSlicer().setInputCol("userFeatures").setOutputCol("features")
+
+    slicer.setIndices(Array(1)).setNames(Array("f3"))
+    // or slicer.setIndices(Array(1, 2)), or slicer.setNames(Array("f2", "f3"))
+
+    val output = slicer.transform(dataset)
+    output.show(false)
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..4bcc6ac6a01f589c2010260ef343567b9f74ee11
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.Word2Vec
+import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.sql.Row
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object Word2VecExample {
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder
+      .appName("Word2Vec example")
+      .getOrCreate()
+
+    // $example on$
+    // Input data: Each row is a bag of words from a sentence or document.
+    val documentDF = spark.createDataFrame(Seq(
+      "Hi I heard about Spark".split(" "),
+      "I wish Java could use case classes".split(" "),
+      "Logistic regression models are neat".split(" ")
+    ).map(Tuple1.apply)).toDF("text")
+
+    // Learn a mapping from words to Vectors.
+    val word2Vec = new Word2Vec()
+      .setInputCol("text")
+      .setOutputCol("result")
+      .setVectorSize(3)
+      .setMinCount(0)
+    val model = word2Vec.fit(documentDF)
+
+    val result = model.transform(documentDF)
+    result.collect().foreach { case Row(text: Seq[_], features: Vector) =>
+      println(s"Text: [${text.mkString(", ")}] => \nVector: $features\n") }
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/AbstractParams.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/AbstractParams.scala
new file mode 100644
index 0000000000000000000000000000000000000000..8985c8565c5317e039a1d4a6b1e9059f49dd1fce
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/AbstractParams.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib
+
+import scala.reflect.runtime.universe._
+
+/**
+ * Abstract class for parameter case classes.
+ * This overrides the [[toString]] method to print all case class fields by name and value.
+ * @tparam T  Concrete parameter class.
+ */
+abstract class AbstractParams[T: TypeTag] {
+
+  private def tag: TypeTag[T] = typeTag[T]
+
+  /**
+   * Finds all case class fields in concrete class instance, and outputs them in JSON-style format:
+   * {
+   *   [field name]:\t[field value]\n
+   *   [field name]:\t[field value]\n
+   *   ...
+   * }
+   */
+  override def toString: String = {
+    val tpe = tag.tpe
+    val allAccessors = tpe.decls.collect {
+      case m: MethodSymbol if m.isCaseAccessor => m
+    }
+    val mirror = runtimeMirror(getClass.getClassLoader)
+    val instanceMirror = mirror.reflect(this)
+    allAccessors.map { f =>
+      val paramName = f.name.toString
+      val fieldMirror = instanceMirror.reflectField(f)
+      val paramValue = fieldMirror.get
+      s"  $paramName:\t$paramValue"
+    }.mkString("{\n", ",\n", "\n}")
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/AssociationRulesExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/AssociationRulesExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..a07535bb5a38d39ec031caf46fc062d10e026159
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/AssociationRulesExample.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.fpm.AssociationRules
+import org.apache.spark.mllib.fpm.FPGrowth.FreqItemset
+// $example off$
+
+object AssociationRulesExample {
+
+  def main(args: Array[String]) {
+    val conf = new SparkConf().setAppName("AssociationRulesExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    val freqItemsets = sc.parallelize(Seq(
+      new FreqItemset(Array("a"), 15L),
+      new FreqItemset(Array("b"), 35L),
+      new FreqItemset(Array("a", "b"), 12L)
+    ))
+
+    val ar = new AssociationRules()
+      .setMinConfidence(0.8)
+    val results = ar.run(freqItemsets)
+
+    results.collect().foreach { rule =>
+    println(s"[${rule.antecedent.mkString(",")}=>${rule.consequent.mkString(",")} ]" +
+        s" ${rule.confidence}")
+    }
+    // $example off$
+
+    sc.stop()
+  }
+
+}
+// scalastyle:on println
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala
new file mode 100644
index 0000000000000000000000000000000000000000..a1a5b5915264febccfd097c7edd48267ec7af003
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.log4j.{Level, Logger}
+import scopt.OptionParser
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.mllib.classification.{LogisticRegressionWithLBFGS, SVMWithSGD}
+import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
+import org.apache.spark.mllib.optimization.{L1Updater, SquaredL2Updater}
+import org.apache.spark.mllib.util.MLUtils
+
+/**
+ * An example app for binary classification. Run with
+ * {{{
+ * bin/run-example org.apache.spark.examples.mllib.BinaryClassification
+ * }}}
+ * A synthetic dataset is located at `data/mllib/sample_binary_classification_data.txt`.
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object BinaryClassification {
+
+  object Algorithm extends Enumeration {
+    type Algorithm = Value
+    val SVM, LR = Value
+  }
+
+  object RegType extends Enumeration {
+    type RegType = Value
+    val L1, L2 = Value
+  }
+
+  import Algorithm._
+  import RegType._
+
+  case class Params(
+      input: String = null,
+      numIterations: Int = 100,
+      stepSize: Double = 1.0,
+      algorithm: Algorithm = LR,
+      regType: RegType = L2,
+      regParam: Double = 0.01) extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("BinaryClassification") {
+      head("BinaryClassification: an example app for binary classification.")
+      opt[Int]("numIterations")
+        .text("number of iterations")
+        .action((x, c) => c.copy(numIterations = x))
+      opt[Double]("stepSize")
+        .text("initial step size (ignored by logistic regression), " +
+          s"default: ${defaultParams.stepSize}")
+        .action((x, c) => c.copy(stepSize = x))
+      opt[String]("algorithm")
+        .text(s"algorithm (${Algorithm.values.mkString(",")}), " +
+        s"default: ${defaultParams.algorithm}")
+        .action((x, c) => c.copy(algorithm = Algorithm.withName(x)))
+      opt[String]("regType")
+        .text(s"regularization type (${RegType.values.mkString(",")}), " +
+        s"default: ${defaultParams.regType}")
+        .action((x, c) => c.copy(regType = RegType.withName(x)))
+      opt[Double]("regParam")
+        .text(s"regularization parameter, default: ${defaultParams.regParam}")
+      arg[String]("<input>")
+        .required()
+        .text("input paths to labeled examples in LIBSVM format")
+        .action((x, c) => c.copy(input = x))
+      note(
+        """
+          |For example, the following command runs this app on a synthetic dataset:
+          |
+          | bin/spark-submit --class org.apache.spark.examples.mllib.BinaryClassification \
+          |  examples/target/scala-*/spark-examples-*.jar \
+          |  --algorithm LR --regType L2 --regParam 1.0 \
+          |  data/mllib/sample_binary_classification_data.txt
+        """.stripMargin)
+    }
+
+    parser.parse(args, defaultParams) match {
+      case Some(params) => run(params)
+      case _ => sys.exit(1)
+    }
+  }
+
+  def run(params: Params): Unit = {
+    val conf = new SparkConf().setAppName(s"BinaryClassification with $params")
+    val sc = new SparkContext(conf)
+
+    Logger.getRootLogger.setLevel(Level.WARN)
+
+    val examples = MLUtils.loadLibSVMFile(sc, params.input).cache()
+
+    val splits = examples.randomSplit(Array(0.8, 0.2))
+    val training = splits(0).cache()
+    val test = splits(1).cache()
+
+    val numTraining = training.count()
+    val numTest = test.count()
+    println(s"Training: $numTraining, test: $numTest.")
+
+    examples.unpersist(blocking = false)
+
+    val updater = params.regType match {
+      case L1 => new L1Updater()
+      case L2 => new SquaredL2Updater()
+    }
+
+    val model = params.algorithm match {
+      case LR =>
+        val algorithm = new LogisticRegressionWithLBFGS()
+        algorithm.optimizer
+          .setNumIterations(params.numIterations)
+          .setUpdater(updater)
+          .setRegParam(params.regParam)
+        algorithm.run(training).clearThreshold()
+      case SVM =>
+        val algorithm = new SVMWithSGD()
+        algorithm.optimizer
+          .setNumIterations(params.numIterations)
+          .setStepSize(params.stepSize)
+          .setUpdater(updater)
+          .setRegParam(params.regParam)
+        algorithm.run(training).clearThreshold()
+    }
+
+    val prediction = model.predict(test.map(_.features))
+    val predictionAndLabel = prediction.zip(test.map(_.label))
+
+    val metrics = new BinaryClassificationMetrics(predictionAndLabel)
+
+    println(s"Test areaUnderPR = ${metrics.areaUnderPR()}.")
+    println(s"Test areaUnderROC = ${metrics.areaUnderROC()}.")
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassificationMetricsExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassificationMetricsExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..c6312d71cc912a489903dd67754449b77a74bfb6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassificationMetricsExample.scala
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
+import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.util.MLUtils
+// $example off$
+
+object BinaryClassificationMetricsExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("BinaryClassificationMetricsExample")
+    val sc = new SparkContext(conf)
+    // $example on$
+    // Load training data in LIBSVM format
+    val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_binary_classification_data.txt")
+
+    // Split data into training (60%) and test (40%)
+    val Array(training, test) = data.randomSplit(Array(0.6, 0.4), seed = 11L)
+    training.cache()
+
+    // Run training algorithm to build the model
+    val model = new LogisticRegressionWithLBFGS()
+      .setNumClasses(2)
+      .run(training)
+
+    // Clear the prediction threshold so the model will return probabilities
+    model.clearThreshold
+
+    // Compute raw scores on the test set
+    val predictionAndLabels = test.map { case LabeledPoint(label, features) =>
+      val prediction = model.predict(features)
+      (prediction, label)
+    }
+
+    // Instantiate metrics object
+    val metrics = new BinaryClassificationMetrics(predictionAndLabels)
+
+    // Precision by threshold
+    val precision = metrics.precisionByThreshold
+    precision.foreach { case (t, p) =>
+      println(s"Threshold: $t, Precision: $p")
+    }
+
+    // Recall by threshold
+    val recall = metrics.recallByThreshold
+    recall.foreach { case (t, r) =>
+      println(s"Threshold: $t, Recall: $r")
+    }
+
+    // Precision-Recall Curve
+    val PRC = metrics.pr
+
+    // F-measure
+    val f1Score = metrics.fMeasureByThreshold
+    f1Score.foreach { case (t, f) =>
+      println(s"Threshold: $t, F-score: $f, Beta = 1")
+    }
+
+    val beta = 0.5
+    val fScore = metrics.fMeasureByThreshold(beta)
+    f1Score.foreach { case (t, f) =>
+      println(s"Threshold: $t, F-score: $f, Beta = 0.5")
+    }
+
+    // AUPRC
+    val auPRC = metrics.areaUnderPR
+    println(s"Area under precision-recall curve = $auPRC")
+
+    // Compute thresholds used in ROC and PR curves
+    val thresholds = precision.map(_._1)
+
+    // ROC Curve
+    val roc = metrics.roc
+
+    // AUROC
+    val auROC = metrics.areaUnderROC
+    println(s"Area under ROC = $auROC")
+    // $example off$
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/BisectingKMeansExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/BisectingKMeansExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..53d0b8fc208efca7ceac00e216e535405ef61bb7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/BisectingKMeansExample.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib
+
+// scalastyle:off println
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.clustering.BisectingKMeans
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
+// $example off$
+
+/**
+ * An example demonstrating a bisecting k-means clustering in spark.mllib.
+ *
+ * Run with
+ * {{{
+ * bin/run-example mllib.BisectingKMeansExample
+ * }}}
+ */
+object BisectingKMeansExample {
+
+  def main(args: Array[String]) {
+    val sparkConf = new SparkConf().setAppName("mllib.BisectingKMeansExample")
+    val sc = new SparkContext(sparkConf)
+
+    // $example on$
+    // Loads and parses data
+    def parse(line: String): Vector = Vectors.dense(line.split(" ").map(_.toDouble))
+    val data = sc.textFile("data/mllib/kmeans_data.txt").map(parse).cache()
+
+    // Clustering the data into 6 clusters by BisectingKMeans.
+    val bkm = new BisectingKMeans().setK(6)
+    val model = bkm.run(data)
+
+    // Show the compute cost and the cluster centers
+    println(s"Compute Cost: ${model.computeCost(data)}")
+    model.clusterCenters.zipWithIndex.foreach { case (center, idx) =>
+      println(s"Cluster Center ${idx}: ${center}")
+    }
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/ChiSqSelectorExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/ChiSqSelectorExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..5e400b7d715b4131cfa1e3504f905addab4838fe
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/ChiSqSelectorExample.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+// $example on$
+import org.apache.spark.mllib.feature.ChiSqSelector
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.util.MLUtils
+// $example off$
+
+object ChiSqSelectorExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("ChiSqSelectorExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // Load some data in libsvm format
+    val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+    // Discretize data in 16 equal bins since ChiSqSelector requires categorical features
+    // Even though features are doubles, the ChiSqSelector treats each unique value as a category
+    val discretizedData = data.map { lp =>
+      LabeledPoint(lp.label, Vectors.dense(lp.features.toArray.map { x => (x / 16).floor }))
+    }
+    // Create ChiSqSelector that will select top 50 of 692 features
+    val selector = new ChiSqSelector(50)
+    // Create ChiSqSelector model (selecting features)
+    val transformer = selector.fit(discretizedData)
+    // Filter the top 50 features from each feature vector
+    val filteredData = discretizedData.map { lp =>
+      LabeledPoint(lp.label, transformer.transform(lp.features))
+    }
+    // $example off$
+
+    println("filtered data: ")
+    filteredData.foreach(x => println(x))
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/Correlations.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/Correlations.scala
new file mode 100644
index 0000000000000000000000000000000000000000..0b44c339ef139d8178489fa27ac1300e9d2985b0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/Correlations.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import scopt.OptionParser
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.mllib.stat.Statistics
+import org.apache.spark.mllib.util.MLUtils
+
+/**
+ * An example app for summarizing multivariate data from a file. Run with
+ * {{{
+ * bin/run-example org.apache.spark.examples.mllib.Correlations
+ * }}}
+ * By default, this loads a synthetic dataset from `data/mllib/sample_linear_regression_data.txt`.
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object Correlations {
+
+  case class Params(input: String = "data/mllib/sample_linear_regression_data.txt")
+    extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("Correlations") {
+      head("Correlations: an example app for computing correlations")
+      opt[String]("input")
+        .text(s"Input path to labeled examples in LIBSVM format, default: ${defaultParams.input}")
+        .action((x, c) => c.copy(input = x))
+      note(
+        """
+        |For example, the following command runs this app on a synthetic dataset:
+        |
+        | bin/spark-submit --class org.apache.spark.examples.mllib.Correlations \
+        |  examples/target/scala-*/spark-examples-*.jar \
+        |  --input data/mllib/sample_linear_regression_data.txt
+        """.stripMargin)
+    }
+
+    parser.parse(args, defaultParams) match {
+      case Some(params) => run(params)
+      case _ => sys.exit(1)
+    }
+  }
+
+  def run(params: Params): Unit = {
+    val conf = new SparkConf().setAppName(s"Correlations with $params")
+    val sc = new SparkContext(conf)
+
+    val examples = MLUtils.loadLibSVMFile(sc, params.input).cache()
+
+    println(s"Summary of data file: ${params.input}")
+    println(s"${examples.count()} data points")
+
+    // Calculate label -- feature correlations
+    val labelRDD = examples.map(_.label)
+    val numFeatures = examples.take(1)(0).features.size
+    val corrType = "pearson"
+    println()
+    println(s"Correlation ($corrType) between label and each feature")
+    println(s"Feature\tCorrelation")
+    var feature = 0
+    while (feature < numFeatures) {
+      val featureRDD = examples.map(_.features(feature))
+      val corr = Statistics.corr(labelRDD, featureRDD)
+      println(s"$feature\t$corr")
+      feature += 1
+    }
+    println()
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/CorrelationsExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/CorrelationsExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..1202caf534e953e6a5f809e32466b46c5e13761f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/CorrelationsExample.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.linalg._
+import org.apache.spark.mllib.stat.Statistics
+import org.apache.spark.rdd.RDD
+// $example off$
+
+object CorrelationsExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("CorrelationsExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    val seriesX: RDD[Double] = sc.parallelize(Array(1, 2, 3, 3, 5))  // a series
+    // must have the same number of partitions and cardinality as seriesX
+    val seriesY: RDD[Double] = sc.parallelize(Array(11, 22, 33, 33, 555))
+
+    // compute the correlation using Pearson's method. Enter "spearman" for Spearman's method. If a
+    // method is not specified, Pearson's method will be used by default.
+    val correlation: Double = Statistics.corr(seriesX, seriesY, "pearson")
+    println(s"Correlation is: $correlation")
+
+    val data: RDD[Vector] = sc.parallelize(
+      Seq(
+        Vectors.dense(1.0, 10.0, 100.0),
+        Vectors.dense(2.0, 20.0, 200.0),
+        Vectors.dense(5.0, 33.0, 366.0))
+    )  // note that each Vector is a row and not a column
+
+    // calculate the correlation matrix using Pearson's method. Use "spearman" for Spearman's method
+    // If a method is not specified, Pearson's method will be used by default.
+    val correlMatrix: Matrix = Statistics.corr(data, "pearson")
+    println(correlMatrix.toString)
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/CosineSimilarity.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/CosineSimilarity.scala
new file mode 100644
index 0000000000000000000000000000000000000000..681465d2176d439398bba5c57fa8309456d385f1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/CosineSimilarity.scala
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import scopt.OptionParser
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.linalg.distributed.{MatrixEntry, RowMatrix}
+
+/**
+ * Compute the similar columns of a matrix, using cosine similarity.
+ *
+ * The input matrix must be stored in row-oriented dense format, one line per row with its entries
+ * separated by space. For example,
+ * {{{
+ * 0.5 1.0
+ * 2.0 3.0
+ * 4.0 5.0
+ * }}}
+ * represents a 3-by-2 matrix, whose first row is (0.5, 1.0).
+ *
+ * Example invocation:
+ *
+ * bin/run-example mllib.CosineSimilarity \
+ * --threshold 0.1 data/mllib/sample_svm_data.txt
+ */
+object CosineSimilarity {
+  case class Params(inputFile: String = null, threshold: Double = 0.1)
+    extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("CosineSimilarity") {
+      head("CosineSimilarity: an example app.")
+      opt[Double]("threshold")
+        .required()
+        .text(s"threshold similarity: to tradeoff computation vs quality estimate")
+        .action((x, c) => c.copy(threshold = x))
+      arg[String]("<inputFile>")
+        .required()
+        .text(s"input file, one row per line, space-separated")
+        .action((x, c) => c.copy(inputFile = x))
+      note(
+        """
+          |For example, the following command runs this app on a dataset:
+          |
+          | ./bin/spark-submit  --class org.apache.spark.examples.mllib.CosineSimilarity \
+          | examplesjar.jar \
+          | --threshold 0.1 data/mllib/sample_svm_data.txt
+        """.stripMargin)
+    }
+
+    parser.parse(args, defaultParams) match {
+      case Some(params) => run(params)
+      case _ => sys.exit(1)
+    }
+  }
+
+  def run(params: Params): Unit = {
+    val conf = new SparkConf().setAppName("CosineSimilarity")
+    val sc = new SparkContext(conf)
+
+    // Load and parse the data file.
+    val rows = sc.textFile(params.inputFile).map { line =>
+      val values = line.split(' ').map(_.toDouble)
+      Vectors.dense(values)
+    }.cache()
+    val mat = new RowMatrix(rows)
+
+    // Compute similar columns perfectly, with brute force.
+    val exact = mat.columnSimilarities()
+
+    // Compute similar columns with estimation using DIMSUM
+    val approx = mat.columnSimilarities(params.threshold)
+
+    val exactEntries = exact.entries.map { case MatrixEntry(i, j, u) => ((i, j), u) }
+    val approxEntries = approx.entries.map { case MatrixEntry(i, j, v) => ((i, j), v) }
+    val MAE = exactEntries.leftOuterJoin(approxEntries).values.map {
+      case (u, Some(v)) =>
+        math.abs(u - v)
+      case (u, None) =>
+        math.abs(u)
+    }.mean()
+
+    println(s"Average absolute error in estimate is: $MAE")
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeClassificationExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeClassificationExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..c2f89b72c9a2e88179dc78ed3354f42dbeab31f1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeClassificationExample.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.tree.DecisionTree
+import org.apache.spark.mllib.tree.model.DecisionTreeModel
+import org.apache.spark.mllib.util.MLUtils
+// $example off$
+
+object DecisionTreeClassificationExample {
+
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("DecisionTreeClassificationExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // Load and parse the data file.
+    val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+    // Split the data into training and test sets (30% held out for testing)
+    val splits = data.randomSplit(Array(0.7, 0.3))
+    val (trainingData, testData) = (splits(0), splits(1))
+
+    // Train a DecisionTree model.
+    //  Empty categoricalFeaturesInfo indicates all features are continuous.
+    val numClasses = 2
+    val categoricalFeaturesInfo = Map[Int, Int]()
+    val impurity = "gini"
+    val maxDepth = 5
+    val maxBins = 32
+
+    val model = DecisionTree.trainClassifier(trainingData, numClasses, categoricalFeaturesInfo,
+      impurity, maxDepth, maxBins)
+
+    // Evaluate model on test instances and compute test error
+    val labelAndPreds = testData.map { point =>
+      val prediction = model.predict(point.features)
+      (point.label, prediction)
+    }
+    val testErr = labelAndPreds.filter(r => r._1 != r._2).count().toDouble / testData.count()
+    println(s"Test Error = $testErr")
+    println(s"Learned classification tree model:\n ${model.toDebugString}")
+
+    // Save and load model
+    model.save(sc, "target/tmp/myDecisionTreeClassificationModel")
+    val sameModel = DecisionTreeModel.load(sc, "target/tmp/myDecisionTreeClassificationModel")
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRegressionExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRegressionExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..1ecf6426e1f95e8777188d16f5d80260f7cf02e1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRegressionExample.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.tree.DecisionTree
+import org.apache.spark.mllib.tree.model.DecisionTreeModel
+import org.apache.spark.mllib.util.MLUtils
+// $example off$
+
+object DecisionTreeRegressionExample {
+
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("DecisionTreeRegressionExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // Load and parse the data file.
+    val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+    // Split the data into training and test sets (30% held out for testing)
+    val splits = data.randomSplit(Array(0.7, 0.3))
+    val (trainingData, testData) = (splits(0), splits(1))
+
+    // Train a DecisionTree model.
+    //  Empty categoricalFeaturesInfo indicates all features are continuous.
+    val categoricalFeaturesInfo = Map[Int, Int]()
+    val impurity = "variance"
+    val maxDepth = 5
+    val maxBins = 32
+
+    val model = DecisionTree.trainRegressor(trainingData, categoricalFeaturesInfo, impurity,
+      maxDepth, maxBins)
+
+    // Evaluate model on test instances and compute test error
+    val labelsAndPredictions = testData.map { point =>
+      val prediction = model.predict(point.features)
+      (point.label, prediction)
+    }
+    val testMSE = labelsAndPredictions.map{ case (v, p) => math.pow(v - p, 2) }.mean()
+    println(s"Test Mean Squared Error = $testMSE")
+    println(s"Learned regression tree model:\n ${model.toDebugString}")
+
+    // Save and load model
+    model.save(sc, "target/tmp/myDecisionTreeRegressionModel")
+    val sameModel = DecisionTreeModel.load(sc, "target/tmp/myDecisionTreeRegressionModel")
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
new file mode 100644
index 0000000000000000000000000000000000000000..fa47e12857f0cc4bf145b0980be401fe16d3e89a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
@@ -0,0 +1,367 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import scala.language.reflectiveCalls
+
+import scopt.OptionParser
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.mllib.evaluation.MulticlassMetrics
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.tree.{impurity, DecisionTree, RandomForest}
+import org.apache.spark.mllib.tree.configuration.{Algo, Strategy}
+import org.apache.spark.mllib.tree.configuration.Algo._
+import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.rdd.RDD
+import org.apache.spark.util.Utils
+
+/**
+ * An example runner for decision trees and random forests. Run with
+ * {{{
+ * ./bin/run-example org.apache.spark.examples.mllib.DecisionTreeRunner [options]
+ * }}}
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ *
+ * Note: This script treats all features as real-valued (not categorical).
+ *       To include categorical features, modify categoricalFeaturesInfo.
+ */
+object DecisionTreeRunner {
+
+  object ImpurityType extends Enumeration {
+    type ImpurityType = Value
+    val Gini, Entropy, Variance = Value
+  }
+
+  import ImpurityType._
+
+  case class Params(
+      input: String = null,
+      testInput: String = "",
+      dataFormat: String = "libsvm",
+      algo: Algo = Classification,
+      maxDepth: Int = 5,
+      impurity: ImpurityType = Gini,
+      maxBins: Int = 32,
+      minInstancesPerNode: Int = 1,
+      minInfoGain: Double = 0.0,
+      numTrees: Int = 1,
+      featureSubsetStrategy: String = "auto",
+      fracTest: Double = 0.2,
+      useNodeIdCache: Boolean = false,
+      checkpointDir: Option[String] = None,
+      checkpointInterval: Int = 10) extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("DecisionTreeRunner") {
+      head("DecisionTreeRunner: an example decision tree app.")
+      opt[String]("algo")
+        .text(s"algorithm (${Algo.values.mkString(",")}), default: ${defaultParams.algo}")
+        .action((x, c) => c.copy(algo = Algo.withName(x)))
+      opt[String]("impurity")
+        .text(s"impurity type (${ImpurityType.values.mkString(",")}), " +
+          s"default: ${defaultParams.impurity}")
+        .action((x, c) => c.copy(impurity = ImpurityType.withName(x)))
+      opt[Int]("maxDepth")
+        .text(s"max depth of the tree, default: ${defaultParams.maxDepth}")
+        .action((x, c) => c.copy(maxDepth = x))
+      opt[Int]("maxBins")
+        .text(s"max number of bins, default: ${defaultParams.maxBins}")
+        .action((x, c) => c.copy(maxBins = x))
+      opt[Int]("minInstancesPerNode")
+        .text(s"min number of instances required at child nodes to create the parent split," +
+          s" default: ${defaultParams.minInstancesPerNode}")
+        .action((x, c) => c.copy(minInstancesPerNode = x))
+      opt[Double]("minInfoGain")
+        .text(s"min info gain required to create a split, default: ${defaultParams.minInfoGain}")
+        .action((x, c) => c.copy(minInfoGain = x))
+      opt[Int]("numTrees")
+        .text(s"number of trees (1 = decision tree, 2+ = random forest)," +
+          s" default: ${defaultParams.numTrees}")
+        .action((x, c) => c.copy(numTrees = x))
+      opt[String]("featureSubsetStrategy")
+        .text(s"feature subset sampling strategy" +
+          s" (${RandomForest.supportedFeatureSubsetStrategies.mkString(", ")}), " +
+          s"default: ${defaultParams.featureSubsetStrategy}")
+        .action((x, c) => c.copy(featureSubsetStrategy = x))
+      opt[Double]("fracTest")
+        .text(s"fraction of data to hold out for testing.  If given option testInput, " +
+          s"this option is ignored. default: ${defaultParams.fracTest}")
+        .action((x, c) => c.copy(fracTest = x))
+      opt[Boolean]("useNodeIdCache")
+        .text(s"whether to use node Id cache during training, " +
+          s"default: ${defaultParams.useNodeIdCache}")
+        .action((x, c) => c.copy(useNodeIdCache = x))
+      opt[String]("checkpointDir")
+        .text(s"checkpoint directory where intermediate node Id caches will be stored, " +
+         s"default: ${defaultParams.checkpointDir match {
+           case Some(strVal) => strVal
+           case None => "None"
+         }}")
+        .action((x, c) => c.copy(checkpointDir = Some(x)))
+      opt[Int]("checkpointInterval")
+        .text(s"how often to checkpoint the node Id cache, " +
+         s"default: ${defaultParams.checkpointInterval}")
+        .action((x, c) => c.copy(checkpointInterval = x))
+      opt[String]("testInput")
+        .text(s"input path to test dataset.  If given, option fracTest is ignored." +
+          s" default: ${defaultParams.testInput}")
+        .action((x, c) => c.copy(testInput = x))
+      opt[String]("dataFormat")
+        .text("data format: libsvm (default), dense (deprecated in Spark v1.1)")
+        .action((x, c) => c.copy(dataFormat = x))
+      arg[String]("<input>")
+        .text("input path to labeled examples")
+        .required()
+        .action((x, c) => c.copy(input = x))
+      checkConfig { params =>
+        if (params.fracTest < 0 || params.fracTest > 1) {
+          failure(s"fracTest ${params.fracTest} value incorrect; should be in [0,1].")
+        } else {
+          if (params.algo == Classification &&
+            (params.impurity == Gini || params.impurity == Entropy)) {
+            success
+          } else if (params.algo == Regression && params.impurity == Variance) {
+            success
+          } else {
+            failure(s"Algo ${params.algo} is not compatible with impurity ${params.impurity}.")
+          }
+        }
+      }
+    }
+
+    parser.parse(args, defaultParams) match {
+      case Some(params) => run(params)
+      case _ => sys.exit(1)
+    }
+  }
+
+  /**
+   * Load training and test data from files.
+   * @param input  Path to input dataset.
+   * @param dataFormat  "libsvm" or "dense"
+   * @param testInput  Path to test dataset.
+   * @param algo  Classification or Regression
+   * @param fracTest  Fraction of input data to hold out for testing.  Ignored if testInput given.
+   * @return  (training dataset, test dataset, number of classes),
+   *          where the number of classes is inferred from data (and set to 0 for Regression)
+   */
+  private[mllib] def loadDatasets(
+      sc: SparkContext,
+      input: String,
+      dataFormat: String,
+      testInput: String,
+      algo: Algo,
+      fracTest: Double): (RDD[LabeledPoint], RDD[LabeledPoint], Int) = {
+    // Load training data and cache it.
+    val origExamples = dataFormat match {
+      case "dense" => MLUtils.loadLabeledPoints(sc, input).cache()
+      case "libsvm" => MLUtils.loadLibSVMFile(sc, input).cache()
+    }
+    // For classification, re-index classes if needed.
+    val (examples, classIndexMap, numClasses) = algo match {
+      case Classification =>
+        // classCounts: class --> # examples in class
+        val classCounts = origExamples.map(_.label).countByValue()
+        val sortedClasses = classCounts.keys.toList.sorted
+        val numClasses = classCounts.size
+        // classIndexMap: class --> index in 0,...,numClasses-1
+        val classIndexMap = {
+          if (classCounts.keySet != Set(0.0, 1.0)) {
+            sortedClasses.zipWithIndex.toMap
+          } else {
+            Map[Double, Int]()
+          }
+        }
+        val examples = {
+          if (classIndexMap.isEmpty) {
+            origExamples
+          } else {
+            origExamples.map(lp => LabeledPoint(classIndexMap(lp.label), lp.features))
+          }
+        }
+        val numExamples = examples.count()
+        println(s"numClasses = $numClasses.")
+        println(s"Per-class example fractions, counts:")
+        println(s"Class\tFrac\tCount")
+        sortedClasses.foreach { c =>
+          val frac = classCounts(c) / numExamples.toDouble
+          println(s"$c\t$frac\t${classCounts(c)}")
+        }
+        (examples, classIndexMap, numClasses)
+      case Regression =>
+        (origExamples, null, 0)
+      case _ =>
+        throw new IllegalArgumentException(s"Algo $algo not supported.")
+    }
+
+    // Create training, test sets.
+    val splits = if (testInput != "") {
+      // Load testInput.
+      val numFeatures = examples.take(1)(0).features.size
+      val origTestExamples = dataFormat match {
+        case "dense" => MLUtils.loadLabeledPoints(sc, testInput)
+        case "libsvm" => MLUtils.loadLibSVMFile(sc, testInput, numFeatures)
+      }
+      algo match {
+        case Classification =>
+          // classCounts: class --> # examples in class
+          val testExamples = {
+            if (classIndexMap.isEmpty) {
+              origTestExamples
+            } else {
+              origTestExamples.map(lp => LabeledPoint(classIndexMap(lp.label), lp.features))
+            }
+          }
+          Array(examples, testExamples)
+        case Regression =>
+          Array(examples, origTestExamples)
+      }
+    } else {
+      // Split input into training, test.
+      examples.randomSplit(Array(1.0 - fracTest, fracTest))
+    }
+    val training = splits(0).cache()
+    val test = splits(1).cache()
+
+    val numTraining = training.count()
+    val numTest = test.count()
+    println(s"numTraining = $numTraining, numTest = $numTest.")
+
+    examples.unpersist(blocking = false)
+
+    (training, test, numClasses)
+  }
+
+  def run(params: Params): Unit = {
+
+    val conf = new SparkConf().setAppName(s"DecisionTreeRunner with $params")
+    val sc = new SparkContext(conf)
+
+    println(s"DecisionTreeRunner with parameters:\n$params")
+
+    // Load training and test data and cache it.
+    val (training, test, numClasses) = loadDatasets(sc, params.input, params.dataFormat,
+      params.testInput, params.algo, params.fracTest)
+
+    val impurityCalculator = params.impurity match {
+      case Gini => impurity.Gini
+      case Entropy => impurity.Entropy
+      case Variance => impurity.Variance
+    }
+
+    params.checkpointDir.foreach(sc.setCheckpointDir)
+
+    val strategy
+      = new Strategy(
+          algo = params.algo,
+          impurity = impurityCalculator,
+          maxDepth = params.maxDepth,
+          maxBins = params.maxBins,
+          numClasses = numClasses,
+          minInstancesPerNode = params.minInstancesPerNode,
+          minInfoGain = params.minInfoGain,
+          useNodeIdCache = params.useNodeIdCache,
+          checkpointInterval = params.checkpointInterval)
+    if (params.numTrees == 1) {
+      val startTime = System.nanoTime()
+      val model = DecisionTree.train(training, strategy)
+      val elapsedTime = (System.nanoTime() - startTime) / 1e9
+      println(s"Training time: $elapsedTime seconds")
+      if (model.numNodes < 20) {
+        println(model.toDebugString) // Print full model.
+      } else {
+        println(model) // Print model summary.
+      }
+      if (params.algo == Classification) {
+        val trainAccuracy =
+          new MulticlassMetrics(training.map(lp => (model.predict(lp.features), lp.label))).accuracy
+        println(s"Train accuracy = $trainAccuracy")
+        val testAccuracy =
+          new MulticlassMetrics(test.map(lp => (model.predict(lp.features), lp.label))).accuracy
+        println(s"Test accuracy = $testAccuracy")
+      }
+      if (params.algo == Regression) {
+        val trainMSE = meanSquaredError(model, training)
+        println(s"Train mean squared error = $trainMSE")
+        val testMSE = meanSquaredError(model, test)
+        println(s"Test mean squared error = $testMSE")
+      }
+    } else {
+      val randomSeed = Utils.random.nextInt()
+      if (params.algo == Classification) {
+        val startTime = System.nanoTime()
+        val model = RandomForest.trainClassifier(training, strategy, params.numTrees,
+          params.featureSubsetStrategy, randomSeed)
+        val elapsedTime = (System.nanoTime() - startTime) / 1e9
+        println(s"Training time: $elapsedTime seconds")
+        if (model.totalNumNodes < 30) {
+          println(model.toDebugString) // Print full model.
+        } else {
+          println(model) // Print model summary.
+        }
+        val trainAccuracy =
+          new MulticlassMetrics(training.map(lp => (model.predict(lp.features), lp.label))).accuracy
+        println(s"Train accuracy = $trainAccuracy")
+        val testAccuracy =
+          new MulticlassMetrics(test.map(lp => (model.predict(lp.features), lp.label))).accuracy
+        println(s"Test accuracy = $testAccuracy")
+      }
+      if (params.algo == Regression) {
+        val startTime = System.nanoTime()
+        val model = RandomForest.trainRegressor(training, strategy, params.numTrees,
+          params.featureSubsetStrategy, randomSeed)
+        val elapsedTime = (System.nanoTime() - startTime) / 1e9
+        println(s"Training time: $elapsedTime seconds")
+        if (model.totalNumNodes < 30) {
+          println(model.toDebugString) // Print full model.
+        } else {
+          println(model) // Print model summary.
+        }
+        val trainMSE = meanSquaredError(model, training)
+        println(s"Train mean squared error = $trainMSE")
+        val testMSE = meanSquaredError(model, test)
+        println(s"Test mean squared error = $testMSE")
+      }
+    }
+
+    sc.stop()
+  }
+
+  /**
+   * Calculates the mean squared error for regression.
+   *
+   * This is just for demo purpose. In general, don't copy this code because it is NOT efficient
+   * due to the use of structural types, which leads to one reflection call per record.
+   */
+  // scalastyle:off structural.type
+  private[mllib] def meanSquaredError(
+      model: { def predict(features: Vector): Double },
+      data: RDD[LabeledPoint]): Double = {
+    data.map { y =>
+      val err = model.predict(y.features) - y.label
+      err * err
+    }.mean()
+  }
+  // scalastyle:on structural.type
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/DenseKMeans.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/DenseKMeans.scala
new file mode 100644
index 0000000000000000000000000000000000000000..b228827e5886f807d5d3051d52cf3245eab1db3e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/DenseKMeans.scala
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.log4j.{Level, Logger}
+import scopt.OptionParser
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.mllib.clustering.KMeans
+import org.apache.spark.mllib.linalg.Vectors
+
+/**
+ * An example k-means app. Run with
+ * {{{
+ * ./bin/run-example org.apache.spark.examples.mllib.DenseKMeans [options] <input>
+ * }}}
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object DenseKMeans {
+
+  object InitializationMode extends Enumeration {
+    type InitializationMode = Value
+    val Random, Parallel = Value
+  }
+
+  import InitializationMode._
+
+  case class Params(
+      input: String = null,
+      k: Int = -1,
+      numIterations: Int = 10,
+      initializationMode: InitializationMode = Parallel) extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("DenseKMeans") {
+      head("DenseKMeans: an example k-means app for dense data.")
+      opt[Int]('k', "k")
+        .required()
+        .text(s"number of clusters, required")
+        .action((x, c) => c.copy(k = x))
+      opt[Int]("numIterations")
+        .text(s"number of iterations, default: ${defaultParams.numIterations}")
+        .action((x, c) => c.copy(numIterations = x))
+      opt[String]("initMode")
+        .text(s"initialization mode (${InitializationMode.values.mkString(",")}), " +
+        s"default: ${defaultParams.initializationMode}")
+        .action((x, c) => c.copy(initializationMode = InitializationMode.withName(x)))
+      arg[String]("<input>")
+        .text("input paths to examples")
+        .required()
+        .action((x, c) => c.copy(input = x))
+    }
+
+    parser.parse(args, defaultParams) match {
+      case Some(params) => run(params)
+      case _ => sys.exit(1)
+    }
+  }
+
+  def run(params: Params): Unit = {
+    val conf = new SparkConf().setAppName(s"DenseKMeans with $params")
+    val sc = new SparkContext(conf)
+
+    Logger.getRootLogger.setLevel(Level.WARN)
+
+    val examples = sc.textFile(params.input).map { line =>
+      Vectors.dense(line.split(' ').map(_.toDouble))
+    }.cache()
+
+    val numExamples = examples.count()
+
+    println(s"numExamples = $numExamples.")
+
+    val initMode = params.initializationMode match {
+      case Random => KMeans.RANDOM
+      case Parallel => KMeans.K_MEANS_PARALLEL
+    }
+
+    val model = new KMeans()
+      .setInitializationMode(initMode)
+      .setK(params.k)
+      .setMaxIterations(params.numIterations)
+      .run(examples)
+
+    val cost = model.computeCost(examples)
+
+    println(s"Total cost = $cost.")
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/ElementwiseProductExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/ElementwiseProductExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..1e4e3543194e236eeca2393005a8287084cace89
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/ElementwiseProductExample.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+// $example on$
+import org.apache.spark.mllib.feature.ElementwiseProduct
+import org.apache.spark.mllib.linalg.Vectors
+// $example off$
+
+object ElementwiseProductExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("ElementwiseProductExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // Create some vector data; also works for sparse vectors
+    val data = sc.parallelize(Array(Vectors.dense(1.0, 2.0, 3.0), Vectors.dense(4.0, 5.0, 6.0)))
+
+    val transformingVector = Vectors.dense(0.0, 1.0, 2.0)
+    val transformer = new ElementwiseProduct(transformingVector)
+
+    // Batch transform and per-row transform give the same results:
+    val transformedData = transformer.transform(data)
+    val transformedData2 = data.map(x => transformer.transform(x))
+    // $example off$
+
+    println("transformedData: ")
+    transformedData.foreach(x => println(x))
+
+    println("transformedData2: ")
+    transformedData2.foreach(x => println(x))
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/FPGrowthExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/FPGrowthExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..f724ee1030f040c9ce2afdb99d15a6375fbf89aa
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/FPGrowthExample.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import scopt.OptionParser
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.mllib.fpm.FPGrowth
+
+/**
+ * Example for mining frequent itemsets using FP-growth.
+ * Example usage: ./bin/run-example mllib.FPGrowthExample \
+ *   --minSupport 0.8 --numPartition 2 ./data/mllib/sample_fpgrowth.txt
+ */
+object FPGrowthExample {
+
+  case class Params(
+    input: String = null,
+    minSupport: Double = 0.3,
+    numPartition: Int = -1) extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("FPGrowthExample") {
+      head("FPGrowth: an example FP-growth app.")
+      opt[Double]("minSupport")
+        .text(s"minimal support level, default: ${defaultParams.minSupport}")
+        .action((x, c) => c.copy(minSupport = x))
+      opt[Int]("numPartition")
+        .text(s"number of partition, default: ${defaultParams.numPartition}")
+        .action((x, c) => c.copy(numPartition = x))
+      arg[String]("<input>")
+        .text("input paths to input data set, whose file format is that each line " +
+          "contains a transaction with each item in String and separated by a space")
+        .required()
+        .action((x, c) => c.copy(input = x))
+    }
+
+    parser.parse(args, defaultParams) match {
+      case Some(params) => run(params)
+      case _ => sys.exit(1)
+    }
+  }
+
+  def run(params: Params): Unit = {
+    val conf = new SparkConf().setAppName(s"FPGrowthExample with $params")
+    val sc = new SparkContext(conf)
+    val transactions = sc.textFile(params.input).map(_.split(" ")).cache()
+
+    println(s"Number of transactions: ${transactions.count()}")
+
+    val model = new FPGrowth()
+      .setMinSupport(params.minSupport)
+      .setNumPartitions(params.numPartition)
+      .run(transactions)
+
+    println(s"Number of frequent itemsets: ${model.freqItemsets.count()}")
+
+    model.freqItemsets.collect().foreach { itemset =>
+      println(s"${itemset.items.mkString("[", ",", "]")}, ${itemset.freq}")
+    }
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/GaussianMixtureExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/GaussianMixtureExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..b1b3a79d87ae1010f4338c3a536879e8c5856208
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/GaussianMixtureExample.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.clustering.{GaussianMixture, GaussianMixtureModel}
+import org.apache.spark.mllib.linalg.Vectors
+// $example off$
+
+object GaussianMixtureExample {
+
+  def main(args: Array[String]) {
+
+    val conf = new SparkConf().setAppName("GaussianMixtureExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // Load and parse the data
+    val data = sc.textFile("data/mllib/gmm_data.txt")
+    val parsedData = data.map(s => Vectors.dense(s.trim.split(' ').map(_.toDouble))).cache()
+
+    // Cluster the data into two classes using GaussianMixture
+    val gmm = new GaussianMixture().setK(2).run(parsedData)
+
+    // Save and load model
+    gmm.save(sc, "target/org/apache/spark/GaussianMixtureExample/GaussianMixtureModel")
+    val sameModel = GaussianMixtureModel.load(sc,
+      "target/org/apache/spark/GaussianMixtureExample/GaussianMixtureModel")
+
+    // output parameters of max-likelihood model
+    for (i <- 0 until gmm.k) {
+      println("weight=%f\nmu=%s\nsigma=\n%s\n" format
+        (gmm.weights(i), gmm.gaussians(i).mu, gmm.gaussians(i).sigma))
+    }
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala
new file mode 100644
index 0000000000000000000000000000000000000000..4020c6b6bca7fec0fbe6f2d263f2816ef98350ad
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import scopt.OptionParser
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.mllib.evaluation.MulticlassMetrics
+import org.apache.spark.mllib.tree.GradientBoostedTrees
+import org.apache.spark.mllib.tree.configuration.{Algo, BoostingStrategy}
+import org.apache.spark.util.Utils
+
+/**
+ * An example runner for Gradient Boosting using decision trees as weak learners. Run with
+ * {{{
+ * ./bin/run-example mllib.GradientBoostedTreesRunner [options]
+ * }}}
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ *
+ * Note: This script treats all features as real-valued (not categorical).
+ *       To include categorical features, modify categoricalFeaturesInfo.
+ */
+object GradientBoostedTreesRunner {
+
+  case class Params(
+      input: String = null,
+      testInput: String = "",
+      dataFormat: String = "libsvm",
+      algo: String = "Classification",
+      maxDepth: Int = 5,
+      numIterations: Int = 10,
+      fracTest: Double = 0.2) extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("GradientBoostedTrees") {
+      head("GradientBoostedTrees: an example decision tree app.")
+      opt[String]("algo")
+        .text(s"algorithm (${Algo.values.mkString(",")}), default: ${defaultParams.algo}")
+        .action((x, c) => c.copy(algo = x))
+      opt[Int]("maxDepth")
+        .text(s"max depth of the tree, default: ${defaultParams.maxDepth}")
+        .action((x, c) => c.copy(maxDepth = x))
+      opt[Int]("numIterations")
+        .text(s"number of iterations of boosting," + s" default: ${defaultParams.numIterations}")
+        .action((x, c) => c.copy(numIterations = x))
+      opt[Double]("fracTest")
+        .text(s"fraction of data to hold out for testing.  If given option testInput, " +
+          s"this option is ignored. default: ${defaultParams.fracTest}")
+        .action((x, c) => c.copy(fracTest = x))
+      opt[String]("testInput")
+        .text(s"input path to test dataset.  If given, option fracTest is ignored." +
+          s" default: ${defaultParams.testInput}")
+        .action((x, c) => c.copy(testInput = x))
+      opt[String]("dataFormat")
+        .text("data format: libsvm (default), dense (deprecated in Spark v1.1)")
+        .action((x, c) => c.copy(dataFormat = x))
+      arg[String]("<input>")
+        .text("input path to labeled examples")
+        .required()
+        .action((x, c) => c.copy(input = x))
+      checkConfig { params =>
+        if (params.fracTest < 0 || params.fracTest > 1) {
+          failure(s"fracTest ${params.fracTest} value incorrect; should be in [0,1].")
+        } else {
+          success
+        }
+      }
+    }
+
+    parser.parse(args, defaultParams) match {
+      case Some(params) => run(params)
+      case _ => sys.exit(1)
+    }
+  }
+
+  def run(params: Params): Unit = {
+
+    val conf = new SparkConf().setAppName(s"GradientBoostedTreesRunner with $params")
+    val sc = new SparkContext(conf)
+
+    println(s"GradientBoostedTreesRunner with parameters:\n$params")
+
+    // Load training and test data and cache it.
+    val (training, test, numClasses) = DecisionTreeRunner.loadDatasets(sc, params.input,
+      params.dataFormat, params.testInput, Algo.withName(params.algo), params.fracTest)
+
+    val boostingStrategy = BoostingStrategy.defaultParams(params.algo)
+    boostingStrategy.treeStrategy.numClasses = numClasses
+    boostingStrategy.numIterations = params.numIterations
+    boostingStrategy.treeStrategy.maxDepth = params.maxDepth
+
+    val randomSeed = Utils.random.nextInt()
+    if (params.algo == "Classification") {
+      val startTime = System.nanoTime()
+      val model = GradientBoostedTrees.train(training, boostingStrategy)
+      val elapsedTime = (System.nanoTime() - startTime) / 1e9
+      println(s"Training time: $elapsedTime seconds")
+      if (model.totalNumNodes < 30) {
+        println(model.toDebugString) // Print full model.
+      } else {
+        println(model) // Print model summary.
+      }
+      val trainAccuracy =
+        new MulticlassMetrics(training.map(lp => (model.predict(lp.features), lp.label))).accuracy
+      println(s"Train accuracy = $trainAccuracy")
+      val testAccuracy =
+        new MulticlassMetrics(test.map(lp => (model.predict(lp.features), lp.label))).accuracy
+      println(s"Test accuracy = $testAccuracy")
+    } else if (params.algo == "Regression") {
+      val startTime = System.nanoTime()
+      val model = GradientBoostedTrees.train(training, boostingStrategy)
+      val elapsedTime = (System.nanoTime() - startTime) / 1e9
+      println(s"Training time: $elapsedTime seconds")
+      if (model.totalNumNodes < 30) {
+        println(model.toDebugString) // Print full model.
+      } else {
+        println(model) // Print model summary.
+      }
+      val trainMSE = DecisionTreeRunner.meanSquaredError(model, training)
+      println(s"Train mean squared error = $trainMSE")
+      val testMSE = DecisionTreeRunner.meanSquaredError(model, test)
+      println(s"Test mean squared error = $testMSE")
+    }
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostingClassificationExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostingClassificationExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..3c56e1941aecae044a37fbd246b3b2ab887610f5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostingClassificationExample.scala
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.tree.GradientBoostedTrees
+import org.apache.spark.mllib.tree.configuration.BoostingStrategy
+import org.apache.spark.mllib.tree.model.GradientBoostedTreesModel
+import org.apache.spark.mllib.util.MLUtils
+// $example off$
+
+object GradientBoostingClassificationExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("GradientBoostedTreesClassificationExample")
+    val sc = new SparkContext(conf)
+    // $example on$
+    // Load and parse the data file.
+    val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+    // Split the data into training and test sets (30% held out for testing)
+    val splits = data.randomSplit(Array(0.7, 0.3))
+    val (trainingData, testData) = (splits(0), splits(1))
+
+    // Train a GradientBoostedTrees model.
+    // The defaultParams for Classification use LogLoss by default.
+    val boostingStrategy = BoostingStrategy.defaultParams("Classification")
+    boostingStrategy.numIterations = 3 // Note: Use more iterations in practice.
+    boostingStrategy.treeStrategy.numClasses = 2
+    boostingStrategy.treeStrategy.maxDepth = 5
+    // Empty categoricalFeaturesInfo indicates all features are continuous.
+    boostingStrategy.treeStrategy.categoricalFeaturesInfo = Map[Int, Int]()
+
+    val model = GradientBoostedTrees.train(trainingData, boostingStrategy)
+
+    // Evaluate model on test instances and compute test error
+    val labelAndPreds = testData.map { point =>
+      val prediction = model.predict(point.features)
+      (point.label, prediction)
+    }
+    val testErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / testData.count()
+    println(s"Test Error = $testErr")
+    println(s"Learned classification GBT model:\n ${model.toDebugString}")
+
+    // Save and load model
+    model.save(sc, "target/tmp/myGradientBoostingClassificationModel")
+    val sameModel = GradientBoostedTreesModel.load(sc,
+      "target/tmp/myGradientBoostingClassificationModel")
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
+
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostingRegressionExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostingRegressionExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..c288bf29bf25535b2a06ed00ec03b966390bc7af
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostingRegressionExample.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.tree.GradientBoostedTrees
+import org.apache.spark.mllib.tree.configuration.BoostingStrategy
+import org.apache.spark.mllib.tree.model.GradientBoostedTreesModel
+import org.apache.spark.mllib.util.MLUtils
+// $example off$
+
+object GradientBoostingRegressionExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("GradientBoostedTreesRegressionExample")
+    val sc = new SparkContext(conf)
+    // $example on$
+    // Load and parse the data file.
+    val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+    // Split the data into training and test sets (30% held out for testing)
+    val splits = data.randomSplit(Array(0.7, 0.3))
+    val (trainingData, testData) = (splits(0), splits(1))
+
+    // Train a GradientBoostedTrees model.
+    // The defaultParams for Regression use SquaredError by default.
+    val boostingStrategy = BoostingStrategy.defaultParams("Regression")
+    boostingStrategy.numIterations = 3 // Note: Use more iterations in practice.
+    boostingStrategy.treeStrategy.maxDepth = 5
+    // Empty categoricalFeaturesInfo indicates all features are continuous.
+    boostingStrategy.treeStrategy.categoricalFeaturesInfo = Map[Int, Int]()
+
+    val model = GradientBoostedTrees.train(trainingData, boostingStrategy)
+
+    // Evaluate model on test instances and compute test error
+    val labelsAndPredictions = testData.map { point =>
+      val prediction = model.predict(point.features)
+      (point.label, prediction)
+    }
+    val testMSE = labelsAndPredictions.map{ case(v, p) => math.pow((v - p), 2)}.mean()
+    println(s"Test Mean Squared Error = $testMSE")
+    println(s"Learned regression GBT model:\n ${model.toDebugString}")
+
+    // Save and load model
+    model.save(sc, "target/tmp/myGradientBoostingRegressionModel")
+    val sameModel = GradientBoostedTreesModel.load(sc,
+      "target/tmp/myGradientBoostingRegressionModel")
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..9b3c3266ee30a07008f9c963ff031b530a1ad198
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.linalg._
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.stat.Statistics
+import org.apache.spark.mllib.stat.test.ChiSqTestResult
+import org.apache.spark.rdd.RDD
+// $example off$
+
+object HypothesisTestingExample {
+
+  def main(args: Array[String]) {
+
+    val conf = new SparkConf().setAppName("HypothesisTestingExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // a vector composed of the frequencies of events
+    val vec: Vector = Vectors.dense(0.1, 0.15, 0.2, 0.3, 0.25)
+
+    // compute the goodness of fit. If a second vector to test against is not supplied
+    // as a parameter, the test runs against a uniform distribution.
+    val goodnessOfFitTestResult = Statistics.chiSqTest(vec)
+    // summary of the test including the p-value, degrees of freedom, test statistic, the method
+    // used, and the null hypothesis.
+    println(s"$goodnessOfFitTestResult\n")
+
+    // a contingency matrix. Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
+    val mat: Matrix = Matrices.dense(3, 2, Array(1.0, 3.0, 5.0, 2.0, 4.0, 6.0))
+
+    // conduct Pearson's independence test on the input contingency matrix
+    val independenceTestResult = Statistics.chiSqTest(mat)
+    // summary of the test including the p-value, degrees of freedom
+    println(s"$independenceTestResult\n")
+
+    val obs: RDD[LabeledPoint] =
+      sc.parallelize(
+        Seq(
+          LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0)),
+          LabeledPoint(1.0, Vectors.dense(1.0, 2.0, 0.0)),
+          LabeledPoint(-1.0, Vectors.dense(-1.0, 0.0, -0.5)
+          )
+        )
+      ) // (label, feature) pairs.
+
+    // The contingency table is constructed from the raw (label, feature) pairs and used to conduct
+    // the independence test. Returns an array containing the ChiSquaredTestResult for every feature
+    // against the label.
+    val featureTestResults: Array[ChiSqTestResult] = Statistics.chiSqTest(obs)
+    featureTestResults.zipWithIndex.foreach { case (k, v) =>
+      println(s"Column ${(v + 1)} :")
+      println(k)
+    }  // summary of the test
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingKolmogorovSmirnovTestExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingKolmogorovSmirnovTestExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..840874cf3c2fe193627d21c6f666fe28059d875e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingKolmogorovSmirnovTestExample.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.stat.Statistics
+import org.apache.spark.rdd.RDD
+// $example off$
+
+object HypothesisTestingKolmogorovSmirnovTestExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("HypothesisTestingKolmogorovSmirnovTestExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    val data: RDD[Double] = sc.parallelize(Seq(0.1, 0.15, 0.2, 0.3, 0.25))  // an RDD of sample data
+
+    // run a KS test for the sample versus a standard normal distribution
+    val testResult = Statistics.kolmogorovSmirnovTest(data, "norm", 0, 1)
+    // summary of the test including the p-value, test statistic, and null hypothesis if our p-value
+    // indicates significance, we can reject the null hypothesis.
+    println(testResult)
+    println()
+
+    // perform a KS test using a cumulative distribution function of our making
+    val myCDF = Map(0.1 -> 0.2, 0.15 -> 0.6, 0.2 -> 0.05, 0.3 -> 0.05, 0.25 -> 0.1)
+    val testResult2 = Statistics.kolmogorovSmirnovTest(data, myCDF)
+    println(testResult2)
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/IsotonicRegressionExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/IsotonicRegressionExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..a10d6f0dda8808d0a99f76585290ea02655d3295
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/IsotonicRegressionExample.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.regression.{IsotonicRegression, IsotonicRegressionModel}
+import org.apache.spark.mllib.util.MLUtils
+// $example off$
+
+object IsotonicRegressionExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("IsotonicRegressionExample")
+    val sc = new SparkContext(conf)
+    // $example on$
+    val data = MLUtils.loadLibSVMFile(sc,
+      "data/mllib/sample_isotonic_regression_libsvm_data.txt").cache()
+
+    // Create label, feature, weight tuples from input data with weight set to default value 1.0.
+    val parsedData = data.map { labeledPoint =>
+      (labeledPoint.label, labeledPoint.features(0), 1.0)
+    }
+
+    // Split data into training (60%) and test (40%) sets.
+    val splits = parsedData.randomSplit(Array(0.6, 0.4), seed = 11L)
+    val training = splits(0)
+    val test = splits(1)
+
+    // Create isotonic regression model from training data.
+    // Isotonic parameter defaults to true so it is only shown for demonstration
+    val model = new IsotonicRegression().setIsotonic(true).run(training)
+
+    // Create tuples of predicted and real labels.
+    val predictionAndLabel = test.map { point =>
+      val predictedLabel = model.predict(point._2)
+      (predictedLabel, point._1)
+    }
+
+    // Calculate mean squared error between predicted and real labels.
+    val meanSquaredError = predictionAndLabel.map { case (p, l) => math.pow((p - l), 2) }.mean()
+    println(s"Mean Squared Error = $meanSquaredError")
+
+    // Save and load model
+    model.save(sc, "target/tmp/myIsotonicRegressionModel")
+    val sameModel = IsotonicRegressionModel.load(sc, "target/tmp/myIsotonicRegressionModel")
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/KMeansExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/KMeansExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..b0a6f1671a89877987228b7f4f220e55936fab8e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/KMeansExample.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.clustering.{KMeans, KMeansModel}
+import org.apache.spark.mllib.linalg.Vectors
+// $example off$
+
+object KMeansExample {
+
+  def main(args: Array[String]) {
+
+    val conf = new SparkConf().setAppName("KMeansExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // Load and parse the data
+    val data = sc.textFile("data/mllib/kmeans_data.txt")
+    val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble))).cache()
+
+    // Cluster the data into two classes using KMeans
+    val numClusters = 2
+    val numIterations = 20
+    val clusters = KMeans.train(parsedData, numClusters, numIterations)
+
+    // Evaluate clustering by computing Within Set Sum of Squared Errors
+    val WSSSE = clusters.computeCost(parsedData)
+    println(s"Within Set Sum of Squared Errors = $WSSSE")
+
+    // Save and load model
+    clusters.save(sc, "target/org/apache/spark/KMeansExample/KMeansModel")
+    val sameModel = KMeansModel.load(sc, "target/org/apache/spark/KMeansExample/KMeansModel")
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/KernelDensityEstimationExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/KernelDensityEstimationExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..cc5d159b36cc9986baf2a3e875ef6a2d8528a7c6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/KernelDensityEstimationExample.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.stat.KernelDensity
+import org.apache.spark.rdd.RDD
+// $example off$
+
+object KernelDensityEstimationExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("KernelDensityEstimationExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // an RDD of sample data
+    val data: RDD[Double] = sc.parallelize(Seq(1, 1, 1, 2, 3, 4, 5, 5, 6, 7, 8, 9, 9))
+
+    // Construct the density estimator with the sample data and a standard deviation
+    // for the Gaussian kernels
+    val kd = new KernelDensity()
+      .setSample(data)
+      .setBandwidth(3.0)
+
+    // Find density estimates for the given values
+    val densities = kd.estimate(Array(-1.0, 2.0, 5.0))
+    // $example off$
+
+    densities.foreach(println)
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/LBFGSExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/LBFGSExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..123782fa6b9cff8dd2ba8b5e2b77d4d9382e1f43
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/LBFGSExample.scala
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.classification.LogisticRegressionModel
+import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.optimization.{LBFGS, LogisticGradient, SquaredL2Updater}
+import org.apache.spark.mllib.util.MLUtils
+// $example off$
+
+object LBFGSExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("LBFGSExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+    val numFeatures = data.take(1)(0).features.size
+
+    // Split data into training (60%) and test (40%).
+    val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
+
+    // Append 1 into the training data as intercept.
+    val training = splits(0).map(x => (x.label, MLUtils.appendBias(x.features))).cache()
+
+    val test = splits(1)
+
+    // Run training algorithm to build the model
+    val numCorrections = 10
+    val convergenceTol = 1e-4
+    val maxNumIterations = 20
+    val regParam = 0.1
+    val initialWeightsWithIntercept = Vectors.dense(new Array[Double](numFeatures + 1))
+
+    val (weightsWithIntercept, loss) = LBFGS.runLBFGS(
+      training,
+      new LogisticGradient(),
+      new SquaredL2Updater(),
+      numCorrections,
+      convergenceTol,
+      maxNumIterations,
+      regParam,
+      initialWeightsWithIntercept)
+
+    val model = new LogisticRegressionModel(
+      Vectors.dense(weightsWithIntercept.toArray.slice(0, weightsWithIntercept.size - 1)),
+      weightsWithIntercept(weightsWithIntercept.size - 1))
+
+    // Clear the default threshold.
+    model.clearThreshold()
+
+    // Compute raw scores on the test set.
+    val scoreAndLabels = test.map { point =>
+      val score = model.predict(point.features)
+      (score, point.label)
+    }
+
+    // Get evaluation metrics.
+    val metrics = new BinaryClassificationMetrics(scoreAndLabels)
+    val auROC = metrics.areaUnderROC()
+
+    println("Loss of each step in training process")
+    loss.foreach(println)
+    println(s"Area under ROC = $auROC")
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..cd77ecf990b3b492ddd2dc867fd6de1af64cf2eb
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
@@ -0,0 +1,238 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import java.util.Locale
+
+import org.apache.log4j.{Level, Logger}
+import scopt.OptionParser
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.ml.feature.{CountVectorizer, CountVectorizerModel, RegexTokenizer, StopWordsRemover}
+import org.apache.spark.ml.linalg.{Vector => MLVector}
+import org.apache.spark.mllib.clustering.{DistributedLDAModel, EMLDAOptimizer, LDA, OnlineLDAOptimizer}
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{Row, SparkSession}
+
+/**
+ * An example Latent Dirichlet Allocation (LDA) app. Run with
+ * {{{
+ * ./bin/run-example mllib.LDAExample [options] <input>
+ * }}}
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object LDAExample {
+
+  private case class Params(
+      input: Seq[String] = Seq.empty,
+      k: Int = 20,
+      maxIterations: Int = 10,
+      docConcentration: Double = -1,
+      topicConcentration: Double = -1,
+      vocabSize: Int = 10000,
+      stopwordFile: String = "",
+      algorithm: String = "em",
+      checkpointDir: Option[String] = None,
+      checkpointInterval: Int = 10) extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("LDAExample") {
+      head("LDAExample: an example LDA app for plain text data.")
+      opt[Int]("k")
+        .text(s"number of topics. default: ${defaultParams.k}")
+        .action((x, c) => c.copy(k = x))
+      opt[Int]("maxIterations")
+        .text(s"number of iterations of learning. default: ${defaultParams.maxIterations}")
+        .action((x, c) => c.copy(maxIterations = x))
+      opt[Double]("docConcentration")
+        .text(s"amount of topic smoothing to use (> 1.0) (-1=auto)." +
+        s"  default: ${defaultParams.docConcentration}")
+        .action((x, c) => c.copy(docConcentration = x))
+      opt[Double]("topicConcentration")
+        .text(s"amount of term (word) smoothing to use (> 1.0) (-1=auto)." +
+        s"  default: ${defaultParams.topicConcentration}")
+        .action((x, c) => c.copy(topicConcentration = x))
+      opt[Int]("vocabSize")
+        .text(s"number of distinct word types to use, chosen by frequency. (-1=all)" +
+          s"  default: ${defaultParams.vocabSize}")
+        .action((x, c) => c.copy(vocabSize = x))
+      opt[String]("stopwordFile")
+        .text(s"filepath for a list of stopwords. Note: This must fit on a single machine." +
+        s"  default: ${defaultParams.stopwordFile}")
+        .action((x, c) => c.copy(stopwordFile = x))
+      opt[String]("algorithm")
+        .text(s"inference algorithm to use. em and online are supported." +
+        s" default: ${defaultParams.algorithm}")
+        .action((x, c) => c.copy(algorithm = x))
+      opt[String]("checkpointDir")
+        .text(s"Directory for checkpointing intermediate results." +
+        s"  Checkpointing helps with recovery and eliminates temporary shuffle files on disk." +
+        s"  default: ${defaultParams.checkpointDir}")
+        .action((x, c) => c.copy(checkpointDir = Some(x)))
+      opt[Int]("checkpointInterval")
+        .text(s"Iterations between each checkpoint.  Only used if checkpointDir is set." +
+        s" default: ${defaultParams.checkpointInterval}")
+        .action((x, c) => c.copy(checkpointInterval = x))
+      arg[String]("<input>...")
+        .text("input paths (directories) to plain text corpora." +
+        "  Each text file line should hold 1 document.")
+        .unbounded()
+        .required()
+        .action((x, c) => c.copy(input = c.input :+ x))
+    }
+
+    parser.parse(args, defaultParams) match {
+      case Some(params) => run(params)
+      case _ => sys.exit(1)
+    }
+  }
+
+  private def run(params: Params): Unit = {
+    val conf = new SparkConf().setAppName(s"LDAExample with $params")
+    val sc = new SparkContext(conf)
+
+    Logger.getRootLogger.setLevel(Level.WARN)
+
+    // Load documents, and prepare them for LDA.
+    val preprocessStart = System.nanoTime()
+    val (corpus, vocabArray, actualNumTokens) =
+      preprocess(sc, params.input, params.vocabSize, params.stopwordFile)
+    corpus.cache()
+    val actualCorpusSize = corpus.count()
+    val actualVocabSize = vocabArray.length
+    val preprocessElapsed = (System.nanoTime() - preprocessStart) / 1e9
+
+    println()
+    println(s"Corpus summary:")
+    println(s"\t Training set size: $actualCorpusSize documents")
+    println(s"\t Vocabulary size: $actualVocabSize terms")
+    println(s"\t Training set size: $actualNumTokens tokens")
+    println(s"\t Preprocessing time: $preprocessElapsed sec")
+    println()
+
+    // Run LDA.
+    val lda = new LDA()
+
+    val optimizer = params.algorithm.toLowerCase(Locale.ROOT) match {
+      case "em" => new EMLDAOptimizer
+      // add (1.0 / actualCorpusSize) to MiniBatchFraction be more robust on tiny datasets.
+      case "online" => new OnlineLDAOptimizer().setMiniBatchFraction(0.05 + 1.0 / actualCorpusSize)
+      case _ => throw new IllegalArgumentException(
+        s"Only em, online are supported but got ${params.algorithm}.")
+    }
+
+    lda.setOptimizer(optimizer)
+      .setK(params.k)
+      .setMaxIterations(params.maxIterations)
+      .setDocConcentration(params.docConcentration)
+      .setTopicConcentration(params.topicConcentration)
+      .setCheckpointInterval(params.checkpointInterval)
+    if (params.checkpointDir.nonEmpty) {
+      sc.setCheckpointDir(params.checkpointDir.get)
+    }
+    val startTime = System.nanoTime()
+    val ldaModel = lda.run(corpus)
+    val elapsed = (System.nanoTime() - startTime) / 1e9
+
+    println(s"Finished training LDA model.  Summary:")
+    println(s"\t Training time: $elapsed sec")
+
+    if (ldaModel.isInstanceOf[DistributedLDAModel]) {
+      val distLDAModel = ldaModel.asInstanceOf[DistributedLDAModel]
+      val avgLogLikelihood = distLDAModel.logLikelihood / actualCorpusSize.toDouble
+      println(s"\t Training data average log likelihood: $avgLogLikelihood")
+      println()
+    }
+
+    // Print the topics, showing the top-weighted terms for each topic.
+    val topicIndices = ldaModel.describeTopics(maxTermsPerTopic = 10)
+    val topics = topicIndices.map { case (terms, termWeights) =>
+      terms.zip(termWeights).map { case (term, weight) => (vocabArray(term.toInt), weight) }
+    }
+    println(s"${params.k} topics:")
+    topics.zipWithIndex.foreach { case (topic, i) =>
+      println(s"TOPIC $i")
+      topic.foreach { case (term, weight) =>
+        println(s"$term\t$weight")
+      }
+      println()
+    }
+    sc.stop()
+  }
+
+  /**
+   * Load documents, tokenize them, create vocabulary, and prepare documents as term count vectors.
+   * @return (corpus, vocabulary as array, total token count in corpus)
+   */
+  private def preprocess(
+      sc: SparkContext,
+      paths: Seq[String],
+      vocabSize: Int,
+      stopwordFile: String): (RDD[(Long, Vector)], Array[String], Long) = {
+
+    val spark = SparkSession
+      .builder
+      .sparkContext(sc)
+      .getOrCreate()
+    import spark.implicits._
+
+    // Get dataset of document texts
+    // One document per line in each text file. If the input consists of many small files,
+    // this can result in a large number of small partitions, which can degrade performance.
+    // In this case, consider using coalesce() to create fewer, larger partitions.
+    val df = sc.textFile(paths.mkString(",")).toDF("docs")
+    val customizedStopWords: Array[String] = if (stopwordFile.isEmpty) {
+      Array.empty[String]
+    } else {
+      val stopWordText = sc.textFile(stopwordFile).collect()
+      stopWordText.flatMap(_.stripMargin.split("\\s+"))
+    }
+    val tokenizer = new RegexTokenizer()
+      .setInputCol("docs")
+      .setOutputCol("rawTokens")
+    val stopWordsRemover = new StopWordsRemover()
+      .setInputCol("rawTokens")
+      .setOutputCol("tokens")
+    stopWordsRemover.setStopWords(stopWordsRemover.getStopWords ++ customizedStopWords)
+    val countVectorizer = new CountVectorizer()
+      .setVocabSize(vocabSize)
+      .setInputCol("tokens")
+      .setOutputCol("features")
+
+    val pipeline = new Pipeline()
+      .setStages(Array(tokenizer, stopWordsRemover, countVectorizer))
+
+    val model = pipeline.fit(df)
+    val documents = model.transform(df)
+      .select("features")
+      .rdd
+      .map { case Row(features: MLVector) => Vectors.fromML(features) }
+      .zipWithIndex()
+      .map(_.swap)
+
+    (documents,
+      model.stages(2).asInstanceOf[CountVectorizerModel].vocabulary,  // vocabulary
+      documents.map(_._2.numActives).sum().toLong) // total token count
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/LatentDirichletAllocationExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/LatentDirichletAllocationExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..d25962c5500ed94b05628918a1bac594b38f64af
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/LatentDirichletAllocationExample.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.clustering.{DistributedLDAModel, LDA}
+import org.apache.spark.mllib.linalg.Vectors
+// $example off$
+
+object LatentDirichletAllocationExample {
+
+  def main(args: Array[String]) {
+
+    val conf = new SparkConf().setAppName("LatentDirichletAllocationExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // Load and parse the data
+    val data = sc.textFile("data/mllib/sample_lda_data.txt")
+    val parsedData = data.map(s => Vectors.dense(s.trim.split(' ').map(_.toDouble)))
+    // Index documents with unique IDs
+    val corpus = parsedData.zipWithIndex.map(_.swap).cache()
+
+    // Cluster the documents into three topics using LDA
+    val ldaModel = new LDA().setK(3).run(corpus)
+
+    // Output topics. Each is a distribution over words (matching word count vectors)
+    println(s"Learned topics (as distributions over vocab of ${ldaModel.vocabSize} words):")
+    val topics = ldaModel.topicsMatrix
+    for (topic <- Range(0, 3)) {
+      print(s"Topic $topic :")
+      for (word <- Range(0, ldaModel.vocabSize)) {
+        print(s"${topics(word, topic)}")
+      }
+      println()
+    }
+
+    // Save and load model.
+    ldaModel.save(sc, "target/org/apache/spark/LatentDirichletAllocationExample/LDAModel")
+    val sameModel = DistributedLDAModel.load(sc,
+      "target/org/apache/spark/LatentDirichletAllocationExample/LDAModel")
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala
new file mode 100644
index 0000000000000000000000000000000000000000..86aec363ea4214d144a22698c5c4c00c49d6bd57
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.log4j.{Level, Logger}
+import scopt.OptionParser
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.mllib.optimization.{L1Updater, SimpleUpdater, SquaredL2Updater}
+import org.apache.spark.mllib.regression.LinearRegressionWithSGD
+import org.apache.spark.mllib.util.MLUtils
+
+/**
+ * An example app for linear regression. Run with
+ * {{{
+ * bin/run-example org.apache.spark.examples.mllib.LinearRegression
+ * }}}
+ * A synthetic dataset can be found at `data/mllib/sample_linear_regression_data.txt`.
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+@deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0")
+object LinearRegression {
+
+  object RegType extends Enumeration {
+    type RegType = Value
+    val NONE, L1, L2 = Value
+  }
+
+  import RegType._
+
+  case class Params(
+      input: String = null,
+      numIterations: Int = 100,
+      stepSize: Double = 1.0,
+      regType: RegType = L2,
+      regParam: Double = 0.01) extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("LinearRegression") {
+      head("LinearRegression: an example app for linear regression.")
+      opt[Int]("numIterations")
+        .text("number of iterations")
+        .action((x, c) => c.copy(numIterations = x))
+      opt[Double]("stepSize")
+        .text(s"initial step size, default: ${defaultParams.stepSize}")
+        .action((x, c) => c.copy(stepSize = x))
+      opt[String]("regType")
+        .text(s"regularization type (${RegType.values.mkString(",")}), " +
+        s"default: ${defaultParams.regType}")
+        .action((x, c) => c.copy(regType = RegType.withName(x)))
+      opt[Double]("regParam")
+        .text(s"regularization parameter, default: ${defaultParams.regParam}")
+      arg[String]("<input>")
+        .required()
+        .text("input paths to labeled examples in LIBSVM format")
+        .action((x, c) => c.copy(input = x))
+      note(
+        """
+          |For example, the following command runs this app on a synthetic dataset:
+          |
+          | bin/spark-submit --class org.apache.spark.examples.mllib.LinearRegression \
+          |  examples/target/scala-*/spark-examples-*.jar \
+          |  data/mllib/sample_linear_regression_data.txt
+        """.stripMargin)
+    }
+
+    parser.parse(args, defaultParams) match {
+      case Some(params) => run(params)
+      case _ => sys.exit(1)
+    }
+  }
+
+  def run(params: Params): Unit = {
+    val conf = new SparkConf().setAppName(s"LinearRegression with $params")
+    val sc = new SparkContext(conf)
+
+    Logger.getRootLogger.setLevel(Level.WARN)
+
+    val examples = MLUtils.loadLibSVMFile(sc, params.input).cache()
+
+    val splits = examples.randomSplit(Array(0.8, 0.2))
+    val training = splits(0).cache()
+    val test = splits(1).cache()
+
+    val numTraining = training.count()
+    val numTest = test.count()
+    println(s"Training: $numTraining, test: $numTest.")
+
+    examples.unpersist(blocking = false)
+
+    val updater = params.regType match {
+      case NONE => new SimpleUpdater()
+      case L1 => new L1Updater()
+      case L2 => new SquaredL2Updater()
+    }
+
+    val algorithm = new LinearRegressionWithSGD()
+    algorithm.optimizer
+      .setNumIterations(params.numIterations)
+      .setStepSize(params.stepSize)
+      .setUpdater(updater)
+      .setRegParam(params.regParam)
+
+    val model = algorithm.run(training)
+
+    val prediction = model.predict(test.map(_.features))
+    val predictionAndLabel = prediction.zip(test.map(_.label))
+
+    val loss = predictionAndLabel.map { case (p, l) =>
+      val err = p - l
+      err * err
+    }.reduce(_ + _)
+    val rmse = math.sqrt(loss / numTest)
+
+    println(s"Test RMSE = $rmse.")
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..449b725d1d173f7a35309e3becf1aa1e44c16d1d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.regression.LinearRegressionModel
+import org.apache.spark.mllib.regression.LinearRegressionWithSGD
+// $example off$
+
+@deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0")
+object LinearRegressionWithSGDExample {
+
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("LinearRegressionWithSGDExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // Load and parse the data
+    val data = sc.textFile("data/mllib/ridge-data/lpsa.data")
+    val parsedData = data.map { line =>
+      val parts = line.split(',')
+      LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(' ').map(_.toDouble)))
+    }.cache()
+
+    // Building the model
+    val numIterations = 100
+    val stepSize = 0.00000001
+    val model = LinearRegressionWithSGD.train(parsedData, numIterations, stepSize)
+
+    // Evaluate model on training examples and compute training error
+    val valuesAndPreds = parsedData.map { point =>
+      val prediction = model.predict(point.features)
+      (point.label, prediction)
+    }
+    val MSE = valuesAndPreds.map{ case(v, p) => math.pow((v - p), 2) }.mean()
+    println(s"training Mean Squared Error $MSE")
+
+    // Save and load model
+    model.save(sc, "target/tmp/scalaLinearRegressionWithSGDModel")
+    val sameModel = LinearRegressionModel.load(sc, "target/tmp/scalaLinearRegressionWithSGDModel")
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/LogisticRegressionWithLBFGSExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/LogisticRegressionWithLBFGSExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..31ba740ad4af0a007da907344787eda82359e7b5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/LogisticRegressionWithLBFGSExample.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.classification.{LogisticRegressionModel, LogisticRegressionWithLBFGS}
+import org.apache.spark.mllib.evaluation.MulticlassMetrics
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.util.MLUtils
+// $example off$
+
+object LogisticRegressionWithLBFGSExample {
+
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("LogisticRegressionWithLBFGSExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // Load training data in LIBSVM format.
+    val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+
+    // Split data into training (60%) and test (40%).
+    val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
+    val training = splits(0).cache()
+    val test = splits(1)
+
+    // Run training algorithm to build the model
+    val model = new LogisticRegressionWithLBFGS()
+      .setNumClasses(10)
+      .run(training)
+
+    // Compute raw scores on the test set.
+    val predictionAndLabels = test.map { case LabeledPoint(label, features) =>
+      val prediction = model.predict(features)
+      (prediction, label)
+    }
+
+    // Get evaluation metrics.
+    val metrics = new MulticlassMetrics(predictionAndLabels)
+    val accuracy = metrics.accuracy
+    println(s"Accuracy = $accuracy")
+
+    // Save and load model
+    model.save(sc, "target/tmp/scalaLogisticRegressionWithLBFGSModel")
+    val sameModel = LogisticRegressionModel.load(sc,
+      "target/tmp/scalaLogisticRegressionWithLBFGSModel")
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
new file mode 100644
index 0000000000000000000000000000000000000000..9bd6927fb7fc0a434387c8a5ecb0c188bbe7d6e7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import scala.collection.mutable
+
+import org.apache.log4j.{Level, Logger}
+import scopt.OptionParser
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.mllib.recommendation.{ALS, MatrixFactorizationModel, Rating}
+import org.apache.spark.rdd.RDD
+
+/**
+ * An example app for ALS on MovieLens data (http://grouplens.org/datasets/movielens/).
+ * Run with
+ * {{{
+ * bin/run-example org.apache.spark.examples.mllib.MovieLensALS
+ * }}}
+ * A synthetic dataset in MovieLens format can be found at `data/mllib/sample_movielens_data.txt`.
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object MovieLensALS {
+
+  case class Params(
+      input: String = null,
+      kryo: Boolean = false,
+      numIterations: Int = 20,
+      lambda: Double = 1.0,
+      rank: Int = 10,
+      numUserBlocks: Int = -1,
+      numProductBlocks: Int = -1,
+      implicitPrefs: Boolean = false) extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("MovieLensALS") {
+      head("MovieLensALS: an example app for ALS on MovieLens data.")
+      opt[Int]("rank")
+        .text(s"rank, default: ${defaultParams.rank}")
+        .action((x, c) => c.copy(rank = x))
+      opt[Int]("numIterations")
+        .text(s"number of iterations, default: ${defaultParams.numIterations}")
+        .action((x, c) => c.copy(numIterations = x))
+      opt[Double]("lambda")
+        .text(s"lambda (smoothing constant), default: ${defaultParams.lambda}")
+        .action((x, c) => c.copy(lambda = x))
+      opt[Unit]("kryo")
+        .text("use Kryo serialization")
+        .action((_, c) => c.copy(kryo = true))
+      opt[Int]("numUserBlocks")
+        .text(s"number of user blocks, default: ${defaultParams.numUserBlocks} (auto)")
+        .action((x, c) => c.copy(numUserBlocks = x))
+      opt[Int]("numProductBlocks")
+        .text(s"number of product blocks, default: ${defaultParams.numProductBlocks} (auto)")
+        .action((x, c) => c.copy(numProductBlocks = x))
+      opt[Unit]("implicitPrefs")
+        .text("use implicit preference")
+        .action((_, c) => c.copy(implicitPrefs = true))
+      arg[String]("<input>")
+        .required()
+        .text("input paths to a MovieLens dataset of ratings")
+        .action((x, c) => c.copy(input = x))
+      note(
+        """
+          |For example, the following command runs this app on a synthetic dataset:
+          |
+          | bin/spark-submit --class org.apache.spark.examples.mllib.MovieLensALS \
+          |  examples/target/scala-*/spark-examples-*.jar \
+          |  --rank 5 --numIterations 20 --lambda 1.0 --kryo \
+          |  data/mllib/sample_movielens_data.txt
+        """.stripMargin)
+    }
+
+    parser.parse(args, defaultParams) match {
+      case Some(params) => run(params)
+      case _ => sys.exit(1)
+    }
+  }
+
+  def run(params: Params): Unit = {
+    val conf = new SparkConf().setAppName(s"MovieLensALS with $params")
+    if (params.kryo) {
+      conf.registerKryoClasses(Array(classOf[mutable.BitSet], classOf[Rating]))
+        .set("spark.kryoserializer.buffer", "8m")
+    }
+    val sc = new SparkContext(conf)
+
+    Logger.getRootLogger.setLevel(Level.WARN)
+
+    val implicitPrefs = params.implicitPrefs
+
+    val ratings = sc.textFile(params.input).map { line =>
+      val fields = line.split("::")
+      if (implicitPrefs) {
+        /*
+         * MovieLens ratings are on a scale of 1-5:
+         * 5: Must see
+         * 4: Will enjoy
+         * 3: It's okay
+         * 2: Fairly bad
+         * 1: Awful
+         * So we should not recommend a movie if the predicted rating is less than 3.
+         * To map ratings to confidence scores, we use
+         * 5 -> 2.5, 4 -> 1.5, 3 -> 0.5, 2 -> -0.5, 1 -> -1.5. This mappings means unobserved
+         * entries are generally between It's okay and Fairly bad.
+         * The semantics of 0 in this expanded world of non-positive weights
+         * are "the same as never having interacted at all".
+         */
+        Rating(fields(0).toInt, fields(1).toInt, fields(2).toDouble - 2.5)
+      } else {
+        Rating(fields(0).toInt, fields(1).toInt, fields(2).toDouble)
+      }
+    }.cache()
+
+    val numRatings = ratings.count()
+    val numUsers = ratings.map(_.user).distinct().count()
+    val numMovies = ratings.map(_.product).distinct().count()
+
+    println(s"Got $numRatings ratings from $numUsers users on $numMovies movies.")
+
+    val splits = ratings.randomSplit(Array(0.8, 0.2))
+    val training = splits(0).cache()
+    val test = if (params.implicitPrefs) {
+      /*
+       * 0 means "don't know" and positive values mean "confident that the prediction should be 1".
+       * Negative values means "confident that the prediction should be 0".
+       * We have in this case used some kind of weighted RMSE. The weight is the absolute value of
+       * the confidence. The error is the difference between prediction and either 1 or 0,
+       * depending on whether r is positive or negative.
+       */
+      splits(1).map(x => Rating(x.user, x.product, if (x.rating > 0) 1.0 else 0.0))
+    } else {
+      splits(1)
+    }.cache()
+
+    val numTraining = training.count()
+    val numTest = test.count()
+    println(s"Training: $numTraining, test: $numTest.")
+
+    ratings.unpersist(blocking = false)
+
+    val model = new ALS()
+      .setRank(params.rank)
+      .setIterations(params.numIterations)
+      .setLambda(params.lambda)
+      .setImplicitPrefs(params.implicitPrefs)
+      .setUserBlocks(params.numUserBlocks)
+      .setProductBlocks(params.numProductBlocks)
+      .run(training)
+
+    val rmse = computeRmse(model, test, params.implicitPrefs)
+
+    println(s"Test RMSE = $rmse.")
+
+    sc.stop()
+  }
+
+  /** Compute RMSE (Root Mean Squared Error). */
+  def computeRmse(model: MatrixFactorizationModel, data: RDD[Rating], implicitPrefs: Boolean)
+    : Double = {
+
+    def mapPredictedRating(r: Double): Double = {
+      if (implicitPrefs) math.max(math.min(r, 1.0), 0.0) else r
+    }
+
+    val predictions: RDD[Rating] = model.predict(data.map(x => (x.user, x.product)))
+    val predictionsAndRatings = predictions.map{ x =>
+      ((x.user, x.product), mapPredictedRating(x.rating))
+    }.join(data.map(x => ((x.user, x.product), x.rating))).values
+    math.sqrt(predictionsAndRatings.map(x => (x._1 - x._2) * (x._1 - x._2)).mean())
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/MultiLabelMetricsExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/MultiLabelMetricsExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..ebab81b334a59e77b661827c059cce70e89e273c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/MultiLabelMetricsExample.scala
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.evaluation.MultilabelMetrics
+import org.apache.spark.rdd.RDD
+// $example off$
+
+object MultiLabelMetricsExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("MultiLabelMetricsExample")
+    val sc = new SparkContext(conf)
+    // $example on$
+    val scoreAndLabels: RDD[(Array[Double], Array[Double])] = sc.parallelize(
+      Seq((Array(0.0, 1.0), Array(0.0, 2.0)),
+        (Array(0.0, 2.0), Array(0.0, 1.0)),
+        (Array.empty[Double], Array(0.0)),
+        (Array(2.0), Array(2.0)),
+        (Array(2.0, 0.0), Array(2.0, 0.0)),
+        (Array(0.0, 1.0, 2.0), Array(0.0, 1.0)),
+        (Array(1.0), Array(1.0, 2.0))), 2)
+
+    // Instantiate metrics object
+    val metrics = new MultilabelMetrics(scoreAndLabels)
+
+    // Summary stats
+    println(s"Recall = ${metrics.recall}")
+    println(s"Precision = ${metrics.precision}")
+    println(s"F1 measure = ${metrics.f1Measure}")
+    println(s"Accuracy = ${metrics.accuracy}")
+
+    // Individual label stats
+    metrics.labels.foreach(label =>
+      println(s"Class $label precision = ${metrics.precision(label)}"))
+    metrics.labels.foreach(label => println(s"Class $label recall = ${metrics.recall(label)}"))
+    metrics.labels.foreach(label => println(s"Class $label F1-score = ${metrics.f1Measure(label)}"))
+
+    // Micro stats
+    println(s"Micro recall = ${metrics.microRecall}")
+    println(s"Micro precision = ${metrics.microPrecision}")
+    println(s"Micro F1 measure = ${metrics.microF1Measure}")
+
+    // Hamming loss
+    println(s"Hamming loss = ${metrics.hammingLoss}")
+
+    // Subset accuracy
+    println(s"Subset accuracy = ${metrics.subsetAccuracy}")
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/MulticlassMetricsExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/MulticlassMetricsExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..e0b98eeb446ba912ccc2cf955178c589db9d9f8a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/MulticlassMetricsExample.scala
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
+import org.apache.spark.mllib.evaluation.MulticlassMetrics
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.util.MLUtils
+// $example off$
+
+object MulticlassMetricsExample {
+
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("MulticlassMetricsExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // Load training data in LIBSVM format
+    val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_multiclass_classification_data.txt")
+
+    // Split data into training (60%) and test (40%)
+    val Array(training, test) = data.randomSplit(Array(0.6, 0.4), seed = 11L)
+    training.cache()
+
+    // Run training algorithm to build the model
+    val model = new LogisticRegressionWithLBFGS()
+      .setNumClasses(3)
+      .run(training)
+
+    // Compute raw scores on the test set
+    val predictionAndLabels = test.map { case LabeledPoint(label, features) =>
+      val prediction = model.predict(features)
+      (prediction, label)
+    }
+
+    // Instantiate metrics object
+    val metrics = new MulticlassMetrics(predictionAndLabels)
+
+    // Confusion matrix
+    println("Confusion matrix:")
+    println(metrics.confusionMatrix)
+
+    // Overall Statistics
+    val accuracy = metrics.accuracy
+    println("Summary Statistics")
+    println(s"Accuracy = $accuracy")
+
+    // Precision by label
+    val labels = metrics.labels
+    labels.foreach { l =>
+      println(s"Precision($l) = " + metrics.precision(l))
+    }
+
+    // Recall by label
+    labels.foreach { l =>
+      println(s"Recall($l) = " + metrics.recall(l))
+    }
+
+    // False positive rate by label
+    labels.foreach { l =>
+      println(s"FPR($l) = " + metrics.falsePositiveRate(l))
+    }
+
+    // F-measure by label
+    labels.foreach { l =>
+      println(s"F1-Score($l) = " + metrics.fMeasure(l))
+    }
+
+    // Weighted stats
+    println(s"Weighted precision: ${metrics.weightedPrecision}")
+    println(s"Weighted recall: ${metrics.weightedRecall}")
+    println(s"Weighted F1 score: ${metrics.weightedFMeasure}")
+    println(s"Weighted false positive rate: ${metrics.weightedFalsePositiveRate}")
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/MultivariateSummarizer.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/MultivariateSummarizer.scala
new file mode 100644
index 0000000000000000000000000000000000000000..f9e47e485e72f735c546715d215ec1b0103c9d72
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/MultivariateSummarizer.scala
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import scopt.OptionParser
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
+import org.apache.spark.mllib.util.MLUtils
+
+/**
+ * An example app for summarizing multivariate data from a file. Run with
+ * {{{
+ * bin/run-example org.apache.spark.examples.mllib.MultivariateSummarizer
+ * }}}
+ * By default, this loads a synthetic dataset from `data/mllib/sample_linear_regression_data.txt`.
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object MultivariateSummarizer {
+
+  case class Params(input: String = "data/mllib/sample_linear_regression_data.txt")
+    extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("MultivariateSummarizer") {
+      head("MultivariateSummarizer: an example app for MultivariateOnlineSummarizer")
+      opt[String]("input")
+        .text(s"Input path to labeled examples in LIBSVM format, default: ${defaultParams.input}")
+        .action((x, c) => c.copy(input = x))
+      note(
+        """
+        |For example, the following command runs this app on a synthetic dataset:
+        |
+        | bin/spark-submit --class org.apache.spark.examples.mllib.MultivariateSummarizer \
+        |  examples/target/scala-*/spark-examples-*.jar \
+        |  --input data/mllib/sample_linear_regression_data.txt
+        """.stripMargin)
+    }
+
+    parser.parse(args, defaultParams) match {
+      case Some(params) => run(params)
+      case _ => sys.exit(1)
+    }
+  }
+
+  def run(params: Params): Unit = {
+    val conf = new SparkConf().setAppName(s"MultivariateSummarizer with $params")
+    val sc = new SparkContext(conf)
+
+    val examples = MLUtils.loadLibSVMFile(sc, params.input).cache()
+
+    println(s"Summary of data file: ${params.input}")
+    println(s"${examples.count()} data points")
+
+    // Summarize labels
+    val labelSummary = examples.aggregate(new MultivariateOnlineSummarizer())(
+      (summary, lp) => summary.add(Vectors.dense(lp.label)),
+      (sum1, sum2) => sum1.merge(sum2))
+
+    // Summarize features
+    val featureSummary = examples.aggregate(new MultivariateOnlineSummarizer())(
+      (summary, lp) => summary.add(lp.features),
+      (sum1, sum2) => sum1.merge(sum2))
+
+    println()
+    println(s"Summary statistics")
+    println(s"\tLabel\tFeatures")
+    println(s"mean\t${labelSummary.mean(0)}\t${featureSummary.mean.toArray.mkString("\t")}")
+    println(s"var\t${labelSummary.variance(0)}\t${featureSummary.variance.toArray.mkString("\t")}")
+    println(
+      s"nnz\t${labelSummary.numNonzeros(0)}\t${featureSummary.numNonzeros.toArray.mkString("\t")}")
+    println(s"max\t${labelSummary.max(0)}\t${featureSummary.max.toArray.mkString("\t")}")
+    println(s"min\t${labelSummary.min(0)}\t${featureSummary.min.toArray.mkString("\t")}")
+    println()
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/NaiveBayesExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/NaiveBayesExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..24c8e3445e5330d063219ea677815db03d52b30e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/NaiveBayesExample.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.classification.{NaiveBayes, NaiveBayesModel}
+import org.apache.spark.mllib.util.MLUtils
+// $example off$
+
+object NaiveBayesExample {
+
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("NaiveBayesExample")
+    val sc = new SparkContext(conf)
+    // $example on$
+    // Load and parse the data file.
+    val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+
+    // Split data into training (60%) and test (40%).
+    val Array(training, test) = data.randomSplit(Array(0.6, 0.4))
+
+    val model = NaiveBayes.train(training, lambda = 1.0, modelType = "multinomial")
+
+    val predictionAndLabel = test.map(p => (model.predict(p.features), p.label))
+    val accuracy = 1.0 * predictionAndLabel.filter(x => x._1 == x._2).count() / test.count()
+
+    // Save and load model
+    model.save(sc, "target/tmp/myNaiveBayesModel")
+    val sameModel = NaiveBayesModel.load(sc, "target/tmp/myNaiveBayesModel")
+    // $example off$
+
+    sc.stop()
+  }
+}
+
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/NormalizerExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/NormalizerExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..b3a9604c2be3e29f1cb57ca7f0d56c8af5a11ba9
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/NormalizerExample.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+// $example on$
+import org.apache.spark.mllib.feature.Normalizer
+import org.apache.spark.mllib.util.MLUtils
+// $example off$
+
+object NormalizerExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("NormalizerExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+
+    val normalizer1 = new Normalizer()
+    val normalizer2 = new Normalizer(p = Double.PositiveInfinity)
+
+    // Each sample in data1 will be normalized using $L^2$ norm.
+    val data1 = data.map(x => (x.label, normalizer1.transform(x.features)))
+
+    // Each sample in data2 will be normalized using $L^\infty$ norm.
+    val data2 = data.map(x => (x.label, normalizer2.transform(x.features)))
+    // $example off$
+
+    println("data1: ")
+    data1.foreach(x => println(x))
+
+    println("data2: ")
+    data2.foreach(x => println(x))
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/PCAExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/PCAExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..eff2393cc3abea45c3e46d3a2077001eda9188c1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/PCAExample.scala
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+// $example on$
+import org.apache.spark.mllib.feature.PCA
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.{LabeledPoint, LinearRegressionWithSGD}
+// $example off$
+
+@deprecated("Deprecated since LinearRegressionWithSGD is deprecated.  Use ml.feature.PCA", "2.0.0")
+object PCAExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("PCAExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    val data = sc.textFile("data/mllib/ridge-data/lpsa.data").map { line =>
+      val parts = line.split(',')
+      LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(' ').map(_.toDouble)))
+    }.cache()
+
+    val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
+    val training = splits(0).cache()
+    val test = splits(1)
+
+    val pca = new PCA(training.first().features.size / 2).fit(data.map(_.features))
+    val training_pca = training.map(p => p.copy(features = pca.transform(p.features)))
+    val test_pca = test.map(p => p.copy(features = pca.transform(p.features)))
+
+    val numIterations = 100
+    val model = LinearRegressionWithSGD.train(training, numIterations)
+    val model_pca = LinearRegressionWithSGD.train(training_pca, numIterations)
+
+    val valuesAndPreds = test.map { point =>
+      val score = model.predict(point.features)
+      (score, point.label)
+    }
+
+    val valuesAndPreds_pca = test_pca.map { point =>
+      val score = model_pca.predict(point.features)
+      (score, point.label)
+    }
+
+    val MSE = valuesAndPreds.map { case (v, p) => math.pow((v - p), 2) }.mean()
+    val MSE_pca = valuesAndPreds_pca.map { case (v, p) => math.pow((v - p), 2) }.mean()
+
+    println(s"Mean Squared Error = $MSE")
+    println(s"PCA Mean Squared Error = $MSE_pca")
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/PCAOnRowMatrixExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/PCAOnRowMatrixExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..da43a8d9c7e80fa6d13be8e2ed748a4b70f293b4
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/PCAOnRowMatrixExample.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+// $example on$
+import org.apache.spark.mllib.linalg.Matrix
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.linalg.distributed.RowMatrix
+// $example off$
+
+object PCAOnRowMatrixExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("PCAOnRowMatrixExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    val data = Array(
+      Vectors.sparse(5, Seq((1, 1.0), (3, 7.0))),
+      Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
+      Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0))
+
+    val rows = sc.parallelize(data)
+
+    val mat: RowMatrix = new RowMatrix(rows)
+
+    // Compute the top 4 principal components.
+    // Principal components are stored in a local dense matrix.
+    val pc: Matrix = mat.computePrincipalComponents(4)
+
+    // Project the rows to the linear space spanned by the top 4 principal components.
+    val projected: RowMatrix = mat.multiply(pc)
+    // $example off$
+    val collect = projected.rows.collect()
+    println("Projected Row Matrix of principal component:")
+    collect.foreach { vector => println(vector) }
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/PCAOnSourceVectorExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/PCAOnSourceVectorExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..cef5402581f5ea47244d2fc0cf9f600f6e6b5cb2
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/PCAOnSourceVectorExample.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+// $example on$
+import org.apache.spark.mllib.feature.PCA
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.rdd.RDD
+// $example off$
+
+object PCAOnSourceVectorExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("PCAOnSourceVectorExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    val data: RDD[LabeledPoint] = sc.parallelize(Seq(
+      new LabeledPoint(0, Vectors.dense(1, 0, 0, 0, 1)),
+      new LabeledPoint(1, Vectors.dense(1, 1, 0, 1, 0)),
+      new LabeledPoint(1, Vectors.dense(1, 1, 0, 0, 0)),
+      new LabeledPoint(0, Vectors.dense(1, 0, 0, 0, 0)),
+      new LabeledPoint(1, Vectors.dense(1, 1, 0, 0, 0))))
+
+    // Compute the top 5 principal components.
+    val pca = new PCA(5).fit(data.map(_.features))
+
+    // Project vectors to the linear space spanned by the top 5 principal
+    // components, keeping the label
+    val projected = data.map(p => p.copy(features = pca.transform(p.features)))
+    // $example off$
+    val collect = projected.collect()
+    println("Projected vector of principal component:")
+    collect.foreach { vector => println(vector) }
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/PMMLModelExportExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/PMMLModelExportExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..96deafd469bc79c97a8dc1ec2be9d0ceb97c7fec
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/PMMLModelExportExample.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.clustering.KMeans
+import org.apache.spark.mllib.linalg.Vectors
+// $example off$
+
+object PMMLModelExportExample {
+
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("PMMLModelExportExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // Load and parse the data
+    val data = sc.textFile("data/mllib/kmeans_data.txt")
+    val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble))).cache()
+
+    // Cluster the data into two classes using KMeans
+    val numClusters = 2
+    val numIterations = 20
+    val clusters = KMeans.train(parsedData, numClusters, numIterations)
+
+    // Export to PMML to a String in PMML format
+    println(s"PMML Model:\n ${clusters.toPMML}")
+
+    // Export the model to a local file in PMML format
+    clusters.toPMML("/tmp/kmeans.xml")
+
+    // Export the model to a directory on a distributed file system in PMML format
+    clusters.toPMML(sc, "/tmp/kmeans")
+
+    // Export the model to the OutputStream in PMML format
+    clusters.toPMML(System.out)
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..65603252c43841391db4ab60a85cb3af36c5ba95
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.log4j.{Level, Logger}
+import scopt.OptionParser
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.clustering.PowerIterationClustering
+// $example off$
+import org.apache.spark.rdd.RDD
+
+/**
+ * An example Power Iteration Clustering app.
+ * http://www.cs.cmu.edu/~frank/papers/icml2010-pic-final.pdf
+ * Takes an input of K concentric circles and the number of points in the innermost circle.
+ * The output should be K clusters - each cluster containing precisely the points associated
+ * with each of the input circles.
+ *
+ * Run with
+ * {{{
+ * ./bin/run-example mllib.PowerIterationClusteringExample [options]
+ *
+ * Where options include:
+ *   k:  Number of circles/clusters
+ *   n:  Number of sampled points on innermost circle.. There are proportionally more points
+ *      within the outer/larger circles
+ *   maxIterations:   Number of Power Iterations
+ * }}}
+ *
+ * Here is a sample run and output:
+ *
+ * ./bin/run-example mllib.PowerIterationClusteringExample -k 2 --n 10 --maxIterations 15
+ *
+ * Cluster assignments: 1 -> [0,1,2,3,4,5,6,7,8,9],
+ *   0 -> [10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29]
+ *
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object PowerIterationClusteringExample {
+
+  case class Params(
+      k: Int = 2,
+      numPoints: Int = 10,
+      maxIterations: Int = 15
+    ) extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("PowerIterationClusteringExample") {
+      head("PowerIterationClusteringExample: an example PIC app using concentric circles.")
+      opt[Int]('k', "k")
+        .text(s"number of circles (clusters), default: ${defaultParams.k}")
+        .action((x, c) => c.copy(k = x))
+      opt[Int]('n', "n")
+        .text(s"number of points in smallest circle, default: ${defaultParams.numPoints}")
+        .action((x, c) => c.copy(numPoints = x))
+      opt[Int]("maxIterations")
+        .text(s"number of iterations, default: ${defaultParams.maxIterations}")
+        .action((x, c) => c.copy(maxIterations = x))
+    }
+
+    parser.parse(args, defaultParams) match {
+      case Some(params) => run(params)
+      case _ => sys.exit(1)
+    }
+  }
+
+  def run(params: Params): Unit = {
+    val conf = new SparkConf()
+      .setMaster("local")
+      .setAppName(s"PowerIterationClustering with $params")
+    val sc = new SparkContext(conf)
+
+    Logger.getRootLogger.setLevel(Level.WARN)
+
+    // $example on$
+    val circlesRdd = generateCirclesRdd(sc, params.k, params.numPoints)
+    val model = new PowerIterationClustering()
+      .setK(params.k)
+      .setMaxIterations(params.maxIterations)
+      .setInitializationMode("degree")
+      .run(circlesRdd)
+
+    val clusters = model.assignments.collect().groupBy(_.cluster).mapValues(_.map(_.id))
+    val assignments = clusters.toList.sortBy { case (k, v) => v.length }
+    val assignmentsStr = assignments
+      .map { case (k, v) =>
+        s"$k -> ${v.sorted.mkString("[", ",", "]")}"
+      }.mkString(", ")
+    val sizesStr = assignments.map {
+      _._2.length
+    }.sorted.mkString("(", ",", ")")
+    println(s"Cluster assignments: $assignmentsStr\ncluster sizes: $sizesStr")
+    // $example off$
+
+    sc.stop()
+  }
+
+  def generateCircle(radius: Double, n: Int): Seq[(Double, Double)] = {
+    Seq.tabulate(n) { i =>
+      val theta = 2.0 * math.Pi * i / n
+      (radius * math.cos(theta), radius * math.sin(theta))
+    }
+  }
+
+  def generateCirclesRdd(
+      sc: SparkContext,
+      nCircles: Int,
+      nPoints: Int): RDD[(Long, Long, Double)] = {
+    val points = (1 to nCircles).flatMap { i =>
+      generateCircle(i, i * nPoints)
+    }.zipWithIndex
+    val rdd = sc.parallelize(points)
+    val distancesRdd = rdd.cartesian(rdd).flatMap { case (((x0, y0), i0), ((x1, y1), i1)) =>
+      if (i0 < i1) {
+        Some((i0.toLong, i1.toLong, gaussianSimilarity((x0, y0), (x1, y1))))
+      } else {
+        None
+      }
+    }
+    distancesRdd
+  }
+
+  /**
+   * Gaussian Similarity:  http://en.wikipedia.org/wiki/Radial_basis_function_kernel
+   */
+  def gaussianSimilarity(p1: (Double, Double), p2: (Double, Double)): Double = {
+    val ssquares = (p1._1 - p2._1) * (p1._1 - p2._1) + (p1._2 - p2._2) * (p1._2 - p2._2)
+    math.exp(-ssquares / 2.0)
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/PrefixSpanExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/PrefixSpanExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..8b789277774af6b09a71d8371c4ceb46bcc888cb
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/PrefixSpanExample.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.fpm.PrefixSpan
+// $example off$
+
+object PrefixSpanExample {
+
+  def main(args: Array[String]) {
+    val conf = new SparkConf().setAppName("PrefixSpanExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    val sequences = sc.parallelize(Seq(
+      Array(Array(1, 2), Array(3)),
+      Array(Array(1), Array(3, 2), Array(1, 2)),
+      Array(Array(1, 2), Array(5)),
+      Array(Array(6))
+    ), 2).cache()
+    val prefixSpan = new PrefixSpan()
+      .setMinSupport(0.5)
+      .setMaxPatternLength(5)
+    val model = prefixSpan.run(sequences)
+    model.freqSequences.collect().foreach { freqSequence =>
+      println(
+        s"${freqSequence.sequence.map(_.mkString("[", ", ", "]")).mkString("[", ", ", "]")}," +
+          s" ${freqSequence.freq}")
+    }
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:off println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/RandomForestClassificationExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/RandomForestClassificationExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..246e71de25615cb55aadc86bc9468b250a10f277
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/RandomForestClassificationExample.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.tree.RandomForest
+import org.apache.spark.mllib.tree.model.RandomForestModel
+import org.apache.spark.mllib.util.MLUtils
+// $example off$
+
+object RandomForestClassificationExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("RandomForestClassificationExample")
+    val sc = new SparkContext(conf)
+    // $example on$
+    // Load and parse the data file.
+    val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+    // Split the data into training and test sets (30% held out for testing)
+    val splits = data.randomSplit(Array(0.7, 0.3))
+    val (trainingData, testData) = (splits(0), splits(1))
+
+    // Train a RandomForest model.
+    // Empty categoricalFeaturesInfo indicates all features are continuous.
+    val numClasses = 2
+    val categoricalFeaturesInfo = Map[Int, Int]()
+    val numTrees = 3 // Use more in practice.
+    val featureSubsetStrategy = "auto" // Let the algorithm choose.
+    val impurity = "gini"
+    val maxDepth = 4
+    val maxBins = 32
+
+    val model = RandomForest.trainClassifier(trainingData, numClasses, categoricalFeaturesInfo,
+      numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins)
+
+    // Evaluate model on test instances and compute test error
+    val labelAndPreds = testData.map { point =>
+      val prediction = model.predict(point.features)
+      (point.label, prediction)
+    }
+    val testErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / testData.count()
+    println(s"Test Error = $testErr")
+    println(s"Learned classification forest model:\n ${model.toDebugString}")
+
+    // Save and load model
+    model.save(sc, "target/tmp/myRandomForestClassificationModel")
+    val sameModel = RandomForestModel.load(sc, "target/tmp/myRandomForestClassificationModel")
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/RandomForestRegressionExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/RandomForestRegressionExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..770e30276bc309a0d4a46d39e6be17d3d495e82b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/RandomForestRegressionExample.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.tree.RandomForest
+import org.apache.spark.mllib.tree.model.RandomForestModel
+import org.apache.spark.mllib.util.MLUtils
+// $example off$
+
+object RandomForestRegressionExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("RandomForestRegressionExample")
+    val sc = new SparkContext(conf)
+    // $example on$
+    // Load and parse the data file.
+    val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+    // Split the data into training and test sets (30% held out for testing)
+    val splits = data.randomSplit(Array(0.7, 0.3))
+    val (trainingData, testData) = (splits(0), splits(1))
+
+    // Train a RandomForest model.
+    // Empty categoricalFeaturesInfo indicates all features are continuous.
+    val numClasses = 2
+    val categoricalFeaturesInfo = Map[Int, Int]()
+    val numTrees = 3 // Use more in practice.
+    val featureSubsetStrategy = "auto" // Let the algorithm choose.
+    val impurity = "variance"
+    val maxDepth = 4
+    val maxBins = 32
+
+    val model = RandomForest.trainRegressor(trainingData, categoricalFeaturesInfo,
+      numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins)
+
+    // Evaluate model on test instances and compute test error
+    val labelsAndPredictions = testData.map { point =>
+      val prediction = model.predict(point.features)
+      (point.label, prediction)
+    }
+    val testMSE = labelsAndPredictions.map{ case(v, p) => math.pow((v - p), 2)}.mean()
+    println(s"Test Mean Squared Error = $testMSE")
+    println(s"Learned regression forest model:\n ${model.toDebugString}")
+
+    // Save and load model
+    model.save(sc, "target/tmp/myRandomForestRegressionModel")
+    val sameModel = RandomForestModel.load(sc, "target/tmp/myRandomForestRegressionModel")
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/RandomRDDGeneration.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/RandomRDDGeneration.scala
new file mode 100644
index 0000000000000000000000000000000000000000..7ccbb5a0640cd6c5ec6e42c9a43fd4506aebf08b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/RandomRDDGeneration.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.mllib.random.RandomRDDs
+import org.apache.spark.rdd.RDD
+
+/**
+ * An example app for randomly generated RDDs. Run with
+ * {{{
+ * bin/run-example org.apache.spark.examples.mllib.RandomRDDGeneration
+ * }}}
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object RandomRDDGeneration {
+
+  def main(args: Array[String]) {
+
+    val conf = new SparkConf().setAppName(s"RandomRDDGeneration")
+    val sc = new SparkContext(conf)
+
+    val numExamples = 10000 // number of examples to generate
+    val fraction = 0.1 // fraction of data to sample
+
+    // Example: RandomRDDs.normalRDD
+    val normalRDD: RDD[Double] = RandomRDDs.normalRDD(sc, numExamples)
+    println(s"Generated RDD of ${normalRDD.count()}" +
+      " examples sampled from the standard normal distribution")
+    println("  First 5 samples:")
+    normalRDD.take(5).foreach( x => println(s"    $x") )
+
+    // Example: RandomRDDs.normalVectorRDD
+    val normalVectorRDD = RandomRDDs.normalVectorRDD(sc, numRows = numExamples, numCols = 2)
+    println(s"Generated RDD of ${normalVectorRDD.count()} examples of length-2 vectors.")
+    println("  First 5 samples:")
+    normalVectorRDD.take(5).foreach( x => println(s"    $x") )
+
+    println()
+
+    sc.stop()
+  }
+
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..d514891da78fcd5627eee0dc1e3d287c122b8a6f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+// $example on$
+import org.apache.spark.mllib.evaluation.{RankingMetrics, RegressionMetrics}
+import org.apache.spark.mllib.recommendation.{ALS, Rating}
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object RankingMetricsExample {
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder
+      .appName("RankingMetricsExample")
+      .getOrCreate()
+    import spark.implicits._
+    // $example on$
+    // Read in the ratings data
+    val ratings = spark.read.textFile("data/mllib/sample_movielens_data.txt").rdd.map { line =>
+      val fields = line.split("::")
+      Rating(fields(0).toInt, fields(1).toInt, fields(2).toDouble - 2.5)
+    }.cache()
+
+    // Map ratings to 1 or 0, 1 indicating a movie that should be recommended
+    val binarizedRatings = ratings.map(r => Rating(r.user, r.product,
+      if (r.rating > 0) 1.0 else 0.0)).cache()
+
+    // Summarize ratings
+    val numRatings = ratings.count()
+    val numUsers = ratings.map(_.user).distinct().count()
+    val numMovies = ratings.map(_.product).distinct().count()
+    println(s"Got $numRatings ratings from $numUsers users on $numMovies movies.")
+
+    // Build the model
+    val numIterations = 10
+    val rank = 10
+    val lambda = 0.01
+    val model = ALS.train(ratings, rank, numIterations, lambda)
+
+    // Define a function to scale ratings from 0 to 1
+    def scaledRating(r: Rating): Rating = {
+      val scaledRating = math.max(math.min(r.rating, 1.0), 0.0)
+      Rating(r.user, r.product, scaledRating)
+    }
+
+    // Get sorted top ten predictions for each user and then scale from [0, 1]
+    val userRecommended = model.recommendProductsForUsers(10).map { case (user, recs) =>
+      (user, recs.map(scaledRating))
+    }
+
+    // Assume that any movie a user rated 3 or higher (which maps to a 1) is a relevant document
+    // Compare with top ten most relevant documents
+    val userMovies = binarizedRatings.groupBy(_.user)
+    val relevantDocuments = userMovies.join(userRecommended).map { case (user, (actual,
+    predictions)) =>
+      (predictions.map(_.product), actual.filter(_.rating > 0.0).map(_.product).toArray)
+    }
+
+    // Instantiate metrics object
+    val metrics = new RankingMetrics(relevantDocuments)
+
+    // Precision at K
+    Array(1, 3, 5).foreach { k =>
+      println(s"Precision at $k = ${metrics.precisionAt(k)}")
+    }
+
+    // Mean average precision
+    println(s"Mean average precision = ${metrics.meanAveragePrecision}")
+
+    // Normalized discounted cumulative gain
+    Array(1, 3, 5).foreach { k =>
+      println(s"NDCG at $k = ${metrics.ndcgAt(k)}")
+    }
+
+    // Get predictions for each data point
+    val allPredictions = model.predict(ratings.map(r => (r.user, r.product))).map(r => ((r.user,
+      r.product), r.rating))
+    val allRatings = ratings.map(r => ((r.user, r.product), r.rating))
+    val predictionsAndLabels = allPredictions.join(allRatings).map { case ((user, product),
+    (predicted, actual)) =>
+      (predicted, actual)
+    }
+
+    // Get the RMSE using regression metrics
+    val regressionMetrics = new RegressionMetrics(predictionsAndLabels)
+    println(s"RMSE = ${regressionMetrics.rootMeanSquaredError}")
+
+    // R-squared
+    println(s"R-squared = ${regressionMetrics.r2}")
+    // $example off$
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/RecommendationExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/RecommendationExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..0bb2b8c8c2b4340a40ac48574c16fb9d59988e2a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/RecommendationExample.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.recommendation.ALS
+import org.apache.spark.mllib.recommendation.MatrixFactorizationModel
+import org.apache.spark.mllib.recommendation.Rating
+// $example off$
+
+object RecommendationExample {
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("CollaborativeFilteringExample")
+    val sc = new SparkContext(conf)
+    // $example on$
+    // Load and parse the data
+    val data = sc.textFile("data/mllib/als/test.data")
+    val ratings = data.map(_.split(',') match { case Array(user, item, rate) =>
+      Rating(user.toInt, item.toInt, rate.toDouble)
+    })
+
+    // Build the recommendation model using ALS
+    val rank = 10
+    val numIterations = 10
+    val model = ALS.train(ratings, rank, numIterations, 0.01)
+
+    // Evaluate the model on rating data
+    val usersProducts = ratings.map { case Rating(user, product, rate) =>
+      (user, product)
+    }
+    val predictions =
+      model.predict(usersProducts).map { case Rating(user, product, rate) =>
+        ((user, product), rate)
+      }
+    val ratesAndPreds = ratings.map { case Rating(user, product, rate) =>
+      ((user, product), rate)
+    }.join(predictions)
+    val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) =>
+      val err = (r1 - r2)
+      err * err
+    }.mean()
+    println(s"Mean Squared Error = $MSE")
+
+    // Save and load model
+    model.save(sc, "target/tmp/myCollaborativeFilter")
+    val sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter")
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..76cfb804e18f3ac608b291b819b663b07ae24968
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// scalastyle:off println
+
+package org.apache.spark.examples.mllib
+
+// $example on$
+import org.apache.spark.mllib.evaluation.RegressionMetrics
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.regression.{LabeledPoint, LinearRegressionWithSGD}
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+@deprecated("Use ml.regression.LinearRegression and the resulting model summary for metrics",
+  "2.0.0")
+object RegressionMetricsExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("RegressionMetricsExample")
+      .getOrCreate()
+    // $example on$
+    // Load the data
+    val data = spark
+      .read.format("libsvm").load("data/mllib/sample_linear_regression_data.txt")
+      .rdd.map(row => LabeledPoint(row.getDouble(0), row.get(1).asInstanceOf[Vector]))
+      .cache()
+
+    // Build the model
+    val numIterations = 100
+    val model = LinearRegressionWithSGD.train(data, numIterations)
+
+    // Get predictions
+    val valuesAndPreds = data.map{ point =>
+      val prediction = model.predict(point.features)
+      (prediction, point.label)
+    }
+
+    // Instantiate metrics object
+    val metrics = new RegressionMetrics(valuesAndPreds)
+
+    // Squared error
+    println(s"MSE = ${metrics.meanSquaredError}")
+    println(s"RMSE = ${metrics.rootMeanSquaredError}")
+
+    // R-squared
+    println(s"R-squared = ${metrics.r2}")
+
+    // Mean absolute error
+    println(s"MAE = ${metrics.meanAbsoluteError}")
+
+    // Explained variance
+    println(s"Explained variance = ${metrics.explainedVariance}")
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/SVDExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/SVDExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..769ae2a3a88b1ec2f1786d5b3c016c0da1a667e3
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/SVDExample.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+// $example on$
+import org.apache.spark.mllib.linalg.Matrix
+import org.apache.spark.mllib.linalg.SingularValueDecomposition
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.linalg.distributed.RowMatrix
+// $example off$
+
+/**
+ * Example for SingularValueDecomposition.
+ */
+object SVDExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("SVDExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    val data = Array(
+      Vectors.sparse(5, Seq((1, 1.0), (3, 7.0))),
+      Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
+      Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0))
+
+    val rows = sc.parallelize(data)
+
+    val mat: RowMatrix = new RowMatrix(rows)
+
+    // Compute the top 5 singular values and corresponding singular vectors.
+    val svd: SingularValueDecomposition[RowMatrix, Matrix] = mat.computeSVD(5, computeU = true)
+    val U: RowMatrix = svd.U  // The U factor is a RowMatrix.
+    val s: Vector = svd.s     // The singular values are stored in a local dense vector.
+    val V: Matrix = svd.V     // The V factor is a local dense matrix.
+    // $example off$
+    val collect = U.rows.collect()
+    println("U factor is:")
+    collect.foreach { vector => println(vector) }
+    println(s"Singular values are: $s")
+    println(s"V factor is:\n$V")
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/SVMWithSGDExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/SVMWithSGDExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..285e2ce512639d87938a79f6feffbe375aeb0801
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/SVMWithSGDExample.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.classification.{SVMModel, SVMWithSGD}
+import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
+import org.apache.spark.mllib.util.MLUtils
+// $example off$
+
+object SVMWithSGDExample {
+
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf().setAppName("SVMWithSGDExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // Load training data in LIBSVM format.
+    val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+
+    // Split data into training (60%) and test (40%).
+    val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
+    val training = splits(0).cache()
+    val test = splits(1)
+
+    // Run training algorithm to build the model
+    val numIterations = 100
+    val model = SVMWithSGD.train(training, numIterations)
+
+    // Clear the default threshold.
+    model.clearThreshold()
+
+    // Compute raw scores on the test set.
+    val scoreAndLabels = test.map { point =>
+      val score = model.predict(point.features)
+      (score, point.label)
+    }
+
+    // Get evaluation metrics.
+    val metrics = new BinaryClassificationMetrics(scoreAndLabels)
+    val auROC = metrics.areaUnderROC()
+
+    println(s"Area under ROC = $auROC")
+
+    // Save and load model
+    model.save(sc, "target/tmp/scalaSVMWithSGDModel")
+    val sameModel = SVMModel.load(sc, "target/tmp/scalaSVMWithSGDModel")
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala
new file mode 100644
index 0000000000000000000000000000000000000000..ba3deae5d688fc192c56e3cb60c9dbef6c13004b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import scopt.OptionParser
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.mllib.util.MLUtils
+
+/**
+ * An example app for randomly generated and sampled RDDs. Run with
+ * {{{
+ * bin/run-example org.apache.spark.examples.mllib.SampledRDDs
+ * }}}
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object SampledRDDs {
+
+  case class Params(input: String = "data/mllib/sample_binary_classification_data.txt")
+    extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("SampledRDDs") {
+      head("SampledRDDs: an example app for randomly generated and sampled RDDs.")
+      opt[String]("input")
+        .text(s"Input path to labeled examples in LIBSVM format, default: ${defaultParams.input}")
+        .action((x, c) => c.copy(input = x))
+      note(
+        """
+        |For example, the following command runs this app:
+        |
+        | bin/spark-submit --class org.apache.spark.examples.mllib.SampledRDDs \
+        |  examples/target/scala-*/spark-examples-*.jar
+        """.stripMargin)
+    }
+
+    parser.parse(args, defaultParams) match {
+      case Some(params) => run(params)
+      case _ => sys.exit(1)
+    }
+  }
+
+  def run(params: Params): Unit = {
+    val conf = new SparkConf().setAppName(s"SampledRDDs with $params")
+    val sc = new SparkContext(conf)
+
+    val fraction = 0.1 // fraction of data to sample
+
+    val examples = MLUtils.loadLibSVMFile(sc, params.input)
+    val numExamples = examples.count()
+    if (numExamples == 0) {
+      throw new RuntimeException("Error: Data file had no samples to load.")
+    }
+    println(s"Loaded data with $numExamples examples from file: ${params.input}")
+
+    // Example: RDD.sample() and RDD.takeSample()
+    val expectedSampleSize = (numExamples * fraction).toInt
+    println(s"Sampling RDD using fraction $fraction.  Expected sample size = $expectedSampleSize.")
+    val sampledRDD = examples.sample(withReplacement = true, fraction = fraction)
+    println(s"  RDD.sample(): sample has ${sampledRDD.count()} examples")
+    val sampledArray = examples.takeSample(withReplacement = true, num = expectedSampleSize)
+    println(s"  RDD.takeSample(): sample has ${sampledArray.length} examples")
+
+    println()
+
+    // Example: RDD.sampleByKey() and RDD.sampleByKeyExact()
+    val keyedRDD = examples.map { lp => (lp.label.toInt, lp.features) }
+    println(s"  Keyed data using label (Int) as key ==> Orig")
+    //  Count examples per label in original data.
+    val keyCounts = keyedRDD.countByKey()
+
+    //  Subsample, and count examples per label in sampled data. (approximate)
+    val fractions = keyCounts.keys.map((_, fraction)).toMap
+    val sampledByKeyRDD = keyedRDD.sampleByKey(withReplacement = true, fractions = fractions)
+    val keyCountsB = sampledByKeyRDD.countByKey()
+    val sizeB = keyCountsB.values.sum
+    println(s"  Sampled $sizeB examples using approximate stratified sampling (by label)." +
+      " ==> Approx Sample")
+
+    //  Subsample, and count examples per label in sampled data. (approximate)
+    val sampledByKeyRDDExact =
+      keyedRDD.sampleByKeyExact(withReplacement = true, fractions = fractions)
+    val keyCountsBExact = sampledByKeyRDDExact.countByKey()
+    val sizeBExact = keyCountsBExact.values.sum
+    println(s"  Sampled $sizeBExact examples using exact stratified sampling (by label)." +
+      " ==> Exact Sample")
+
+    //  Compare samples
+    println(s"   \tFractions of examples with key")
+    println(s"Key\tOrig\tApprox Sample\tExact Sample")
+    keyCounts.keys.toSeq.sorted.foreach { key =>
+      val origFrac = keyCounts(key) / numExamples.toDouble
+      val approxFrac = if (sizeB != 0) {
+        keyCountsB.getOrElse(key, 0L) / sizeB.toDouble
+      } else {
+        0
+      }
+      val exactFrac = if (sizeBExact != 0) {
+        keyCountsBExact.getOrElse(key, 0L) / sizeBExact.toDouble
+      } else {
+        0
+      }
+      println(s"$key\t$origFrac\t$approxFrac\t$exactFrac")
+    }
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/SimpleFPGrowth.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/SimpleFPGrowth.scala
new file mode 100644
index 0000000000000000000000000000000000000000..694c3bb18b0459b6e35adc53d3fed55390dfdeb2
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/SimpleFPGrowth.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.fpm.FPGrowth
+import org.apache.spark.rdd.RDD
+// $example off$
+
+object SimpleFPGrowth {
+
+  def main(args: Array[String]) {
+
+    val conf = new SparkConf().setAppName("SimpleFPGrowth")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    val data = sc.textFile("data/mllib/sample_fpgrowth.txt")
+
+    val transactions: RDD[Array[String]] = data.map(s => s.trim.split(' '))
+
+    val fpg = new FPGrowth()
+      .setMinSupport(0.2)
+      .setNumPartitions(10)
+    val model = fpg.run(transactions)
+
+    model.freqItemsets.collect().foreach { itemset =>
+      println(s"${itemset.items.mkString("[", ",", "]")},${itemset.freq}")
+    }
+
+    val minConfidence = 0.8
+    model.generateAssociationRules(minConfidence).collect().foreach { rule =>
+      println(s"${rule.antecedent.mkString("[", ",", "]")}=> " +
+        s"${rule.consequent .mkString("[", ",", "]")},${rule.confidence}")
+    }
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/SparseNaiveBayes.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/SparseNaiveBayes.scala
new file mode 100644
index 0000000000000000000000000000000000000000..b76add2f9bc991090dc30b38593e8927fbb8415c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/SparseNaiveBayes.scala
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.log4j.{Level, Logger}
+import scopt.OptionParser
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.mllib.classification.NaiveBayes
+import org.apache.spark.mllib.util.MLUtils
+
+/**
+ * An example naive Bayes app. Run with
+ * {{{
+ * ./bin/run-example org.apache.spark.examples.mllib.SparseNaiveBayes [options] <input>
+ * }}}
+ * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
+ */
+object SparseNaiveBayes {
+
+  case class Params(
+      input: String = null,
+      minPartitions: Int = 0,
+      numFeatures: Int = -1,
+      lambda: Double = 1.0) extends AbstractParams[Params]
+
+  def main(args: Array[String]) {
+    val defaultParams = Params()
+
+    val parser = new OptionParser[Params]("SparseNaiveBayes") {
+      head("SparseNaiveBayes: an example naive Bayes app for LIBSVM data.")
+      opt[Int]("numPartitions")
+        .text("min number of partitions")
+        .action((x, c) => c.copy(minPartitions = x))
+      opt[Int]("numFeatures")
+        .text("number of features")
+        .action((x, c) => c.copy(numFeatures = x))
+      opt[Double]("lambda")
+        .text(s"lambda (smoothing constant), default: ${defaultParams.lambda}")
+        .action((x, c) => c.copy(lambda = x))
+      arg[String]("<input>")
+        .text("input paths to labeled examples in LIBSVM format")
+        .required()
+        .action((x, c) => c.copy(input = x))
+    }
+
+    parser.parse(args, defaultParams) match {
+      case Some(params) => run(params)
+      case _ => sys.exit(1)
+    }
+  }
+
+  def run(params: Params): Unit = {
+    val conf = new SparkConf().setAppName(s"SparseNaiveBayes with $params")
+    val sc = new SparkContext(conf)
+
+    Logger.getRootLogger.setLevel(Level.WARN)
+
+    val minPartitions =
+      if (params.minPartitions > 0) params.minPartitions else sc.defaultMinPartitions
+
+    val examples =
+      MLUtils.loadLibSVMFile(sc, params.input, params.numFeatures, minPartitions)
+    // Cache examples because it will be used in both training and evaluation.
+    examples.cache()
+
+    val splits = examples.randomSplit(Array(0.8, 0.2))
+    val training = splits(0)
+    val test = splits(1)
+
+    val numTraining = training.count()
+    val numTest = test.count()
+
+    println(s"numTraining = $numTraining, numTest = $numTest.")
+
+    val model = new NaiveBayes().setLambda(params.lambda).run(training)
+
+    val prediction = model.predict(test.map(_.features))
+    val predictionAndLabel = prediction.zip(test.map(_.label))
+    val accuracy = predictionAndLabel.filter(x => x._1 == x._2).count().toDouble / numTest
+
+    println(s"Test accuracy = $accuracy.")
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/StandardScalerExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/StandardScalerExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..769fc17b3dc655266b90b7d15e960b15330735a5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/StandardScalerExample.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+// $example on$
+import org.apache.spark.mllib.feature.{StandardScaler, StandardScalerModel}
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.util.MLUtils
+// $example off$
+
+object StandardScalerExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("StandardScalerExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
+
+    val scaler1 = new StandardScaler().fit(data.map(x => x.features))
+    val scaler2 = new StandardScaler(withMean = true, withStd = true).fit(data.map(x => x.features))
+    // scaler3 is an identical model to scaler2, and will produce identical transformations
+    val scaler3 = new StandardScalerModel(scaler2.std, scaler2.mean)
+
+    // data1 will be unit variance.
+    val data1 = data.map(x => (x.label, scaler1.transform(x.features)))
+
+    // data2 will be unit variance and zero mean.
+    val data2 = data.map(x => (x.label, scaler2.transform(Vectors.dense(x.features.toArray))))
+    // $example off$
+
+    println("data1: ")
+    data1.foreach(x => println(x))
+
+    println("data2: ")
+    data2.foreach(x => println(x))
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/StratifiedSamplingExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/StratifiedSamplingExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..3d41bef0af88c2d96f8b80403fbb06ff7c852c78
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/StratifiedSamplingExample.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+
+object StratifiedSamplingExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("StratifiedSamplingExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // an RDD[(K, V)] of any key value pairs
+    val data = sc.parallelize(
+      Seq((1, 'a'), (1, 'b'), (2, 'c'), (2, 'd'), (2, 'e'), (3, 'f')))
+
+    // specify the exact fraction desired from each key
+    val fractions = Map(1 -> 0.1, 2 -> 0.6, 3 -> 0.3)
+
+    // Get an approximate sample from each stratum
+    val approxSample = data.sampleByKey(withReplacement = false, fractions = fractions)
+    // Get an exact sample from each stratum
+    val exactSample = data.sampleByKeyExact(withReplacement = false, fractions = fractions)
+    // $example off$
+
+    println(s"approxSample size is ${approxSample.collect().size}")
+    approxSample.collect().foreach(println)
+
+    println(s"exactSample its size is ${exactSample.collect().size}")
+    exactSample.collect().foreach(println)
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeansExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeansExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..7888af79f87f4124f205603dc77c5ec77c480eb5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeansExample.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.SparkConf
+// $example on$
+import org.apache.spark.mllib.clustering.StreamingKMeans
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+// $example off$
+
+/**
+ * Estimate clusters on one stream of data and make predictions
+ * on another stream, where the data streams arrive as text files
+ * into two different directories.
+ *
+ * The rows of the training text files must be vector data in the form
+ * `[x1,x2,x3,...,xn]`
+ * Where n is the number of dimensions.
+ *
+ * The rows of the test text files must be labeled data in the form
+ * `(y,[x1,x2,x3,...,xn])`
+ * Where y is some identifier. n must be the same for train and test.
+ *
+ * Usage:
+ *   StreamingKMeansExample <trainingDir> <testDir> <batchDuration> <numClusters> <numDimensions>
+ *
+ * To run on your local machine using the two directories `trainingDir` and `testDir`,
+ * with updates every 5 seconds, 2 dimensions per data point, and 3 clusters, call:
+ *    $ bin/run-example mllib.StreamingKMeansExample trainingDir testDir 5 3 2
+ *
+ * As you add text files to `trainingDir` the clusters will continuously update.
+ * Anytime you add text files to `testDir`, you'll see predicted labels using the current model.
+ *
+ */
+object StreamingKMeansExample {
+
+  def main(args: Array[String]) {
+    if (args.length != 5) {
+      System.err.println(
+        "Usage: StreamingKMeansExample " +
+          "<trainingDir> <testDir> <batchDuration> <numClusters> <numDimensions>")
+      System.exit(1)
+    }
+
+    // $example on$
+    val conf = new SparkConf().setAppName("StreamingKMeansExample")
+    val ssc = new StreamingContext(conf, Seconds(args(2).toLong))
+
+    val trainingData = ssc.textFileStream(args(0)).map(Vectors.parse)
+    val testData = ssc.textFileStream(args(1)).map(LabeledPoint.parse)
+
+    val model = new StreamingKMeans()
+      .setK(args(3).toInt)
+      .setDecayFactor(1.0)
+      .setRandomCenters(args(4).toInt, 0.0)
+
+    model.trainOn(trainingData)
+    model.predictOnValues(testData.map(lp => (lp.label, lp.features))).print()
+
+    ssc.start()
+    ssc.awaitTermination()
+    // $example off$
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegressionExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegressionExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..2ba1a62e450ee8ad3bf4698194cc13ac990339c3
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLinearRegressionExample.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.SparkConf
+// $example on$
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.regression.StreamingLinearRegressionWithSGD
+// $example off$
+import org.apache.spark.streaming._
+
+/**
+ * Train a linear regression model on one stream of data and make predictions
+ * on another stream, where the data streams arrive as text files
+ * into two different directories.
+ *
+ * The rows of the text files must be labeled data points in the form
+ * `(y,[x1,x2,x3,...,xn])`
+ * Where n is the number of features. n must be the same for train and test.
+ *
+ * Usage: StreamingLinearRegressionExample <trainingDir> <testDir>
+ *
+ * To run on your local machine using the two directories `trainingDir` and `testDir`,
+ * with updates every 5 seconds, and 2 features per data point, call:
+ *    $ bin/run-example mllib.StreamingLinearRegressionExample trainingDir testDir
+ *
+ * As you add text files to `trainingDir` the model will continuously update.
+ * Anytime you add text files to `testDir`, you'll see predictions from the current model.
+ *
+ */
+object StreamingLinearRegressionExample {
+
+  def main(args: Array[String]): Unit = {
+    if (args.length != 2) {
+      System.err.println("Usage: StreamingLinearRegressionExample <trainingDir> <testDir>")
+      System.exit(1)
+    }
+
+    val conf = new SparkConf().setAppName("StreamingLinearRegressionExample")
+    val ssc = new StreamingContext(conf, Seconds(1))
+
+    // $example on$
+    val trainingData = ssc.textFileStream(args(0)).map(LabeledPoint.parse).cache()
+    val testData = ssc.textFileStream(args(1)).map(LabeledPoint.parse)
+
+    val numFeatures = 3
+    val model = new StreamingLinearRegressionWithSGD()
+      .setInitialWeights(Vectors.zeros(numFeatures))
+
+    model.trainOn(trainingData)
+    model.predictOnValues(testData.map(lp => (lp.label, lp.features))).print()
+
+    ssc.start()
+    ssc.awaitTermination()
+    // $example off$
+
+    ssc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLogisticRegression.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLogisticRegression.scala
new file mode 100644
index 0000000000000000000000000000000000000000..a8b144a197229a9c603eccf4538a954c7746d5de
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLogisticRegression.scala
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.SparkConf
+import org.apache.spark.mllib.classification.StreamingLogisticRegressionWithSGD
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+
+/**
+ * Train a logistic regression model on one stream of data and make predictions
+ * on another stream, where the data streams arrive as text files
+ * into two different directories.
+ *
+ * The rows of the text files must be labeled data points in the form
+ * `(y,[x1,x2,x3,...,xn])`
+ * Where n is the number of features, y is a binary label, and
+ * n must be the same for train and test.
+ *
+ * Usage: StreamingLogisticRegression <trainingDir> <testDir> <batchDuration> <numFeatures>
+ *
+ * To run on your local machine using the two directories `trainingDir` and `testDir`,
+ * with updates every 5 seconds, and 2 features per data point, call:
+ *    $ bin/run-example mllib.StreamingLogisticRegression trainingDir testDir 5 2
+ *
+ * As you add text files to `trainingDir` the model will continuously update.
+ * Anytime you add text files to `testDir`, you'll see predictions from the current model.
+ *
+ */
+object StreamingLogisticRegression {
+
+  def main(args: Array[String]) {
+
+    if (args.length != 4) {
+      System.err.println(
+        "Usage: StreamingLogisticRegression <trainingDir> <testDir> <batchDuration> <numFeatures>")
+      System.exit(1)
+    }
+
+    val conf = new SparkConf().setMaster("local").setAppName("StreamingLogisticRegression")
+    val ssc = new StreamingContext(conf, Seconds(args(2).toLong))
+
+    val trainingData = ssc.textFileStream(args(0)).map(LabeledPoint.parse)
+    val testData = ssc.textFileStream(args(1)).map(LabeledPoint.parse)
+
+    val model = new StreamingLogisticRegressionWithSGD()
+      .setInitialWeights(Vectors.zeros(args(3).toInt))
+
+    model.trainOn(trainingData)
+    model.predictOnValues(testData.map(lp => (lp.label, lp.features))).print()
+
+    ssc.start()
+    ssc.awaitTermination()
+
+  }
+
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingTestExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingTestExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..ae4dee24c6474949d4fc63e4e81314d7fcd00c34
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingTestExample.scala
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.SparkConf
+import org.apache.spark.mllib.stat.test.{BinarySample, StreamingTest}
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.util.Utils
+
+/**
+ * Perform streaming testing using Welch's 2-sample t-test on a stream of data, where the data
+ * stream arrives as text files in a directory. Stops when the two groups are statistically
+ * significant (p-value < 0.05) or after a user-specified timeout in number of batches is exceeded.
+ *
+ * The rows of the text files must be in the form `Boolean, Double`. For example:
+ *   false, -3.92
+ *   true, 99.32
+ *
+ * Usage:
+ *   StreamingTestExample <dataDir> <batchDuration> <numBatchesTimeout>
+ *
+ * To run on your local machine using the directory `dataDir` with 5 seconds between each batch and
+ * a timeout after 100 insignificant batches, call:
+ *    $ bin/run-example mllib.StreamingTestExample dataDir 5 100
+ *
+ * As you add text files to `dataDir` the significance test wil continually update every
+ * `batchDuration` seconds until the test becomes significant (p-value < 0.05) or the number of
+ * batches processed exceeds `numBatchesTimeout`.
+ */
+object StreamingTestExample {
+
+  def main(args: Array[String]) {
+    if (args.length != 3) {
+      // scalastyle:off println
+      System.err.println(
+        "Usage: StreamingTestExample " +
+          "<dataDir> <batchDuration> <numBatchesTimeout>")
+      // scalastyle:on println
+      System.exit(1)
+    }
+    val dataDir = args(0)
+    val batchDuration = Seconds(args(1).toLong)
+    val numBatchesTimeout = args(2).toInt
+
+    val conf = new SparkConf().setMaster("local").setAppName("StreamingTestExample")
+    val ssc = new StreamingContext(conf, batchDuration)
+    ssc.checkpoint {
+      val dir = Utils.createTempDir()
+      dir.toString
+    }
+
+    // $example on$
+    val data = ssc.textFileStream(dataDir).map(line => line.split(",") match {
+      case Array(label, value) => BinarySample(label.toBoolean, value.toDouble)
+    })
+
+    val streamingTest = new StreamingTest()
+      .setPeacePeriod(0)
+      .setWindowSize(0)
+      .setTestMethod("welch")
+
+    val out = streamingTest.registerStream(data)
+    out.print()
+    // $example off$
+
+    // Stop processing if test becomes significant or we time out
+    var timeoutCounter = numBatchesTimeout
+    out.foreachRDD { rdd =>
+      timeoutCounter -= 1
+      val anySignificant = rdd.map(_.pValue < 0.05).fold(false)(_ || _)
+      if (timeoutCounter == 0 || anySignificant) rdd.context.stop()
+    }
+
+    ssc.start()
+    ssc.awaitTermination()
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/SummaryStatisticsExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/SummaryStatisticsExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..948b443c0a754043f4e23e4bc67414943d984ed5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/SummaryStatisticsExample.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+// $example on$
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, Statistics}
+// $example off$
+
+object SummaryStatisticsExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("SummaryStatisticsExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    val observations = sc.parallelize(
+      Seq(
+        Vectors.dense(1.0, 10.0, 100.0),
+        Vectors.dense(2.0, 20.0, 200.0),
+        Vectors.dense(3.0, 30.0, 300.0)
+      )
+    )
+
+    // Compute column summary statistics.
+    val summary: MultivariateStatisticalSummary = Statistics.colStats(observations)
+    println(summary.mean)  // a dense vector containing the mean value for each column
+    println(summary.variance)  // column-wise variance
+    println(summary.numNonzeros)  // number of nonzeros in each column
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/TFIDFExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/TFIDFExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..a5bdcd8f2ed32d6e223fc1ce28f512c4f25df819
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/TFIDFExample.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+// $example on$
+import org.apache.spark.mllib.feature.{HashingTF, IDF}
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.rdd.RDD
+// $example off$
+
+object TFIDFExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("TFIDFExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    // Load documents (one per line).
+    val documents: RDD[Seq[String]] = sc.textFile("data/mllib/kmeans_data.txt")
+      .map(_.split(" ").toSeq)
+
+    val hashingTF = new HashingTF()
+    val tf: RDD[Vector] = hashingTF.transform(documents)
+
+    // While applying HashingTF only needs a single pass to the data, applying IDF needs two passes:
+    // First to compute the IDF vector and second to scale the term frequencies by IDF.
+    tf.cache()
+    val idf = new IDF().fit(tf)
+    val tfidf: RDD[Vector] = idf.transform(tf)
+
+    // spark.mllib IDF implementation provides an option for ignoring terms which occur in less than
+    // a minimum number of documents. In such cases, the IDF for these terms is set to 0.
+    // This feature can be used by passing the minDocFreq value to the IDF constructor.
+    val idfIgnore = new IDF(minDocFreq = 2).fit(tf)
+    val tfidfIgnore: RDD[Vector] = idfIgnore.transform(tf)
+    // $example off$
+
+    println("tfidf: ")
+    tfidf.foreach(x => println(x))
+
+    println("tfidfIgnore: ")
+    tfidfIgnore.foreach(x => println(x))
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnyPCA.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnyPCA.scala
new file mode 100644
index 0000000000000000000000000000000000000000..071d341b81614a91b983a53540d4d79572711e7b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnyPCA.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.linalg.distributed.RowMatrix
+
+/**
+ * Compute the principal components of a tall-and-skinny matrix, whose rows are observations.
+ *
+ * The input matrix must be stored in row-oriented dense format, one line per row with its entries
+ * separated by space. For example,
+ * {{{
+ * 0.5 1.0
+ * 2.0 3.0
+ * 4.0 5.0
+ * }}}
+ * represents a 3-by-2 matrix, whose first row is (0.5, 1.0).
+ */
+object TallSkinnyPCA {
+  def main(args: Array[String]) {
+    if (args.length != 1) {
+      System.err.println("Usage: TallSkinnyPCA <input>")
+      System.exit(1)
+    }
+
+    val conf = new SparkConf().setAppName("TallSkinnyPCA")
+    val sc = new SparkContext(conf)
+
+    // Load and parse the data file.
+    val rows = sc.textFile(args(0)).map { line =>
+      val values = line.split(' ').map(_.toDouble)
+      Vectors.dense(values)
+    }
+    val mat = new RowMatrix(rows)
+
+    // Compute principal components.
+    val pc = mat.computePrincipalComponents(mat.numCols().toInt)
+
+    println(s"Principal components are:\n $pc")
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnySVD.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnySVD.scala
new file mode 100644
index 0000000000000000000000000000000000000000..8ae6de16d80e7cdb9b079e5bbd3226e71b28af69
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnySVD.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.linalg.distributed.RowMatrix
+
+/**
+ * Compute the singular value decomposition (SVD) of a tall-and-skinny matrix.
+ *
+ * The input matrix must be stored in row-oriented dense format, one line per row with its entries
+ * separated by space. For example,
+ * {{{
+ * 0.5 1.0
+ * 2.0 3.0
+ * 4.0 5.0
+ * }}}
+ * represents a 3-by-2 matrix, whose first row is (0.5, 1.0).
+ */
+object TallSkinnySVD {
+  def main(args: Array[String]) {
+    if (args.length != 1) {
+      System.err.println("Usage: TallSkinnySVD <input>")
+      System.exit(1)
+    }
+
+    val conf = new SparkConf().setAppName("TallSkinnySVD")
+    val sc = new SparkContext(conf)
+
+    // Load and parse the data file.
+    val rows = sc.textFile(args(0)).map { line =>
+      val values = line.split(' ').map(_.toDouble)
+      Vectors.dense(values)
+    }
+    val mat = new RowMatrix(rows)
+
+    // Compute SVD.
+    val svd = mat.computeSVD(mat.numCols().toInt)
+
+    println(s"Singular values are ${svd.s}")
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/Word2VecExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/Word2VecExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..ea794c700ae7e5e3b0d169beca51c83ef49d9797
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/mllib/Word2VecExample.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.mllib
+
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkContext
+// $example on$
+import org.apache.spark.mllib.feature.{Word2Vec, Word2VecModel}
+// $example off$
+
+object Word2VecExample {
+
+  def main(args: Array[String]): Unit = {
+
+    val conf = new SparkConf().setAppName("Word2VecExample")
+    val sc = new SparkContext(conf)
+
+    // $example on$
+    val input = sc.textFile("data/mllib/sample_lda_data.txt").map(line => line.split(" ").toSeq)
+
+    val word2vec = new Word2Vec()
+
+    val model = word2vec.fit(input)
+
+    val synonyms = model.findSynonyms("1", 5)
+
+    for((synonym, cosineSimilarity) <- synonyms) {
+      println(s"$synonym $cosineSimilarity")
+    }
+
+    // Save and load model
+    model.save(sc, "myModelPath")
+    val sameModel = Word2VecModel.load(sc, "myModelPath")
+    // $example off$
+
+    sc.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/pythonconverters/AvroConverters.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/pythonconverters/AvroConverters.scala
new file mode 100644
index 0000000000000000000000000000000000000000..6bd96346ca23883b4b466fbaf1ac8b372cd39b58
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/pythonconverters/AvroConverters.scala
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.pythonconverters
+
+import java.util.{Collection => JCollection, Map => JMap}
+
+import scala.collection.JavaConverters._
+
+import org.apache.avro.Schema
+import org.apache.avro.Schema.Type._
+import org.apache.avro.generic.{GenericFixed, IndexedRecord}
+import org.apache.avro.mapred.AvroWrapper
+
+import org.apache.spark.SparkException
+import org.apache.spark.api.python.Converter
+
+
+object AvroConversionUtil extends Serializable {
+  def fromAvro(obj: Any, schema: Schema): Any = {
+    if (obj == null) {
+      return null
+    }
+    schema.getType match {
+      case UNION => unpackUnion(obj, schema)
+      case ARRAY => unpackArray(obj, schema)
+      case FIXED => unpackFixed(obj, schema)
+      case MAP => unpackMap(obj, schema)
+      case BYTES => unpackBytes(obj)
+      case RECORD => unpackRecord(obj)
+      case STRING => obj.toString
+      case ENUM => obj.toString
+      case NULL => obj
+      case BOOLEAN => obj
+      case DOUBLE => obj
+      case FLOAT => obj
+      case INT => obj
+      case LONG => obj
+      case other => throw new SparkException(s"Unknown Avro schema type ${other.getName}")
+    }
+  }
+
+  def unpackRecord(obj: Any): JMap[String, Any] = {
+    val map = new java.util.HashMap[String, Any]
+    obj match {
+      case record: IndexedRecord =>
+        record.getSchema.getFields.asScala.zipWithIndex.foreach { case (f, i) =>
+          map.put(f.name, fromAvro(record.get(i), f.schema))
+        }
+      case other => throw new SparkException(
+        s"Unsupported RECORD type ${other.getClass.getName}")
+    }
+    map
+  }
+
+  def unpackMap(obj: Any, schema: Schema): JMap[String, Any] = {
+    obj.asInstanceOf[JMap[_, _]].asScala.map { case (key, value) =>
+      (key.toString, fromAvro(value, schema.getValueType))
+    }.asJava
+  }
+
+  def unpackFixed(obj: Any, schema: Schema): Array[Byte] = {
+    unpackBytes(obj.asInstanceOf[GenericFixed].bytes())
+  }
+
+  def unpackBytes(obj: Any): Array[Byte] = {
+    val bytes: Array[Byte] = obj match {
+      case buf: java.nio.ByteBuffer =>
+        val arr = new Array[Byte](buf.remaining())
+        buf.get(arr)
+        arr
+      case arr: Array[Byte] => arr
+      case other => throw new SparkException(
+        s"Unknown BYTES type ${other.getClass.getName}")
+    }
+    val bytearray = new Array[Byte](bytes.length)
+    System.arraycopy(bytes, 0, bytearray, 0, bytes.length)
+    bytearray
+  }
+
+  def unpackArray(obj: Any, schema: Schema): JCollection[Any] = obj match {
+    case c: JCollection[_] =>
+      c.asScala.map(fromAvro(_, schema.getElementType)).toSeq.asJava
+    case arr: Array[_] if arr.getClass.getComponentType.isPrimitive =>
+      arr.toSeq.asJava.asInstanceOf[JCollection[Any]]
+    case arr: Array[_] =>
+      arr.map(fromAvro(_, schema.getElementType)).toSeq.asJava
+    case other => throw new SparkException(
+      s"Unknown ARRAY type ${other.getClass.getName}")
+  }
+
+  def unpackUnion(obj: Any, schema: Schema): Any = {
+    schema.getTypes.asScala.toList match {
+      case List(s) => fromAvro(obj, s)
+      case List(n, s) if n.getType == NULL => fromAvro(obj, s)
+      case List(s, n) if n.getType == NULL => fromAvro(obj, s)
+      case _ => throw new SparkException(
+        "Unions may only consist of a concrete type and null")
+    }
+  }
+}
+
+/**
+ * Implementation of [[org.apache.spark.api.python.Converter]] that converts
+ * an Avro IndexedRecord (e.g., derived from AvroParquetInputFormat) to a Java Map.
+ */
+class IndexedRecordToJavaConverter extends Converter[IndexedRecord, JMap[String, Any]]{
+  override def convert(record: IndexedRecord): JMap[String, Any] = {
+    if (record == null) {
+      return null
+    }
+    val map = new java.util.HashMap[String, Any]
+    AvroConversionUtil.unpackRecord(record)
+  }
+}
+
+/**
+ * Implementation of [[org.apache.spark.api.python.Converter]] that converts
+ * an Avro Record wrapped in an AvroKey (or AvroValue) to a Java Map. It tries
+ * to work with all 3 Avro data mappings (Generic, Specific and Reflect).
+ */
+class AvroWrapperToJavaConverter extends Converter[Any, Any] {
+  override def convert(obj: Any): Any = {
+    if (obj == null) {
+      return null
+    }
+    obj.asInstanceOf[AvroWrapper[_]].datum() match {
+      case null => null
+      case record: IndexedRecord => AvroConversionUtil.unpackRecord(record)
+      case other => throw new SparkException(
+        s"Unsupported top-level Avro data type ${other.getClass.getName}")
+    }
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
new file mode 100644
index 0000000000000000000000000000000000000000..deaa9f252b9b06a9d1a169307bea37984ff44f40
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.sql
+
+import org.apache.spark.sql.SaveMode
+// $example on:init_session$
+import org.apache.spark.sql.SparkSession
+// $example off:init_session$
+
+// One method for defining the schema of an RDD is to make a case class with the desired column
+// names and types.
+case class Record(key: Int, value: String)
+
+object RDDRelation {
+  def main(args: Array[String]) {
+    // $example on:init_session$
+    val spark = SparkSession
+      .builder
+      .appName("Spark Examples")
+      .config("spark.some.config.option", "some-value")
+      .getOrCreate()
+
+    // Importing the SparkSession gives access to all the SQL functions and implicit conversions.
+    import spark.implicits._
+    // $example off:init_session$
+
+    val df = spark.createDataFrame((1 to 100).map(i => Record(i, s"val_$i")))
+    // Any RDD containing case classes can be used to create a temporary view.  The schema of the
+    // view is automatically inferred using scala reflection.
+    df.createOrReplaceTempView("records")
+
+    // Once tables have been registered, you can run SQL queries over them.
+    println("Result of SELECT *:")
+    spark.sql("SELECT * FROM records").collect().foreach(println)
+
+    // Aggregation queries are also supported.
+    val count = spark.sql("SELECT COUNT(*) FROM records").collect().head.getLong(0)
+    println(s"COUNT(*): $count")
+
+    // The results of SQL queries are themselves RDDs and support all normal RDD functions. The
+    // items in the RDD are of type Row, which allows you to access each column by ordinal.
+    val rddFromSql = spark.sql("SELECT key, value FROM records WHERE key < 10")
+
+    println("Result of RDD.map:")
+    rddFromSql.rdd.map(row => s"Key: ${row(0)}, Value: ${row(1)}").collect().foreach(println)
+
+    // Queries can also be written using a LINQ-like Scala DSL.
+    df.where($"key" === 1).orderBy($"value".asc).select($"key").collect().foreach(println)
+
+    // Write out an RDD as a parquet file with overwrite mode.
+    df.write.mode(SaveMode.Overwrite).parquet("pair.parquet")
+
+    // Read in parquet file.  Parquet files are self-describing so the schema is preserved.
+    val parquetFile = spark.read.parquet("pair.parquet")
+
+    // Queries can be run using the DSL on parquet files just like the original RDD.
+    parquetFile.where($"key" === 1).select($"value".as("a")).collect().foreach(println)
+
+    // These files can also be used to create a temporary view.
+    parquetFile.createOrReplaceTempView("parquetFile")
+    spark.sql("SELECT * FROM parquetFile").collect().foreach(println)
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..49177334ebae4e57733d7c3464bd6d6cae03108d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
@@ -0,0 +1,225 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.examples.sql
+
+import java.util.Properties
+
+import org.apache.spark.sql.SparkSession
+
+object SQLDataSourceExample {
+
+  case class Person(name: String, age: Long)
+
+  def main(args: Array[String]) {
+    val spark = SparkSession
+      .builder()
+      .appName("Spark SQL data sources example")
+      .config("spark.some.config.option", "some-value")
+      .getOrCreate()
+
+    runBasicDataSourceExample(spark)
+    runBasicParquetExample(spark)
+    runParquetSchemaMergingExample(spark)
+    runJsonDatasetExample(spark)
+    runJdbcDatasetExample(spark)
+
+    spark.stop()
+  }
+
+  private def runBasicDataSourceExample(spark: SparkSession): Unit = {
+    // $example on:generic_load_save_functions$
+    val usersDF = spark.read.load("examples/src/main/resources/users.parquet")
+    usersDF.select("name", "favorite_color").write.save("namesAndFavColors.parquet")
+    // $example off:generic_load_save_functions$
+    // $example on:manual_load_options$
+    val peopleDF = spark.read.format("json").load("examples/src/main/resources/people.json")
+    peopleDF.select("name", "age").write.format("parquet").save("namesAndAges.parquet")
+    // $example off:manual_load_options$
+    // $example on:manual_load_options_csv$
+    val peopleDFCsv = spark.read.format("csv")
+      .option("sep", ";")
+      .option("inferSchema", "true")
+      .option("header", "true")
+      .load("examples/src/main/resources/people.csv")
+    // $example off:manual_load_options_csv$
+    // $example on:manual_save_options_orc$
+    usersDF.write.format("orc")
+      .option("orc.bloom.filter.columns", "favorite_color")
+      .option("orc.dictionary.key.threshold", "1.0")
+      .save("users_with_options.orc")
+    // $example off:manual_save_options_orc$
+
+    // $example on:direct_sql$
+    val sqlDF = spark.sql("SELECT * FROM parquet.`examples/src/main/resources/users.parquet`")
+    // $example off:direct_sql$
+    // $example on:write_sorting_and_bucketing$
+    peopleDF.write.bucketBy(42, "name").sortBy("age").saveAsTable("people_bucketed")
+    // $example off:write_sorting_and_bucketing$
+    // $example on:write_partitioning$
+    usersDF.write.partitionBy("favorite_color").format("parquet").save("namesPartByColor.parquet")
+    // $example off:write_partitioning$
+    // $example on:write_partition_and_bucket$
+    usersDF
+      .write
+      .partitionBy("favorite_color")
+      .bucketBy(42, "name")
+      .saveAsTable("users_partitioned_bucketed")
+    // $example off:write_partition_and_bucket$
+
+    spark.sql("DROP TABLE IF EXISTS people_bucketed")
+    spark.sql("DROP TABLE IF EXISTS users_partitioned_bucketed")
+  }
+
+  private def runBasicParquetExample(spark: SparkSession): Unit = {
+    // $example on:basic_parquet_example$
+    // Encoders for most common types are automatically provided by importing spark.implicits._
+    import spark.implicits._
+
+    val peopleDF = spark.read.json("examples/src/main/resources/people.json")
+
+    // DataFrames can be saved as Parquet files, maintaining the schema information
+    peopleDF.write.parquet("people.parquet")
+
+    // Read in the parquet file created above
+    // Parquet files are self-describing so the schema is preserved
+    // The result of loading a Parquet file is also a DataFrame
+    val parquetFileDF = spark.read.parquet("people.parquet")
+
+    // Parquet files can also be used to create a temporary view and then used in SQL statements
+    parquetFileDF.createOrReplaceTempView("parquetFile")
+    val namesDF = spark.sql("SELECT name FROM parquetFile WHERE age BETWEEN 13 AND 19")
+    namesDF.map(attributes => "Name: " + attributes(0)).show()
+    // +------------+
+    // |       value|
+    // +------------+
+    // |Name: Justin|
+    // +------------+
+    // $example off:basic_parquet_example$
+  }
+
+  private def runParquetSchemaMergingExample(spark: SparkSession): Unit = {
+    // $example on:schema_merging$
+    // This is used to implicitly convert an RDD to a DataFrame.
+    import spark.implicits._
+
+    // Create a simple DataFrame, store into a partition directory
+    val squaresDF = spark.sparkContext.makeRDD(1 to 5).map(i => (i, i * i)).toDF("value", "square")
+    squaresDF.write.parquet("data/test_table/key=1")
+
+    // Create another DataFrame in a new partition directory,
+    // adding a new column and dropping an existing column
+    val cubesDF = spark.sparkContext.makeRDD(6 to 10).map(i => (i, i * i * i)).toDF("value", "cube")
+    cubesDF.write.parquet("data/test_table/key=2")
+
+    // Read the partitioned table
+    val mergedDF = spark.read.option("mergeSchema", "true").parquet("data/test_table")
+    mergedDF.printSchema()
+
+    // The final schema consists of all 3 columns in the Parquet files together
+    // with the partitioning column appeared in the partition directory paths
+    // root
+    //  |-- value: int (nullable = true)
+    //  |-- square: int (nullable = true)
+    //  |-- cube: int (nullable = true)
+    //  |-- key: int (nullable = true)
+    // $example off:schema_merging$
+  }
+
+  private def runJsonDatasetExample(spark: SparkSession): Unit = {
+    // $example on:json_dataset$
+    // Primitive types (Int, String, etc) and Product types (case classes) encoders are
+    // supported by importing this when creating a Dataset.
+    import spark.implicits._
+
+    // A JSON dataset is pointed to by path.
+    // The path can be either a single text file or a directory storing text files
+    val path = "examples/src/main/resources/people.json"
+    val peopleDF = spark.read.json(path)
+
+    // The inferred schema can be visualized using the printSchema() method
+    peopleDF.printSchema()
+    // root
+    //  |-- age: long (nullable = true)
+    //  |-- name: string (nullable = true)
+
+    // Creates a temporary view using the DataFrame
+    peopleDF.createOrReplaceTempView("people")
+
+    // SQL statements can be run by using the sql methods provided by spark
+    val teenagerNamesDF = spark.sql("SELECT name FROM people WHERE age BETWEEN 13 AND 19")
+    teenagerNamesDF.show()
+    // +------+
+    // |  name|
+    // +------+
+    // |Justin|
+    // +------+
+
+    // Alternatively, a DataFrame can be created for a JSON dataset represented by
+    // a Dataset[String] storing one JSON object per string
+    val otherPeopleDataset = spark.createDataset(
+      """{"name":"Yin","address":{"city":"Columbus","state":"Ohio"}}""" :: Nil)
+    val otherPeople = spark.read.json(otherPeopleDataset)
+    otherPeople.show()
+    // +---------------+----+
+    // |        address|name|
+    // +---------------+----+
+    // |[Columbus,Ohio]| Yin|
+    // +---------------+----+
+    // $example off:json_dataset$
+  }
+
+  private def runJdbcDatasetExample(spark: SparkSession): Unit = {
+    // $example on:jdbc_dataset$
+    // Note: JDBC loading and saving can be achieved via either the load/save or jdbc methods
+    // Loading data from a JDBC source
+    val jdbcDF = spark.read
+      .format("jdbc")
+      .option("url", "jdbc:postgresql:dbserver")
+      .option("dbtable", "schema.tablename")
+      .option("user", "username")
+      .option("password", "password")
+      .load()
+
+    val connectionProperties = new Properties()
+    connectionProperties.put("user", "username")
+    connectionProperties.put("password", "password")
+    val jdbcDF2 = spark.read
+      .jdbc("jdbc:postgresql:dbserver", "schema.tablename", connectionProperties)
+    // Specifying the custom data types of the read schema
+    connectionProperties.put("customSchema", "id DECIMAL(38, 0), name STRING")
+    val jdbcDF3 = spark.read
+      .jdbc("jdbc:postgresql:dbserver", "schema.tablename", connectionProperties)
+
+    // Saving data to a JDBC source
+    jdbcDF.write
+      .format("jdbc")
+      .option("url", "jdbc:postgresql:dbserver")
+      .option("dbtable", "schema.tablename")
+      .option("user", "username")
+      .option("password", "password")
+      .save()
+
+    jdbcDF2.write
+      .jdbc("jdbc:postgresql:dbserver", "schema.tablename", connectionProperties)
+
+    // Specifying create table column data types on write
+    jdbcDF.write
+      .option("createTableColumnTypes", "name CHAR(64), comments VARCHAR(1024)")
+      .jdbc("jdbc:postgresql:dbserver", "schema.tablename", connectionProperties)
+    // $example off:jdbc_dataset$
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..958361a6684c51a54c7a75342fe159dd69c9519b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala
@@ -0,0 +1,273 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.examples.sql
+
+import org.apache.spark.sql.Row
+// $example on:init_session$
+import org.apache.spark.sql.SparkSession
+// $example off:init_session$
+// $example on:programmatic_schema$
+// $example on:data_types$
+import org.apache.spark.sql.types._
+// $example off:data_types$
+// $example off:programmatic_schema$
+
+object SparkSQLExample {
+
+  // $example on:create_ds$
+  case class Person(name: String, age: Long)
+  // $example off:create_ds$
+
+  def main(args: Array[String]) {
+    // $example on:init_session$
+    val spark = SparkSession
+      .builder()
+      .appName("Spark SQL basic example")
+      .config("spark.some.config.option", "some-value")
+      .getOrCreate()
+
+    // For implicit conversions like converting RDDs to DataFrames
+    import spark.implicits._
+    // $example off:init_session$
+
+    runBasicDataFrameExample(spark)
+    runDatasetCreationExample(spark)
+    runInferSchemaExample(spark)
+    runProgrammaticSchemaExample(spark)
+
+    spark.stop()
+  }
+
+  private def runBasicDataFrameExample(spark: SparkSession): Unit = {
+    // $example on:create_df$
+    val df = spark.read.json("examples/src/main/resources/people.json")
+
+    // Displays the content of the DataFrame to stdout
+    df.show()
+    // +----+-------+
+    // | age|   name|
+    // +----+-------+
+    // |null|Michael|
+    // |  30|   Andy|
+    // |  19| Justin|
+    // +----+-------+
+    // $example off:create_df$
+
+    // $example on:untyped_ops$
+    // This import is needed to use the $-notation
+    import spark.implicits._
+    // Print the schema in a tree format
+    df.printSchema()
+    // root
+    // |-- age: long (nullable = true)
+    // |-- name: string (nullable = true)
+
+    // Select only the "name" column
+    df.select("name").show()
+    // +-------+
+    // |   name|
+    // +-------+
+    // |Michael|
+    // |   Andy|
+    // | Justin|
+    // +-------+
+
+    // Select everybody, but increment the age by 1
+    df.select($"name", $"age" + 1).show()
+    // +-------+---------+
+    // |   name|(age + 1)|
+    // +-------+---------+
+    // |Michael|     null|
+    // |   Andy|       31|
+    // | Justin|       20|
+    // +-------+---------+
+
+    // Select people older than 21
+    df.filter($"age" > 21).show()
+    // +---+----+
+    // |age|name|
+    // +---+----+
+    // | 30|Andy|
+    // +---+----+
+
+    // Count people by age
+    df.groupBy("age").count().show()
+    // +----+-----+
+    // | age|count|
+    // +----+-----+
+    // |  19|    1|
+    // |null|    1|
+    // |  30|    1|
+    // +----+-----+
+    // $example off:untyped_ops$
+
+    // $example on:run_sql$
+    // Register the DataFrame as a SQL temporary view
+    df.createOrReplaceTempView("people")
+
+    val sqlDF = spark.sql("SELECT * FROM people")
+    sqlDF.show()
+    // +----+-------+
+    // | age|   name|
+    // +----+-------+
+    // |null|Michael|
+    // |  30|   Andy|
+    // |  19| Justin|
+    // +----+-------+
+    // $example off:run_sql$
+
+    // $example on:global_temp_view$
+    // Register the DataFrame as a global temporary view
+    df.createGlobalTempView("people")
+
+    // Global temporary view is tied to a system preserved database `global_temp`
+    spark.sql("SELECT * FROM global_temp.people").show()
+    // +----+-------+
+    // | age|   name|
+    // +----+-------+
+    // |null|Michael|
+    // |  30|   Andy|
+    // |  19| Justin|
+    // +----+-------+
+
+    // Global temporary view is cross-session
+    spark.newSession().sql("SELECT * FROM global_temp.people").show()
+    // +----+-------+
+    // | age|   name|
+    // +----+-------+
+    // |null|Michael|
+    // |  30|   Andy|
+    // |  19| Justin|
+    // +----+-------+
+    // $example off:global_temp_view$
+  }
+
+  private def runDatasetCreationExample(spark: SparkSession): Unit = {
+    import spark.implicits._
+    // $example on:create_ds$
+    // Encoders are created for case classes
+    val caseClassDS = Seq(Person("Andy", 32)).toDS()
+    caseClassDS.show()
+    // +----+---+
+    // |name|age|
+    // +----+---+
+    // |Andy| 32|
+    // +----+---+
+
+    // Encoders for most common types are automatically provided by importing spark.implicits._
+    val primitiveDS = Seq(1, 2, 3).toDS()
+    primitiveDS.map(_ + 1).collect() // Returns: Array(2, 3, 4)
+
+    // DataFrames can be converted to a Dataset by providing a class. Mapping will be done by name
+    val path = "examples/src/main/resources/people.json"
+    val peopleDS = spark.read.json(path).as[Person]
+    peopleDS.show()
+    // +----+-------+
+    // | age|   name|
+    // +----+-------+
+    // |null|Michael|
+    // |  30|   Andy|
+    // |  19| Justin|
+    // +----+-------+
+    // $example off:create_ds$
+  }
+
+  private def runInferSchemaExample(spark: SparkSession): Unit = {
+    // $example on:schema_inferring$
+    // For implicit conversions from RDDs to DataFrames
+    import spark.implicits._
+
+    // Create an RDD of Person objects from a text file, convert it to a Dataframe
+    val peopleDF = spark.sparkContext
+      .textFile("examples/src/main/resources/people.txt")
+      .map(_.split(","))
+      .map(attributes => Person(attributes(0), attributes(1).trim.toInt))
+      .toDF()
+    // Register the DataFrame as a temporary view
+    peopleDF.createOrReplaceTempView("people")
+
+    // SQL statements can be run by using the sql methods provided by Spark
+    val teenagersDF = spark.sql("SELECT name, age FROM people WHERE age BETWEEN 13 AND 19")
+
+    // The columns of a row in the result can be accessed by field index
+    teenagersDF.map(teenager => "Name: " + teenager(0)).show()
+    // +------------+
+    // |       value|
+    // +------------+
+    // |Name: Justin|
+    // +------------+
+
+    // or by field name
+    teenagersDF.map(teenager => "Name: " + teenager.getAs[String]("name")).show()
+    // +------------+
+    // |       value|
+    // +------------+
+    // |Name: Justin|
+    // +------------+
+
+    // No pre-defined encoders for Dataset[Map[K,V]], define explicitly
+    implicit val mapEncoder = org.apache.spark.sql.Encoders.kryo[Map[String, Any]]
+    // Primitive types and case classes can be also defined as
+    // implicit val stringIntMapEncoder: Encoder[Map[String, Any]] = ExpressionEncoder()
+
+    // row.getValuesMap[T] retrieves multiple columns at once into a Map[String, T]
+    teenagersDF.map(teenager => teenager.getValuesMap[Any](List("name", "age"))).collect()
+    // Array(Map("name" -> "Justin", "age" -> 19))
+    // $example off:schema_inferring$
+  }
+
+  private def runProgrammaticSchemaExample(spark: SparkSession): Unit = {
+    import spark.implicits._
+    // $example on:programmatic_schema$
+    // Create an RDD
+    val peopleRDD = spark.sparkContext.textFile("examples/src/main/resources/people.txt")
+
+    // The schema is encoded in a string
+    val schemaString = "name age"
+
+    // Generate the schema based on the string of schema
+    val fields = schemaString.split(" ")
+      .map(fieldName => StructField(fieldName, StringType, nullable = true))
+    val schema = StructType(fields)
+
+    // Convert records of the RDD (people) to Rows
+    val rowRDD = peopleRDD
+      .map(_.split(","))
+      .map(attributes => Row(attributes(0), attributes(1).trim))
+
+    // Apply the schema to the RDD
+    val peopleDF = spark.createDataFrame(rowRDD, schema)
+
+    // Creates a temporary view using the DataFrame
+    peopleDF.createOrReplaceTempView("people")
+
+    // SQL can be run over a temporary view created using DataFrames
+    val results = spark.sql("SELECT name FROM people")
+
+    // The results of SQL queries are DataFrames and support all the normal RDD operations
+    // The columns of a row in the result can be accessed by field index or by field name
+    results.map(attributes => "Name: " + attributes(0)).show()
+    // +-------------+
+    // |        value|
+    // +-------------+
+    // |Name: Michael|
+    // |   Name: Andy|
+    // | Name: Justin|
+    // +-------------+
+    // $example off:programmatic_schema$
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedTypedAggregation.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedTypedAggregation.scala
new file mode 100644
index 0000000000000000000000000000000000000000..f04a831487b591f45295141d07cc40e9aa27d308
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedTypedAggregation.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.examples.sql
+
+// $example on:typed_custom_aggregation$
+import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
+import org.apache.spark.sql.expressions.Aggregator
+// $example off:typed_custom_aggregation$
+
+object UserDefinedTypedAggregation {
+
+  // $example on:typed_custom_aggregation$
+  case class Employee(name: String, salary: Long)
+  case class Average(var sum: Long, var count: Long)
+
+  object MyAverage extends Aggregator[Employee, Average, Double] {
+    // A zero value for this aggregation. Should satisfy the property that any b + zero = b
+    def zero: Average = Average(0L, 0L)
+    // Combine two values to produce a new value. For performance, the function may modify `buffer`
+    // and return it instead of constructing a new object
+    def reduce(buffer: Average, employee: Employee): Average = {
+      buffer.sum += employee.salary
+      buffer.count += 1
+      buffer
+    }
+    // Merge two intermediate values
+    def merge(b1: Average, b2: Average): Average = {
+      b1.sum += b2.sum
+      b1.count += b2.count
+      b1
+    }
+    // Transform the output of the reduction
+    def finish(reduction: Average): Double = reduction.sum.toDouble / reduction.count
+    // Specifies the Encoder for the intermediate value type
+    def bufferEncoder: Encoder[Average] = Encoders.product
+    // Specifies the Encoder for the final output value type
+    def outputEncoder: Encoder[Double] = Encoders.scalaDouble
+  }
+  // $example off:typed_custom_aggregation$
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder()
+      .appName("Spark SQL user-defined Datasets aggregation example")
+      .getOrCreate()
+
+    import spark.implicits._
+
+    // $example on:typed_custom_aggregation$
+    val ds = spark.read.json("examples/src/main/resources/employees.json").as[Employee]
+    ds.show()
+    // +-------+------+
+    // |   name|salary|
+    // +-------+------+
+    // |Michael|  3000|
+    // |   Andy|  4500|
+    // | Justin|  3500|
+    // |  Berta|  4000|
+    // +-------+------+
+
+    // Convert the function to a `TypedColumn` and give it a name
+    val averageSalary = MyAverage.toColumn.name("average_salary")
+    val result = ds.select(averageSalary)
+    result.show()
+    // +--------------+
+    // |average_salary|
+    // +--------------+
+    // |        3750.0|
+    // +--------------+
+    // $example off:typed_custom_aggregation$
+
+    spark.stop()
+  }
+
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedUntypedAggregation.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedUntypedAggregation.scala
new file mode 100644
index 0000000000000000000000000000000000000000..3656a84c571db341bfb48f5d0162509391cbb19b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedUntypedAggregation.scala
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.examples.sql
+
+// $example on:untyped_custom_aggregation$
+import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.sql.expressions.MutableAggregationBuffer
+import org.apache.spark.sql.expressions.UserDefinedAggregateFunction
+import org.apache.spark.sql.types._
+// $example off:untyped_custom_aggregation$
+
+object UserDefinedUntypedAggregation {
+
+  // $example on:untyped_custom_aggregation$
+  object MyAverage extends UserDefinedAggregateFunction {
+    // Data types of input arguments of this aggregate function
+    def inputSchema: StructType = StructType(StructField("inputColumn", LongType) :: Nil)
+    // Data types of values in the aggregation buffer
+    def bufferSchema: StructType = {
+      StructType(StructField("sum", LongType) :: StructField("count", LongType) :: Nil)
+    }
+    // The data type of the returned value
+    def dataType: DataType = DoubleType
+    // Whether this function always returns the same output on the identical input
+    def deterministic: Boolean = true
+    // Initializes the given aggregation buffer. The buffer itself is a `Row` that in addition to
+    // standard methods like retrieving a value at an index (e.g., get(), getBoolean()), provides
+    // the opportunity to update its values. Note that arrays and maps inside the buffer are still
+    // immutable.
+    def initialize(buffer: MutableAggregationBuffer): Unit = {
+      buffer(0) = 0L
+      buffer(1) = 0L
+    }
+    // Updates the given aggregation buffer `buffer` with new input data from `input`
+    def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
+      if (!input.isNullAt(0)) {
+        buffer(0) = buffer.getLong(0) + input.getLong(0)
+        buffer(1) = buffer.getLong(1) + 1
+      }
+    }
+    // Merges two aggregation buffers and stores the updated buffer values back to `buffer1`
+    def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
+      buffer1(0) = buffer1.getLong(0) + buffer2.getLong(0)
+      buffer1(1) = buffer1.getLong(1) + buffer2.getLong(1)
+    }
+    // Calculates the final result
+    def evaluate(buffer: Row): Double = buffer.getLong(0).toDouble / buffer.getLong(1)
+  }
+  // $example off:untyped_custom_aggregation$
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder()
+      .appName("Spark SQL user-defined DataFrames aggregation example")
+      .getOrCreate()
+
+    // $example on:untyped_custom_aggregation$
+    // Register the function to access it
+    spark.udf.register("myAverage", MyAverage)
+
+    val df = spark.read.json("examples/src/main/resources/employees.json")
+    df.createOrReplaceTempView("employees")
+    df.show()
+    // +-------+------+
+    // |   name|salary|
+    // +-------+------+
+    // |Michael|  3000|
+    // |   Andy|  4500|
+    // | Justin|  3500|
+    // |  Berta|  4000|
+    // +-------+------+
+
+    val result = spark.sql("SELECT myAverage(salary) as average_salary FROM employees")
+    result.show()
+    // +--------------+
+    // |average_salary|
+    // +--------------+
+    // |        3750.0|
+    // +--------------+
+    // $example off:untyped_custom_aggregation$
+
+    spark.stop()
+  }
+
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala
new file mode 100644
index 0000000000000000000000000000000000000000..70fb5b27366ecdff0be7905ac19eead9d319ae10
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.examples.sql.hive
+
+// $example on:spark_hive$
+import java.io.File
+
+import org.apache.spark.sql.{Row, SaveMode, SparkSession}
+// $example off:spark_hive$
+
+object SparkHiveExample {
+
+  // $example on:spark_hive$
+  case class Record(key: Int, value: String)
+  // $example off:spark_hive$
+
+  def main(args: Array[String]) {
+    // When working with Hive, one must instantiate `SparkSession` with Hive support, including
+    // connectivity to a persistent Hive metastore, support for Hive serdes, and Hive user-defined
+    // functions. Users who do not have an existing Hive deployment can still enable Hive support.
+    // When not configured by the hive-site.xml, the context automatically creates `metastore_db`
+    // in the current directory and creates a directory configured by `spark.sql.warehouse.dir`,
+    // which defaults to the directory `spark-warehouse` in the current directory that the spark
+    // application is started.
+
+    // $example on:spark_hive$
+    // warehouseLocation points to the default location for managed databases and tables
+    val warehouseLocation = new File("spark-warehouse").getAbsolutePath
+
+    val spark = SparkSession
+      .builder()
+      .appName("Spark Hive Example")
+      .config("spark.sql.warehouse.dir", warehouseLocation)
+      .enableHiveSupport()
+      .getOrCreate()
+
+    import spark.implicits._
+    import spark.sql
+
+    sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING) USING hive")
+    sql("LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE src")
+
+    // Queries are expressed in HiveQL
+    sql("SELECT * FROM src").show()
+    // +---+-------+
+    // |key|  value|
+    // +---+-------+
+    // |238|val_238|
+    // | 86| val_86|
+    // |311|val_311|
+    // ...
+
+    // Aggregation queries are also supported.
+    sql("SELECT COUNT(*) FROM src").show()
+    // +--------+
+    // |count(1)|
+    // +--------+
+    // |    500 |
+    // +--------+
+
+    // The results of SQL queries are themselves DataFrames and support all normal functions.
+    val sqlDF = sql("SELECT key, value FROM src WHERE key < 10 ORDER BY key")
+
+    // The items in DataFrames are of type Row, which allows you to access each column by ordinal.
+    val stringsDS = sqlDF.map {
+      case Row(key: Int, value: String) => s"Key: $key, Value: $value"
+    }
+    stringsDS.show()
+    // +--------------------+
+    // |               value|
+    // +--------------------+
+    // |Key: 0, Value: val_0|
+    // |Key: 0, Value: val_0|
+    // |Key: 0, Value: val_0|
+    // ...
+
+    // You can also use DataFrames to create temporary views within a SparkSession.
+    val recordsDF = spark.createDataFrame((1 to 100).map(i => Record(i, s"val_$i")))
+    recordsDF.createOrReplaceTempView("records")
+
+    // Queries can then join DataFrame data with data stored in Hive.
+    sql("SELECT * FROM records r JOIN src s ON r.key = s.key").show()
+    // +---+------+---+------+
+    // |key| value|key| value|
+    // +---+------+---+------+
+    // |  2| val_2|  2| val_2|
+    // |  4| val_4|  4| val_4|
+    // |  5| val_5|  5| val_5|
+    // ...
+
+    // Create a Hive managed Parquet table, with HQL syntax instead of the Spark SQL native syntax
+    // `USING hive`
+    sql("CREATE TABLE hive_records(key int, value string) STORED AS PARQUET")
+    // Save DataFrame to the Hive managed table
+    val df = spark.table("src")
+    df.write.mode(SaveMode.Overwrite).saveAsTable("hive_records")
+    // After insertion, the Hive managed table has data now
+    sql("SELECT * FROM hive_records").show()
+    // +---+-------+
+    // |key|  value|
+    // +---+-------+
+    // |238|val_238|
+    // | 86| val_86|
+    // |311|val_311|
+    // ...
+
+    // Prepare a Parquet data directory
+    val dataDir = "/tmp/parquet_data"
+    spark.range(10).write.parquet(dataDir)
+    // Create a Hive external Parquet table
+    sql(s"CREATE EXTERNAL TABLE hive_ints(key int) STORED AS PARQUET LOCATION '$dataDir'")
+    // The Hive external table should already have data
+    sql("SELECT * FROM hive_ints").show()
+    // +---+
+    // |key|
+    // +---+
+    // |  0|
+    // |  1|
+    // |  2|
+    // ...
+
+    // Turn on flag for Hive Dynamic Partitioning
+    spark.sqlContext.setConf("hive.exec.dynamic.partition", "true")
+    spark.sqlContext.setConf("hive.exec.dynamic.partition.mode", "nonstrict")
+    // Create a Hive partitioned table using DataFrame API
+    df.write.partitionBy("key").format("hive").saveAsTable("hive_part_tbl")
+    // Partitioned column `key` will be moved to the end of the schema.
+    sql("SELECT * FROM hive_part_tbl").show()
+    // +-------+---+
+    // |  value|key|
+    // +-------+---+
+    // |val_238|238|
+    // | val_86| 86|
+    // |val_311|311|
+    // ...
+
+    spark.stop()
+    // $example off:spark_hive$
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredKafkaWordCount.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredKafkaWordCount.scala
new file mode 100644
index 0000000000000000000000000000000000000000..2aab49c8891d1559a36481587678a942959a70b5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredKafkaWordCount.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.sql.streaming
+
+import java.util.UUID
+
+import org.apache.spark.sql.SparkSession
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: StructuredKafkaWordCount <bootstrap-servers> <subscribe-type> <topics>
+ *     [<checkpoint-location>]
+ *   <bootstrap-servers> The Kafka "bootstrap.servers" configuration. A
+ *   comma-separated list of host:port.
+ *   <subscribe-type> There are three kinds of type, i.e. 'assign', 'subscribe',
+ *   'subscribePattern'.
+ *   |- <assign> Specific TopicPartitions to consume. Json string
+ *   |  {"topicA":[0,1],"topicB":[2,4]}.
+ *   |- <subscribe> The topic list to subscribe. A comma-separated list of
+ *   |  topics.
+ *   |- <subscribePattern> The pattern used to subscribe to topic(s).
+ *   |  Java regex string.
+ *   |- Only one of "assign, "subscribe" or "subscribePattern" options can be
+ *   |  specified for Kafka source.
+ *   <topics> Different value format depends on the value of 'subscribe-type'.
+ *   <checkpoint-location> Directory in which to create checkpoints. If not
+ *   provided, defaults to a randomized directory in /tmp.
+ *
+ * Example:
+ *    `$ bin/run-example \
+ *      sql.streaming.StructuredKafkaWordCount host1:port1,host2:port2 \
+ *      subscribe topic1,topic2`
+ */
+object StructuredKafkaWordCount {
+  def main(args: Array[String]): Unit = {
+    if (args.length < 3) {
+      System.err.println("Usage: StructuredKafkaWordCount <bootstrap-servers> " +
+        "<subscribe-type> <topics> [<checkpoint-location>]")
+      System.exit(1)
+    }
+
+    val Array(bootstrapServers, subscribeType, topics, _*) = args
+    val checkpointLocation =
+      if (args.length > 3) args(3) else "/tmp/temporary-" + UUID.randomUUID.toString
+
+    val spark = SparkSession
+      .builder
+      .appName("StructuredKafkaWordCount")
+      .getOrCreate()
+
+    import spark.implicits._
+
+    // Create DataSet representing the stream of input lines from kafka
+    val lines = spark
+      .readStream
+      .format("kafka")
+      .option("kafka.bootstrap.servers", bootstrapServers)
+      .option(subscribeType, topics)
+      .load()
+      .selectExpr("CAST(value AS STRING)")
+      .as[String]
+
+    // Generate running word count
+    val wordCounts = lines.flatMap(_.split(" ")).groupBy("value").count()
+
+    // Start running the query that prints the running counts to the console
+    val query = wordCounts.writeStream
+      .outputMode("complete")
+      .format("console")
+      .option("checkpointLocation", checkpointLocation)
+      .start()
+
+    query.awaitTermination()
+  }
+
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCount.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCount.scala
new file mode 100644
index 0000000000000000000000000000000000000000..de477c5ce81616726aabc6b8a9ae81e344137ba8
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCount.scala
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.sql.streaming
+
+import org.apache.spark.sql.SparkSession
+
+/**
+ * Counts words in UTF8 encoded, '\n' delimited text received from the network.
+ *
+ * Usage: StructuredNetworkWordCount <hostname> <port>
+ * <hostname> and <port> describe the TCP server that Structured Streaming
+ * would connect to receive data.
+ *
+ * To run this on your local machine, you need to first run a Netcat server
+ *    `$ nc -lk 9999`
+ * and then run the example
+ *    `$ bin/run-example sql.streaming.StructuredNetworkWordCount
+ *    localhost 9999`
+ */
+object StructuredNetworkWordCount {
+  def main(args: Array[String]) {
+    if (args.length < 2) {
+      System.err.println("Usage: StructuredNetworkWordCount <hostname> <port>")
+      System.exit(1)
+    }
+
+    val host = args(0)
+    val port = args(1).toInt
+
+    val spark = SparkSession
+      .builder
+      .appName("StructuredNetworkWordCount")
+      .getOrCreate()
+
+    import spark.implicits._
+
+    // Create DataFrame representing the stream of input lines from connection to host:port
+    val lines = spark.readStream
+      .format("socket")
+      .option("host", host)
+      .option("port", port)
+      .load()
+
+    // Split the lines into words
+    val words = lines.as[String].flatMap(_.split(" "))
+
+    // Generate running word count
+    val wordCounts = words.groupBy("value").count()
+
+    // Start running the query that prints the running counts to the console
+    val query = wordCounts.writeStream
+      .outputMode("complete")
+      .format("console")
+      .start()
+
+    query.awaitTermination()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCountWindowed.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCountWindowed.scala
new file mode 100644
index 0000000000000000000000000000000000000000..b4dad21dd75b0049bcf08b667df77f8aa2b88b2a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCountWindowed.scala
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.sql.streaming
+
+import java.sql.Timestamp
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.functions._
+
+/**
+ * Counts words in UTF8 encoded, '\n' delimited text received from the network over a
+ * sliding window of configurable duration. Each line from the network is tagged
+ * with a timestamp that is used to determine the windows into which it falls.
+ *
+ * Usage: StructuredNetworkWordCountWindowed <hostname> <port> <window duration>
+ *   [<slide duration>]
+ * <hostname> and <port> describe the TCP server that Structured Streaming
+ * would connect to receive data.
+ * <window duration> gives the size of window, specified as integer number of seconds
+ * <slide duration> gives the amount of time successive windows are offset from one another,
+ * given in the same units as above. <slide duration> should be less than or equal to
+ * <window duration>. If the two are equal, successive windows have no overlap. If
+ * <slide duration> is not provided, it defaults to <window duration>.
+ *
+ * To run this on your local machine, you need to first run a Netcat server
+ *    `$ nc -lk 9999`
+ * and then run the example
+ *    `$ bin/run-example sql.streaming.StructuredNetworkWordCountWindowed
+ *    localhost 9999 <window duration in seconds> [<slide duration in seconds>]`
+ *
+ * One recommended <window duration>, <slide duration> pair is 10, 5
+ */
+object StructuredNetworkWordCountWindowed {
+
+  def main(args: Array[String]) {
+    if (args.length < 3) {
+      System.err.println("Usage: StructuredNetworkWordCountWindowed <hostname> <port>" +
+        " <window duration in seconds> [<slide duration in seconds>]")
+      System.exit(1)
+    }
+
+    val host = args(0)
+    val port = args(1).toInt
+    val windowSize = args(2).toInt
+    val slideSize = if (args.length == 3) windowSize else args(3).toInt
+    if (slideSize > windowSize) {
+      System.err.println("<slide duration> must be less than or equal to <window duration>")
+    }
+    val windowDuration = s"$windowSize seconds"
+    val slideDuration = s"$slideSize seconds"
+
+    val spark = SparkSession
+      .builder
+      .appName("StructuredNetworkWordCountWindowed")
+      .getOrCreate()
+
+    import spark.implicits._
+
+    // Create DataFrame representing the stream of input lines from connection to host:port
+    val lines = spark.readStream
+      .format("socket")
+      .option("host", host)
+      .option("port", port)
+      .option("includeTimestamp", true)
+      .load()
+
+    // Split the lines into words, retaining timestamps
+    val words = lines.as[(String, Timestamp)].flatMap(line =>
+      line._1.split(" ").map(word => (word, line._2))
+    ).toDF("word", "timestamp")
+
+    // Group the data by window and word and compute the count of each group
+    val windowedCounts = words.groupBy(
+      window($"timestamp", windowDuration, slideDuration), $"word"
+    ).count().orderBy("window")
+
+    // Start running the query that prints the windowed word counts to the console
+    val query = windowedCounts.writeStream
+      .outputMode("complete")
+      .format("console")
+      .option("truncate", "false")
+      .start()
+
+    query.awaitTermination()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala
new file mode 100644
index 0000000000000000000000000000000000000000..ed63fb677b9ef02b058b0d90455a17a5d9b2c7b7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.sql.streaming
+
+import java.sql.Timestamp
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.streaming._
+
+
+/**
+ * Counts words in UTF8 encoded, '\n' delimited text received from the network.
+ *
+ * Usage: MapGroupsWithState <hostname> <port>
+ * <hostname> and <port> describe the TCP server that Structured Streaming
+ * would connect to receive data.
+ *
+ * To run this on your local machine, you need to first run a Netcat server
+ * `$ nc -lk 9999`
+ * and then run the example
+ * `$ bin/run-example sql.streaming.StructuredSessionization
+ * localhost 9999`
+ */
+object StructuredSessionization {
+
+  def main(args: Array[String]): Unit = {
+    if (args.length < 2) {
+      System.err.println("Usage: StructuredSessionization <hostname> <port>")
+      System.exit(1)
+    }
+
+    val host = args(0)
+    val port = args(1).toInt
+
+    val spark = SparkSession
+      .builder
+      .appName("StructuredSessionization")
+      .getOrCreate()
+
+    import spark.implicits._
+
+    // Create DataFrame representing the stream of input lines from connection to host:port
+    val lines = spark.readStream
+      .format("socket")
+      .option("host", host)
+      .option("port", port)
+      .option("includeTimestamp", true)
+      .load()
+
+    // Split the lines into words, treat words as sessionId of events
+    val events = lines
+      .as[(String, Timestamp)]
+      .flatMap { case (line, timestamp) =>
+        line.split(" ").map(word => Event(sessionId = word, timestamp))
+      }
+
+    // Sessionize the events. Track number of events, start and end timestamps of session, and
+    // and report session updates.
+    val sessionUpdates = events
+      .groupByKey(event => event.sessionId)
+      .mapGroupsWithState[SessionInfo, SessionUpdate](GroupStateTimeout.ProcessingTimeTimeout) {
+
+        case (sessionId: String, events: Iterator[Event], state: GroupState[SessionInfo]) =>
+
+          // If timed out, then remove session and send final update
+          if (state.hasTimedOut) {
+            val finalUpdate =
+              SessionUpdate(sessionId, state.get.durationMs, state.get.numEvents, expired = true)
+            state.remove()
+            finalUpdate
+          } else {
+            // Update start and end timestamps in session
+            val timestamps = events.map(_.timestamp.getTime).toSeq
+            val updatedSession = if (state.exists) {
+              val oldSession = state.get
+              SessionInfo(
+                oldSession.numEvents + timestamps.size,
+                oldSession.startTimestampMs,
+                math.max(oldSession.endTimestampMs, timestamps.max))
+            } else {
+              SessionInfo(timestamps.size, timestamps.min, timestamps.max)
+            }
+            state.update(updatedSession)
+
+            // Set timeout such that the session will be expired if no data received for 10 seconds
+            state.setTimeoutDuration("10 seconds")
+            SessionUpdate(sessionId, state.get.durationMs, state.get.numEvents, expired = false)
+          }
+      }
+
+    // Start running the query that prints the session updates to the console
+    val query = sessionUpdates
+      .writeStream
+      .outputMode("update")
+      .format("console")
+      .start()
+
+    query.awaitTermination()
+  }
+}
+/** User-defined data type representing the input events */
+case class Event(sessionId: String, timestamp: Timestamp)
+
+/**
+ * User-defined data type for storing a session information as state in mapGroupsWithState.
+ *
+ * @param numEvents        total number of events received in the session
+ * @param startTimestampMs timestamp of first event received in the session when it started
+ * @param endTimestampMs   timestamp of last event received in the session before it expired
+ */
+case class SessionInfo(
+    numEvents: Int,
+    startTimestampMs: Long,
+    endTimestampMs: Long) {
+
+  /** Duration of the session, between the first and last events */
+  def durationMs: Long = endTimestampMs - startTimestampMs
+}
+
+/**
+ * User-defined data type representing the update information returned by mapGroupsWithState.
+ *
+ * @param id          Id of the session
+ * @param durationMs  Duration the session was active, that is, from first event to its expiry
+ * @param numEvents   Number of events received by the session while it was active
+ * @param expired     Is the session active or expired
+ */
+case class SessionUpdate(
+    id: String,
+    durationMs: Long,
+    numEvents: Int,
+    expired: Boolean)
+
+// scalastyle:on println
+
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
new file mode 100644
index 0000000000000000000000000000000000000000..25c7bf28719729ba40d1f4f3548a12804eb61e4d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.streaming
+
+import java.io.{BufferedReader, InputStreamReader}
+import java.net.Socket
+import java.nio.charset.StandardCharsets
+
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.Logging
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+import org.apache.spark.streaming.receiver.Receiver
+
+/**
+ * Custom Receiver that receives data over a socket. Received bytes are interpreted as
+ * text and \n delimited lines are considered as records. They are then counted and printed.
+ *
+ * To run this on your local machine, you need to first run a Netcat server
+ *    `$ nc -lk 9999`
+ * and then run the example
+ *    `$ bin/run-example org.apache.spark.examples.streaming.CustomReceiver localhost 9999`
+ */
+object CustomReceiver {
+  def main(args: Array[String]) {
+    if (args.length < 2) {
+      System.err.println("Usage: CustomReceiver <hostname> <port>")
+      System.exit(1)
+    }
+
+    StreamingExamples.setStreamingLogLevels()
+
+    // Create the context with a 1 second batch size
+    val sparkConf = new SparkConf().setAppName("CustomReceiver")
+    val ssc = new StreamingContext(sparkConf, Seconds(1))
+
+    // Create an input stream with the custom receiver on target ip:port and count the
+    // words in input stream of \n delimited text (eg. generated by 'nc')
+    val lines = ssc.receiverStream(new CustomReceiver(args(0), args(1).toInt))
+    val words = lines.flatMap(_.split(" "))
+    val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)
+    wordCounts.print()
+    ssc.start()
+    ssc.awaitTermination()
+  }
+}
+
+
+class CustomReceiver(host: String, port: Int)
+  extends Receiver[String](StorageLevel.MEMORY_AND_DISK_2) with Logging {
+
+  def onStart() {
+    // Start the thread that receives data over a connection
+    new Thread("Socket Receiver") {
+      override def run() { receive() }
+    }.start()
+  }
+
+  def onStop() {
+   // There is nothing much to do as the thread calling receive()
+   // is designed to stop by itself isStopped() returns false
+  }
+
+  /** Create a socket connection and receive data until receiver is stopped */
+  private def receive() {
+   var socket: Socket = null
+   var userInput: String = null
+   try {
+     logInfo(s"Connecting to $host : $port")
+     socket = new Socket(host, port)
+     logInfo(s"Connected to $host : $port")
+     val reader = new BufferedReader(
+       new InputStreamReader(socket.getInputStream(), StandardCharsets.UTF_8))
+     userInput = reader.readLine()
+     while(!isStopped && userInput != null) {
+       store(userInput)
+       userInput = reader.readLine()
+     }
+     reader.close()
+     socket.close()
+     logInfo("Stopped receiving")
+     restart("Trying to connect again")
+   } catch {
+     case e: java.net.ConnectException =>
+       restart(s"Error connecting to $host : $port", e)
+     case t: Throwable =>
+       restart("Error receiving data", t)
+   }
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala
new file mode 100644
index 0000000000000000000000000000000000000000..2082fb71afdf1b2f352809ff8acb9e1ad6f8f4c1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.streaming
+
+import org.apache.kafka.clients.consumer.ConsumerConfig
+import org.apache.kafka.common.serialization.StringDeserializer
+
+import org.apache.spark.SparkConf
+import org.apache.spark.streaming._
+import org.apache.spark.streaming.kafka010._
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: DirectKafkaWordCount <brokers> <topics>
+ *   <brokers> is a list of one or more Kafka brokers
+ *   <groupId> is a consumer group name to consume from topics
+ *   <topics> is a list of one or more kafka topics to consume from
+ *
+ * Example:
+ *    $ bin/run-example streaming.DirectKafkaWordCount broker1-host:port,broker2-host:port \
+ *    consumer-group topic1,topic2
+ */
+object DirectKafkaWordCount {
+  def main(args: Array[String]) {
+    if (args.length < 3) {
+      System.err.println(s"""
+        |Usage: DirectKafkaWordCount <brokers> <topics>
+        |  <brokers> is a list of one or more Kafka brokers
+        |  <groupId> is a consumer group name to consume from topics
+        |  <topics> is a list of one or more kafka topics to consume from
+        |
+        """.stripMargin)
+      System.exit(1)
+    }
+
+    StreamingExamples.setStreamingLogLevels()
+
+    val Array(brokers, groupId, topics) = args
+
+    // Create context with 2 second batch interval
+    val sparkConf = new SparkConf().setAppName("DirectKafkaWordCount")
+    val ssc = new StreamingContext(sparkConf, Seconds(2))
+
+    // Create direct kafka stream with brokers and topics
+    val topicsSet = topics.split(",").toSet
+    val kafkaParams = Map[String, Object](
+      ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokers,
+      ConsumerConfig.GROUP_ID_CONFIG -> groupId,
+      ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
+      ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer])
+    val messages = KafkaUtils.createDirectStream[String, String](
+      ssc,
+      LocationStrategies.PreferConsistent,
+      ConsumerStrategies.Subscribe[String, String](topicsSet, kafkaParams))
+
+    // Get the lines, split them into words, count the words and print
+    val lines = messages.map(_.value)
+    val words = lines.flatMap(_.split(" "))
+    val wordCounts = words.map(x => (x, 1L)).reduceByKey(_ + _)
+    wordCounts.print()
+
+    // Start the computation
+    ssc.start()
+    ssc.awaitTermination()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/HdfsWordCount.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/HdfsWordCount.scala
new file mode 100644
index 0000000000000000000000000000000000000000..1f282d437dc38b156f9a90c571b6031a64cbb8bf
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/HdfsWordCount.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.streaming
+
+import org.apache.spark.SparkConf
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+
+/**
+ * Counts words in new text files created in the given directory
+ * Usage: HdfsWordCount <directory>
+ *   <directory> is the directory that Spark Streaming will use to find and read new text files.
+ *
+ * To run this on your local machine on directory `localdir`, run this example
+ *    $ bin/run-example \
+ *       org.apache.spark.examples.streaming.HdfsWordCount localdir
+ *
+ * Then create a text file in `localdir` and the words in the file will get counted.
+ */
+object HdfsWordCount {
+  def main(args: Array[String]) {
+    if (args.length < 1) {
+      System.err.println("Usage: HdfsWordCount <directory>")
+      System.exit(1)
+    }
+
+    StreamingExamples.setStreamingLogLevels()
+    val sparkConf = new SparkConf().setAppName("HdfsWordCount")
+    // Create the context
+    val ssc = new StreamingContext(sparkConf, Seconds(2))
+
+    // Create the FileInputDStream on the directory and use the
+    // stream to count words in new files created
+    val lines = ssc.textFileStream(args(0))
+    val words = lines.flatMap(_.split(" "))
+    val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)
+    wordCounts.print()
+    ssc.start()
+    ssc.awaitTermination()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala
new file mode 100644
index 0000000000000000000000000000000000000000..15b57fccb4076c9f88e2c73fd7000d71119d0688
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.streaming
+
+import org.apache.spark.SparkConf
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+
+/**
+ * Counts words in UTF8 encoded, '\n' delimited text received from the network every second.
+ *
+ * Usage: NetworkWordCount <hostname> <port>
+ * <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive data.
+ *
+ * To run this on your local machine, you need to first run a Netcat server
+ *    `$ nc -lk 9999`
+ * and then run the example
+ *    `$ bin/run-example org.apache.spark.examples.streaming.NetworkWordCount localhost 9999`
+ */
+object NetworkWordCount {
+  def main(args: Array[String]) {
+    if (args.length < 2) {
+      System.err.println("Usage: NetworkWordCount <hostname> <port>")
+      System.exit(1)
+    }
+
+    StreamingExamples.setStreamingLogLevels()
+
+    // Create the context with a 1 second batch size
+    val sparkConf = new SparkConf().setAppName("NetworkWordCount")
+    val ssc = new StreamingContext(sparkConf, Seconds(1))
+
+    // Create a socket stream on target ip:port and count the
+    // words in input stream of \n delimited text (eg. generated by 'nc')
+    // Note that no duplication in storage level only for running locally.
+    // Replication necessary in distributed scenario for fault tolerance.
+    val lines = ssc.socketTextStream(args(0), args(1).toInt, StorageLevel.MEMORY_AND_DISK_SER)
+    val words = lines.flatMap(_.split(" "))
+    val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)
+    wordCounts.print()
+    ssc.start()
+    ssc.awaitTermination()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala
new file mode 100644
index 0000000000000000000000000000000000000000..19bacd449787bbeb16f70271a760aaf762084d8f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming
+
+import scala.collection.mutable.Queue
+
+import org.apache.spark.SparkConf
+import org.apache.spark.rdd.RDD
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+
+object QueueStream {
+
+  def main(args: Array[String]) {
+
+    StreamingExamples.setStreamingLogLevels()
+    val sparkConf = new SparkConf().setAppName("QueueStream")
+    // Create the context
+    val ssc = new StreamingContext(sparkConf, Seconds(1))
+
+    // Create the queue through which RDDs can be pushed to
+    // a QueueInputDStream
+    val rddQueue = new Queue[RDD[Int]]()
+
+    // Create the QueueInputDStream and use it do some processing
+    val inputStream = ssc.queueStream(rddQueue)
+    val mappedStream = inputStream.map(x => (x % 10, 1))
+    val reducedStream = mappedStream.reduceByKey(_ + _)
+    reducedStream.print()
+    ssc.start()
+
+    // Create and push some RDDs into rddQueue
+    for (i <- 1 to 30) {
+      rddQueue.synchronized {
+        rddQueue += ssc.sparkContext.makeRDD(1 to 1000, 10)
+      }
+      Thread.sleep(1000)
+    }
+    ssc.stop()
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/RawNetworkGrep.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/RawNetworkGrep.scala
new file mode 100644
index 0000000000000000000000000000000000000000..437ccf0898d7c7141310a0a6238f599ebf5653e3
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/RawNetworkGrep.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.streaming
+
+import org.apache.spark.SparkConf
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming._
+import org.apache.spark.util.IntParam
+
+/**
+ * Receives text from multiple rawNetworkStreams and counts how many '\n' delimited
+ * lines have the word 'the' in them. This is useful for benchmarking purposes. This
+ * will only work with spark.streaming.util.RawTextSender running on all worker nodes
+ * and with Spark using Kryo serialization (set Java property "spark.serializer" to
+ * "org.apache.spark.serializer.KryoSerializer").
+ * Usage: RawNetworkGrep <numStreams> <host> <port> <batchMillis>
+ *   <numStream> is the number rawNetworkStreams, which should be same as number
+ *               of work nodes in the cluster
+ *   <host> is "localhost".
+ *   <port> is the port on which RawTextSender is running in the worker nodes.
+ *   <batchMillise> is the Spark Streaming batch duration in milliseconds.
+ */
+object RawNetworkGrep {
+  def main(args: Array[String]) {
+    if (args.length != 4) {
+      System.err.println("Usage: RawNetworkGrep <numStreams> <host> <port> <batchMillis>")
+      System.exit(1)
+    }
+
+    StreamingExamples.setStreamingLogLevels()
+
+    val Array(IntParam(numStreams), host, IntParam(port), IntParam(batchMillis)) = args
+    val sparkConf = new SparkConf().setAppName("RawNetworkGrep")
+    // Create the context
+    val ssc = new StreamingContext(sparkConf, Duration(batchMillis))
+
+    val rawStreams = (1 to numStreams).map(_ =>
+      ssc.rawSocketStream[String](host, port, StorageLevel.MEMORY_ONLY_SER_2)).toArray
+    val union = ssc.union(rawStreams)
+    union.filter(_.contains("the")).count().foreachRDD(r =>
+      println(s"Grep count: ${r.collect().mkString}"))
+    ssc.start()
+    ssc.awaitTermination()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
new file mode 100644
index 0000000000000000000000000000000000000000..f018f3a26d2e900c310bfac2aac4990d424fb960
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.streaming
+
+import java.io.File
+import java.nio.charset.Charset
+
+import com.google.common.io.Files
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.rdd.RDD
+import org.apache.spark.streaming.{Seconds, StreamingContext, Time}
+import org.apache.spark.util.{IntParam, LongAccumulator}
+
+/**
+ * Use this singleton to get or register a Broadcast variable.
+ */
+object WordBlacklist {
+
+  @volatile private var instance: Broadcast[Seq[String]] = null
+
+  def getInstance(sc: SparkContext): Broadcast[Seq[String]] = {
+    if (instance == null) {
+      synchronized {
+        if (instance == null) {
+          val wordBlacklist = Seq("a", "b", "c")
+          instance = sc.broadcast(wordBlacklist)
+        }
+      }
+    }
+    instance
+  }
+}
+
+/**
+ * Use this singleton to get or register an Accumulator.
+ */
+object DroppedWordsCounter {
+
+  @volatile private var instance: LongAccumulator = null
+
+  def getInstance(sc: SparkContext): LongAccumulator = {
+    if (instance == null) {
+      synchronized {
+        if (instance == null) {
+          instance = sc.longAccumulator("WordsInBlacklistCounter")
+        }
+      }
+    }
+    instance
+  }
+}
+
+/**
+ * Counts words in text encoded with UTF8 received from the network every second. This example also
+ * shows how to use lazily instantiated singleton instances for Accumulator and Broadcast so that
+ * they can be registered on driver failures.
+ *
+ * Usage: RecoverableNetworkWordCount <hostname> <port> <checkpoint-directory> <output-file>
+ *   <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive
+ *   data. <checkpoint-directory> directory to HDFS-compatible file system which checkpoint data
+ *   <output-file> file to which the word counts will be appended
+ *
+ * <checkpoint-directory> and <output-file> must be absolute paths
+ *
+ * To run this on your local machine, you need to first run a Netcat server
+ *
+ *      `$ nc -lk 9999`
+ *
+ * and run the example as
+ *
+ *      `$ ./bin/run-example org.apache.spark.examples.streaming.RecoverableNetworkWordCount \
+ *              localhost 9999 ~/checkpoint/ ~/out`
+ *
+ * If the directory ~/checkpoint/ does not exist (e.g. running for the first time), it will create
+ * a new StreamingContext (will print "Creating new context" to the console). Otherwise, if
+ * checkpoint data exists in ~/checkpoint/, then it will create StreamingContext from
+ * the checkpoint data.
+ *
+ * Refer to the online documentation for more details.
+ */
+object RecoverableNetworkWordCount {
+
+  def createContext(ip: String, port: Int, outputPath: String, checkpointDirectory: String)
+    : StreamingContext = {
+
+    // If you do not see this printed, that means the StreamingContext has been loaded
+    // from the new checkpoint
+    println("Creating new context")
+    val outputFile = new File(outputPath)
+    if (outputFile.exists()) outputFile.delete()
+    val sparkConf = new SparkConf().setAppName("RecoverableNetworkWordCount")
+    // Create the context with a 1 second batch size
+    val ssc = new StreamingContext(sparkConf, Seconds(1))
+    ssc.checkpoint(checkpointDirectory)
+
+    // Create a socket stream on target ip:port and count the
+    // words in input stream of \n delimited text (eg. generated by 'nc')
+    val lines = ssc.socketTextStream(ip, port)
+    val words = lines.flatMap(_.split(" "))
+    val wordCounts = words.map((_, 1)).reduceByKey(_ + _)
+    wordCounts.foreachRDD { (rdd: RDD[(String, Int)], time: Time) =>
+      // Get or register the blacklist Broadcast
+      val blacklist = WordBlacklist.getInstance(rdd.sparkContext)
+      // Get or register the droppedWordsCounter Accumulator
+      val droppedWordsCounter = DroppedWordsCounter.getInstance(rdd.sparkContext)
+      // Use blacklist to drop words and use droppedWordsCounter to count them
+      val counts = rdd.filter { case (word, count) =>
+        if (blacklist.value.contains(word)) {
+          droppedWordsCounter.add(count)
+          false
+        } else {
+          true
+        }
+      }.collect().mkString("[", ", ", "]")
+      val output = s"Counts at time $time $counts"
+      println(output)
+      println(s"Dropped ${droppedWordsCounter.value} word(s) totally")
+      println(s"Appending to ${outputFile.getAbsolutePath}")
+      Files.append(output + "\n", outputFile, Charset.defaultCharset())
+    }
+    ssc
+  }
+
+  def main(args: Array[String]) {
+    if (args.length != 4) {
+      System.err.println(s"Your arguments were ${args.mkString("[", ", ", "]")}")
+      System.err.println(
+        """
+          |Usage: RecoverableNetworkWordCount <hostname> <port> <checkpoint-directory>
+          |     <output-file>. <hostname> and <port> describe the TCP server that Spark
+          |     Streaming would connect to receive data. <checkpoint-directory> directory to
+          |     HDFS-compatible file system which checkpoint data <output-file> file to which the
+          |     word counts will be appended
+          |
+          |In local mode, <master> should be 'local[n]' with n > 1
+          |Both <checkpoint-directory> and <output-file> must be absolute paths
+        """.stripMargin
+      )
+      System.exit(1)
+    }
+    val Array(ip, IntParam(port), checkpointDirectory, outputPath) = args
+    val ssc = StreamingContext.getOrCreate(checkpointDirectory,
+      () => createContext(ip, port, outputPath, checkpointDirectory))
+    ssc.start()
+    ssc.awaitTermination()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala
new file mode 100644
index 0000000000000000000000000000000000000000..787bbec73b28f4481b26370bdb46ae7eed06d522
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.streaming
+
+import org.apache.spark.SparkConf
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.{Seconds, StreamingContext, Time}
+
+/**
+ * Use DataFrames and SQL to count words in UTF8 encoded, '\n' delimited text received from the
+ * network every second.
+ *
+ * Usage: SqlNetworkWordCount <hostname> <port>
+ * <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive data.
+ *
+ * To run this on your local machine, you need to first run a Netcat server
+ *    `$ nc -lk 9999`
+ * and then run the example
+ *    `$ bin/run-example org.apache.spark.examples.streaming.SqlNetworkWordCount localhost 9999`
+ */
+
+object SqlNetworkWordCount {
+  def main(args: Array[String]) {
+    if (args.length < 2) {
+      System.err.println("Usage: NetworkWordCount <hostname> <port>")
+      System.exit(1)
+    }
+
+    StreamingExamples.setStreamingLogLevels()
+
+    // Create the context with a 2 second batch size
+    val sparkConf = new SparkConf().setAppName("SqlNetworkWordCount")
+    val ssc = new StreamingContext(sparkConf, Seconds(2))
+
+    // Create a socket stream on target ip:port and count the
+    // words in input stream of \n delimited text (eg. generated by 'nc')
+    // Note that no duplication in storage level only for running locally.
+    // Replication necessary in distributed scenario for fault tolerance.
+    val lines = ssc.socketTextStream(args(0), args(1).toInt, StorageLevel.MEMORY_AND_DISK_SER)
+    val words = lines.flatMap(_.split(" "))
+
+    // Convert RDDs of the words DStream to DataFrame and run SQL query
+    words.foreachRDD { (rdd: RDD[String], time: Time) =>
+      // Get the singleton instance of SparkSession
+      val spark = SparkSessionSingleton.getInstance(rdd.sparkContext.getConf)
+      import spark.implicits._
+
+      // Convert RDD[String] to RDD[case class] to DataFrame
+      val wordsDataFrame = rdd.map(w => Record(w)).toDF()
+
+      // Creates a temporary view using the DataFrame
+      wordsDataFrame.createOrReplaceTempView("words")
+
+      // Do word count on table using SQL and print it
+      val wordCountsDataFrame =
+        spark.sql("select word, count(*) as total from words group by word")
+      println(s"========= $time =========")
+      wordCountsDataFrame.show()
+    }
+
+    ssc.start()
+    ssc.awaitTermination()
+  }
+}
+
+
+/** Case class for converting RDD to DataFrame */
+case class Record(word: String)
+
+
+/** Lazily instantiated singleton instance of SparkSession */
+object SparkSessionSingleton {
+
+  @transient  private var instance: SparkSession = _
+
+  def getInstance(sparkConf: SparkConf): SparkSession = {
+    if (instance == null) {
+      instance = SparkSession
+        .builder
+        .config(sparkConf)
+        .getOrCreate()
+    }
+    instance
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala
new file mode 100644
index 0000000000000000000000000000000000000000..2811e67009fb0572d83f11b0430311e3cf0176ae
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.streaming
+
+import org.apache.spark.SparkConf
+import org.apache.spark.streaming._
+
+/**
+ * Counts words cumulatively in UTF8 encoded, '\n' delimited text received from the network every
+ * second starting with initial value of word count.
+ * Usage: StatefulNetworkWordCount <hostname> <port>
+ *   <hostname> and <port> describe the TCP server that Spark Streaming would connect to receive
+ *   data.
+ *
+ * To run this on your local machine, you need to first run a Netcat server
+ *    `$ nc -lk 9999`
+ * and then run the example
+ *    `$ bin/run-example
+ *      org.apache.spark.examples.streaming.StatefulNetworkWordCount localhost 9999`
+ */
+object StatefulNetworkWordCount {
+  def main(args: Array[String]) {
+    if (args.length < 2) {
+      System.err.println("Usage: StatefulNetworkWordCount <hostname> <port>")
+      System.exit(1)
+    }
+
+    StreamingExamples.setStreamingLogLevels()
+
+    val sparkConf = new SparkConf().setAppName("StatefulNetworkWordCount")
+    // Create the context with a 1 second batch size
+    val ssc = new StreamingContext(sparkConf, Seconds(1))
+    ssc.checkpoint(".")
+
+    // Initial state RDD for mapWithState operation
+    val initialRDD = ssc.sparkContext.parallelize(List(("hello", 1), ("world", 1)))
+
+    // Create a ReceiverInputDStream on target ip:port and count the
+    // words in input stream of \n delimited test (eg. generated by 'nc')
+    val lines = ssc.socketTextStream(args(0), args(1).toInt)
+    val words = lines.flatMap(_.split(" "))
+    val wordDstream = words.map(x => (x, 1))
+
+    // Update the cumulative count using mapWithState
+    // This will give a DStream made of state (which is the cumulative count of the words)
+    val mappingFunc = (word: String, one: Option[Int], state: State[Int]) => {
+      val sum = one.getOrElse(0) + state.getOption.getOrElse(0)
+      val output = (word, sum)
+      state.update(sum)
+      output
+    }
+
+    val stateDstream = wordDstream.mapWithState(
+      StateSpec.function(mappingFunc).initialState(initialRDD))
+    stateDstream.print()
+    ssc.start()
+    ssc.awaitTermination()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala
new file mode 100644
index 0000000000000000000000000000000000000000..b00f32fb25243ba450d6761c4d43270b1ff98430
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming
+
+import org.apache.log4j.{Level, Logger}
+
+import org.apache.spark.internal.Logging
+
+/** Utility functions for Spark Streaming examples. */
+object StreamingExamples extends Logging {
+
+  /** Set reasonable logging levels for streaming if the user has not configured log4j. */
+  def setStreamingLogLevels() {
+    val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements
+    if (!log4jInitialized) {
+      // We first log something to initialize Spark's default logging, then we override the
+      // logging level.
+      logInfo("Setting log level to [WARN] for streaming example." +
+        " To override add a custom log4j.properties to the classpath.")
+      Logger.getRootLogger.setLevel(Level.WARN)
+    }
+  }
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala
new file mode 100644
index 0000000000000000000000000000000000000000..2108bc63edea2d1734dcf6ea44ec1b8690acb6ae
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.streaming.clickstream
+
+import java.io.PrintWriter
+import java.net.ServerSocket
+import java.util.Random
+
+/** Represents a page view on a website with associated dimension data. */
+class PageView(val url: String, val status: Int, val zipCode: Int, val userID: Int)
+    extends Serializable {
+  override def toString(): String = {
+    "%s\t%s\t%s\t%s\n".format(url, status, zipCode, userID)
+  }
+}
+
+object PageView extends Serializable {
+  def fromString(in: String): PageView = {
+    val parts = in.split("\t")
+    new PageView(parts(0), parts(1).toInt, parts(2).toInt, parts(3).toInt)
+  }
+}
+
+// scalastyle:off
+/**
+ * Generates streaming events to simulate page views on a website.
+ *
+ * This should be used in tandem with PageViewStream.scala. Example:
+ *
+ * To run the generator
+ * `$ bin/run-example org.apache.spark.examples.streaming.clickstream.PageViewGenerator 44444 10`
+ * To process the generated stream
+ * `$ bin/run-example \
+ *    org.apache.spark.examples.streaming.clickstream.PageViewStream errorRatePerZipCode localhost 44444`
+ *
+ */
+// scalastyle:on
+object PageViewGenerator {
+  val pages = Map("http://foo.com/" -> .7,
+                  "http://foo.com/news" -> 0.2,
+                  "http://foo.com/contact" -> .1)
+  val httpStatus = Map(200 -> .95,
+                       404 -> .05)
+  val userZipCode = Map(94709 -> .5,
+                        94117 -> .5)
+  val userID = Map((1 to 100).map(_ -> .01): _*)
+
+  def pickFromDistribution[T](inputMap: Map[T, Double]): T = {
+    val rand = new Random().nextDouble()
+    var total = 0.0
+    for ((item, prob) <- inputMap) {
+      total = total + prob
+      if (total > rand) {
+        return item
+      }
+    }
+    inputMap.take(1).head._1 // Shouldn't get here if probabilities add up to 1.0
+  }
+
+  def getNextClickEvent(): String = {
+    val id = pickFromDistribution(userID)
+    val page = pickFromDistribution(pages)
+    val status = pickFromDistribution(httpStatus)
+    val zipCode = pickFromDistribution(userZipCode)
+    new PageView(page, status, zipCode, id).toString()
+  }
+
+  def main(args: Array[String]) {
+    if (args.length != 2) {
+      System.err.println("Usage: PageViewGenerator <port> <viewsPerSecond>")
+      System.exit(1)
+    }
+    val port = args(0).toInt
+    val viewsPerSecond = args(1).toFloat
+    val sleepDelayMs = (1000.0 / viewsPerSecond).toInt
+    val listener = new ServerSocket(port)
+    println(s"Listening on port: $port")
+
+    while (true) {
+      val socket = listener.accept()
+      new Thread() {
+        override def run(): Unit = {
+          println(s"Got client connected from: ${socket.getInetAddress}")
+          val out = new PrintWriter(socket.getOutputStream(), true)
+
+          while (true) {
+            Thread.sleep(sleepDelayMs)
+            out.write(getNextClickEvent())
+            out.flush()
+          }
+          socket.close()
+        }
+      }.start()
+    }
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala
new file mode 100644
index 0000000000000000000000000000000000000000..b8e7c7e9e9152ee69562e2d2f7c296f05367c6f7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.streaming.clickstream
+
+import org.apache.spark.examples.streaming.StreamingExamples
+import org.apache.spark.streaming.{Seconds, StreamingContext}
+
+// scalastyle:off
+/**
+ * Analyses a streaming dataset of web page views. This class demonstrates several types of
+ * operators available in Spark streaming.
+ *
+ * This should be used in tandem with PageViewStream.scala. Example:
+ * To run the generator
+ * `$ bin/run-example org.apache.spark.examples.streaming.clickstream.PageViewGenerator 44444 10`
+ * To process the generated stream
+ * `$ bin/run-example \
+ *    org.apache.spark.examples.streaming.clickstream.PageViewStream errorRatePerZipCode localhost 44444`
+ */
+// scalastyle:on
+object PageViewStream {
+  def main(args: Array[String]) {
+    if (args.length != 3) {
+      System.err.println("Usage: PageViewStream <metric> <host> <port>")
+      System.err.println("<metric> must be one of pageCounts, slidingPageCounts," +
+                         " errorRatePerZipCode, activeUserCount, popularUsersSeen")
+      System.exit(1)
+    }
+    StreamingExamples.setStreamingLogLevels()
+    val metric = args(0)
+    val host = args(1)
+    val port = args(2).toInt
+
+    // Create the context
+    val ssc = new StreamingContext("local[2]", "PageViewStream", Seconds(1),
+      System.getenv("SPARK_HOME"), StreamingContext.jarOfClass(this.getClass).toSeq)
+
+    // Create a ReceiverInputDStream on target host:port and convert each line to a PageView
+    val pageViews = ssc.socketTextStream(host, port)
+                       .flatMap(_.split("\n"))
+                       .map(PageView.fromString(_))
+
+    // Return a count of views per URL seen in each batch
+    val pageCounts = pageViews.map(view => view.url).countByValue()
+
+    // Return a sliding window of page views per URL in the last ten seconds
+    val slidingPageCounts = pageViews.map(view => view.url)
+                                     .countByValueAndWindow(Seconds(10), Seconds(2))
+
+
+    // Return the rate of error pages (a non 200 status) in each zip code over the last 30 seconds
+    val statusesPerZipCode = pageViews.window(Seconds(30), Seconds(2))
+                                      .map(view => ((view.zipCode, view.status)))
+                                      .groupByKey()
+    val errorRatePerZipCode = statusesPerZipCode.map{
+      case(zip, statuses) =>
+        val normalCount = statuses.count(_ == 200)
+        val errorCount = statuses.size - normalCount
+        val errorRatio = errorCount.toFloat / statuses.size
+        if (errorRatio > 0.05) {
+          "%s: **%s**".format(zip, errorRatio)
+        } else {
+          "%s: %s".format(zip, errorRatio)
+        }
+    }
+
+    // Return the number unique users in last 15 seconds
+    val activeUserCount = pageViews.window(Seconds(15), Seconds(2))
+                                   .map(view => (view.userID, 1))
+                                   .groupByKey()
+                                   .count()
+                                   .map("Unique active users: " + _)
+
+    // An external dataset we want to join to this stream
+    val userList = ssc.sparkContext.parallelize(Seq(
+      1 -> "Patrick Wendell",
+      2 -> "Reynold Xin",
+      3 -> "Matei Zaharia"))
+
+    metric match {
+      case "pageCounts" => pageCounts.print()
+      case "slidingPageCounts" => slidingPageCounts.print()
+      case "errorRatePerZipCode" => errorRatePerZipCode.print()
+      case "activeUserCount" => activeUserCount.print()
+      case "popularUsersSeen" =>
+        // Look for users in our existing dataset and print it out if we have a match
+        pageViews.map(view => (view.userID, 1))
+          .foreachRDD((rdd, time) => rdd.join(userList)
+            .map(_._2._2)
+            .take(10)
+            .foreach(u => println(s"Saw user $u at time $time")))
+      case _ => println(s"Invalid metric entered: $metric")
+    }
+
+    ssc.start()
+    ssc.awaitTermination()
+  }
+}
+// scalastyle:on println
diff --git a/spark-2.4.0-bin-hadoop2.7/extra-packages/numpy-1.14.2-cp36-cp36m-manylinux1_x86_64.whl b/spark-2.4.0-bin-hadoop2.7/extra-packages/numpy-1.14.2-cp36-cp36m-manylinux1_x86_64.whl
new file mode 100644
index 0000000000000000000000000000000000000000..6167d2cd572250bd335d0cd688344031e7da99e9
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/extra-packages/numpy-1.14.2-cp36-cp36m-manylinux1_x86_64.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/extra-packages/nxcals_data_access_python3-0.1.164-py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/extra-packages/nxcals_data_access_python3-0.1.164-py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..fdfec10f40155104b351bc8071ef626805fd3f50
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/extra-packages/nxcals_data_access_python3-0.1.164-py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/JavaEWAH-0.3.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/JavaEWAH-0.3.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..36ec6e176b810258d05270e03af711e44e2ab8a0
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/JavaEWAH-0.3.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/RoaringBitmap-0.5.11.jar b/spark-2.4.0-bin-hadoop2.7/jars/RoaringBitmap-0.5.11.jar
new file mode 100644
index 0000000000000000000000000000000000000000..cbeb7a12703e24a7725b4f95d0575cda7caeaf5d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/RoaringBitmap-0.5.11.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/ST4-4.0.4.jar b/spark-2.4.0-bin-hadoop2.7/jars/ST4-4.0.4.jar
new file mode 100644
index 0000000000000000000000000000000000000000..0ed9db8472ff457fbdc59bd095b19d9bd8ba076c
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/ST4-4.0.4.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/activation-1.1.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/activation-1.1.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..1b703ab283e0cddabf9c1b5e28658f9198c0def4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/activation-1.1.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/aircompressor-0.10.jar b/spark-2.4.0-bin-hadoop2.7/jars/aircompressor-0.10.jar
new file mode 100644
index 0000000000000000000000000000000000000000..e5859afa9ad927ab973d9efac375f97e07eb3be8
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/aircompressor-0.10.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/antlr-2.7.7.jar b/spark-2.4.0-bin-hadoop2.7/jars/antlr-2.7.7.jar
new file mode 100644
index 0000000000000000000000000000000000000000..5e5f14b35584eac2a9f0f888769f0ab93ca6d849
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/antlr-2.7.7.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/antlr-runtime-3.4.jar b/spark-2.4.0-bin-hadoop2.7/jars/antlr-runtime-3.4.jar
new file mode 100644
index 0000000000000000000000000000000000000000..865a537b0c5e4d617b8c45adc43fa210df1a8129
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/antlr-runtime-3.4.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/antlr4-runtime-4.7.jar b/spark-2.4.0-bin-hadoop2.7/jars/antlr4-runtime-4.7.jar
new file mode 100644
index 0000000000000000000000000000000000000000..5c3bf7d351dfc1989a026f94bf5b1c3c241493f4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/antlr4-runtime-4.7.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/aopalliance-1.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/aopalliance-1.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..578b1a0c359ef88a84461bdb91d9d0041afd54de
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/aopalliance-1.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/aopalliance-repackaged-2.4.0-b34.jar b/spark-2.4.0-bin-hadoop2.7/jars/aopalliance-repackaged-2.4.0-b34.jar
new file mode 100644
index 0000000000000000000000000000000000000000..a22cbb836511c6cb0e8901434489e53f44a24158
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/aopalliance-repackaged-2.4.0-b34.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/apache-log4j-extras-1.2.17.jar b/spark-2.4.0-bin-hadoop2.7/jars/apache-log4j-extras-1.2.17.jar
new file mode 100644
index 0000000000000000000000000000000000000000..9212a96be41f27b2dca1c6a80d2733518f24cd04
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/apache-log4j-extras-1.2.17.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/apacheds-i18n-2.0.0-M15.jar b/spark-2.4.0-bin-hadoop2.7/jars/apacheds-i18n-2.0.0-M15.jar
new file mode 100644
index 0000000000000000000000000000000000000000..11dccb3e2c3261ddfc7f03593555311d7762b6b7
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/apacheds-i18n-2.0.0-M15.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/apacheds-kerberos-codec-2.0.0-M15.jar b/spark-2.4.0-bin-hadoop2.7/jars/apacheds-kerberos-codec-2.0.0-M15.jar
new file mode 100644
index 0000000000000000000000000000000000000000..82e564e1a6526785dcba2d9983f25f453ad9f664
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/apacheds-kerberos-codec-2.0.0-M15.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/api-asn1-api-1.0.0-M20.jar b/spark-2.4.0-bin-hadoop2.7/jars/api-asn1-api-1.0.0-M20.jar
new file mode 100644
index 0000000000000000000000000000000000000000..68dee3a02f5f62ce35cc363cb3a2c0a3b0d81acf
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/api-asn1-api-1.0.0-M20.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/api-util-1.0.0-M20.jar b/spark-2.4.0-bin-hadoop2.7/jars/api-util-1.0.0-M20.jar
new file mode 100644
index 0000000000000000000000000000000000000000..ff9780eac9219a461f5f62515a83bdad345c85f7
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/api-util-1.0.0-M20.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/arpack_combined_all-0.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/arpack_combined_all-0.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6beb2172a8ab7a57288f1c1a2bbb58f5467b1abe
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/arpack_combined_all-0.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/arrow-format-0.10.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/arrow-format-0.10.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..ed169160538324fe1f6f527ffb878f2ca2b1935a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/arrow-format-0.10.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/arrow-memory-0.10.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/arrow-memory-0.10.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..678c59be010fce29b41f78dda262d8a2f7181143
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/arrow-memory-0.10.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/arrow-vector-0.10.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/arrow-vector-0.10.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..0e5bf3001263b80ca44a48d3888eef81ad293218
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/arrow-vector-0.10.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/automaton-1.11-8.jar b/spark-2.4.0-bin-hadoop2.7/jars/automaton-1.11-8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..60f6ce01e14fcabf4c59a4845c7483be47becf5f
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/automaton-1.11-8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/avro-1.8.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/avro-1.8.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..23b3958b4e05279805c6a60723034a0bf421c4aa
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/avro-1.8.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/avro-ipc-1.8.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/avro-ipc-1.8.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..015579e10c07840c440565005e99f44b7491fea7
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/avro-ipc-1.8.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/avro-mapred-1.8.2-hadoop2.jar b/spark-2.4.0-bin-hadoop2.7/jars/avro-mapred-1.8.2-hadoop2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..89307c14b1101b9656dff59c3a0ad1a4ff6b4712
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/avro-mapred-1.8.2-hadoop2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/bonecp-0.8.0.RELEASE.jar b/spark-2.4.0-bin-hadoop2.7/jars/bonecp-0.8.0.RELEASE.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d47104f28cbe4716bb95fea9db9801a0221bdf9d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/bonecp-0.8.0.RELEASE.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/breeze-macros_2.11-0.13.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/breeze-macros_2.11-0.13.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..1e87c65ed163e2c8ad61cd347cfd5a7b779518c2
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/breeze-macros_2.11-0.13.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/breeze_2.11-0.13.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/breeze_2.11-0.13.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..4464e944201bca9902161106db2af9a5ff5167b4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/breeze_2.11-0.13.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/calcite-avatica-1.2.0-incubating.jar b/spark-2.4.0-bin-hadoop2.7/jars/calcite-avatica-1.2.0-incubating.jar
new file mode 100644
index 0000000000000000000000000000000000000000..04891f4778e791bbd4ebde28b3c946900bf8a1d6
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/calcite-avatica-1.2.0-incubating.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/calcite-core-1.2.0-incubating.jar b/spark-2.4.0-bin-hadoop2.7/jars/calcite-core-1.2.0-incubating.jar
new file mode 100644
index 0000000000000000000000000000000000000000..2ab1f0c5ea212c6c23d7d9e36929931d06277c6d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/calcite-core-1.2.0-incubating.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/calcite-linq4j-1.2.0-incubating.jar b/spark-2.4.0-bin-hadoop2.7/jars/calcite-linq4j-1.2.0-incubating.jar
new file mode 100644
index 0000000000000000000000000000000000000000..59f1232125cbd483467b1e978e610357671b2525
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/calcite-linq4j-1.2.0-incubating.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/chill-java-0.9.3.jar b/spark-2.4.0-bin-hadoop2.7/jars/chill-java-0.9.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..f9bda36f5f443c9738f419f23f8f241d02d3c568
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/chill-java-0.9.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/chill_2.11-0.9.3.jar b/spark-2.4.0-bin-hadoop2.7/jars/chill_2.11-0.9.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6b321596d48b799b7a982acbc27733e8d9d651d7
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/chill_2.11-0.9.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/commons-beanutils-1.7.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/commons-beanutils-1.7.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..b1b89c9c921f16af22a88db3ff28975a8e40d886
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/commons-beanutils-1.7.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/commons-beanutils-core-1.8.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/commons-beanutils-core-1.8.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..87c15f456597d71315ecc0b306af2b8e99c532b9
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/commons-beanutils-core-1.8.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/commons-cli-1.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/commons-cli-1.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..ce4b9fffe40c41669797cd806ac989471b2acd84
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/commons-cli-1.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/commons-codec-1.10.jar b/spark-2.4.0-bin-hadoop2.7/jars/commons-codec-1.10.jar
new file mode 100644
index 0000000000000000000000000000000000000000..1d7417c4031324cc6577a306cde57ae220f55523
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/commons-codec-1.10.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/commons-collections-3.2.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/commons-collections-3.2.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..fa5df82a63062c040eefcef947827c4ea4ae9e6b
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/commons-collections-3.2.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/commons-compiler-3.0.9.jar b/spark-2.4.0-bin-hadoop2.7/jars/commons-compiler-3.0.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..2866a666252c82ae59e9d355610702aad2344dbb
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/commons-compiler-3.0.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/commons-compress-1.8.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/commons-compress-1.8.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..66b0a56bd8b24478ddeb2d6eb2824280de1c1a9d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/commons-compress-1.8.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/commons-configuration-1.6.jar b/spark-2.4.0-bin-hadoop2.7/jars/commons-configuration-1.6.jar
new file mode 100644
index 0000000000000000000000000000000000000000..2d4689a1b8c498e765c5be31fbc96d97b774a55f
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/commons-configuration-1.6.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/commons-crypto-1.0.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/commons-crypto-1.0.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..847507d2d71441776a409968ac842af71c8c1fd0
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/commons-crypto-1.0.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/commons-dbcp-1.4.jar b/spark-2.4.0-bin-hadoop2.7/jars/commons-dbcp-1.4.jar
new file mode 100644
index 0000000000000000000000000000000000000000..c4c1c4f286a464227a55bcd3577d01d107e7b192
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/commons-dbcp-1.4.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/commons-digester-1.8.jar b/spark-2.4.0-bin-hadoop2.7/jars/commons-digester-1.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..1110f0aadeba873dea8b7c11914c6959c73ac2d3
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/commons-digester-1.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/commons-httpclient-3.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/commons-httpclient-3.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..7c59774aed4f5dd08778489aaad565690ff7c132
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/commons-httpclient-3.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/commons-io-2.4.jar b/spark-2.4.0-bin-hadoop2.7/jars/commons-io-2.4.jar
new file mode 100644
index 0000000000000000000000000000000000000000..90035a4fe068e65d42edf621a0ffc78b4b020cdc
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/commons-io-2.4.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/commons-lang-2.6.jar b/spark-2.4.0-bin-hadoop2.7/jars/commons-lang-2.6.jar
new file mode 100644
index 0000000000000000000000000000000000000000..98467d3a653ebad776ffa3542efeb9732fe0b482
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/commons-lang-2.6.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/commons-lang3-3.5.jar b/spark-2.4.0-bin-hadoop2.7/jars/commons-lang3-3.5.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6328c8de419b8424a27705fb940d2cfeccafddd9
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/commons-lang3-3.5.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/commons-logging-1.1.3.jar b/spark-2.4.0-bin-hadoop2.7/jars/commons-logging-1.1.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..ab5125407a6a77767330e29a028b97c3ac64ce18
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/commons-logging-1.1.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/commons-math3-3.4.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/commons-math3-3.4.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..cf9f519fb1780ffbb04ac4902abc22a5e609f3c4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/commons-math3-3.4.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/commons-net-3.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/commons-net-3.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..b75f1a51cc60108be7dd7e9d0e8a6fbbd3856203
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/commons-net-3.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/commons-pool-1.5.4.jar b/spark-2.4.0-bin-hadoop2.7/jars/commons-pool-1.5.4.jar
new file mode 100644
index 0000000000000000000000000000000000000000..43edf996359706eccd90cc01311ce852c87cb6fb
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/commons-pool-1.5.4.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/compress-lzf-1.0.3.jar b/spark-2.4.0-bin-hadoop2.7/jars/compress-lzf-1.0.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..a121eb487b24067ef5acb11f52447ca4f8ae2d7a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/compress-lzf-1.0.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/core-1.1.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/core-1.1.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..cf4584fdb9cce29c1c9a24ffcd75f06c58cf394a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/core-1.1.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/curator-client-2.7.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/curator-client-2.7.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..2d480565c9c797c92d46b9e12e512021a3c55453
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/curator-client-2.7.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/curator-framework-2.7.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/curator-framework-2.7.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..2e9eca662c034b3a912bb72545e1e3a6277301d1
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/curator-framework-2.7.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/curator-recipes-2.7.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/curator-recipes-2.7.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..9fa08568adb9259d5bad422d1c3a94609bbdb7b0
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/curator-recipes-2.7.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/datanucleus-api-jdo-3.2.6.jar b/spark-2.4.0-bin-hadoop2.7/jars/datanucleus-api-jdo-3.2.6.jar
new file mode 100644
index 0000000000000000000000000000000000000000..8c98aca1ed6dd68246e4833018127a5751cdc5f8
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/datanucleus-api-jdo-3.2.6.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/datanucleus-core-3.2.10.jar b/spark-2.4.0-bin-hadoop2.7/jars/datanucleus-core-3.2.10.jar
new file mode 100644
index 0000000000000000000000000000000000000000..7078bce0682f3234dd8a80aae8c71d48d7178eb0
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/datanucleus-core-3.2.10.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/datanucleus-rdbms-3.2.9.jar b/spark-2.4.0-bin-hadoop2.7/jars/datanucleus-rdbms-3.2.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..a23c982f501731f272f39a5d3f806c2d1b3442ff
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/datanucleus-rdbms-3.2.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/derby-10.12.1.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/derby-10.12.1.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..5841555f8cde25c26923b226881816a70475d998
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/derby-10.12.1.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/eigenbase-properties-1.1.5.jar b/spark-2.4.0-bin-hadoop2.7/jars/eigenbase-properties-1.1.5.jar
new file mode 100644
index 0000000000000000000000000000000000000000..786c6c6a2bdddf3819a40cd97f4a819091d7ccf6
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/eigenbase-properties-1.1.5.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/flatbuffers-1.2.0-3f79e055.jar b/spark-2.4.0-bin-hadoop2.7/jars/flatbuffers-1.2.0-3f79e055.jar
new file mode 100644
index 0000000000000000000000000000000000000000..e0d2db46405e58561cf67e0b30b7f5abb19f3016
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/flatbuffers-1.2.0-3f79e055.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/generex-1.0.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/generex-1.0.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..982bd1e2d07cb4a9b2f2adea0e27d11040300d00
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/generex-1.0.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/gson-2.2.4.jar b/spark-2.4.0-bin-hadoop2.7/jars/gson-2.2.4.jar
new file mode 100644
index 0000000000000000000000000000000000000000..75fe27c54714ca75a087b84da03161b756c97633
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/gson-2.2.4.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/guava-14.0.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/guava-14.0.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..3a3d9258e3b9529a65a9c115a719a7d5d66a5261
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/guava-14.0.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/guice-3.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/guice-3.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..f313e2b089d9e1b9d2c6e578ccbc4d8b6eabfd73
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/guice-3.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/guice-servlet-3.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/guice-servlet-3.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..bdc66142e7c5554f83aaf265a7f30f2a0173b82a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/guice-servlet-3.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hadoop-annotations-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-annotations-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..87ccddc35269600e53d5b523a3e44aed63b8a6e2
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-annotations-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hadoop-auth-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-auth-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..569b88c2e3329b6fed921789583fa2c1e635086d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-auth-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hadoop-client-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-client-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..20049c089d33e826f46f62436a209060d7beb655
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-client-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hadoop-common-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-common-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..c8a37566fda49f863596e708be66a171296bb9f2
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-common-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hadoop-hdfs-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-hdfs-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..e6fdb444cd1efa77a4a3895f1154aa28c0b4d6f1
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-hdfs-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hadoop-mapreduce-client-app-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-mapreduce-client-app-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..3aa28d6d213c6d492bfa83b648f045273eaf9bd7
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-mapreduce-client-app-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hadoop-mapreduce-client-common-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-mapreduce-client-common-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..38df87459bac5516f58329caabcce3ff38b9f5a4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-mapreduce-client-common-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hadoop-mapreduce-client-core-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-mapreduce-client-core-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..b99f85fbdeeea23a6068ef5d18b85f336fabb0c1
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-mapreduce-client-core-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hadoop-mapreduce-client-jobclient-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-mapreduce-client-jobclient-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..5c60ecbcbe7e441cb6ae04d6d2ca98c1a71ee611
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-mapreduce-client-jobclient-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hadoop-mapreduce-client-shuffle-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-mapreduce-client-shuffle-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..755f7ff74eccc751a81216f9cad6892782fb8b07
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-mapreduce-client-shuffle-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hadoop-yarn-api-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-yarn-api-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..3fbbba509e1f50326b5fad510e4b6364e24cf8a0
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-yarn-api-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hadoop-yarn-client-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-yarn-client-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..7c7ad86c9ef481f7b813a1f570c0bdf16812de8c
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-yarn-client-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hadoop-yarn-common-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-yarn-common-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..ff0d84c79a4e3864b232b7b3aac2c6784253dabd
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-yarn-common-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hadoop-yarn-server-common-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-yarn-server-common-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..e686b71f401922890a4b0ca8d517de6d7fed199c
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-yarn-server-common-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hadoop-yarn-server-nodemanager-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-yarn-server-nodemanager-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..0f6539a6d02050a749faef6c68982089a6986cbe
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-yarn-server-nodemanager-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hadoop-yarn-server-web-proxy-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-yarn-server-web-proxy-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..a54d91174b2ff022779427e005a7a43c3d77a972
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hadoop-yarn-server-web-proxy-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hive-beeline-1.2.1.spark2.jar b/spark-2.4.0-bin-hadoop2.7/jars/hive-beeline-1.2.1.spark2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..3c9f93adbcd41ac0ac429a8e8afe8e2114f83c3a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hive-beeline-1.2.1.spark2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hive-cli-1.2.1.spark2.jar b/spark-2.4.0-bin-hadoop2.7/jars/hive-cli-1.2.1.spark2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..c57a601e764fda6b1bc8e05ce8f27a5486a86086
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hive-cli-1.2.1.spark2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hive-exec-1.2.1.spark2.jar b/spark-2.4.0-bin-hadoop2.7/jars/hive-exec-1.2.1.spark2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..4e98a64d44d92492726750033fc3f210c842116e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hive-exec-1.2.1.spark2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hive-jdbc-1.2.1.spark2.jar b/spark-2.4.0-bin-hadoop2.7/jars/hive-jdbc-1.2.1.spark2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d29b9a8b42e8bc34504c7ed3c3b9b84b73979a3b
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hive-jdbc-1.2.1.spark2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hive-metastore-1.2.1.spark2.jar b/spark-2.4.0-bin-hadoop2.7/jars/hive-metastore-1.2.1.spark2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..9ef0392c1211021f08fc4615a1cff8df9b6e0dea
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hive-metastore-1.2.1.spark2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hk2-api-2.4.0-b34.jar b/spark-2.4.0-bin-hadoop2.7/jars/hk2-api-2.4.0-b34.jar
new file mode 100644
index 0000000000000000000000000000000000000000..dcf2c6c96f5b763deed4fbe8197c8fd27aa221f3
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hk2-api-2.4.0-b34.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hk2-locator-2.4.0-b34.jar b/spark-2.4.0-bin-hadoop2.7/jars/hk2-locator-2.4.0-b34.jar
new file mode 100644
index 0000000000000000000000000000000000000000..190f29205b792a47cf2a9623b24020397164940c
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hk2-locator-2.4.0-b34.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hk2-utils-2.4.0-b34.jar b/spark-2.4.0-bin-hadoop2.7/jars/hk2-utils-2.4.0-b34.jar
new file mode 100644
index 0000000000000000000000000000000000000000..91930d9b6614c4e2841cde08a95b972351d03e6e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hk2-utils-2.4.0-b34.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/hppc-0.7.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/hppc-0.7.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..54dea63ae1382d49d67c7f0333dc6f3d0ca1f0fc
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/hppc-0.7.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/htrace-core-3.1.0-incubating.jar b/spark-2.4.0-bin-hadoop2.7/jars/htrace-core-3.1.0-incubating.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6f03925d534ad56b775dc2af8791d5878141d3f5
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/htrace-core-3.1.0-incubating.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/httpclient-4.5.6.jar b/spark-2.4.0-bin-hadoop2.7/jars/httpclient-4.5.6.jar
new file mode 100644
index 0000000000000000000000000000000000000000..56231de0cb6daa281a4ee34a3f28dc90a667cfd2
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/httpclient-4.5.6.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/httpcore-4.4.10.jar b/spark-2.4.0-bin-hadoop2.7/jars/httpcore-4.4.10.jar
new file mode 100644
index 0000000000000000000000000000000000000000..dc510f81cb7a832c08e63ad1f0298c8484096ded
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/httpcore-4.4.10.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/ivy-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/ivy-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..14ff88e260188705f55608e6386ce3a1408beaeb
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/ivy-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jackson-annotations-2.9.8.jar b/spark-2.4.0-bin-hadoop2.7/jars/jackson-annotations-2.9.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..4d9f42153781c16f2b5abc6ecd1495c4955966d9
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jackson-annotations-2.9.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jackson-core-2.9.8.jar b/spark-2.4.0-bin-hadoop2.7/jars/jackson-core-2.9.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..362f1f393e6fb9feba0fe1fa171811a80f8f83d6
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jackson-core-2.9.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jackson-core-asl-1.9.13.jar b/spark-2.4.0-bin-hadoop2.7/jars/jackson-core-asl-1.9.13.jar
new file mode 100644
index 0000000000000000000000000000000000000000..bb4fe1da1186505a254611ad5a7c49e9f64a3ee1
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jackson-core-asl-1.9.13.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jackson-databind-2.9.8.jar b/spark-2.4.0-bin-hadoop2.7/jars/jackson-databind-2.9.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..2d8687b5d79cca6ea54cda3471dce3b14384794b
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jackson-databind-2.9.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jackson-dataformat-yaml-2.9.8.jar b/spark-2.4.0-bin-hadoop2.7/jars/jackson-dataformat-yaml-2.9.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6c403e99a43effe786bf3091c87c513c549db417
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jackson-dataformat-yaml-2.9.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jackson-datatype-jdk8-2.9.8.jar b/spark-2.4.0-bin-hadoop2.7/jars/jackson-datatype-jdk8-2.9.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6367d1f4efaf6d13e66aa2df84e795333bd33d6c
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jackson-datatype-jdk8-2.9.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jackson-datatype-jsr310-2.9.8.jar b/spark-2.4.0-bin-hadoop2.7/jars/jackson-datatype-jsr310-2.9.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..09bc9d59863ae678e5d95b830c2756af80c0f59b
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jackson-datatype-jsr310-2.9.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jackson-jaxrs-1.9.13.jar b/spark-2.4.0-bin-hadoop2.7/jars/jackson-jaxrs-1.9.13.jar
new file mode 100644
index 0000000000000000000000000000000000000000..b85f45cf407a3727f2b9767f226d27a79a46462c
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jackson-jaxrs-1.9.13.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jackson-mapper-asl-1.9.13.jar b/spark-2.4.0-bin-hadoop2.7/jars/jackson-mapper-asl-1.9.13.jar
new file mode 100644
index 0000000000000000000000000000000000000000..0f2073fc7eaf3ac5ffd02928cf9272b558e42e81
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jackson-mapper-asl-1.9.13.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jackson-module-parameter-names-2.9.8.jar b/spark-2.4.0-bin-hadoop2.7/jars/jackson-module-parameter-names-2.9.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..341de941f508832107f382648e9d4b74362edbbe
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jackson-module-parameter-names-2.9.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jackson-module-paranamer-2.9.8.jar b/spark-2.4.0-bin-hadoop2.7/jars/jackson-module-paranamer-2.9.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..63debc93f1874446714d073c42b5aeac746eccd7
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jackson-module-paranamer-2.9.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jackson-module-scala_2.11-2.9.8.jar b/spark-2.4.0-bin-hadoop2.7/jars/jackson-module-scala_2.11-2.9.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6b3484303cd38207c9d2bd42632093170ca06185
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jackson-module-scala_2.11-2.9.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jackson-xc-1.9.13.jar b/spark-2.4.0-bin-hadoop2.7/jars/jackson-xc-1.9.13.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d5842e9c3af4cf231adc4b56108b3867e1920111
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jackson-xc-1.9.13.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/janino-3.0.9.jar b/spark-2.4.0-bin-hadoop2.7/jars/janino-3.0.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..761e0c2a0a7b3a7999fecc99df79b8e321c6869a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/janino-3.0.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/javassist-3.18.1-GA.jar b/spark-2.4.0-bin-hadoop2.7/jars/javassist-3.18.1-GA.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d5f19ac596ae64347a4fb159c81273143a25bdf4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/javassist-3.18.1-GA.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/javax.annotation-api-1.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/javax.annotation-api-1.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..9ab39ffa4b57f9d20bd37acfbf6011902596a078
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/javax.annotation-api-1.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/javax.inject-1.jar b/spark-2.4.0-bin-hadoop2.7/jars/javax.inject-1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..b2a9d0bf7bd463a6361898b36f010edd05c0cf6e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/javax.inject-1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/javax.inject-2.4.0-b34.jar b/spark-2.4.0-bin-hadoop2.7/jars/javax.inject-2.4.0-b34.jar
new file mode 100644
index 0000000000000000000000000000000000000000..9a06dd38156c7b57c32ed910f5f5c9422aaeef1d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/javax.inject-2.4.0-b34.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/javax.servlet-api-3.1.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/javax.servlet-api-3.1.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6b14c3d267867e76c04948bb31b3de18e01412ee
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/javax.servlet-api-3.1.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/javax.ws.rs-api-2.0.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/javax.ws.rs-api-2.0.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..7eb68b4a0abb804e1d30b68ae4ba74a1eb14d38b
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/javax.ws.rs-api-2.0.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/javolution-5.5.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/javolution-5.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..7ac80ea9ea923b198712b8c00bb7b691ea2d7aee
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/javolution-5.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jaxb-api-2.2.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/jaxb-api-2.2.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..31e5fa062f64ee9867760c8b00f57245b0a32017
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jaxb-api-2.2.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jcl-over-slf4j-1.7.16.jar b/spark-2.4.0-bin-hadoop2.7/jars/jcl-over-slf4j-1.7.16.jar
new file mode 100644
index 0000000000000000000000000000000000000000..63ac422d00ff29c7cf337b978603b2724cb90d1a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jcl-over-slf4j-1.7.16.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jdo-api-3.0.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/jdo-api-3.0.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6318d329ca9fc71037af1c59d1c8b4723f9c2795
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jdo-api-3.0.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jersey-client-2.22.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/jersey-client-2.22.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..73adaf976bb70a34df05f9bd2564ed4cf9c8ce6e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jersey-client-2.22.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jersey-common-2.22.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/jersey-common-2.22.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..40270efb50d180f969937fec22101eae228d441b
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jersey-common-2.22.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jersey-container-servlet-2.22.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/jersey-container-servlet-2.22.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..ebeffe3b84274e23d75fac28e1b2fc00c7f0c083
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jersey-container-servlet-2.22.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jersey-container-servlet-core-2.22.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/jersey-container-servlet-core-2.22.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..bb77236e13cfc02500741ce3667dc77f8fcc408c
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jersey-container-servlet-core-2.22.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jersey-guava-2.22.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/jersey-guava-2.22.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..30f6ad8eca9ce04dd1cf58dd9c8f18272d0ddc7e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jersey-guava-2.22.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jersey-media-jaxb-2.22.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/jersey-media-jaxb-2.22.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..9ea2cc11c78736e6b2350e000174a53d187c479b
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jersey-media-jaxb-2.22.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jersey-server-2.22.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/jersey-server-2.22.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..893e2dc8648c4437adce565b969265496a69c97c
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jersey-server-2.22.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jetty-6.1.26.jar b/spark-2.4.0-bin-hadoop2.7/jars/jetty-6.1.26.jar
new file mode 100644
index 0000000000000000000000000000000000000000..2cbe07aeefa47ad6321addf0e75e010858f72fba
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jetty-6.1.26.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jetty-util-6.1.26.jar b/spark-2.4.0-bin-hadoop2.7/jars/jetty-util-6.1.26.jar
new file mode 100644
index 0000000000000000000000000000000000000000..cd237528add68b792f2e6e030344f27d9d07ec31
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jetty-util-6.1.26.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jline-2.14.6.jar b/spark-2.4.0-bin-hadoop2.7/jars/jline-2.14.6.jar
new file mode 100644
index 0000000000000000000000000000000000000000..fe11ed545bd565e64101d605fde8b9d5aeb5e00d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jline-2.14.6.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/joda-time-2.9.3.jar b/spark-2.4.0-bin-hadoop2.7/jars/joda-time-2.9.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..4d54b2628f029db84e7965e8175102f65c4df87e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/joda-time-2.9.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jodd-core-3.5.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/jodd-core-3.5.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..b71c139730400f0d6e746ec9cfba9a78ea2125b3
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jodd-core-3.5.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jpam-1.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/jpam-1.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6ccbfbebb4e60d7de866fc0e308fef66c9ed8a1d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jpam-1.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/json4s-ast_2.11-3.5.3.jar b/spark-2.4.0-bin-hadoop2.7/jars/json4s-ast_2.11-3.5.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..056bc424a153ff8084e70e76441fde5eb0208345
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/json4s-ast_2.11-3.5.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/json4s-core_2.11-3.5.3.jar b/spark-2.4.0-bin-hadoop2.7/jars/json4s-core_2.11-3.5.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..4896f16c46853fa8a50b1fbad40f3d0be95a79d8
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/json4s-core_2.11-3.5.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/json4s-jackson_2.11-3.5.3.jar b/spark-2.4.0-bin-hadoop2.7/jars/json4s-jackson_2.11-3.5.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..f1d76561126ac61acc6466c8312340f61b14fb6d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/json4s-jackson_2.11-3.5.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/json4s-scalap_2.11-3.5.3.jar b/spark-2.4.0-bin-hadoop2.7/jars/json4s-scalap_2.11-3.5.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..a0c0804ba60b1ca64ab665fc3bf0dca0cd2ee0cc
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/json4s-scalap_2.11-3.5.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jsp-api-2.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/jsp-api-2.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..c0195afa5006e13cb9edc71ef64f13f4f70ff6bc
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jsp-api-2.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jsr305-1.3.9.jar b/spark-2.4.0-bin-hadoop2.7/jars/jsr305-1.3.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..a9afc6619b4b9369f0501d06ceb1cf82a1e47cdd
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jsr305-1.3.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jta-1.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/jta-1.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..7736ec9f08c2177f0068923020138bcd57d14f41
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jta-1.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jtransforms-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/jtransforms-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..9bc843fa603eb68523f0b94a0c140f84904fc1bf
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jtransforms-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/jul-to-slf4j-1.7.16.jar b/spark-2.4.0-bin-hadoop2.7/jars/jul-to-slf4j-1.7.16.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d22a014f22e5ac5d2c1c22d63e823db1c8f3d285
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/jul-to-slf4j-1.7.16.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/kryo-shaded-4.0.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/kryo-shaded-4.0.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..a25fbdc405a5de5327ec927bd380cedffbec9046
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/kryo-shaded-4.0.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/kubernetes-client-3.0.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/kubernetes-client-3.0.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..f7dddd01e1750e6b994ec8f5747171983774b022
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/kubernetes-client-3.0.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/kubernetes-model-2.0.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/kubernetes-model-2.0.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6453d8fe49aafea5bab725016473465e4f8f9ad3
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/kubernetes-model-2.0.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/leveldbjni-all-1.8.jar b/spark-2.4.0-bin-hadoop2.7/jars/leveldbjni-all-1.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..cbda6e40a261f717f2a34470a9564f0bd39e5de8
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/leveldbjni-all-1.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/libfb303-0.9.3.jar b/spark-2.4.0-bin-hadoop2.7/jars/libfb303-0.9.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..07c711b738e2292420a7a244ef089d1ab7fbb7cc
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/libfb303-0.9.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/libthrift-0.9.3.jar b/spark-2.4.0-bin-hadoop2.7/jars/libthrift-0.9.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..f9221a9f958f2ac140e33a71abaf1d31adb6a3f7
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/libthrift-0.9.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/log4j-1.2.17.jar b/spark-2.4.0-bin-hadoop2.7/jars/log4j-1.2.17.jar
new file mode 100644
index 0000000000000000000000000000000000000000..1d425cf7d7e25f81be64d32c406ff66cfb6c4766
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/log4j-1.2.17.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/logging-interceptor-3.8.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/logging-interceptor-3.8.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6d19bf47bb9a680c921b78979acf8a3bb9648b1e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/logging-interceptor-3.8.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/lz4-java-1.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/lz4-java-1.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..301908be2c3a97e882ef553f0442c2e0854eb84d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/lz4-java-1.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/machinist_2.11-0.6.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/machinist_2.11-0.6.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..8c70859aa1eed74df3f520d3b81af51c94434b9c
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/machinist_2.11-0.6.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/macro-compat_2.11-1.1.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/macro-compat_2.11-1.1.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..7932094740e954219157434510d3999ce4cf6648
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/macro-compat_2.11-1.1.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/mesos-1.4.0-shaded-protobuf.jar b/spark-2.4.0-bin-hadoop2.7/jars/mesos-1.4.0-shaded-protobuf.jar
new file mode 100644
index 0000000000000000000000000000000000000000..07ed59ec399ea3068a19ced592ad5b396f756dc4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/mesos-1.4.0-shaded-protobuf.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/metrics-core-3.1.5.jar b/spark-2.4.0-bin-hadoop2.7/jars/metrics-core-3.1.5.jar
new file mode 100644
index 0000000000000000000000000000000000000000..5e6aed8dda24220f02995bbd4522c3924a41212a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/metrics-core-3.1.5.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/metrics-graphite-3.1.5.jar b/spark-2.4.0-bin-hadoop2.7/jars/metrics-graphite-3.1.5.jar
new file mode 100644
index 0000000000000000000000000000000000000000..e44d6ff093108fb9079afea54d17e673a9961211
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/metrics-graphite-3.1.5.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/metrics-json-3.1.5.jar b/spark-2.4.0-bin-hadoop2.7/jars/metrics-json-3.1.5.jar
new file mode 100644
index 0000000000000000000000000000000000000000..0ab9ab45a0af1b8e90dbbf9456bb9a2e4c5a2e86
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/metrics-json-3.1.5.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/metrics-jvm-3.1.5.jar b/spark-2.4.0-bin-hadoop2.7/jars/metrics-jvm-3.1.5.jar
new file mode 100644
index 0000000000000000000000000000000000000000..9fa80302d6f0166b478b4e50cb1d2dd3f426a048
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/metrics-jvm-3.1.5.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/minlog-1.3.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/minlog-1.3.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..ec67b63e569bf974378f9a45149882dd9ba16f53
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/minlog-1.3.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/netty-3.9.9.Final.jar b/spark-2.4.0-bin-hadoop2.7/jars/netty-3.9.9.Final.jar
new file mode 100644
index 0000000000000000000000000000000000000000..25b1849660473f66f332298232485e9b11dbd1e8
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/netty-3.9.9.Final.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/netty-all-4.1.17.Final.jar b/spark-2.4.0-bin-hadoop2.7/jars/netty-all-4.1.17.Final.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d5cd7c2138aba770e95b8122c0192ec62c7f3c4a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/netty-all-4.1.17.Final.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/nxcals-hadoop-pro-config-0.1.164.jar b/spark-2.4.0-bin-hadoop2.7/jars/nxcals-hadoop-pro-config-0.1.164.jar
new file mode 100644
index 0000000000000000000000000000000000000000..8c0f0935004ac9703afb2b09a3c0f5e1539a9695
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/nxcals-hadoop-pro-config-0.1.164.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/objenesis-2.5.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/objenesis-2.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..31b245b99c47556de776978c44e0de8ad1553aa7
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/objenesis-2.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/okhttp-3.8.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/okhttp-3.8.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..97b1fd4c5275f4cc228f911e0f793c43e2c06f2f
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/okhttp-3.8.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/okio-1.13.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/okio-1.13.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..02c302f82e2aa3c73614418b206503e5df1cc25d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/okio-1.13.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/opencsv-2.3.jar b/spark-2.4.0-bin-hadoop2.7/jars/opencsv-2.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..01f82ca733e3dd9974d5afcf4e368b01941a7710
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/opencsv-2.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/orc-core-1.5.2-nohive.jar b/spark-2.4.0-bin-hadoop2.7/jars/orc-core-1.5.2-nohive.jar
new file mode 100644
index 0000000000000000000000000000000000000000..a704608327c61d52221c1e7ecff06f45af8cce65
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/orc-core-1.5.2-nohive.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/orc-mapreduce-1.5.2-nohive.jar b/spark-2.4.0-bin-hadoop2.7/jars/orc-mapreduce-1.5.2-nohive.jar
new file mode 100644
index 0000000000000000000000000000000000000000..4bb15263efde782b01486d635230826cd19b0856
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/orc-mapreduce-1.5.2-nohive.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/orc-shims-1.5.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/orc-shims-1.5.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..3685a1a95c14caa39d4b24b364545fdd4d99e324
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/orc-shims-1.5.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/oro-2.0.8.jar b/spark-2.4.0-bin-hadoop2.7/jars/oro-2.0.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..23488d2600f5f4784c0ba2be5baa4c41f396f616
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/oro-2.0.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/osgi-resource-locator-1.0.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/osgi-resource-locator-1.0.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..bd6aa17ebbcbb8c940c92d8f945919f5d13c1f86
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/osgi-resource-locator-1.0.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/paranamer-2.8.jar b/spark-2.4.0-bin-hadoop2.7/jars/paranamer-2.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..0bf659b93e74bb53003bc521f59c855d0ab8e692
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/paranamer-2.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/parquet-column-1.10.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/parquet-column-1.10.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..ac6864a3d0b8f58a343fb73d866ad257da021258
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/parquet-column-1.10.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/parquet-common-1.10.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/parquet-common-1.10.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..ddfbad6cd2ca790b0fa9a93d7e34831ea162238e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/parquet-common-1.10.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/parquet-encoding-1.10.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/parquet-encoding-1.10.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..19d2b6487c8c6c7586b4ccdee170bea28672a936
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/parquet-encoding-1.10.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/parquet-format-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/parquet-format-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..5ee2192f025170f7435a40a0154490f56a3da12c
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/parquet-format-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/parquet-hadoop-1.10.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/parquet-hadoop-1.10.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..99a7d71995642cb6b62841c1fc6ef807a0b55bea
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/parquet-hadoop-1.10.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/parquet-hadoop-bundle-1.6.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/parquet-hadoop-bundle-1.6.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..8ec829df64b7ecf149e19e02d31d8d19d198ef12
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/parquet-hadoop-bundle-1.6.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/parquet-jackson-1.10.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/parquet-jackson-1.10.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..11f465a4674848c9b5a45258cad6a16e7d444699
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/parquet-jackson-1.10.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/protobuf-java-2.5.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/protobuf-java-2.5.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..4c4e686a497c4d4944caf39d830c1271db5d6fee
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/protobuf-java-2.5.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/py4j-0.10.7.jar b/spark-2.4.0-bin-hadoop2.7/jars/py4j-0.10.7.jar
new file mode 100644
index 0000000000000000000000000000000000000000..b7327d70a0d382463c7291d032f15b8ea8331475
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/py4j-0.10.7.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/pyrolite-4.13.jar b/spark-2.4.0-bin-hadoop2.7/jars/pyrolite-4.13.jar
new file mode 100644
index 0000000000000000000000000000000000000000..af7bd6cc9b6ec754289112ef1f8b6e1877feacdf
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/pyrolite-4.13.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/scala-compiler-2.11.12.jar b/spark-2.4.0-bin-hadoop2.7/jars/scala-compiler-2.11.12.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d4803b52ca97a6e95f0a8b5bfe4dfde4b0e5a333
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/scala-compiler-2.11.12.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/scala-library-2.11.12.jar b/spark-2.4.0-bin-hadoop2.7/jars/scala-library-2.11.12.jar
new file mode 100644
index 0000000000000000000000000000000000000000..a8bcc617a78ca27a92a0f8eb195d4d8631b17d24
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/scala-library-2.11.12.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/scala-parser-combinators_2.11-1.1.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/scala-parser-combinators_2.11-1.1.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..f76440dddead49b2135e0c353c03c4f816fe7749
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/scala-parser-combinators_2.11-1.1.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/scala-reflect-2.11.12.jar b/spark-2.4.0-bin-hadoop2.7/jars/scala-reflect-2.11.12.jar
new file mode 100644
index 0000000000000000000000000000000000000000..f987cf84d0aceed288cb30afd15fc69ef8506059
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/scala-reflect-2.11.12.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/scala-xml_2.11-1.0.5.jar b/spark-2.4.0-bin-hadoop2.7/jars/scala-xml_2.11-1.0.5.jar
new file mode 100644
index 0000000000000000000000000000000000000000..269bba9b0f7224b30206cedc65b16007d5565dbf
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/scala-xml_2.11-1.0.5.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/shapeless_2.11-2.3.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/shapeless_2.11-2.3.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..c8ed6bcf321e583510c9b9199d281b730f2575d6
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/shapeless_2.11-2.3.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/slf4j-api-1.7.16.jar b/spark-2.4.0-bin-hadoop2.7/jars/slf4j-api-1.7.16.jar
new file mode 100644
index 0000000000000000000000000000000000000000..68285952b61697b4fe22207ac8273bd2f8dae019
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/slf4j-api-1.7.16.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/slf4j-log4j12-1.7.16.jar b/spark-2.4.0-bin-hadoop2.7/jars/slf4j-log4j12-1.7.16.jar
new file mode 100644
index 0000000000000000000000000000000000000000..a6aef80e9eed61f717d2f7c2655bebd2b3290b68
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/slf4j-log4j12-1.7.16.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/snakeyaml-1.15.jar b/spark-2.4.0-bin-hadoop2.7/jars/snakeyaml-1.15.jar
new file mode 100644
index 0000000000000000000000000000000000000000..34084e33257e0d04f66ca703ae34eb747a9f20b4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/snakeyaml-1.15.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/snappy-0.2.jar b/spark-2.4.0-bin-hadoop2.7/jars/snappy-0.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..38cc82527f8f6a19b5d31b92829e775e69301f29
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/snappy-0.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/snappy-java-1.1.7.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/snappy-java-1.1.7.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..e817ab2efd25ea851dfa396215006d62de06c030
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/snappy-java-1.1.7.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spark-catalyst_2.11-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/spark-catalyst_2.11-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..1d84886157fcb9a2a59d42adc0d887d6a3a32d47
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spark-catalyst_2.11-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spark-core_2.11-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/spark-core_2.11-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..025db49cd237a6cb9a282e825f28ef1a3c9d1903
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spark-core_2.11-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spark-graphx_2.11-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/spark-graphx_2.11-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..08408502ce2030237bf41b61b438555a17b62e3b
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spark-graphx_2.11-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spark-hive-thriftserver_2.11-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/spark-hive-thriftserver_2.11-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..4f8424065930faa9bccbd554dc3f7ed70426a021
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spark-hive-thriftserver_2.11-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spark-hive_2.11-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/spark-hive_2.11-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..517c2f1a1b6d56de4282972d4b2b28c2296e92d5
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spark-hive_2.11-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spark-kubernetes_2.11-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/spark-kubernetes_2.11-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d0a0f1971132bfd5e138a5633114b1bb6909f022
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spark-kubernetes_2.11-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spark-kvstore_2.11-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/spark-kvstore_2.11-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..a6fb419ed42052a1b415d74cdf79394d10f10e1d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spark-kvstore_2.11-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spark-launcher_2.11-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/spark-launcher_2.11-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d04f1547db6bb1c0cafe90c7f5fe2ba44fbd3819
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spark-launcher_2.11-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spark-mesos_2.11-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/spark-mesos_2.11-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..904ada84e05059c6e1effe793dd107ddb87d20a4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spark-mesos_2.11-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spark-mllib-local_2.11-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/spark-mllib-local_2.11-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..bd67ade9e9b06f5f7f475ab1fa51255ddafc09f4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spark-mllib-local_2.11-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spark-mllib_2.11-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/spark-mllib_2.11-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..b682be6efbb6e62bd60e09941482d2847b59ccb9
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spark-mllib_2.11-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spark-network-common_2.11-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/spark-network-common_2.11-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..bdfd5296be0dde6da12e3def3e290e4e6198706f
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spark-network-common_2.11-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spark-network-shuffle_2.11-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/spark-network-shuffle_2.11-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..2e484d14ca70867474228554d61e71241fb40c5a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spark-network-shuffle_2.11-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spark-repl_2.11-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/spark-repl_2.11-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..8ee702a70e6b02f7861adfb49720fba4e2c5e8f8
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spark-repl_2.11-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spark-sketch_2.11-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/spark-sketch_2.11-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..bb50d972b38c3aa3c735a3e170c05e2f00f2e2bb
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spark-sketch_2.11-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spark-sql_2.11-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/spark-sql_2.11-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d854fd80e4c064c050758b4b74adbbfccf734e01
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spark-sql_2.11-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spark-streaming_2.11-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/spark-streaming_2.11-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..3dba9171d6ed4506e5dad8913d7019fbed9af8ca
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spark-streaming_2.11-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spark-tags_2.11-2.4.0-tests.jar b/spark-2.4.0-bin-hadoop2.7/jars/spark-tags_2.11-2.4.0-tests.jar
new file mode 100644
index 0000000000000000000000000000000000000000..1a2c4ee6b2650502b40e7308f9937c15e834159a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spark-tags_2.11-2.4.0-tests.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spark-tags_2.11-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/spark-tags_2.11-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..aa40bcc215559a38fbad6c71177440b43944bd0d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spark-tags_2.11-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spark-unsafe_2.11-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/spark-unsafe_2.11-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..e2810ed588edcd7b6f2b66b0c60b860effe45757
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spark-unsafe_2.11-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spark-yarn_2.11-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/spark-yarn_2.11-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..7bfc5fbb23795502a975836b80deea394b362925
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spark-yarn_2.11-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spire-macros_2.11-0.13.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/spire-macros_2.11-0.13.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..5fe397c5756f1887525dc364f1ab63e70d6bfda7
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spire-macros_2.11-0.13.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/spire_2.11-0.13.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/spire_2.11-0.13.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..7e8769eef8faee12701e8b31edde139c9b8a401d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/spire_2.11-0.13.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/stax-api-1.0-2.jar b/spark-2.4.0-bin-hadoop2.7/jars/stax-api-1.0-2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..015169dc71f06832714d21b67970ff8a7970d5b4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/stax-api-1.0-2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/stax-api-1.0.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/stax-api-1.0.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d9a1665151cf5ba827d15f9bccc35dc77b57cf98
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/stax-api-1.0.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/stream-2.7.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/stream-2.7.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..979ed3eee214a637bd6fe7ea4730547b71f28ae4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/stream-2.7.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/stringtemplate-3.2.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/stringtemplate-3.2.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d0e11b7193734c5b2784127951a1a91372f2ab94
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/stringtemplate-3.2.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/super-csv-2.2.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/super-csv-2.2.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..0b2f0b182e18e0c897f4d8a046632537d6cd76c8
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/super-csv-2.2.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/univocity-parsers-2.7.3.jar b/spark-2.4.0-bin-hadoop2.7/jars/univocity-parsers-2.7.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..894facbdec98395aa1e509479cbb388f8461fa5e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/univocity-parsers-2.7.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/validation-api-1.1.0.Final.jar b/spark-2.4.0-bin-hadoop2.7/jars/validation-api-1.1.0.Final.jar
new file mode 100644
index 0000000000000000000000000000000000000000..de85403868d90e62cd40c6e7fb64662ed4908594
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/validation-api-1.1.0.Final.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/xbean-asm6-shaded-4.8.jar b/spark-2.4.0-bin-hadoop2.7/jars/xbean-asm6-shaded-4.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..792fe6c24c722f1fc6f2c9d0587f817bf3a8238e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/xbean-asm6-shaded-4.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/xercesImpl-2.9.1.jar b/spark-2.4.0-bin-hadoop2.7/jars/xercesImpl-2.9.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..547f56300d93fe36587910739e095f03e287d47e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/xercesImpl-2.9.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/xmlenc-0.52.jar b/spark-2.4.0-bin-hadoop2.7/jars/xmlenc-0.52.jar
new file mode 100644
index 0000000000000000000000000000000000000000..ec568b4c9e05327edd26e76ad24a56023fa7ea08
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/xmlenc-0.52.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/xz-1.5.jar b/spark-2.4.0-bin-hadoop2.7/jars/xz-1.5.jar
new file mode 100644
index 0000000000000000000000000000000000000000..2e9599ecfa6eef94200d2ca4275b9bafda773d4a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/xz-1.5.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/zjsonpatch-0.3.0.jar b/spark-2.4.0-bin-hadoop2.7/jars/zjsonpatch-0.3.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..fc1829c48abea0d4058585bfeb9e9238039b3a73
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/zjsonpatch-0.3.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/zookeeper-3.4.6.jar b/spark-2.4.0-bin-hadoop2.7/jars/zookeeper-3.4.6.jar
new file mode 100644
index 0000000000000000000000000000000000000000..7c340be9f5d25d753508297c0b35f2b5a9d6ab85
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/zookeeper-3.4.6.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/jars/zstd-jni-1.3.2-2.jar b/spark-2.4.0-bin-hadoop2.7/jars/zstd-jni-1.3.2-2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..84311f75027a269d97c719d887540c960c54eda6
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/jars/zstd-jni-1.3.2-2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/kubernetes/dockerfiles/spark/Dockerfile b/spark-2.4.0-bin-hadoop2.7/kubernetes/dockerfiles/spark/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..1c4dcd54768721e774ce26d72cadac850b936a56
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/kubernetes/dockerfiles/spark/Dockerfile
@@ -0,0 +1,54 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+FROM openjdk:8-alpine
+
+ARG spark_jars=jars
+ARG img_path=kubernetes/dockerfiles
+ARG k8s_tests=kubernetes/tests
+
+# Before building the docker image, first build and make a Spark distribution following
+# the instructions in http://spark.apache.org/docs/latest/building-spark.html.
+# If this docker file is being used in the context of building your images from a Spark
+# distribution, the docker build command should be invoked from the top level directory
+# of the Spark distribution. E.g.:
+# docker build -t spark:latest -f kubernetes/dockerfiles/spark/Dockerfile .
+
+RUN set -ex && \
+    apk upgrade --no-cache && \
+    apk add --no-cache bash tini libc6-compat linux-pam && \
+    mkdir -p /opt/spark && \
+    mkdir -p /opt/spark/work-dir && \
+    touch /opt/spark/RELEASE && \
+    rm /bin/sh && \
+    ln -sv /bin/bash /bin/sh && \
+    echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \
+    chgrp root /etc/passwd && chmod ug+rw /etc/passwd
+
+COPY ${spark_jars} /opt/spark/jars
+COPY bin /opt/spark/bin
+COPY sbin /opt/spark/sbin
+COPY ${img_path}/spark/entrypoint.sh /opt/
+COPY examples /opt/spark/examples
+COPY ${k8s_tests} /opt/spark/tests
+COPY data /opt/spark/data
+
+ENV SPARK_HOME /opt/spark
+
+WORKDIR /opt/spark/work-dir
+
+ENTRYPOINT [ "/opt/entrypoint.sh" ]
diff --git a/spark-2.4.0-bin-hadoop2.7/kubernetes/dockerfiles/spark/bindings/R/Dockerfile b/spark-2.4.0-bin-hadoop2.7/kubernetes/dockerfiles/spark/bindings/R/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..9f67422efeb3c62c9e6b6ee15faf43be8a5accc7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/kubernetes/dockerfiles/spark/bindings/R/Dockerfile
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+ARG base_img
+FROM $base_img
+WORKDIR /
+RUN mkdir ${SPARK_HOME}/R
+
+RUN apk add --no-cache R R-dev
+
+COPY R ${SPARK_HOME}/R
+ENV R_HOME /usr/lib/R
+
+WORKDIR /opt/spark/work-dir
+ENTRYPOINT [ "/opt/entrypoint.sh" ]
diff --git a/spark-2.4.0-bin-hadoop2.7/kubernetes/dockerfiles/spark/bindings/python/Dockerfile b/spark-2.4.0-bin-hadoop2.7/kubernetes/dockerfiles/spark/bindings/python/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..69b6efa6149a0bf156662fcb8081399361c396b0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/kubernetes/dockerfiles/spark/bindings/python/Dockerfile
@@ -0,0 +1,39 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+ARG base_img
+FROM $base_img
+WORKDIR /
+RUN mkdir ${SPARK_HOME}/python
+# TODO: Investigate running both pip and pip3 via virtualenvs
+RUN apk add --no-cache python && \
+    apk add --no-cache python3 && \
+    python -m ensurepip && \
+    python3 -m ensurepip && \
+    # We remove ensurepip since it adds no functionality since pip is
+    # installed on the image and it just takes up 1.6MB on the image
+    rm -r /usr/lib/python*/ensurepip && \
+    pip install --upgrade pip setuptools && \
+    # You may install with python3 packages by using pip3.6
+    # Removed the .cache to save space
+    rm -r /root/.cache
+
+COPY python/lib ${SPARK_HOME}/python/lib
+ENV PYTHONPATH ${SPARK_HOME}/python/lib/pyspark.zip:${SPARK_HOME}/python/lib/py4j-*.zip
+
+WORKDIR /opt/spark/work-dir
+ENTRYPOINT [ "/opt/entrypoint.sh" ]
diff --git a/spark-2.4.0-bin-hadoop2.7/kubernetes/dockerfiles/spark/entrypoint.sh b/spark-2.4.0-bin-hadoop2.7/kubernetes/dockerfiles/spark/entrypoint.sh
new file mode 100755
index 0000000000000000000000000000000000000000..216e8fe31becbeb9d68dcc252f78fd804da7f730
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/kubernetes/dockerfiles/spark/entrypoint.sh
@@ -0,0 +1,133 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# echo commands to the terminal output
+set -ex
+
+# Check whether there is a passwd entry for the container UID
+myuid=$(id -u)
+mygid=$(id -g)
+# turn off -e for getent because it will return error code in anonymous uid case
+set +e
+uidentry=$(getent passwd $myuid)
+set -e
+
+# If there is no passwd entry for the container UID, attempt to create one
+if [ -z "$uidentry" ] ; then
+    if [ -w /etc/passwd ] ; then
+        echo "$myuid:x:$myuid:$mygid:anonymous uid:$SPARK_HOME:/bin/false" >> /etc/passwd
+    else
+        echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID"
+    fi
+fi
+
+SPARK_K8S_CMD="$1"
+case "$SPARK_K8S_CMD" in
+    driver | driver-py | driver-r | executor)
+      shift 1
+      ;;
+    "")
+      ;;
+    *)
+      echo "Non-spark-on-k8s command provided, proceeding in pass-through mode..."
+      exec /sbin/tini -s -- "$@"
+      ;;
+esac
+
+SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*"
+env | grep SPARK_JAVA_OPT_ | sort -t_ -k4 -n | sed 's/[^=]*=\(.*\)/\1/g' > /tmp/java_opts.txt
+readarray -t SPARK_EXECUTOR_JAVA_OPTS < /tmp/java_opts.txt
+
+if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then
+  SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH"
+fi
+
+if [ -n "$PYSPARK_FILES" ]; then
+    PYTHONPATH="$PYTHONPATH:$PYSPARK_FILES"
+fi
+
+PYSPARK_ARGS=""
+if [ -n "$PYSPARK_APP_ARGS" ]; then
+    PYSPARK_ARGS="$PYSPARK_APP_ARGS"
+fi
+
+R_ARGS=""
+if [ -n "$R_APP_ARGS" ]; then
+    R_ARGS="$R_APP_ARGS"
+fi
+
+if [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "2" ]; then
+    pyv="$(python -V 2>&1)"
+    export PYTHON_VERSION="${pyv:7}"
+    export PYSPARK_PYTHON="python"
+    export PYSPARK_DRIVER_PYTHON="python"
+elif [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "3" ]; then
+    pyv3="$(python3 -V 2>&1)"
+    export PYTHON_VERSION="${pyv3:7}"
+    export PYSPARK_PYTHON="python3"
+    export PYSPARK_DRIVER_PYTHON="python3"
+fi
+
+case "$SPARK_K8S_CMD" in
+  driver)
+    CMD=(
+      "$SPARK_HOME/bin/spark-submit"
+      --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS"
+      --deploy-mode client
+      "$@"
+    )
+    ;;
+  driver-py)
+    CMD=(
+      "$SPARK_HOME/bin/spark-submit"
+      --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS"
+      --deploy-mode client
+      "$@" $PYSPARK_PRIMARY $PYSPARK_ARGS
+    )
+    ;;
+    driver-r)
+    CMD=(
+      "$SPARK_HOME/bin/spark-submit"
+      --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS"
+      --deploy-mode client
+      "$@" $R_PRIMARY $R_ARGS
+    )
+    ;;
+  executor)
+    CMD=(
+      ${JAVA_HOME}/bin/java
+      "${SPARK_EXECUTOR_JAVA_OPTS[@]}"
+      -Xms$SPARK_EXECUTOR_MEMORY
+      -Xmx$SPARK_EXECUTOR_MEMORY
+      -cp "$SPARK_CLASSPATH"
+      org.apache.spark.executor.CoarseGrainedExecutorBackend
+      --driver-url $SPARK_DRIVER_URL
+      --executor-id $SPARK_EXECUTOR_ID
+      --cores $SPARK_EXECUTOR_CORES
+      --app-id $SPARK_APPLICATION_ID
+      --hostname $SPARK_EXECUTOR_POD_IP
+    )
+    ;;
+
+  *)
+    echo "Unknown command: $SPARK_K8S_CMD" 1>&2
+    exit 1
+esac
+
+# Execute the container CMD under tini for better hygiene
+exec /sbin/tini -s -- "${CMD[@]}"
diff --git a/spark-2.4.0-bin-hadoop2.7/kubernetes/tests/py_container_checks.py b/spark-2.4.0-bin-hadoop2.7/kubernetes/tests/py_container_checks.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6b3be2806c826d2085d003829e2708696486377
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/kubernetes/tests/py_container_checks.py
@@ -0,0 +1,32 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+
+
+def version_check(python_env, major_python_version):
+    """
+        These are various tests to test the Python container image.
+        This file will be distributed via --py-files in the e2e tests.
+    """
+    env_version = os.environ.get('PYSPARK_PYTHON')
+    print("Python runtime version check is: " +
+          str(sys.version_info[0] == major_python_version))
+
+    print("Python environment version check is: " +
+          str(env_version == python_env))
diff --git a/spark-2.4.0-bin-hadoop2.7/kubernetes/tests/pyfiles.py b/spark-2.4.0-bin-hadoop2.7/kubernetes/tests/pyfiles.py
new file mode 100644
index 0000000000000000000000000000000000000000..4193654b49a12b436523149e7d48d78209df12a8
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/kubernetes/tests/pyfiles.py
@@ -0,0 +1,38 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+import sys
+
+from pyspark.sql import SparkSession
+
+
+if __name__ == "__main__":
+    """
+        Usage: pyfiles [major_python_version]
+    """
+    spark = SparkSession \
+        .builder \
+        .appName("PyFilesTest") \
+        .getOrCreate()
+
+    from py_container_checks import version_check
+    # Begin of Python container checks
+    version_check(sys.argv[1], 2 if sys.argv[1] == "python" else 3)
+
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/kubernetes/tests/worker_memory_check.py b/spark-2.4.0-bin-hadoop2.7/kubernetes/tests/worker_memory_check.py
new file mode 100644
index 0000000000000000000000000000000000000000..d312a29f388e4e8c69820b70307ccf6ec8ab07ba
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/kubernetes/tests/worker_memory_check.py
@@ -0,0 +1,47 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+import resource
+import sys
+
+from pyspark.sql import SparkSession
+
+
+if __name__ == "__main__":
+    """
+        Usage: worker_memory_check [Memory_in_Mi]
+    """
+    spark = SparkSession \
+        .builder \
+        .appName("PyMemoryTest") \
+        .getOrCreate()
+    sc = spark.sparkContext
+    if len(sys.argv) < 2:
+        print("Usage: worker_memory_check [Memory_in_Mi]", file=sys.stderr)
+        sys.exit(-1)
+
+    def f(x):
+        rLimit = resource.getrlimit(resource.RLIMIT_AS)
+        print("RLimit is " + str(rLimit))
+        return rLimit
+    resourceValue = sc.parallelize([1]).map(f).collect()[0][0]
+    print("Resource Value is " + str(resourceValue))
+    truthCheck = (resourceValue == int(sys.argv[1]))
+    print("PySpark Worker Memory Check is: " + str(truthCheck))
+    spark.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-AnchorJS.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-AnchorJS.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2bf24b9b9f848d6f4ba9e31b7bae34b573e54cba
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-AnchorJS.txt
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) <year> <copyright holders>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-CC0.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-CC0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1625c1793607996fcfc46420e8aa2f3d2b7efd1e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-CC0.txt
@@ -0,0 +1,121 @@
+Creative Commons Legal Code
+
+CC0 1.0 Universal
+
+    CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
+    LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
+    ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
+    INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
+    REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
+    PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
+    THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
+    HEREUNDER.
+
+Statement of Purpose
+
+The laws of most jurisdictions throughout the world automatically confer
+exclusive Copyright and Related Rights (defined below) upon the creator
+and subsequent owner(s) (each and all, an "owner") of an original work of
+authorship and/or a database (each, a "Work").
+
+Certain owners wish to permanently relinquish those rights to a Work for
+the purpose of contributing to a commons of creative, cultural and
+scientific works ("Commons") that the public can reliably and without fear
+of later claims of infringement build upon, modify, incorporate in other
+works, reuse and redistribute as freely as possible in any form whatsoever
+and for any purposes, including without limitation commercial purposes.
+These owners may contribute to the Commons to promote the ideal of a free
+culture and the further production of creative, cultural and scientific
+works, or to gain reputation or greater distribution for their Work in
+part through the use and efforts of others.
+
+For these and/or other purposes and motivations, and without any
+expectation of additional consideration or compensation, the person
+associating CC0 with a Work (the "Affirmer"), to the extent that he or she
+is an owner of Copyright and Related Rights in the Work, voluntarily
+elects to apply CC0 to the Work and publicly distribute the Work under its
+terms, with knowledge of his or her Copyright and Related Rights in the
+Work and the meaning and intended legal effect of CC0 on those rights.
+
+1. Copyright and Related Rights. A Work made available under CC0 may be
+protected by copyright and related or neighboring rights ("Copyright and
+Related Rights"). Copyright and Related Rights include, but are not
+limited to, the following:
+
+  i. the right to reproduce, adapt, distribute, perform, display,
+     communicate, and translate a Work;
+ ii. moral rights retained by the original author(s) and/or performer(s);
+iii. publicity and privacy rights pertaining to a person's image or
+     likeness depicted in a Work;
+ iv. rights protecting against unfair competition in regards to a Work,
+     subject to the limitations in paragraph 4(a), below;
+  v. rights protecting the extraction, dissemination, use and reuse of data
+     in a Work;
+ vi. database rights (such as those arising under Directive 96/9/EC of the
+     European Parliament and of the Council of 11 March 1996 on the legal
+     protection of databases, and under any national implementation
+     thereof, including any amended or successor version of such
+     directive); and
+vii. other similar, equivalent or corresponding rights throughout the
+     world based on applicable law or treaty, and any national
+     implementations thereof.
+
+2. Waiver. To the greatest extent permitted by, but not in contravention
+of, applicable law, Affirmer hereby overtly, fully, permanently,
+irrevocably and unconditionally waives, abandons, and surrenders all of
+Affirmer's Copyright and Related Rights and associated claims and causes
+of action, whether now known or unknown (including existing as well as
+future claims and causes of action), in the Work (i) in all territories
+worldwide, (ii) for the maximum duration provided by applicable law or
+treaty (including future time extensions), (iii) in any current or future
+medium and for any number of copies, and (iv) for any purpose whatsoever,
+including without limitation commercial, advertising or promotional
+purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
+member of the public at large and to the detriment of Affirmer's heirs and
+successors, fully intending that such Waiver shall not be subject to
+revocation, rescission, cancellation, termination, or any other legal or
+equitable action to disrupt the quiet enjoyment of the Work by the public
+as contemplated by Affirmer's express Statement of Purpose.
+
+3. Public License Fallback. Should any part of the Waiver for any reason
+be judged legally invalid or ineffective under applicable law, then the
+Waiver shall be preserved to the maximum extent permitted taking into
+account Affirmer's express Statement of Purpose. In addition, to the
+extent the Waiver is so judged Affirmer hereby grants to each affected
+person a royalty-free, non transferable, non sublicensable, non exclusive,
+irrevocable and unconditional license to exercise Affirmer's Copyright and
+Related Rights in the Work (i) in all territories worldwide, (ii) for the
+maximum duration provided by applicable law or treaty (including future
+time extensions), (iii) in any current or future medium and for any number
+of copies, and (iv) for any purpose whatsoever, including without
+limitation commercial, advertising or promotional purposes (the
+"License"). The License shall be deemed effective as of the date CC0 was
+applied by Affirmer to the Work. Should any part of the License for any
+reason be judged legally invalid or ineffective under applicable law, such
+partial invalidity or ineffectiveness shall not invalidate the remainder
+of the License, and in such case Affirmer hereby affirms that he or she
+will not (i) exercise any of his or her remaining Copyright and Related
+Rights in the Work or (ii) assert any associated claims and causes of
+action with respect to the Work, in either case contrary to Affirmer's
+express Statement of Purpose.
+
+4. Limitations and Disclaimers.
+
+ a. No trademark or patent rights held by Affirmer are waived, abandoned,
+    surrendered, licensed or otherwise affected by this document.
+ b. Affirmer offers the Work as-is and makes no representations or
+    warranties of any kind concerning the Work, express, implied,
+    statutory or otherwise, including without limitation warranties of
+    title, merchantability, fitness for a particular purpose, non
+    infringement, or the absence of latent or other defects, accuracy, or
+    the present or absence of errors, whether or not discoverable, all to
+    the greatest extent permissible under applicable law.
+ c. Affirmer disclaims responsibility for clearing rights of other persons
+    that may apply to the Work or any use thereof, including without
+    limitation any person's Copyright and Related Rights in the Work.
+    Further, Affirmer disclaims responsibility for obtaining any necessary
+    consents, permissions or other rights required for any use of the
+    Work.
+ d. Affirmer understands and acknowledges that Creative Commons is not a
+    party to this document and has no duty or obligation with respect to
+    this CC0 or use of the Work.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-antlr.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-antlr.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3021ea04332ed4d3f713bd19c7504798ab32a510
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-antlr.txt
@@ -0,0 +1,8 @@
+[The BSD License]
+Copyright (c) 2012 Terence Parr and Sam Harwell
+All rights reserved.
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+Neither the name of the author nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-arpack.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-arpack.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3ad80087bb6308dc19842d5e4184d6e13ae34da
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-arpack.txt
@@ -0,0 +1,8 @@
+Copyright © 2018 The University of Tennessee. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+· Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+· Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer listed in this license in the documentation and/or other materials provided with the distribution.
+· Neither the name of the copyright holders nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+This software is provided by the copyright holders and contributors "as is" and any express or implied warranties, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose are disclaimed. in no event shall the copyright owner or contributors be liable for any direct, indirect, incidental, special, exemplary, or consequential damages (including, but not limited to, procurement of substitute goods or services; loss of use, data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-automaton.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-automaton.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2fc6e8c3432f0c7cc6d6cdeeef294c6e9e5791d1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-automaton.txt
@@ -0,0 +1,24 @@
+Copyright (c) 2001-2017 Anders Moeller
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+3. The name of the author may not be used to endorse or promote products
+   derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-bootstrap.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-bootstrap.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c711832fbc85466e9256dcfcc8de5f049a3a033
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-bootstrap.txt
@@ -0,0 +1,13 @@
+Copyright 2013 Twitter, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-cloudpickle.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-cloudpickle.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b1e20fa1eda8864f536a5188848910851ae06b89
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-cloudpickle.txt
@@ -0,0 +1,28 @@
+Copyright (c) 2012, Regents of the University of California.
+Copyright (c) 2009 `PiCloud, Inc. <http://www.picloud.com>`_.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the University of California, Berkeley nor the
+      names of its contributors may be used to endorse or promote
+      products derived from this software without specific prior written
+      permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-d3.min.js.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-d3.min.js.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c71e3f254c0689d0f15003e0480496fa962a1188
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-d3.min.js.txt
@@ -0,0 +1,26 @@
+Copyright (c) 2010-2015, Michael Bostock
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* The name Michael Bostock may not be used to endorse or promote products
+  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT,
+INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-dagre-d3.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-dagre-d3.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4864fe05e9803123e68940e7bda357407462d8f6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-dagre-d3.txt
@@ -0,0 +1,19 @@
+Copyright (c) 2013 Chris Pettitt
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-datatables.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-datatables.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bb7708b5b5a497e2530ab655f279443d2c190482
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-datatables.txt
@@ -0,0 +1,7 @@
+Copyright (C) 2008-2018, SpryMedia Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-f2j.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-f2j.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e28fd3ccdfa69309503067ea6681575581bff102
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-f2j.txt
@@ -0,0 +1,8 @@
+Copyright © 2015 The University of Tennessee. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+· Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+· Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer listed in this license in the documentation and/or other materials provided with the distribution.
+· Neither the name of the copyright holders nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+This software is provided by the copyright holders and contributors "as is" and any express or implied warranties, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose are disclaimed. in no event shall the copyright owner or contributors be liable for any direct, indirect, incidental, special, exemplary, or consequential damages (including, but not limited to, procurement of substitute goods or services; loss of use, data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-graphlib-dot.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-graphlib-dot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4864fe05e9803123e68940e7bda357407462d8f6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-graphlib-dot.txt
@@ -0,0 +1,19 @@
+Copyright (c) 2013 Chris Pettitt
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-heapq.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-heapq.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c4c4b954bea4c5b0382f34ebad7141335ac4813
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-heapq.txt
@@ -0,0 +1,280 @@
+
+# A. HISTORY OF THE SOFTWARE
+# ==========================
+#
+# Python was created in the early 1990s by Guido van Rossum at Stichting
+# Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands
+# as a successor of a language called ABC.  Guido remains Python's
+# principal author, although it includes many contributions from others.
+#
+# In 1995, Guido continued his work on Python at the Corporation for
+#     National Research Initiatives (CNRI, see http://www.cnri.reston.va.us)
+# in Reston, Virginia where he released several versions of the
+# software.
+#
+# In May 2000, Guido and the Python core development team moved to
+# BeOpen.com to form the BeOpen PythonLabs team.  In October of the same
+# year, the PythonLabs team moved to Digital Creations (now Zope
+# Corporation, see http://www.zope.com).  In 2001, the Python Software
+# Foundation (PSF, see http://www.python.org/psf/) was formed, a
+# non-profit organization created specifically to own Python-related
+# Intellectual Property.  Zope Corporation is a sponsoring member of
+# the PSF.
+#
+# All Python releases are Open Source (see http://www.opensource.org for
+# the Open Source Definition).  Historically, most, but not all, Python
+# releases have also been GPL-compatible; the table below summarizes
+# the various releases.
+#
+# Release         Derived     Year        Owner       GPL-
+# from                                compatible? (1)
+#
+# 0.9.0 thru 1.2              1991-1995   CWI         yes
+# 1.3 thru 1.5.2  1.2         1995-1999   CNRI        yes
+# 1.6             1.5.2       2000        CNRI        no
+# 2.0             1.6         2000        BeOpen.com  no
+# 1.6.1           1.6         2001        CNRI        yes (2)
+# 2.1             2.0+1.6.1   2001        PSF         no
+# 2.0.1           2.0+1.6.1   2001        PSF         yes
+# 2.1.1           2.1+2.0.1   2001        PSF         yes
+# 2.2             2.1.1       2001        PSF         yes
+# 2.1.2           2.1.1       2002        PSF         yes
+# 2.1.3           2.1.2       2002        PSF         yes
+# 2.2.1           2.2         2002        PSF         yes
+# 2.2.2           2.2.1       2002        PSF         yes
+# 2.2.3           2.2.2       2003        PSF         yes
+# 2.3             2.2.2       2002-2003   PSF         yes
+# 2.3.1           2.3         2002-2003   PSF         yes
+# 2.3.2           2.3.1       2002-2003   PSF         yes
+# 2.3.3           2.3.2       2002-2003   PSF         yes
+# 2.3.4           2.3.3       2004        PSF         yes
+# 2.3.5           2.3.4       2005        PSF         yes
+# 2.4             2.3         2004        PSF         yes
+# 2.4.1           2.4         2005        PSF         yes
+# 2.4.2           2.4.1       2005        PSF         yes
+# 2.4.3           2.4.2       2006        PSF         yes
+# 2.4.4           2.4.3       2006        PSF         yes
+# 2.5             2.4         2006        PSF         yes
+# 2.5.1           2.5         2007        PSF         yes
+# 2.5.2           2.5.1       2008        PSF         yes
+# 2.5.3           2.5.2       2008        PSF         yes
+# 2.6             2.5         2008        PSF         yes
+# 2.6.1           2.6         2008        PSF         yes
+# 2.6.2           2.6.1       2009        PSF         yes
+# 2.6.3           2.6.2       2009        PSF         yes
+# 2.6.4           2.6.3       2009        PSF         yes
+# 2.6.5           2.6.4       2010        PSF         yes
+# 2.7             2.6         2010        PSF         yes
+#
+# Footnotes:
+#
+# (1) GPL-compatible doesn't mean that we're distributing Python under
+# the GPL.  All Python licenses, unlike the GPL, let you distribute
+# a modified version without making your changes open source.  The
+# GPL-compatible licenses make it possible to combine Python with
+#     other software that is released under the GPL; the others don't.
+#
+# (2) According to Richard Stallman, 1.6.1 is not GPL-compatible,
+# because its license has a choice of law clause.  According to
+# CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1
+# is "not incompatible" with the GPL.
+#
+# Thanks to the many outside volunteers who have worked under Guido's
+# direction to make these releases possible.
+#
+#
+# B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON
+# ===============================================================
+#
+# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
+# --------------------------------------------
+#
+# 1. This LICENSE AGREEMENT is between the Python Software Foundation
+# ("PSF"), and the Individual or Organization ("Licensee") accessing and
+# otherwise using this software ("Python") in source or binary form and
+# its associated documentation.
+#
+# 2. Subject to the terms and conditions of this License Agreement, PSF hereby
+# grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
+# analyze, test, perform and/or display publicly, prepare derivative works,
+# distribute, and otherwise use Python alone or in any derivative version,
+# provided, however, that PSF's License Agreement and PSF's notice of copyright,
+# i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+# 2011, 2012, 2013 Python Software Foundation; All Rights Reserved" are retained
+# in Python alone or in any derivative version prepared by Licensee.
+#
+# 3. In the event Licensee prepares a derivative work that is based on
+# or incorporates Python or any part thereof, and wants to make
+# the derivative work available to others as provided herein, then
+# Licensee hereby agrees to include in any such work a brief summary of
+# the changes made to Python.
+#
+# 4. PSF is making Python available to Licensee on an "AS IS"
+# basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+# IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
+# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
+# INFRINGE ANY THIRD PARTY RIGHTS.
+#
+# 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+# FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+# A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
+# OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+#
+# 6. This License Agreement will automatically terminate upon a material
+# breach of its terms and conditions.
+#
+# 7. Nothing in this License Agreement shall be deemed to create any
+# relationship of agency, partnership, or joint venture between PSF and
+# Licensee.  This License Agreement does not grant permission to use PSF
+# trademarks or trade name in a trademark sense to endorse or promote
+# products or services of Licensee, or any third party.
+#
+# 8. By copying, installing or otherwise using Python, Licensee
+# agrees to be bound by the terms and conditions of this License
+# Agreement.
+#
+#
+# BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0
+# -------------------------------------------
+#
+# BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
+#
+# 1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an
+# office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the
+# Individual or Organization ("Licensee") accessing and otherwise using
+# this software in source or binary form and its associated
+# documentation ("the Software").
+#
+# 2. Subject to the terms and conditions of this BeOpen Python License
+# Agreement, BeOpen hereby grants Licensee a non-exclusive,
+# royalty-free, world-wide license to reproduce, analyze, test, perform
+# and/or display publicly, prepare derivative works, distribute, and
+# otherwise use the Software alone or in any derivative version,
+# provided, however, that the BeOpen Python License is retained in the
+# Software, alone or in any derivative version prepared by Licensee.
+#
+# 3. BeOpen is making the Software available to Licensee on an "AS IS"
+# basis.  BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+# IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND
+# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT
+# INFRINGE ANY THIRD PARTY RIGHTS.
+#
+# 4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
+# SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
+# AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY
+# DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+#
+# 5. This License Agreement will automatically terminate upon a material
+# breach of its terms and conditions.
+#
+# 6. This License Agreement shall be governed by and interpreted in all
+# respects by the law of the State of California, excluding conflict of
+# law provisions.  Nothing in this License Agreement shall be deemed to
+# create any relationship of agency, partnership, or joint venture
+# between BeOpen and Licensee.  This License Agreement does not grant
+# permission to use BeOpen trademarks or trade names in a trademark
+# sense to endorse or promote products or services of Licensee, or any
+# third party.  As an exception, the "BeOpen Python" logos available at
+# http://www.pythonlabs.com/logos.html may be used according to the
+# permissions granted on that web page.
+#
+# 7. By copying, installing or otherwise using the software, Licensee
+# agrees to be bound by the terms and conditions of this License
+# Agreement.
+#
+#
+# CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1
+# ---------------------------------------
+#
+# 1. This LICENSE AGREEMENT is between the Corporation for National
+#     Research Initiatives, having an office at 1895 Preston White Drive,
+# Reston, VA 20191 ("CNRI"), and the Individual or Organization
+# ("Licensee") accessing and otherwise using Python 1.6.1 software in
+# source or binary form and its associated documentation.
+#
+# 2. Subject to the terms and conditions of this License Agreement, CNRI
+# hereby grants Licensee a nonexclusive, royalty-free, world-wide
+# license to reproduce, analyze, test, perform and/or display publicly,
+# prepare derivative works, distribute, and otherwise use Python 1.6.1
+# alone or in any derivative version, provided, however, that CNRI's
+# License Agreement and CNRI's notice of copyright, i.e., "Copyright (c)
+# 1995-2001 Corporation for National Research Initiatives; All Rights
+# Reserved" are retained in Python 1.6.1 alone or in any derivative
+# version prepared by Licensee.  Alternately, in lieu of CNRI's License
+# Agreement, Licensee may substitute the following text (omitting the
+# quotes): "Python 1.6.1 is made available subject to the terms and
+# conditions in CNRI's License Agreement.  This Agreement together with
+# Python 1.6.1 may be located on the Internet using the following
+# unique, persistent identifier (known as a handle): 1895.22/1013.  This
+# Agreement may also be obtained from a proxy server on the Internet
+# using the following URL: http://hdl.handle.net/1895.22/1013".
+#
+# 3. In the event Licensee prepares a derivative work that is based on
+# or incorporates Python 1.6.1 or any part thereof, and wants to make
+# the derivative work available to others as provided herein, then
+# Licensee hereby agrees to include in any such work a brief summary of
+# the changes made to Python 1.6.1.
+#
+# 4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS"
+# basis.  CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+# IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND
+# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT
+# INFRINGE ANY THIRD PARTY RIGHTS.
+#
+# 5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+# 1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+# A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1,
+# OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+#
+# 6. This License Agreement will automatically terminate upon a material
+# breach of its terms and conditions.
+#
+# 7. This License Agreement shall be governed by the federal
+# intellectual property law of the United States, including without
+# limitation the federal copyright law, and, to the extent such
+# U.S. federal law does not apply, by the law of the Commonwealth of
+# Virginia, excluding Virginia's conflict of law provisions.
+# Notwithstanding the foregoing, with regard to derivative works based
+# on Python 1.6.1 that incorporate non-separable material that was
+# previously distributed under the GNU General Public License (GPL), the
+# law of the Commonwealth of Virginia shall govern this License
+# Agreement only as to issues arising under or with respect to
+# Paragraphs 4, 5, and 7 of this License Agreement.  Nothing in this
+# License Agreement shall be deemed to create any relationship of
+# agency, partnership, or joint venture between CNRI and Licensee.  This
+# License Agreement does not grant permission to use CNRI trademarks or
+# trade name in a trademark sense to endorse or promote products or
+# services of Licensee, or any third party.
+#
+# 8. By clicking on the "ACCEPT" button where indicated, or by copying,
+# installing or otherwise using Python 1.6.1, Licensee agrees to be
+# bound by the terms and conditions of this License Agreement.
+#
+# ACCEPT
+#
+#
+# CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2
+# --------------------------------------------------
+#
+# Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam,
+# The Netherlands.  All rights reserved.
+#
+# Permission to use, copy, modify, and distribute this software and its
+# documentation for any purpose and without fee is hereby granted,
+# provided that the above copyright notice appear in all copies and that
+# both that copyright notice and this permission notice appear in
+# supporting documentation, and that the name of Stichting Mathematisch
+# Centrum or CWI not be used in advertising or publicity pertaining to
+# distribution of the software without specific, written prior
+# permission.
+#
+# STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
+# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+# FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
+# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-janino.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-janino.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d1e1f237c4641c77b9c4de8c331e8e77e2ab7aa6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-janino.txt
@@ -0,0 +1,31 @@
+Janino - An embedded Java[TM] compiler
+
+Copyright (c) 2001-2016, Arno Unkrig
+Copyright (c) 2015-2016  TIBCO Software Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+   1. Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+   2. Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials
+      provided with the distribution.
+   3. Neither the name of JANINO nor the names of its contributors
+      may be used to endorse or promote products derived from this
+      software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-javassist.html b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-javassist.html
new file mode 100644
index 0000000000000000000000000000000000000000..5abd563a0c4d9cb1b11ea070bc0e0343ece72902
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-javassist.html
@@ -0,0 +1,373 @@
+<HTML>
+<HEAD>
+  <TITLE>Javassist License</TITLE>
+  <META http-equiv=Content-Type content="text/html; charset=iso-8859-1">
+  <META content="MSHTML 5.50.4916.2300" name=GENERATOR></HEAD>
+
+<BODY text=#000000 vLink=#551a8b aLink=#ff0000 link=#0000ee bgColor=#ffffff>
+<CENTER><B><FONT size=+2>MOZILLA PUBLIC LICENSE</FONT></B> <BR><B>Version
+  1.1</B>
+  <P>
+  <HR width="20%">
+</CENTER>
+<P><B>1. Definitions.</B>
+  <UL><B>1.0.1. "Commercial Use" </B>means distribution or otherwise making the
+    Covered Code available to a third party.
+<P><B>1.1. ''Contributor''</B> means each entity that creates or contributes
+  to the creation of Modifications.
+<P><B>1.2. ''Contributor Version''</B> means the combination of the Original
+  Code, prior Modifications used by a Contributor, and the Modifications made by
+  that particular Contributor.
+<P><B>1.3. ''Covered Code''</B> means the Original Code or Modifications or
+  the combination of the Original Code and Modifications, in each case including
+  portions thereof<B>.</B>
+<P><B>1.4. ''Electronic Distribution Mechanism''</B> means a mechanism
+  generally accepted in the software development community for the electronic
+  transfer of data.
+<P><B>1.5. ''Executable''</B> means Covered Code in any form other than Source
+  Code.
+<P><B>1.6. ''Initial Developer''</B> means the individual or entity identified
+  as the Initial Developer in the Source Code notice required by <B>Exhibit
+    A</B>.
+<P><B>1.7. ''Larger Work''</B> means a work which combines Covered Code or
+  portions thereof with code not governed by the terms of this License.
+<P><B>1.8. ''License''</B> means this document.
+<P><B>1.8.1. "Licensable"</B> means having the right to grant, to the maximum
+  extent possible, whether at the time of the initial grant or subsequently
+  acquired, any and all of the rights conveyed herein.
+<P><B>1.9. ''Modifications''</B> means any addition to or deletion from the
+  substance or structure of either the Original Code or any previous
+  Modifications. When Covered Code is released as a series of files, a
+  Modification is:
+  <UL><B>A.</B> Any addition to or deletion from the contents of a file
+    containing Original Code or previous Modifications.
+<P><B>B.</B> Any new file that contains any part of the Original Code or
+  previous Modifications. <BR>&nbsp;</P></UL><B>1.10. ''Original Code''</B>
+means Source Code of computer software code which is described in the Source
+Code notice required by <B>Exhibit A</B> as Original Code, and which, at the
+time of its release under this License is not already Covered Code governed by
+this License.
+<P><B>1.10.1. "Patent Claims"</B> means any patent claim(s), now owned or
+  hereafter acquired, including without limitation,&nbsp; method, process, and
+  apparatus claims, in any patent Licensable by grantor.
+<P><B>1.11. ''Source Code''</B> means the preferred form of the Covered Code
+  for making modifications to it, including all modules it contains, plus any
+  associated interface definition files, scripts used to control compilation and
+  installation of an Executable, or source code differential comparisons against
+  either the Original Code or another well known, available Covered Code of the
+  Contributor's choice. The Source Code can be in a compressed or archival form,
+  provided the appropriate decompression or de-archiving software is widely
+  available for no charge.
+<P><B>1.12. "You'' (or "Your")&nbsp;</B> means an individual or a legal entity
+  exercising rights under, and complying with all of the terms of, this License
+  or a future version of this License issued under Section 6.1. For legal
+  entities, "You'' includes any entity which controls, is controlled by, or is
+  under common control with You. For purposes of this definition, "control''
+  means (a) the power, direct or indirect, to cause the direction or management
+  of such entity, whether by contract or otherwise, or (b) ownership of more
+  than fifty percent (50%) of the outstanding shares or beneficial ownership of
+  such entity.</P></UL><B>2. Source Code License.</B>
+<UL><B>2.1. The Initial Developer Grant.</B> <BR>The Initial Developer hereby
+  grants You a world-wide, royalty-free, non-exclusive license, subject to third
+  party intellectual property claims:
+  <UL><B>(a)</B>&nbsp;<B> </B>under intellectual property rights (other than
+    patent or trademark) Licensable by Initial Developer to use, reproduce,
+    modify, display, perform, sublicense and distribute the Original Code (or
+    portions thereof) with or without Modifications, and/or as part of a Larger
+    Work; and
+    <P><B>(b)</B> under Patents Claims infringed by the making, using or selling
+      of Original Code, to make, have made, use, practice, sell, and offer for
+      sale, and/or otherwise dispose of the Original Code (or portions thereof).
+    <UL>
+      <UL></UL></UL><B>(c) </B>the licenses granted in this Section 2.1(a) and (b)
+    are effective on the date Initial Developer first distributes Original Code
+    under the terms of this License.
+    <P><B>(d) </B>Notwithstanding Section 2.1(b) above, no patent license is
+      granted: 1) for code that You delete from the Original Code; 2) separate
+      from the Original Code;&nbsp; or 3) for infringements caused by: i) the
+      modification of the Original Code or ii) the combination of the Original
+      Code with other software or devices. <BR>&nbsp;</P></UL><B>2.2. Contributor
+    Grant.</B> <BR>Subject to third party intellectual property claims, each
+  Contributor hereby grants You a world-wide, royalty-free, non-exclusive
+  license
+  <UL> <BR><B>(a)</B>&nbsp;<B> </B>under intellectual property rights (other
+    than patent or trademark) Licensable by Contributor, to use, reproduce,
+    modify, display, perform, sublicense and distribute the Modifications
+    created by such Contributor (or portions thereof) either on an unmodified
+    basis, with other Modifications, as Covered Code and/or as part of a Larger
+    Work; and
+    <P><B>(b)</B> under Patent Claims infringed by the making, using, or selling
+      of&nbsp; Modifications made by that Contributor either alone and/or in<FONT
+          color=#000000> combination with its Contributor Version (or portions of such
+        combination), to make, use, sell, offer for sale, have made, and/or
+        otherwise dispose of: 1) Modifications made by that Contributor (or portions
+        thereof); and 2) the combination of&nbsp; Modifications made by that
+        Contributor with its Contributor Version (or portions of such
+        combination).</FONT>
+    <P><B>(c) </B>the licenses granted in Sections 2.2(a) and 2.2(b) are
+      effective on the date Contributor first makes Commercial Use of the Covered
+      Code.
+    <P><B>(d)&nbsp;</B>&nbsp;&nbsp; Notwithstanding Section 2.2(b) above, no
+      patent license is granted: 1) for any code that Contributor has deleted from
+      the Contributor Version; 2)&nbsp; separate from the Contributor
+      Version;&nbsp; 3)&nbsp; for infringements caused by: i) third party
+      modifications of Contributor Version or ii)&nbsp; the combination of
+      Modifications made by that Contributor with other software&nbsp; (except as
+      part of the Contributor Version) or other devices; or 4) under Patent Claims
+      infringed by Covered Code in the absence of Modifications made by that
+      Contributor.</P></UL></UL>
+<P><BR><B>3. Distribution Obligations.</B>
+  <UL><B>3.1. Application of License.</B> <BR>The Modifications which You create
+    or to which You contribute are governed by the terms of this License,
+    including without limitation Section <B>2.2</B>. The Source Code version of
+    Covered Code may be distributed only under the terms of this License or a
+    future version of this License released under Section <B>6.1</B>, and You must
+    include a copy of this License with every copy of the Source Code You
+    distribute. You may not offer or impose any terms on any Source Code version
+    that alters or restricts the applicable version of this License or the
+    recipients' rights hereunder. However, You may include an additional document
+    offering the additional rights described in Section <B>3.5</B>.
+<P><B>3.2. Availability of Source Code.</B> <BR>Any Modification which You
+  create or to which You contribute must be made available in Source Code form
+  under the terms of this License either on the same media as an Executable
+  version or via an accepted Electronic Distribution Mechanism to anyone to whom
+  you made an Executable version available; and if made available via Electronic
+  Distribution Mechanism, must remain available for at least twelve (12) months
+  after the date it initially became available, or at least six (6) months after
+  a subsequent version of that particular Modification has been made available
+  to such recipients. You are responsible for ensuring that the Source Code
+  version remains available even if the Electronic Distribution Mechanism is
+  maintained by a third party.
+<P><B>3.3. Description of Modifications.</B> <BR>You must cause all Covered
+  Code to which You contribute to contain a file documenting the changes You
+  made to create that Covered Code and the date of any change. You must include
+  a prominent statement that the Modification is derived, directly or
+  indirectly, from Original Code provided by the Initial Developer and including
+  the name of the Initial Developer in (a) the Source Code, and (b) in any
+  notice in an Executable version or related documentation in which You describe
+  the origin or ownership of the Covered Code.
+<P><B>3.4. Intellectual Property Matters</B>
+  <UL><B>(a) Third Party Claims</B>. <BR>If Contributor has knowledge that a
+    license under a third party's intellectual property rights is required to
+    exercise the rights granted by such Contributor under Sections 2.1 or 2.2,
+    Contributor must include a text file with the Source Code distribution
+    titled "LEGAL'' which describes the claim and the party making the claim in
+    sufficient detail that a recipient will know whom to contact. If Contributor
+    obtains such knowledge after the Modification is made available as described
+    in Section 3.2, Contributor shall promptly modify the LEGAL file in all
+    copies Contributor makes available thereafter and shall take other steps
+    (such as notifying appropriate mailing lists or newsgroups) reasonably
+    calculated to inform those who received the Covered Code that new knowledge
+    has been obtained.
+<P><B>(b) Contributor APIs</B>. <BR>If Contributor's Modifications include
+  an application programming interface and Contributor has knowledge of patent
+  licenses which are reasonably necessary to implement that API, Contributor
+  must also include this information in the LEGAL file.
+  <BR>&nbsp;</P></UL>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+<B>(c)&nbsp;&nbsp;&nbsp; Representations.</B>
+<UL>Contributor represents that, except as disclosed pursuant to Section
+  3.4(a) above, Contributor believes that Contributor's Modifications are
+  Contributor's original creation(s) and/or Contributor has sufficient rights
+  to grant the rights conveyed by this License.</UL>
+<P><BR><B>3.5. Required Notices.</B> <BR>You must duplicate the notice in
+  <B>Exhibit A</B> in each file of the Source Code.&nbsp; If it is not possible
+  to put such notice in a particular Source Code file due to its structure, then
+  You must include such notice in a location (such as a relevant directory)
+  where a user would be likely to look for such a notice.&nbsp; If You created
+  one or more Modification(s) You may add your name as a Contributor to the
+  notice described in <B>Exhibit A</B>.&nbsp; You must also duplicate this
+  License in any documentation for the Source Code where You describe
+  recipients' rights or ownership rights relating to Covered Code.&nbsp; You may
+  choose to offer, and to charge a fee for, warranty, support, indemnity or
+  liability obligations to one or more recipients of Covered Code. However, You
+  may do so only on Your own behalf, and not on behalf of the Initial Developer
+  or any Contributor. You must make it absolutely clear than any such warranty,
+  support, indemnity or liability obligation is offered by You alone, and You
+  hereby agree to indemnify the Initial Developer and every Contributor for any
+  liability incurred by the Initial Developer or such Contributor as a result of
+  warranty, support, indemnity or liability terms You offer.
+<P><B>3.6. Distribution of Executable Versions.</B> <BR>You may distribute
+  Covered Code in Executable form only if the requirements of Section
+  <B>3.1-3.5</B> have been met for that Covered Code, and if You include a
+  notice stating that the Source Code version of the Covered Code is available
+  under the terms of this License, including a description of how and where You
+  have fulfilled the obligations of Section <B>3.2</B>. The notice must be
+  conspicuously included in any notice in an Executable version, related
+  documentation or collateral in which You describe recipients' rights relating
+  to the Covered Code. You may distribute the Executable version of Covered Code
+  or ownership rights under a license of Your choice, which may contain terms
+  different from this License, provided that You are in compliance with the
+  terms of this License and that the license for the Executable version does not
+  attempt to limit or alter the recipient's rights in the Source Code version
+  from the rights set forth in this License. If You distribute the Executable
+  version under a different license You must make it absolutely clear that any
+  terms which differ from this License are offered by You alone, not by the
+  Initial Developer or any Contributor. You hereby agree to indemnify the
+  Initial Developer and every Contributor for any liability incurred by the
+  Initial Developer or such Contributor as a result of any such terms You offer.
+
+<P><B>3.7. Larger Works.</B> <BR>You may create a Larger Work by combining
+  Covered Code with other code not governed by the terms of this License and
+  distribute the Larger Work as a single product. In such a case, You must make
+  sure the requirements of this License are fulfilled for the Covered
+  Code.</P></UL><B>4. Inability to Comply Due to Statute or Regulation.</B>
+<UL>If it is impossible for You to comply with any of the terms of this
+  License with respect to some or all of the Covered Code due to statute,
+  judicial order, or regulation then You must: (a) comply with the terms of this
+  License to the maximum extent possible; and (b) describe the limitations and
+  the code they affect. Such description must be included in the LEGAL file
+  described in Section <B>3.4</B> and must be included with all distributions of
+  the Source Code. Except to the extent prohibited by statute or regulation,
+  such description must be sufficiently detailed for a recipient of ordinary
+  skill to be able to understand it.</UL><B>5. Application of this License.</B>
+<UL>This License applies to code to which the Initial Developer has attached
+  the notice in <B>Exhibit A</B> and to related Covered Code.</UL><B>6. Versions
+  of the License.</B>
+<UL><B>6.1. New Versions</B>. <BR>Netscape Communications Corporation
+  (''Netscape'') may publish revised and/or new versions of the License from
+  time to time. Each version will be given a distinguishing version number.
+  <P><B>6.2. Effect of New Versions</B>. <BR>Once Covered Code has been
+    published under a particular version of the License, You may always continue
+    to use it under the terms of that version. You may also choose to use such
+    Covered Code under the terms of any subsequent version of the License
+    published by Netscape. No one other than Netscape has the right to modify the
+    terms applicable to Covered Code created under this License.
+  <P><B>6.3. Derivative Works</B>. <BR>If You create or use a modified version
+    of this License (which you may only do in order to apply it to code which is
+    not already Covered Code governed by this License), You must (a) rename Your
+    license so that the phrases ''Mozilla'', ''MOZILLAPL'', ''MOZPL'',
+    ''Netscape'', "MPL", ''NPL'' or any confusingly similar phrase do not appear
+    in your license (except to note that your license differs from this License)
+    and (b) otherwise make it clear that Your version of the license contains
+    terms which differ from the Mozilla Public License and Netscape Public
+    License. (Filling in the name of the Initial Developer, Original Code or
+    Contributor in the notice described in <B>Exhibit A</B> shall not of
+    themselves be deemed to be modifications of this License.)</P></UL><B>7.
+  DISCLAIMER OF WARRANTY.</B>
+<UL>COVERED CODE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS'' BASIS, WITHOUT
+  WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT
+  LIMITATION, WARRANTIES THAT THE COVERED CODE IS FREE OF DEFECTS, MERCHANTABLE,
+  FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE
+  QUALITY AND PERFORMANCE OF THE COVERED CODE IS WITH YOU. SHOULD ANY COVERED
+  CODE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY
+  OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR
+  CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+  LICENSE. NO USE OF ANY COVERED CODE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS
+  DISCLAIMER.</UL><B>8. TERMINATION.</B>
+<UL><B>8.1.&nbsp; </B>This License and the rights granted hereunder will
+  terminate automatically if You fail to comply with terms herein and fail to
+  cure such breach within 30 days of becoming aware of the breach. All
+  sublicenses to the Covered Code which are properly granted shall survive any
+  termination of this License. Provisions which, by their nature, must remain in
+  effect beyond the termination of this License shall survive.
+  <P><B>8.2.&nbsp; </B>If You initiate litigation by asserting a patent
+    infringement claim (excluding declatory judgment actions) against Initial
+    Developer or a Contributor (the Initial Developer or Contributor against whom
+    You file such action is referred to as "Participant")&nbsp; alleging that:
+  <P><B>(a)&nbsp; </B>such Participant's Contributor Version directly or
+    indirectly infringes any patent, then any and all rights granted by such
+    Participant to You under Sections 2.1 and/or 2.2 of this License shall, upon
+    60 days notice from Participant terminate prospectively, unless if within 60
+    days after receipt of notice You either: (i)&nbsp; agree in writing to pay
+    Participant a mutually agreeable reasonable royalty for Your past and future
+    use of Modifications made by such Participant, or (ii) withdraw Your
+    litigation claim with respect to the Contributor Version against such
+    Participant.&nbsp; If within 60 days of notice, a reasonable royalty and
+    payment arrangement are not mutually agreed upon in writing by the parties or
+    the litigation claim is not withdrawn, the rights granted by Participant to
+    You under Sections 2.1 and/or 2.2 automatically terminate at the expiration of
+    the 60 day notice period specified above.
+  <P><B>(b)</B>&nbsp; any software, hardware, or device, other than such
+    Participant's Contributor Version, directly or indirectly infringes any
+    patent, then any rights granted to You by such Participant under Sections
+    2.1(b) and 2.2(b) are revoked effective as of the date You first made, used,
+    sold, distributed, or had made, Modifications made by that Participant.
+  <P><B>8.3.&nbsp; </B>If You assert a patent infringement claim against
+    Participant alleging that such Participant's Contributor Version directly or
+    indirectly infringes any patent where such claim is resolved (such as by
+    license or settlement) prior to the initiation of patent infringement
+    litigation, then the reasonable value of the licenses granted by such
+    Participant under Sections 2.1 or 2.2 shall be taken into account in
+    determining the amount or value of any payment or license.
+  <P><B>8.4.</B>&nbsp; In the event of termination under Sections 8.1 or 8.2
+    above,&nbsp; all end user license agreements (excluding distributors and
+    resellers) which have been validly granted by You or any distributor hereunder
+    prior to termination shall survive termination.</P></UL><B>9. LIMITATION OF
+  LIABILITY.</B>
+<UL>UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING
+  NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL DEVELOPER, ANY
+  OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED CODE, OR ANY SUPPLIER OF ANY
+  OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL,
+  INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT
+  LIMITATION, DAMAGES FOR LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR
+  MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH
+  PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS
+  LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL
+  INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW
+  PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR
+  LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND
+  LIMITATION MAY NOT APPLY TO YOU.</UL><B>10. U.S. GOVERNMENT END USERS.</B>
+<UL>The Covered Code is a ''commercial item,'' as that term is defined in 48
+  C.F.R. 2.101 (Oct. 1995), consisting of ''commercial computer software'' and
+  ''commercial computer software documentation,'' as such terms are used in 48
+  C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R.
+  227.7202-1 through 227.7202-4 (June 1995), all U.S. Government End Users
+  acquire Covered Code with only those rights set forth herein.</UL><B>11.
+  MISCELLANEOUS.</B>
+<UL>This License represents the complete agreement concerning subject matter
+  hereof. If any provision of this License is held to be unenforceable, such
+  provision shall be reformed only to the extent necessary to make it
+  enforceable. This License shall be governed by California law provisions
+  (except to the extent applicable law, if any, provides otherwise), excluding
+  its conflict-of-law provisions. With respect to disputes in which at least one
+  party is a citizen of, or an entity chartered or registered to do business in
+  the United States of America, any litigation relating to this License shall be
+  subject to the jurisdiction of the Federal Courts of the Northern District of
+  California, with venue lying in Santa Clara County, California, with the
+  losing party responsible for costs, including without limitation, court costs
+  and reasonable attorneys' fees and expenses. The application of the United
+  Nations Convention on Contracts for the International Sale of Goods is
+  expressly excluded. Any law or regulation which provides that the language of
+  a contract shall be construed against the drafter shall not apply to this
+  License.</UL><B>12. RESPONSIBILITY FOR CLAIMS.</B>
+<UL>As between Initial Developer and the Contributors, each party is
+  responsible for claims and damages arising, directly or indirectly, out of its
+  utilization of rights under this License and You agree to work with Initial
+  Developer and Contributors to distribute such responsibility on an equitable
+  basis. Nothing herein is intended or shall be deemed to constitute any
+  admission of liability.</UL><B>13. MULTIPLE-LICENSED CODE.</B>
+<UL>Initial Developer may designate portions of the Covered Code as
+  "Multiple-Licensed".&nbsp; "Multiple-Licensed" means that the Initial
+  Developer permits you to utilize portions of the Covered Code under Your
+  choice of the MPL or the alternative licenses, if any, specified by the
+  Initial Developer in the file described in Exhibit A.</UL>
+<P><BR><B>EXHIBIT A -Mozilla Public License.</B>
+  <UL>The contents of this file are subject to the Mozilla Public License
+    Version 1.1 (the "License"); you may not use this file except in compliance
+    with the License. You may obtain a copy of the License at
+    <BR>http://www.mozilla.org/MPL/
+<P>Software distributed under the License is distributed on an "AS IS" basis,
+  WITHOUT WARRANTY OF <BR>ANY KIND, either express or implied. See the License
+  for the specific language governing rights and <BR>limitations under the
+  License.
+<P>The Original Code is Javassist.
+<P>The Initial Developer of the Original Code is Shigeru Chiba.
+  Portions created by the Initial Developer are<BR>&nbsp;
+  Copyright (C) 1999- Shigeru Chiba. All Rights Reserved.
+<P>Contributor(s):  __Bill Burke, Jason T. Greene______________.
+
+<p>Alternatively, the contents of this software may be used under the
+  terms of the GNU Lesser General Public License Version 2.1 or later
+  (the "LGPL"), or the Apache License Version 2.0 (the "AL"),
+  in which case the provisions of the LGPL or the AL are applicable
+  instead of those above. If you wish to allow use of your version of
+  this software only under the terms of either the LGPL or the AL, and not to allow others to
+  use your version of this software under the terms of the MPL, indicate
+  your decision by deleting the provisions above and replace them with
+  the notice and other provisions required by the LGPL or the AL. If you do not
+  delete the provisions above, a recipient may use your version of this
+  software under the terms of any one of the MPL, the LGPL or the AL.
+
+<P></P></UL>
+</BODY>
+</HTML>
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-javolution.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-javolution.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b64af4d8298aaa6d24483a3db16415ed384318aa
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-javolution.txt
@@ -0,0 +1,27 @@
+/*
+ * Javolution - Java(tm) Solution for Real-Time and Embedded Systems
+ * Copyright (c) 2012, Javolution (http://javolution.org/)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-jline.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-jline.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ec539d10ac547a81aefc7c5ff515cda73add8fd
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-jline.txt
@@ -0,0 +1,32 @@
+Copyright (c) 2002-2006, Marc Prud'hommeaux <mwp1@cornell.edu>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or
+without modification, are permitted provided that the following
+conditions are met:
+
+Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with
+the distribution.
+
+Neither the name of JLine nor the names of its contributors
+may be used to endorse or promote products derived from this
+software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
+BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-jodd.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-jodd.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc6b458adb386d4cda70728777fbf7a32927d376
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-jodd.txt
@@ -0,0 +1,24 @@
+Copyright (c) 2003-present, Jodd Team (https://jodd.org)
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-join.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-join.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1d916090e4ea0338ad45ba26ce12af0122244d06
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-join.txt
@@ -0,0 +1,30 @@
+Copyright (c) 2011, Douban Inc. <http://www.douban.com/>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+
+    * Neither the name of the Douban Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-jquery.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-jquery.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45930542204fbb37b3115ea11646dfae95ac2119
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-jquery.txt
@@ -0,0 +1,20 @@
+Copyright JS Foundation and other contributors, https://js.foundation/
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-json-formatter.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-json-formatter.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5193348fce1260ea96581d61f11aaf04ea90084c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-json-formatter.txt
@@ -0,0 +1,6 @@
+Copyright 2014 Mohsen Azimi
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-jtransforms.html b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-jtransforms.html
new file mode 100644
index 0000000000000000000000000000000000000000..351c17412357b47a65ed7206654795381a3bafe5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-jtransforms.html
@@ -0,0 +1,388 @@
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en"><head><title>Mozilla Public License version 1.1</title>
+
+
+
+  <style type="text/css">
+    .very-strong{
+      text-transform:uppercase;
+    }
+    dt{
+      font-weight:bold;
+    }
+    dd p{
+      margin:0;
+    }
+  </style></head><body>
+<small> </small><h1><small>Mozilla Public License Version 1.1</small></h1>
+<small> </small><h2 id="section-1"><small>1. Definitions.</small></h2>
+<small> </small><dl>
+  <small>  <dt id="section-1.0.1">1.0.1. "Commercial Use"
+  </dt><dd>means distribution or otherwise making the Covered Code available to a third party.
+  </dd><dt id="section-1.1">1.1. "Contributor"
+  </dt><dd>means each entity that creates or contributes to the creation of Modifications.
+  </dd><dt id="section-1.2">1.2. "Contributor Version"
+  </dt><dd>means the combination of the Original Code, prior Modifications used by a Contributor,
+    and the Modifications made by that particular Contributor.
+  </dd><dt id="section-1.3">1.3. "Covered Code"
+  </dt><dd>means the Original Code or Modifications or the combination of the Original Code and
+    Modifications, in each case including portions thereof.
+  </dd><dt id="section-1.4">1.4. "Electronic Distribution Mechanism"
+  </dt><dd>means a mechanism generally accepted in the software development community for the
+    electronic transfer of data.
+  </dd><dt id="section-1.5">1.5. "Executable"
+  </dt><dd>means Covered Code in any form other than Source Code.
+  </dd><dt id="section-1.6">1.6. "Initial Developer"
+  </dt><dd>means the individual or entity identified as the Initial Developer in the Source Code
+    notice required by <a href="#exhibit-a">Exhibit A</a>.
+  </dd><dt id="section-1.7">1.7. "Larger Work"
+  </dt><dd>means a work which combines Covered Code or portions thereof with code not governed
+    by the terms of this License.
+  </dd><dt id="section-1.8">1.8. "License"
+  </dt><dd>means this document.
+  </dd><dt id="section-1.8.1">1.8.1. "Licensable"
+  </dt><dd>means having the right to grant, to the maximum extent possible, whether at the
+    time of the initial grant or subsequently acquired, any and all of the rights
+    conveyed herein.
+  </dd><dt id="section-1.9">1.9. "Modifications"
+  </dt><dd>
+    <p>means any addition to or deletion from the substance or structure of either the
+      Original Code or any previous Modifications. When Covered Code is released as a
+      series of files, a Modification is:
+    </p><ol type="a">
+    <li id="section-1.9-a">Any addition to or deletion from the contents of a file
+      containing Original Code or previous Modifications.
+    </li><li id="section-1.9-b">Any new file that contains any part of the Original Code or
+    previous Modifications.
+  </li></ol>
+  </dd><dt id="section-1.10">1.10. "Original Code"
+  </dt><dd>means Source Code of computer software code which is described in the Source Code
+    notice required by <a href="#exhibit-a">Exhibit A</a> as Original Code, and which,
+    at the time of its release under this License is not already Covered Code governed
+    by this License.
+  </dd><dt id="section-1.10.1">1.10.1. "Patent Claims"
+  </dt><dd>means any patent claim(s), now owned or hereafter acquired, including without
+    limitation, method, process, and apparatus claims, in any patent Licensable by
+    grantor.
+  </dd><dt id="section-1.11">1.11. "Source Code"
+  </dt><dd>means the preferred form of the Covered Code for making modifications to it,
+    including all modules it contains, plus any associated interface definition files,
+    scripts used to control compilation and installation of an Executable, or source
+    code differential comparisons against either the Original Code or another well known,
+    available Covered Code of the Contributor's choice. The Source Code can be in a
+    compressed or archival form, provided the appropriate decompression or de-archiving
+    software is widely available for no charge.
+  </dd><dt id="section-1.12">1.12. "You" (or "Your")
+  </dt><dd>means an individual or a legal entity exercising rights under, and complying with
+    all of the terms of, this License or a future version of this License issued under
+    <a href="#section-6.1">Section 6.1.</a> For legal entities, "You" includes any entity
+    which controls, is controlled by, or is under common control with You. For purposes of
+    this definition, "control" means (a) the power, direct or indirect, to cause the
+    direction or management of such entity, whether by contract or otherwise, or (b)
+    ownership of more than fifty percent (50%) of the outstanding shares or beneficial
+    ownership of such entity.
+  </dd></small></dl>
+<small> </small><h2 id="section-2"><small>2. Source Code License.</small></h2>
+<small> </small><h3 id="section-2.1"><small>2.1. The Initial Developer Grant.</small></h3>
+<small> </small><p><small>The Initial Developer hereby grants You a world-wide, royalty-free, non-exclusive
+  license, subject to third party intellectual property claims:
+</small></p><ol type="a">
+  <small>  </small><li id="section-2.1-a"><small>under intellectual property rights (other than patent or
+  trademark) Licensable by Initial Developer to use, reproduce, modify, display, perform,
+  sublicense and distribute the Original Code (or portions thereof) with or without
+  Modifications, and/or as part of a Larger Work; and
+</small></li><li id="section-2.1-b"><small>under Patents Claims infringed by the making, using or selling
+  of Original Code, to make, have made, use, practice, sell, and offer for sale, and/or
+  otherwise dispose of the Original Code (or portions thereof).
+</small></li><li id="section-2.1-c"><small>the licenses granted in this Section 2.1
+  (<a href="#section-2.1-a">a</a>) and (<a href="#section-2.1-b">b</a>) are effective on
+  the date Initial Developer first distributes Original Code under the terms of this
+  License.
+</small></li><li id="section-2.1-d"><small>Notwithstanding Section 2.1 (<a href="#section-2.1-b">b</a>)
+  above, no patent license is granted: 1) for code that You delete from the Original Code;
+  2) separate from the Original Code; or 3) for infringements caused by: i) the
+  modification of the Original Code or ii) the combination of the Original Code with other
+  software or devices.
+</small></li></ol>
+<small> </small><h3 id="section-2.2"><small>2.2. Contributor Grant.</small></h3>
+<small> </small><p><small>Subject to third party intellectual property claims, each Contributor hereby grants You
+  a world-wide, royalty-free, non-exclusive license
+</small></p><ol type="a">
+  <small>  </small><li id="section-2.2-a"><small>under intellectual property rights (other than patent or trademark)
+  Licensable by Contributor, to use, reproduce, modify, display, perform, sublicense and
+  distribute the Modifications created by such Contributor (or portions thereof) either on
+  an unmodified basis, with other Modifications, as Covered Code and/or as part of a Larger
+  Work; and
+</small></li><li id="section-2.2-b"><small>under Patent Claims infringed by the making, using, or selling of
+  Modifications made by that Contributor either alone and/or in combination with its
+  Contributor Version (or portions of such combination), to make, use, sell, offer for
+  sale, have made, and/or otherwise dispose of: 1) Modifications made by that Contributor
+  (or portions thereof); and 2) the combination of Modifications made by that Contributor
+  with its Contributor Version (or portions of such combination).
+</small></li><li id="section-2.2-c"><small>the licenses granted in Sections 2.2
+  (<a href="#section-2.2-a">a</a>) and 2.2 (<a href="#section-2.2-b">b</a>) are effective
+  on the date Contributor first makes Commercial Use of the Covered Code.
+</small></li><li id="section-2.2-d"><small>Notwithstanding Section 2.2 (<a href="#section-2.2-b">b</a>)
+  above, no patent license is granted: 1) for any code that Contributor has deleted from
+  the Contributor Version; 2) separate from the Contributor Version; 3) for infringements
+  caused by: i) third party modifications of Contributor Version or ii) the combination of
+  Modifications made by that Contributor with other software (except as part of the
+  Contributor Version) or other devices; or 4) under Patent Claims infringed by Covered Code
+  in the absence of Modifications made by that Contributor.
+</small></li></ol>
+<small> </small><h2 id="section-3"><small>3. Distribution Obligations.</small></h2>
+<small> </small><h3 id="section-3.1"><small>3.1. Application of License.</small></h3>
+<small> </small><p><small>The Modifications which You create or to which You contribute are governed by the terms
+  of this License, including without limitation Section <a href="#section-2.2">2.2</a>. The
+  Source Code version of Covered Code may be distributed only under the terms of this License
+  or a future version of this License released under Section <a href="#section-6.1">6.1</a>,
+  and You must include a copy of this License with every copy of the Source Code You
+  distribute. You may not offer or impose any terms on any Source Code version that alters or
+  restricts the applicable version of this License or the recipients' rights hereunder.
+  However, You may include an additional document offering the additional rights described in
+  Section <a href="#section-3.5">3.5</a>.
+</small></p><h3 id="section-3.2"><small>3.2. Availability of Source Code.</small></h3>
+<small> </small><p><small>Any Modification which You create or to which You contribute must be made available in
+  Source Code form under the terms of this License either on the same media as an Executable
+  version or via an accepted Electronic Distribution Mechanism to anyone to whom you made an
+  Executable version available; and if made available via Electronic Distribution Mechanism,
+  must remain available for at least twelve (12) months after the date it initially became
+  available, or at least six (6) months after a subsequent version of that particular
+  Modification has been made available to such recipients. You are responsible for ensuring
+  that the Source Code version remains available even if the Electronic Distribution
+  Mechanism is maintained by a third party.
+</small></p><h3 id="section-3.3"><small>3.3. Description of Modifications.</small></h3>
+<small> </small><p><small>You must cause all Covered Code to which You contribute to contain a file documenting the
+  changes You made to create that Covered Code and the date of any change. You must include a
+  prominent statement that the Modification is derived, directly or indirectly, from Original
+  Code provided by the Initial Developer and including the name of the Initial Developer in
+  (a) the Source Code, and (b) in any notice in an Executable version or related documentation
+  in which You describe the origin or ownership of the Covered Code.
+</small></p><h3 id="section-3.4"><small>3.4. Intellectual Property Matters</small></h3>
+<small> </small><h4 id="section-3.4-a"><small>(a) Third Party Claims</small></h4>
+<small> </small><p><small>If Contributor has knowledge that a license under a third party's intellectual property
+  rights is required to exercise the rights granted by such Contributor under Sections
+  <a href="#section-2.1">2.1</a> or <a href="#section-2.2">2.2</a>, Contributor must include a
+  text file with the Source Code distribution titled "LEGAL" which describes the claim and the
+  party making the claim in sufficient detail that a recipient will know whom to contact. If
+  Contributor obtains such knowledge after the Modification is made available as described in
+  Section <a href="#section-3.2">3.2</a>, Contributor shall promptly modify the LEGAL file in
+  all copies Contributor makes available thereafter and shall take other steps (such as
+  notifying appropriate mailing lists or newsgroups) reasonably calculated to inform those who
+  received the Covered Code that new knowledge has been obtained.
+</small></p><h4 id="section-3.4-b"><small>(b) Contributor APIs</small></h4>
+<small> </small><p><small>If Contributor's Modifications include an application programming interface and Contributor
+  has knowledge of patent licenses which are reasonably necessary to implement that
+  <abbr>API</abbr>, Contributor must also include this information in the
+  <strong class="very-strong">legal</strong> file.
+</small></p><h4 id="section-3.4-c"><small>(c) Representations.</small></h4>
+<small> </small><p><small>Contributor represents that, except as disclosed pursuant to Section 3.4
+  (<a href="#section-3.4-a">a</a>) above, Contributor believes that Contributor's Modifications
+  are Contributor's original creation(s) and/or Contributor has sufficient rights to grant the
+  rights conveyed by this License.
+</small></p><h3 id="section-3.5"><small>3.5. Required Notices.</small></h3>
+<small> </small><p><small>You must duplicate the notice in <a href="#exhibit-a">Exhibit A</a> in each file of the
+  Source Code. If it is not possible to put such notice in a particular Source Code file due to
+  its structure, then You must include such notice in a location (such as a relevant directory)
+  where a user would be likely to look for such a notice. If You created one or more
+  Modification(s) You may add your name as a Contributor to the notice described in
+  <a href="#exhibit-a">Exhibit A</a>. You must also duplicate this License in any documentation
+  for the Source Code where You describe recipients' rights or ownership rights relating to
+  Covered Code. You may choose to offer, and to charge a fee for, warranty, support, indemnity
+  or liability obligations to one or more recipients of Covered Code. However, You may do so
+  only on Your own behalf, and not on behalf of the Initial Developer or any Contributor. You
+  must make it absolutely clear than any such warranty, support, indemnity or liability
+  obligation is offered by You alone, and You hereby agree to indemnify the Initial Developer
+  and every Contributor for any liability incurred by the Initial Developer or such Contributor
+  as a result of warranty, support, indemnity or liability terms You offer.
+</small></p><h3 id="section-3.6"><small>3.6. Distribution of Executable Versions.</small></h3>
+<small> </small><p><small>You may distribute Covered Code in Executable form only if the requirements of Sections
+  <a href="#section-3.1">3.1</a>, <a href="#section-3.2">3.2</a>,
+  <a href="#section-3.3">3.3</a>, <a href="#section-3.4">3.4</a> and
+  <a href="#section-3.5">3.5</a> have been met for that Covered Code, and if You include a
+  notice stating that the Source Code version of the Covered Code is available under the terms
+  of this License, including a description of how and where You have fulfilled the obligations
+  of Section <a href="#section-3.2">3.2</a>. The notice must be conspicuously included in any
+  notice in an Executable version, related documentation or collateral in which You describe
+  recipients' rights relating to the Covered Code. You may distribute the Executable version of
+  Covered Code or ownership rights under a license of Your choice, which may contain terms
+  different from this License, provided that You are in compliance with the terms of this
+  License and that the license for the Executable version does not attempt to limit or alter the
+  recipient's rights in the Source Code version from the rights set forth in this License. If
+  You distribute the Executable version under a different license You must make it absolutely
+  clear that any terms which differ from this License are offered by You alone, not by the
+  Initial Developer or any Contributor. You hereby agree to indemnify the Initial Developer and
+  every Contributor for any liability incurred by the Initial Developer or such Contributor as
+  a result of any such terms You offer.
+</small></p><h3 id="section-3.7"><small>3.7. Larger Works.</small></h3>
+<small> </small><p><small>You may create a Larger Work by combining Covered Code with other code not governed by the
+  terms of this License and distribute the Larger Work as a single product. In such a case,
+  You must make sure the requirements of this License are fulfilled for the Covered Code.
+</small></p><h2 id="section-4"><small>4. Inability to Comply Due to Statute or Regulation.</small></h2>
+<small> </small><p><small>If it is impossible for You to comply with any of the terms of this License with respect to
+  some or all of the Covered Code due to statute, judicial order, or regulation then You must:
+  (a) comply with the terms of this License to the maximum extent possible; and (b) describe
+  the limitations and the code they affect. Such description must be included in the
+  <strong class="very-strong">legal</strong> file described in Section
+  <a href="#section-3.4">3.4</a> and must be included with all distributions of the Source Code.
+  Except to the extent prohibited by statute or regulation, such description must be
+  sufficiently detailed for a recipient of ordinary skill to be able to understand it.
+</small></p><h2 id="section-5"><small>5. Application of this License.</small></h2>
+<small> </small><p><small>This License applies to code to which the Initial Developer has attached the notice in
+  <a href="#exhibit-a">Exhibit A</a> and to related Covered Code.
+</small></p><h2 id="section-6"><small>6. Versions of the License.</small></h2>
+<small> </small><h3 id="section-6.1"><small>6.1. New Versions</small></h3>
+<small> </small><p><small>Netscape Communications Corporation ("Netscape") may publish revised and/or new versions
+  of the License from time to time. Each version will be given a distinguishing version number.
+</small></p><h3 id="section-6.2"><small>6.2. Effect of New Versions</small></h3>
+<small> </small><p><small>Once Covered Code has been published under a particular version of the License, You may
+  always continue to use it under the terms of that version. You may also choose to use such
+  Covered Code under the terms of any subsequent version of the License published by Netscape.
+  No one other than Netscape has the right to modify the terms applicable to Covered Code
+  created under this License.
+</small></p><h3 id="section-6.3"><small>6.3. Derivative Works</small></h3>
+<small> </small><p><small>If You create or use a modified version of this License (which you may only do in order to
+  apply it to code which is not already Covered Code governed by this License), You must (a)
+  rename Your license so that the phrases "Mozilla", "MOZILLAPL", "MOZPL", "Netscape", "MPL",
+  "NPL" or any confusingly similar phrase do not appear in your license (except to note that
+  your license differs from this License) and (b) otherwise make it clear that Your version of
+  the license contains terms which differ from the Mozilla Public License and Netscape Public
+  License. (Filling in the name of the Initial Developer, Original Code or Contributor in the
+  notice described in <a href="#exhibit-a">Exhibit A</a> shall not of themselves be deemed to
+  be modifications of this License.)
+</small></p><h2 id="section-7"><small>7. <strong class="very-strong">Disclaimer of warranty</strong></small></h2>
+<small> </small><p><small><strong class="very-strong">Covered code is provided under this license on an "as is"
+  basis, without warranty of any kind, either expressed or implied, including, without
+  limitation, warranties that the covered code is free of defects, merchantable, fit for a
+  particular purpose or non-infringing. The entire risk as to the quality and performance of
+  the covered code is with you. Should any covered code prove defective in any respect, you
+  (not the initial developer or any other contributor) assume the cost of any necessary
+  servicing, repair or correction. This disclaimer of warranty constitutes an essential part
+  of this license. No use of any covered code is authorized hereunder except under this
+  disclaimer.</strong>
+</small></p><h2 id="section-8"><small>8. Termination</small></h2>
+<small> </small><p id="section-8.1"><small>8.1. This License and the rights granted hereunder will terminate
+  automatically if You fail to comply with terms herein and fail to cure such breach
+  within 30 days of becoming aware of the breach. All sublicenses to the Covered Code which
+  are properly granted shall survive any termination of this License. Provisions which, by
+  their nature, must remain in effect beyond the termination of this License shall survive.
+</small></p><p id="section-8.2"><small>8.2. If You initiate litigation by asserting a patent infringement
+  claim (excluding declatory judgment actions) against Initial Developer or a Contributor
+  (the Initial Developer or Contributor against whom You file such action is referred to
+  as "Participant") alleging that:
+</small></p><ol type="a">
+  <small>  </small><li id="section-8.2-a"><small>such Participant's Contributor Version directly or indirectly
+  infringes any patent, then any and all rights granted by such Participant to You under
+  Sections <a href="#section-2.1">2.1</a> and/or <a href="#section-2.2">2.2</a> of this
+  License shall, upon 60 days notice from Participant terminate prospectively, unless if
+  within 60 days after receipt of notice You either: (i) agree in writing to pay
+  Participant a mutually agreeable reasonable royalty for Your past and future use of
+  Modifications made by such Participant, or (ii) withdraw Your litigation claim with
+  respect to the Contributor Version against such Participant. If within 60 days of
+  notice, a reasonable royalty and payment arrangement are not mutually agreed upon in
+  writing by the parties or the litigation claim is not withdrawn, the rights granted by
+  Participant to You under Sections <a href="#section-2.1">2.1</a> and/or
+  <a href="#section-2.2">2.2</a> automatically terminate at the expiration of the 60 day
+  notice period specified above.
+</small></li><li id="section-8.2-b"><small>any software, hardware, or device, other than such Participant's
+  Contributor Version, directly or indirectly infringes any patent, then any rights
+  granted to You by such Participant under Sections 2.1(<a href="#section-2.1-b">b</a>)
+  and 2.2(<a href="#section-2.2-b">b</a>) are revoked effective as of the date You first
+  made, used, sold, distributed, or had made, Modifications made by that Participant.
+</small></li></ol>
+<small> </small><p id="section-8.3"><small>8.3. If You assert a patent infringement claim against Participant
+  alleging that such Participant's Contributor Version directly or indirectly infringes
+  any patent where such claim is resolved (such as by license or settlement) prior to the
+  initiation of patent infringement litigation, then the reasonable value of the licenses
+  granted by such Participant under Sections <a href="#section-2.1">2.1</a> or
+  <a href="#section-2.2">2.2</a> shall be taken into account in determining the amount or
+  value of any payment or license.
+</small></p><p id="section-8.4"><small>8.4. In the event of termination under Sections
+  <a href="#section-8.1">8.1</a> or <a href="#section-8.2">8.2</a> above, all end user
+  license agreements (excluding distributors and resellers) which have been validly
+  granted by You or any distributor hereunder prior to termination shall survive
+  termination.
+</small></p><h2 id="section-9"><small>9. <strong class="very-strong">Limitation of liability</strong></small></h2>
+<small> </small><p><small><strong class="very-strong">Under no circumstances and under no legal theory, whether
+  tort (including negligence), contract, or otherwise, shall you, the initial developer,
+  any other contributor, or any distributor of covered code, or any supplier of any of
+  such parties, be liable to any person for any indirect, special, incidental, or
+  consequential damages of any character including, without limitation, damages for loss
+  of goodwill, work stoppage, computer failure or malfunction, or any and all other
+  commercial damages or losses, even if such party shall have been informed of the
+  possibility of such damages. This limitation of liability shall not apply to liability
+  for death or personal injury resulting from such party's negligence to the extent
+  applicable law prohibits such limitation. Some jurisdictions do not allow the exclusion
+  or limitation of incidental or consequential damages, so this exclusion and limitation
+  may not apply to you.</strong>
+</small></p><h2 id="section-10"><small>10. <abbr title="United States">U.S.</abbr> government end users</small></h2>
+<small> </small><p><small>The Covered Code is a "commercial item," as that term is defined in 48
+  <abbr>C.F.R.</abbr> 2.101 (<abbr title="October">Oct.</abbr> 1995), consisting of
+  "commercial computer software" and "commercial computer software documentation," as such
+  terms are used in 48 <abbr>C.F.R.</abbr> 12.212 (<abbr title="September">Sept.</abbr>
+  1995). Consistent with 48 <abbr>C.F.R.</abbr> 12.212 and 48 <abbr>C.F.R.</abbr>
+  227.7202-1 through 227.7202-4 (June 1995), all <abbr>U.S.</abbr> Government End Users
+  acquire Covered Code with only those rights set forth herein.
+</small></p><h2 id="section-11"><small>11. Miscellaneous</small></h2>
+<small> </small><p><small>This License represents the complete agreement concerning subject matter hereof. If
+  any provision of this License is held to be unenforceable, such provision shall be
+  reformed only to the extent necessary to make it enforceable. This License shall be
+  governed by California law provisions (except to the extent applicable law, if any,
+  provides otherwise), excluding its conflict-of-law provisions. With respect to
+  disputes in which at least one party is a citizen of, or an entity chartered or
+  registered to do business in the United States of America, any litigation relating to
+  this License shall be subject to the jurisdiction of the Federal Courts of the
+  Northern District of California, with venue lying in Santa Clara County, California,
+  with the losing party responsible for costs, including without limitation, court
+  costs and reasonable attorneys' fees and expenses. The application of the United
+  Nations Convention on Contracts for the International Sale of Goods is expressly
+  excluded. Any law or regulation which provides that the language of a contract
+  shall be construed against the drafter shall not apply to this License.
+</small></p><h2 id="section-12"><small>12. Responsibility for claims</small></h2>
+<small> </small><p><small>As between Initial Developer and the Contributors, each party is responsible for
+  claims and damages arising, directly or indirectly, out of its utilization of rights
+  under this License and You agree to work with Initial Developer and Contributors to
+  distribute such responsibility on an equitable basis. Nothing herein is intended or
+  shall be deemed to constitute any admission of liability.
+</small></p><h2 id="section-13"><small>13. Multiple-licensed code</small></h2>
+<small> </small><p><small>Initial Developer may designate portions of the Covered Code as
+  "Multiple-Licensed". "Multiple-Licensed" means that the Initial Developer permits
+  you to utilize portions of the Covered Code under Your choice of the <abbr>MPL</abbr>
+  or the alternative licenses, if any, specified by the Initial Developer in the file
+  described in <a href="#exhibit-a">Exhibit A</a>.
+</small></p><h2 id="exhibit-a"><small>Exhibit A - Mozilla Public License.</small></h2>
+<small> </small><pre><small>"The contents of this file are subject to the Mozilla Public License
+Version 1.1 (the "License"); you may not use this file except in
+compliance with the License. You may obtain a copy of the License at
+http://www.mozilla.org/MPL/
+
+Software distributed under the License is distributed on an "AS IS"
+basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
+License for the specific language governing rights and limitations
+under the License.
+
+The Original Code is JTransforms.
+
+The Initial Developer of the Original Code is
+Piotr Wendykier, Emory University.
+Portions created by the Initial Developer are Copyright (C) 2007-2009
+the Initial Developer. All Rights Reserved.
+
+Alternatively, the contents of this file may be used under the terms of
+either the GNU General Public License Version 2 or later (the "GPL"), or
+the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+in which case the provisions of the GPL or the LGPL are applicable instead
+of those above. If you wish to allow use of your version of this file only
+under the terms of either the GPL or the LGPL, and not to allow others to
+use your version of this file under the terms of the MPL, indicate your
+decision by deleting the provisions above and replace them with the notice
+and other provisions required by the GPL or the LGPL. If you do not delete
+the provisions above, a recipient may use your version of this file under
+the terms of any one of the MPL, the GPL or the LGPL.</small></pre>
+<small> </small><p><small>NOTE: The text of this Exhibit A may differ slightly from the text of
+  the notices in the Source Code files of the Original Code. You should
+  use the text of this Exhibit A rather than the text found in the
+  Original Code Source Code for Your Modifications.
+
+</small></p></body></html>
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-kryo.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-kryo.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f6a160c238e542756c50193a25e819204bdc3af
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-kryo.txt
@@ -0,0 +1,10 @@
+Copyright (c) 2008, Nathan Sweet
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+    * Neither the name of Esoteric Software nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-leveldbjni.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-leveldbjni.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4dabb9174c6d4f8508e3c4ff6db38ac9b60a45a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-leveldbjni.txt
@@ -0,0 +1,27 @@
+Copyright (c) 2011 FuseSource Corp. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of FuseSource Corp. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-machinist.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-machinist.txt
new file mode 100644
index 0000000000000000000000000000000000000000..68cc3a3e3a9c44c090ca81916a3cdbf3db8aa4b3
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-machinist.txt
@@ -0,0 +1,19 @@
+Copyright (c) 2011-2014 Erik Osheim, Tom Switzer
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-matchMedia-polyfill.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-matchMedia-polyfill.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2fd0bc2b37448d28d1176daf56db9479129141bf
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-matchMedia-polyfill.txt
@@ -0,0 +1 @@
+matchMedia() polyfill - Test a CSS media type/query in JS. Authors & copyright (c) 2012: Scott Jehl, Paul Irish, Nicholas Zakas. Dual MIT/BSD license
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-minlog.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-minlog.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f6a160c238e542756c50193a25e819204bdc3af
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-minlog.txt
@@ -0,0 +1,10 @@
+Copyright (c) 2008, Nathan Sweet
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+    * Neither the name of Esoteric Software nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-modernizr.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-modernizr.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2bf24b9b9f848d6f4ba9e31b7bae34b573e54cba
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-modernizr.txt
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) <year> <copyright holders>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-netlib.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-netlib.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75783ed6bc357302f64b0b4c4e1c38cbb8fda74f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-netlib.txt
@@ -0,0 +1,49 @@
+Copyright (c) 2013 Samuel Halliday
+Copyright (c) 1992-2011 The University of Tennessee and The University
+                        of Tennessee Research Foundation.  All rights
+                        reserved.
+Copyright (c) 2000-2011 The University of California Berkeley. All
+                        rights reserved.
+Copyright (c) 2006-2011 The University of Colorado Denver.  All rights
+                        reserved.
+
+$COPYRIGHT$
+
+Additional copyrights may follow
+
+$HEADER$
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+- Redistributions of source code must retain the above copyright
+  notice, this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions and the following disclaimer listed
+  in this license in the documentation and/or other materials
+  provided with the distribution.
+
+- Neither the name of the copyright holders nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+The copyright holders provide no reassurances that the source code
+provided does not infringe any patent, copyright, or any other
+intellectual property rights of third parties.  The copyright holders
+disclaim any liability to any recipient for claims brought against
+recipient by any third party for infringement of that parties
+intellectual property rights.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-paranamer.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-paranamer.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fca18473ba03f2082422d6a92871b9e29700250a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-paranamer.txt
@@ -0,0 +1,28 @@
+[ ParaNamer used to be 'Pubic Domain', but since it includes a small piece of ASM it is now the same license as that: BSD ]
+
+ Copyright (c) 2006 Paul Hammant & ThoughtWorks Inc
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 1. Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+ 3. Neither the name of the copyright holders nor the names of its
+    contributors may be used to endorse or promote products derived from
+    this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-pmml-model.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-pmml-model.txt
new file mode 100644
index 0000000000000000000000000000000000000000..69411d1c6e9a8dcc09e7ea8d2c1a120760cbdc4a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-pmml-model.txt
@@ -0,0 +1,10 @@
+Copyright (c) 2009, University of Tartu
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-protobuf.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-protobuf.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4350ec83c758f33a9eda70045052a872c7896e3
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-protobuf.txt
@@ -0,0 +1,42 @@
+This license applies to all parts of Protocol Buffers except the following:
+
+  - Atomicops support for generic gcc, located in
+    src/google/protobuf/stubs/atomicops_internals_generic_gcc.h.
+    This file is copyrighted by Red Hat Inc.
+
+  - Atomicops support for AIX/POWER, located in
+    src/google/protobuf/stubs/atomicops_internals_aix.h.
+    This file is copyrighted by Bloomberg Finance LP.
+
+Copyright 2014, Google Inc.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+    * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Code generated by the Protocol Buffer compiler is owned by the owner
+of the input file used when generating it.  This code is not
+standalone and requires a support library to be linked with it.  This
+support library is itself covered by the above license.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-py4j.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-py4j.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70af3e69ed67a377148def9d4e9b3c2a9bf00017
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-py4j.txt
@@ -0,0 +1,27 @@
+Copyright (c) 2009-2011, Barthelemy Dagenais All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+- The name of the author may not be used to endorse or promote products
+derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-pyrolite.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-pyrolite.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9457c7aa66140cfc1c576cec1f04205e126a55c5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-pyrolite.txt
@@ -0,0 +1,28 @@
+
+Pyro - Python Remote Objects
+Software License, copyright, and disclaimer
+
+  Pyro is Copyright (c) by Irmen de Jong (irmen@razorvine.net).
+
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to deal
+  in the Software without restriction, including without limitation the rights
+  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+  copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+
+  The above copyright notice and this permission notice shall be included in
+  all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+  SOFTWARE.
+
+
+This is the "MIT Software License" which is OSI-certified, and GPL-compatible.
+See http://www.opensource.org/licenses/mit-license.php
+
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-reflectasm.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-reflectasm.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f6a160c238e542756c50193a25e819204bdc3af
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-reflectasm.txt
@@ -0,0 +1,10 @@
+Copyright (c) 2008, Nathan Sweet
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+    * Neither the name of Esoteric Software nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-respond.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-respond.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dea4ff9e5b2eadc49c911753dc29e790b5b424df
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-respond.txt
@@ -0,0 +1,22 @@
+Copyright (c) 2012 Scott Jehl
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-sbt-launch-lib.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-sbt-launch-lib.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b9156baaab78d25c9667f69a9c5711a3d21bd68
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-sbt-launch-lib.txt
@@ -0,0 +1,26 @@
+// Generated from http://www.opensource.org/licenses/bsd-license.php
+Copyright (c) 2011, Paul Phillips.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+    * Neither the name of the author nor the names of its contributors may be
+      used to endorse or promote products derived from this software without
+      specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-scala.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-scala.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4846076aba24650f56048f1672d7862009adebb1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-scala.txt
@@ -0,0 +1,30 @@
+Copyright (c) 2002-2013 EPFL
+Copyright (c) 2011-2013 Typesafe, Inc.
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+- Neither the name of the EPFL nor the names of its contributors may be
+  used to endorse or promote products derived from this software without
+  specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-scopt.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-scopt.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e92e9b592fba0b426f5eb58b5506e26414d75a8c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-scopt.txt
@@ -0,0 +1,9 @@
+This project is licensed under the MIT license.
+
+Copyright (c) scopt contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-slf4j.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-slf4j.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6548cd3af4322470423ef726ef059a44bb94c9f0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-slf4j.txt
@@ -0,0 +1,21 @@
+Copyright (c) 2004-2013 QOS.ch
+ All rights reserved.
+
+ Permission is hereby granted, free  of charge, to any person obtaining
+ a  copy  of this  software  and  associated  documentation files  (the
+ "Software"), to  deal in  the Software without  restriction, including
+ without limitation  the rights to  use, copy, modify,  merge, publish,
+ distribute,  sublicense, and/or sell  copies of  the Software,  and to
+ permit persons to whom the Software  is furnished to do so, subject to
+ the following conditions:
+
+ The  above  copyright  notice  and  this permission  notice  shall  be
+ included in all copies or substantial portions of the Software.
+
+ THE  SOFTWARE IS  PROVIDED  "AS  IS", WITHOUT  WARRANTY  OF ANY  KIND,
+ EXPRESS OR  IMPLIED, INCLUDING  BUT NOT LIMITED  TO THE  WARRANTIES OF
+ MERCHANTABILITY,    FITNESS    FOR    A   PARTICULAR    PURPOSE    AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE,  ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-sorttable.js.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-sorttable.js.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b31a5b206bf408167e26df5e8864358caff2570b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-sorttable.js.txt
@@ -0,0 +1,16 @@
+Copyright (c) 1997-2007 Stuart Langridge
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-spire.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-spire.txt
new file mode 100644
index 0000000000000000000000000000000000000000..40af7746b93156ef8df8f26bb1390e2a7ceeea62
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-spire.txt
@@ -0,0 +1,19 @@
+Copyright (c) 2011-2012 Erik Osheim, Tom Switzer
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-vis.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-vis.txt
new file mode 100644
index 0000000000000000000000000000000000000000..18b7323059a4153ee378ed6ee845e400beac7da8
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-vis.txt
@@ -0,0 +1,22 @@
+vis.js
+https://github.com/almende/vis
+
+A dynamic, browser-based visualization library.
+
+@version 4.16.1
+@date    2016-04-18
+
+@license
+Copyright (C) 2011-2016 Almende B.V, http://almende.com
+
+Vis.js is dual licensed under both
+
+* The Apache 2.0 License
+  http://www.apache.org/licenses/LICENSE-2.0
+
+and
+
+* The MIT License
+  http://opensource.org/licenses/MIT
+
+Vis.js may be distributed under either license.
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-xmlenc.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-xmlenc.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3a70c9bfcdadd4572d8cbf51facf3f15e98246fb
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-xmlenc.txt
@@ -0,0 +1,27 @@
+Copyright 2003-2005, Ernst de Haan <wfe.dehaan@gmail.com>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors
+   may be used to endorse or promote products derived from this software
+   without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-zstd-jni.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-zstd-jni.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32c6bbdd980d69d1eee90da4485e8288c2dabc82
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-zstd-jni.txt
@@ -0,0 +1,26 @@
+Zstd-jni: JNI bindings to Zstd Library
+
+Copyright (c) 2015-2016, Luben Karavelov/ All rights reserved.
+
+BSD License
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-zstd.txt b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-zstd.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a793a802892567f17d464a831e2e531dc8833f55
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/licenses/LICENSE-zstd.txt
@@ -0,0 +1,30 @@
+BSD License
+
+For Zstandard software
+
+Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+ * Neither the name Facebook nor the names of its contributors may be used to
+   endorse or promote products derived from this software without specific
+   prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/JavaEWAH-0.3.2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/JavaEWAH-0.3.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..36ec6e176b810258d05270e03af711e44e2ab8a0
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/JavaEWAH-0.3.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/RoaringBitmap-0.5.11.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/RoaringBitmap-0.5.11.jar
new file mode 100644
index 0000000000000000000000000000000000000000..cbeb7a12703e24a7725b4f95d0575cda7caeaf5d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/RoaringBitmap-0.5.11.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/ST4-4.0.4.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/ST4-4.0.4.jar
new file mode 100644
index 0000000000000000000000000000000000000000..0ed9db8472ff457fbdc59bd095b19d9bd8ba076c
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/ST4-4.0.4.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/activation-1.1.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/activation-1.1.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..1b703ab283e0cddabf9c1b5e28658f9198c0def4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/activation-1.1.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/aircompressor-0.10.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/aircompressor-0.10.jar
new file mode 100644
index 0000000000000000000000000000000000000000..e5859afa9ad927ab973d9efac375f97e07eb3be8
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/aircompressor-0.10.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/amqp-client-4.4.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/amqp-client-4.4.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..e450ac8b45af2fa5fe6b12a827846a40f088746b
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/amqp-client-4.4.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/animal-sniffer-annotation-1.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/animal-sniffer-annotation-1.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..124edb8582a7ba854911f6b5ed3460846a013a3e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/animal-sniffer-annotation-1.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/animal-sniffer-annotations-1.17.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/animal-sniffer-annotations-1.17.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6ec7a603e7345b68df0fb276034e87f65a55c540
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/animal-sniffer-annotations-1.17.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/annotations-2.0.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/annotations-2.0.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d4030ba2e6418f322538cf312767a14af9d45ff5
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/annotations-2.0.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/antlr-2.7.7.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/antlr-2.7.7.jar
new file mode 100644
index 0000000000000000000000000000000000000000..5e5f14b35584eac2a9f0f888769f0ab93ca6d849
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/antlr-2.7.7.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/antlr-runtime-3.4.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/antlr-runtime-3.4.jar
new file mode 100644
index 0000000000000000000000000000000000000000..865a537b0c5e4d617b8c45adc43fa210df1a8129
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/antlr-runtime-3.4.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/antlr4-runtime-4.7.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/antlr4-runtime-4.7.jar
new file mode 100644
index 0000000000000000000000000000000000000000..5c3bf7d351dfc1989a026f94bf5b1c3c241493f4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/antlr4-runtime-4.7.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/aopalliance-1.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/aopalliance-1.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..578b1a0c359ef88a84461bdb91d9d0041afd54de
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/aopalliance-1.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/aopalliance-repackaged-2.4.0-b31.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/aopalliance-repackaged-2.4.0-b31.jar
new file mode 100644
index 0000000000000000000000000000000000000000..7f59718c8158ef208f6c2dc7de62ff52e5f1c218
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/aopalliance-repackaged-2.4.0-b31.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/apacheds-i18n-2.0.0-M15.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/apacheds-i18n-2.0.0-M15.jar
new file mode 100644
index 0000000000000000000000000000000000000000..11dccb3e2c3261ddfc7f03593555311d7762b6b7
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/apacheds-i18n-2.0.0-M15.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/apacheds-kerberos-codec-2.0.0-M15.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/apacheds-kerberos-codec-2.0.0-M15.jar
new file mode 100644
index 0000000000000000000000000000000000000000..82e564e1a6526785dcba2d9983f25f453ad9f664
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/apacheds-kerberos-codec-2.0.0-M15.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/api-asn1-api-1.0.0-M20.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/api-asn1-api-1.0.0-M20.jar
new file mode 100644
index 0000000000000000000000000000000000000000..68dee3a02f5f62ce35cc363cb3a2c0a3b0d81acf
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/api-asn1-api-1.0.0-M20.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/api-util-1.0.0-M20.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/api-util-1.0.0-M20.jar
new file mode 100644
index 0000000000000000000000000000000000000000..ff9780eac9219a461f5f62515a83bdad345c85f7
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/api-util-1.0.0-M20.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/archaius-core-0.6.6.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/archaius-core-0.6.6.jar
new file mode 100644
index 0000000000000000000000000000000000000000..37e1f3ed55a5a4d057b50d7f2aed169cee9320ec
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/archaius-core-0.6.6.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/arrow-format-0.10.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/arrow-format-0.10.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..ed169160538324fe1f6f527ffb878f2ca2b1935a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/arrow-format-0.10.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/arrow-memory-0.10.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/arrow-memory-0.10.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..678c59be010fce29b41f78dda262d8a2f7181143
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/arrow-memory-0.10.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/arrow-vector-0.10.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/arrow-vector-0.10.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..0e5bf3001263b80ca44a48d3888eef81ad293218
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/arrow-vector-0.10.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/asm-3.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/asm-3.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..8217cae0a1bc977b241e0c8517cc2e3e7cede276
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/asm-3.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/aspectjrt-1.8.10.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/aspectjrt-1.8.10.jar
new file mode 100644
index 0000000000000000000000000000000000000000..b5e8ce534723e3166d643296d0c6db0b7cb2e6b9
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/aspectjrt-1.8.10.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/aspectjweaver-1.8.10.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/aspectjweaver-1.8.10.jar
new file mode 100644
index 0000000000000000000000000000000000000000..4dbcaaf5d5b3081a7c26f162b7d88bed95b655c4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/aspectjweaver-1.8.10.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/avro-1.8.2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/avro-1.8.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..23b3958b4e05279805c6a60723034a0bf421c4aa
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/avro-1.8.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/avro-ipc-1.8.2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/avro-ipc-1.8.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..015579e10c07840c440565005e99f44b7491fea7
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/avro-ipc-1.8.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/avro-mapred-1.8.2-hadoop2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/avro-mapred-1.8.2-hadoop2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..89307c14b1101b9656dff59c3a0ad1a4ff6b4712
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/avro-mapred-1.8.2-hadoop2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/bonecp-0.8.0.RELEASE.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/bonecp-0.8.0.RELEASE.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d47104f28cbe4716bb95fea9db9801a0221bdf9d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/bonecp-0.8.0.RELEASE.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/caffeine-2.6.2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/caffeine-2.6.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..f2f724bf7dc2822866aac2a118bbaa2f7d3ef659
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/caffeine-2.6.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/calcite-avatica-1.2.0-incubating.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/calcite-avatica-1.2.0-incubating.jar
new file mode 100644
index 0000000000000000000000000000000000000000..04891f4778e791bbd4ebde28b3c946900bf8a1d6
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/calcite-avatica-1.2.0-incubating.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/calcite-core-1.2.0-incubating.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/calcite-core-1.2.0-incubating.jar
new file mode 100644
index 0000000000000000000000000000000000000000..2ab1f0c5ea212c6c23d7d9e36929931d06277c6d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/calcite-core-1.2.0-incubating.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/calcite-linq4j-1.2.0-incubating.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/calcite-linq4j-1.2.0-incubating.jar
new file mode 100644
index 0000000000000000000000000000000000000000..59f1232125cbd483467b1e978e610357671b2525
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/calcite-linq4j-1.2.0-incubating.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/cglib-2.2.1-v20090111.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/cglib-2.2.1-v20090111.jar
new file mode 100644
index 0000000000000000000000000000000000000000..cfd5ae7273729999ca32ba6bfa761f522cce6dbc
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/cglib-2.2.1-v20090111.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/checker-qual-2.5.2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/checker-qual-2.5.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..ae4e7f1f9e6974625307843800c95c447f2f53a4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/checker-qual-2.5.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/chill-java-0.9.3.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/chill-java-0.9.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..f9bda36f5f443c9738f419f23f8f241d02d3c568
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/chill-java-0.9.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/chill_2.11-0.9.3.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/chill_2.11-0.9.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6b321596d48b799b7a982acbc27733e8d9d651d7
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/chill_2.11-0.9.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/classmate-1.3.4.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/classmate-1.3.4.jar
new file mode 100644
index 0000000000000000000000000000000000000000..5be6d9961f53680ce6753a22b061cc373320d020
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/classmate-1.3.4.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/cmw-data-2.0.3.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/cmw-data-2.0.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..daaba5ffe9363eb8e421cdf6970f15609c7aaf77
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/cmw-data-2.0.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/cmw-datax-1.0.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/cmw-datax-1.0.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..c3bb093d35177456c3496ffac625c13e686f3516
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/cmw-datax-1.0.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-cli-1.2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-cli-1.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..ce4b9fffe40c41669797cd806ac989471b2acd84
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-cli-1.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-codec-1.11.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-codec-1.11.jar
new file mode 100644
index 0000000000000000000000000000000000000000..22451206dd591944cca1f25a4be90fa47f67c585
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-codec-1.11.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-collections-3.2.2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-collections-3.2.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..fa5df82a63062c040eefcef947827c4ea4ae9e6b
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-collections-3.2.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-compiler-3.0.9.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-compiler-3.0.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..2866a666252c82ae59e9d355610702aad2344dbb
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-compiler-3.0.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-compress-1.8.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-compress-1.8.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..66b0a56bd8b24478ddeb2d6eb2824280de1c1a9d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-compress-1.8.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-configuration-1.10.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-configuration-1.10.jar
new file mode 100644
index 0000000000000000000000000000000000000000..792243660467436164d597b0f6730f78e5b045fb
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-configuration-1.10.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-crypto-1.0.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-crypto-1.0.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..847507d2d71441776a409968ac842af71c8c1fd0
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-crypto-1.0.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-dbcp-1.4.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-dbcp-1.4.jar
new file mode 100644
index 0000000000000000000000000000000000000000..c4c1c4f286a464227a55bcd3577d01d107e7b192
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-dbcp-1.4.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-el-1.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-el-1.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..608ed796ca34c5acc14cb3cf3cc4deee4ea86180
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-el-1.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-httpclient-3.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-httpclient-3.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..7c59774aed4f5dd08778489aaad565690ff7c132
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-httpclient-3.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-io-2.6.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-io-2.6.jar
new file mode 100644
index 0000000000000000000000000000000000000000..00556b119d45dd85a3c3073b1826916c3c60b9c4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-io-2.6.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-lang-2.6.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-lang-2.6.jar
new file mode 100644
index 0000000000000000000000000000000000000000..98467d3a653ebad776ffa3542efeb9732fe0b482
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-lang-2.6.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-lang3-3.8.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-lang3-3.8.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..2c65ce67d5c2b746e0583e4879c35ed0751b505e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-lang3-3.8.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-logging-1.2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-logging-1.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..93a3b9f6db406c84e270e19b9a5e70f2e27ca513
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-logging-1.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-math-2.2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-math-2.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..b29a39c54e1e85e11ebfba3ef71390dd7aae875f
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-math-2.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-math3-3.6.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-math3-3.6.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..0ff582cfcb687e5c9608457628f78e9a782be932
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-math3-3.6.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-net-3.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-net-3.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..b75f1a51cc60108be7dd7e9d0e8a6fbbd3856203
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-net-3.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-pool-1.5.4.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-pool-1.5.4.jar
new file mode 100644
index 0000000000000000000000000000000000000000..43edf996359706eccd90cc01311ce852c87cb6fb
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/commons-pool-1.5.4.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/compress-lzf-1.0.3.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/compress-lzf-1.0.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..a121eb487b24067ef5acb11f52447ca4f8ae2d7a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/compress-lzf-1.0.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/config-1.3.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/config-1.3.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..a21f42bbf02fbdbe0f733a25d63ff1656cda7ae4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/config-1.3.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/curator-client-2.13.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/curator-client-2.13.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..7c01fbe81e30a7b01bc7e287f2b23e9c2cf27f01
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/curator-client-2.13.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/curator-framework-2.13.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/curator-framework-2.13.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..46cada0777e09937b01d1ac29261b1085de79a62
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/curator-framework-2.13.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/curator-recipes-2.13.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/curator-recipes-2.13.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..caf7b87491c74408c69a2ecf61e99d73e58af3f4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/curator-recipes-2.13.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/datanucleus-api-jdo-3.2.6.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/datanucleus-api-jdo-3.2.6.jar
new file mode 100644
index 0000000000000000000000000000000000000000..8c98aca1ed6dd68246e4833018127a5751cdc5f8
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/datanucleus-api-jdo-3.2.6.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/datanucleus-core-3.2.10.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/datanucleus-core-3.2.10.jar
new file mode 100644
index 0000000000000000000000000000000000000000..7078bce0682f3234dd8a80aae8c71d48d7178eb0
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/datanucleus-core-3.2.10.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/datanucleus-rdbms-3.2.9.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/datanucleus-rdbms-3.2.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..a23c982f501731f272f39a5d3f806c2d1b3442ff
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/datanucleus-rdbms-3.2.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/derby-10.12.1.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/derby-10.12.1.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..5841555f8cde25c26923b226881816a70475d998
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/derby-10.12.1.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/disruptor-3.3.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/disruptor-3.3.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6f88adacd6d195ce984f160e738cf2e017d87b31
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/disruptor-3.3.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/eigenbase-properties-1.1.5.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/eigenbase-properties-1.1.5.jar
new file mode 100644
index 0000000000000000000000000000000000000000..786c6c6a2bdddf3819a40cd97f4a819091d7ccf6
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/eigenbase-properties-1.1.5.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/error_prone_annotations-2.2.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/error_prone_annotations-2.2.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..c8e27b53b00b11e50116c283e8aab440a13a1f9a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/error_prone_annotations-2.2.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/failureaccess-1.0.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/failureaccess-1.0.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..9b56dc751c1cc7dff75ed80ccbb45f027058e8ce
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/failureaccess-1.0.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/feign-core-9.5.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/feign-core-9.5.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..8e7da41bc0ad721e8d538147be63e36d9cee634b
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/feign-core-9.5.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/feign-httpclient-9.5.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/feign-httpclient-9.5.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..c67f8150e1ba2bc72b02d2d2307390e6a05d3d5b
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/feign-httpclient-9.5.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/feign-jackson-9.5.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/feign-jackson-9.5.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d3c00018785402e06bb5c3c6983c866e7fc418a5
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/feign-jackson-9.5.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/feign-ribbon-9.5.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/feign-ribbon-9.5.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..4ca2672a3430ce8f0e3881eda4c3e8291a21b945
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/feign-ribbon-9.5.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/feign-slf4j-9.5.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/feign-slf4j-9.5.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..5cd7e69ac29bed766a9c0c0065f950da86d0a58f
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/feign-slf4j-9.5.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/findbugs-annotations-1.3.9-1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/findbugs-annotations-1.3.9-1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..f8551a276a1de26605d4f9538baa06d140f83c9a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/findbugs-annotations-1.3.9-1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/flatbuffers-1.2.0-3f79e055.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/flatbuffers-1.2.0-3f79e055.jar
new file mode 100644
index 0000000000000000000000000000000000000000..e0d2db46405e58561cf67e0b30b7f5abb19f3016
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/flatbuffers-1.2.0-3f79e055.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/gson-2.8.5.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/gson-2.8.5.jar
new file mode 100644
index 0000000000000000000000000000000000000000..0d5baf3fa775220b6c8dfdc5376d3a28f8e7f725
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/gson-2.8.5.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/guava-27.0.1-jre.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/guava-27.0.1-jre.jar
new file mode 100644
index 0000000000000000000000000000000000000000..f164ce722607ae699598668312663c62e5ee4803
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/guava-27.0.1-jre.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/guice-3.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/guice-3.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..f313e2b089d9e1b9d2c6e578ccbc4d8b6eabfd73
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/guice-3.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/guice-servlet-3.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/guice-servlet-3.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..bdc66142e7c5554f83aaf265a7f30f2a0173b82a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/guice-servlet-3.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-annotations-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-annotations-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..87ccddc35269600e53d5b523a3e44aed63b8a6e2
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-annotations-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-auth-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-auth-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..569b88c2e3329b6fed921789583fa2c1e635086d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-auth-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-client-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-client-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..20049c089d33e826f46f62436a209060d7beb655
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-client-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-common-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-common-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..c8a37566fda49f863596e708be66a171296bb9f2
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-common-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-hdfs-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-hdfs-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..e6fdb444cd1efa77a4a3895f1154aa28c0b4d6f1
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-hdfs-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-mapreduce-client-app-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-mapreduce-client-app-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..3aa28d6d213c6d492bfa83b648f045273eaf9bd7
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-mapreduce-client-app-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-mapreduce-client-common-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-mapreduce-client-common-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..38df87459bac5516f58329caabcce3ff38b9f5a4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-mapreduce-client-common-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-mapreduce-client-core-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-mapreduce-client-core-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..b99f85fbdeeea23a6068ef5d18b85f336fabb0c1
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-mapreduce-client-core-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-mapreduce-client-jobclient-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-mapreduce-client-jobclient-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..5c60ecbcbe7e441cb6ae04d6d2ca98c1a71ee611
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-mapreduce-client-jobclient-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-mapreduce-client-shuffle-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-mapreduce-client-shuffle-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..755f7ff74eccc751a81216f9cad6892782fb8b07
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-mapreduce-client-shuffle-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-yarn-api-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-yarn-api-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..3fbbba509e1f50326b5fad510e4b6364e24cf8a0
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-yarn-api-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-yarn-client-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-yarn-client-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..7c7ad86c9ef481f7b813a1f570c0bdf16812de8c
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-yarn-client-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-yarn-common-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-yarn-common-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..ff0d84c79a4e3864b232b7b3aac2c6784253dabd
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-yarn-common-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-yarn-server-common-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-yarn-server-common-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..e686b71f401922890a4b0ca8d517de6d7fed199c
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-yarn-server-common-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-yarn-server-nodemanager-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-yarn-server-nodemanager-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..0f6539a6d02050a749faef6c68982089a6986cbe
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-yarn-server-nodemanager-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-yarn-server-web-proxy-2.7.5.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-yarn-server-web-proxy-2.7.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..a54d91174b2ff022779427e005a7a43c3d77a972
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hadoop-yarn-server-web-proxy-2.7.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hamcrest-core-1.3.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hamcrest-core-1.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..9d5fe16e3dd37ebe79a36f61f5d0e1a69a653a8a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hamcrest-core-1.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-annotations-1.4.9.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-annotations-1.4.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..5eb8bf9212a03b457b1a5ef1bb4cefdfa3fa9685
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-annotations-1.4.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-client-1.4.9.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-client-1.4.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..b01a7e930daeb872db418b21d1b9c272c5fe273b
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-client-1.4.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-common-1.4.9-tests.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-common-1.4.9-tests.jar
new file mode 100644
index 0000000000000000000000000000000000000000..b09d12cd57a96f5304ec022db33291bf2a7b18fd
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-common-1.4.9-tests.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-common-1.4.9.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-common-1.4.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..f61497cfd8e03119d718d6ad0d931a272a35413a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-common-1.4.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-hadoop-compat-1.4.9.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-hadoop-compat-1.4.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..4a13f3b947d607986a838970076174d5f7274e50
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-hadoop-compat-1.4.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-hadoop2-compat-1.4.9.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-hadoop2-compat-1.4.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..46a68cfd0fd748eda1e32139b49639d6f02828bf
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-hadoop2-compat-1.4.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-metrics-1.4.9.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-metrics-1.4.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..088c12885796ac0395009a7665595c305fe1a102
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-metrics-1.4.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-metrics-api-1.4.9.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-metrics-api-1.4.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..c74f9c3e904fa6d2e07c1ad22bf612b6808c2db8
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-metrics-api-1.4.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-prefix-tree-1.4.9.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-prefix-tree-1.4.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..fcaa4386c601efdfc160e85895e55399a3e3ffcd
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-prefix-tree-1.4.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-procedure-1.4.9.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-procedure-1.4.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..c5d4028ba306e04156054b9b9711937af6e2c443
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-procedure-1.4.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-protocol-1.4.9.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-protocol-1.4.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..724143db3fc12f566fd192cd07c7c943727c7cd7
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-protocol-1.4.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-server-1.4.9.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-server-1.4.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..df8d44c27bd6001c71e88486b0f80354ffa65bc8
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hbase-server-1.4.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hibernate-validator-6.0.15.Final.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hibernate-validator-6.0.15.Final.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d4c09fe9487f23a4f1f2d262a4475ce648e03f88
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hibernate-validator-6.0.15.Final.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hive-exec-1.2.1.spark2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hive-exec-1.2.1.spark2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..4e98a64d44d92492726750033fc3f210c842116e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hive-exec-1.2.1.spark2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hive-metastore-1.2.1.spark2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hive-metastore-1.2.1.spark2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..9ef0392c1211021f08fc4615a1cff8df9b6e0dea
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hive-metastore-1.2.1.spark2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hk2-api-2.4.0-b31.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hk2-api-2.4.0-b31.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6a70ef3c18d7401c13ce49938f65b14e40e2f923
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hk2-api-2.4.0-b31.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hk2-locator-2.4.0-b31.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hk2-locator-2.4.0-b31.jar
new file mode 100644
index 0000000000000000000000000000000000000000..1f38d250d149c46ecfc92b43ed01760e4a384b49
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hk2-locator-2.4.0-b31.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hk2-utils-2.4.0-b31.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hk2-utils-2.4.0-b31.jar
new file mode 100644
index 0000000000000000000000000000000000000000..211ce37b11b9341e99d8e8d28a725cb67beed67f
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hk2-utils-2.4.0-b31.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hppc-0.7.2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hppc-0.7.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..54dea63ae1382d49d67c7f0333dc6f3d0ca1f0fc
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/hppc-0.7.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/htrace-core-3.1.0-incubating.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/htrace-core-3.1.0-incubating.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6f03925d534ad56b775dc2af8791d5878141d3f5
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/htrace-core-3.1.0-incubating.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/httpclient-4.5.3.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/httpclient-4.5.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..8af45610819f7afb6c72f37ca3cc20c029b84810
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/httpclient-4.5.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/httpcore-4.4.6.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/httpcore-4.4.6.jar
new file mode 100644
index 0000000000000000000000000000000000000000..16ed0d160d7deed898d2f65f7c970bc2ccf6d97f
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/httpcore-4.4.6.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/ivy-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/ivy-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..14ff88e260188705f55608e6386ce3a1408beaeb
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/ivy-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/j2objc-annotations-1.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/j2objc-annotations-1.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d9b2599317893e07bd7db53bfb2501d4a53c454d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/j2objc-annotations-1.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-annotations-2.9.8.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-annotations-2.9.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..4d9f42153781c16f2b5abc6ecd1495c4955966d9
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-annotations-2.9.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-core-2.9.8.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-core-2.9.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..362f1f393e6fb9feba0fe1fa171811a80f8f83d6
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-core-2.9.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-core-asl-1.9.13.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-core-asl-1.9.13.jar
new file mode 100644
index 0000000000000000000000000000000000000000..bb4fe1da1186505a254611ad5a7c49e9f64a3ee1
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-core-asl-1.9.13.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-databind-2.9.8.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-databind-2.9.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..2d8687b5d79cca6ea54cda3471dce3b14384794b
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-databind-2.9.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-dataformat-yaml-2.9.8.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-dataformat-yaml-2.9.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6c403e99a43effe786bf3091c87c513c549db417
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-dataformat-yaml-2.9.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-datatype-jdk8-2.9.8.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-datatype-jdk8-2.9.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6367d1f4efaf6d13e66aa2df84e795333bd33d6c
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-datatype-jdk8-2.9.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-datatype-jsr310-2.9.8.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-datatype-jsr310-2.9.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..09bc9d59863ae678e5d95b830c2756af80c0f59b
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-datatype-jsr310-2.9.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-jaxrs-1.9.13.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-jaxrs-1.9.13.jar
new file mode 100644
index 0000000000000000000000000000000000000000..b85f45cf407a3727f2b9767f226d27a79a46462c
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-jaxrs-1.9.13.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-mapper-asl-1.9.13.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-mapper-asl-1.9.13.jar
new file mode 100644
index 0000000000000000000000000000000000000000..0f2073fc7eaf3ac5ffd02928cf9272b558e42e81
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-mapper-asl-1.9.13.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-module-parameter-names-2.9.8.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-module-parameter-names-2.9.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..341de941f508832107f382648e9d4b74362edbbe
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-module-parameter-names-2.9.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-module-paranamer-2.9.8.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-module-paranamer-2.9.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..63debc93f1874446714d073c42b5aeac746eccd7
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-module-paranamer-2.9.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-module-scala_2.11-2.9.8.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-module-scala_2.11-2.9.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6b3484303cd38207c9d2bd42632093170ca06185
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-module-scala_2.11-2.9.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-xc-1.9.13.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-xc-1.9.13.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d5842e9c3af4cf231adc4b56108b3867e1920111
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jackson-xc-1.9.13.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jamon-runtime-2.4.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jamon-runtime-2.4.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..219f88d33ebe4c4f008521f4ed0f19a5e8a14675
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jamon-runtime-2.4.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/janino-3.0.9.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/janino-3.0.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..761e0c2a0a7b3a7999fecc99df79b8e321c6869a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/janino-3.0.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jasper-compiler-5.5.23.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jasper-compiler-5.5.23.jar
new file mode 100644
index 0000000000000000000000000000000000000000..170efa0281fb1a18084e1a805b20d002c92ee9ed
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jasper-compiler-5.5.23.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jasper-runtime-5.5.23.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jasper-runtime-5.5.23.jar
new file mode 100644
index 0000000000000000000000000000000000000000..a3208c9a4e54e5e8682dcbe771ca0c282c9341a7
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jasper-runtime-5.5.23.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/java-xmlbuilder-0.4.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/java-xmlbuilder-0.4.jar
new file mode 100644
index 0000000000000000000000000000000000000000..e5acf928186e7dff73a85d88cf5f268f02d34ca4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/java-xmlbuilder-0.4.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/javassist-3.18.1-GA.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/javassist-3.18.1-GA.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d5f19ac596ae64347a4fb159c81273143a25bdf4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/javassist-3.18.1-GA.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/javax.annotation-api-1.2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/javax.annotation-api-1.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..9ab39ffa4b57f9d20bd37acfbf6011902596a078
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/javax.annotation-api-1.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/javax.inject-1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/javax.inject-1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..b2a9d0bf7bd463a6361898b36f010edd05c0cf6e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/javax.inject-1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/javax.inject-2.4.0-b31.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/javax.inject-2.4.0-b31.jar
new file mode 100644
index 0000000000000000000000000000000000000000..1f60d98a76f69b67cd9031fe0a4d97a7b5a85832
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/javax.inject-2.4.0-b31.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/javax.servlet-api-4.0.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/javax.servlet-api-4.0.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..844ec7f17a6c8469282e2ad66952593e120b7805
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/javax.servlet-api-4.0.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/javax.ws.rs-api-2.0.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/javax.ws.rs-api-2.0.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..7eb68b4a0abb804e1d30b68ae4ba74a1eb14d38b
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/javax.ws.rs-api-2.0.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/javolution-5.5.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/javolution-5.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..7ac80ea9ea923b198712b8c00bb7b691ea2d7aee
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/javolution-5.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jaxb-api-2.2.2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jaxb-api-2.2.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..31e5fa062f64ee9867760c8b00f57245b0a32017
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jaxb-api-2.2.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jaxb-impl-2.2.3-1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jaxb-impl-2.2.3-1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..eeaf6604c407b6391ba1184dd7d5e2f6b6bc76f9
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jaxb-impl-2.2.3-1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jboss-logging-3.3.2.Final.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jboss-logging-3.3.2.Final.jar
new file mode 100644
index 0000000000000000000000000000000000000000..67cde710fc8f91eb3deb28ed2bdb025753761ca0
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jboss-logging-3.3.2.Final.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jcl-over-slf4j-1.7.16.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jcl-over-slf4j-1.7.16.jar
new file mode 100644
index 0000000000000000000000000000000000000000..63ac422d00ff29c7cf337b978603b2724cb90d1a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jcl-over-slf4j-1.7.16.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jcodings-1.0.8.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jcodings-1.0.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..1c377c6ec5bb21b02e55483f304fd1f38b5bff53
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jcodings-1.0.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jdo-api-3.0.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jdo-api-3.0.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6318d329ca9fc71037af1c59d1c8b4723f9c2795
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jdo-api-3.0.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-client-1.9.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-client-1.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d811f13e8e0843c402f40fd7ca272c47a4e250f9
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-client-1.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-client-2.22.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-client-2.22.jar
new file mode 100644
index 0000000000000000000000000000000000000000..2211483dbc75d0512be53785129c34c39877a2a1
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-client-2.22.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-common-2.22.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-common-2.22.jar
new file mode 100644
index 0000000000000000000000000000000000000000..16d2ab7cdb07f18f28221d941e2204c5cf582906
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-common-2.22.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-container-servlet-2.22.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-container-servlet-2.22.jar
new file mode 100644
index 0000000000000000000000000000000000000000..079e1eeb61b77e1d9ed11317352aaad08a91d40f
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-container-servlet-2.22.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-container-servlet-core-2.22.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-container-servlet-core-2.22.jar
new file mode 100644
index 0000000000000000000000000000000000000000..4aee044954d11a1a400677b6f9d069228334e892
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-container-servlet-core-2.22.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-core-1.9.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-core-1.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..548dd8882149dde4eed767a5bdc409070a804da4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-core-1.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-guava-2.22.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-guava-2.22.jar
new file mode 100644
index 0000000000000000000000000000000000000000..3c36eb15854040bf6c1b7ec929c06e815ec266e1
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-guava-2.22.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-guice-1.9.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-guice-1.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..cb46c94cdd7ae8ab8a8d02cac2cb98a513407f20
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-guice-1.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-json-1.9.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-json-1.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..b1a4ce5eac1b4ff4aa7777718de9798068396c40
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-json-1.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-media-jaxb-2.22.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-media-jaxb-2.22.jar
new file mode 100644
index 0000000000000000000000000000000000000000..a0118b4714e5490e371d424ce3db1b017816efc6
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-media-jaxb-2.22.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-server-1.9.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-server-1.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..ae0117c54d2aa3f0f12d218c58462c7352da4db1
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-server-1.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-server-2.22.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-server-2.22.jar
new file mode 100644
index 0000000000000000000000000000000000000000..26ff3e221f962d527f8c650f6d9b0afe1855805b
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jersey-server-2.22.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jets3t-0.9.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jets3t-0.9.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..6870900edf06a75df62b7f0565d4e8f8f0d9ad77
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jets3t-0.9.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jettison-1.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jettison-1.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..e4e9c8c3c414313a77b7c04463372c68af5a7a1d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jettison-1.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jetty-6.1.26.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jetty-6.1.26.jar
new file mode 100644
index 0000000000000000000000000000000000000000..2cbe07aeefa47ad6321addf0e75e010858f72fba
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jetty-6.1.26.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jetty-sslengine-6.1.26.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jetty-sslengine-6.1.26.jar
new file mode 100644
index 0000000000000000000000000000000000000000..051efb59d6b215888b9630336bb1fa4f705d3b4f
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jetty-sslengine-6.1.26.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jetty-util-6.1.26.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jetty-util-6.1.26.jar
new file mode 100644
index 0000000000000000000000000000000000000000..cd237528add68b792f2e6e030344f27d9d07ec31
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jetty-util-6.1.26.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jline-0.9.94.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jline-0.9.94.jar
new file mode 100644
index 0000000000000000000000000000000000000000..dede3721dee71225c942ebcfa4e38250e1e893c2
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jline-0.9.94.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/joda-time-2.9.9.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/joda-time-2.9.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..b3080c4353fd54b8d0477003452d8782a95272be
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/joda-time-2.9.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jodd-core-3.5.2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jodd-core-3.5.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..b71c139730400f0d6e746ec9cfba9a78ea2125b3
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jodd-core-3.5.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/joni-2.1.2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/joni-2.1.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..e953719ee62f21e9dbb1a1b1691c591d5e20d456
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/joni-2.1.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jsch-0.1.54.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jsch-0.1.54.jar
new file mode 100644
index 0000000000000000000000000000000000000000..426332e73f75c8b982cc8d410a21684f48a17a1c
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jsch-0.1.54.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/json4s-ast_2.11-3.5.3.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/json4s-ast_2.11-3.5.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..056bc424a153ff8084e70e76441fde5eb0208345
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/json4s-ast_2.11-3.5.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/json4s-core_2.11-3.5.3.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/json4s-core_2.11-3.5.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..4896f16c46853fa8a50b1fbad40f3d0be95a79d8
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/json4s-core_2.11-3.5.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/json4s-jackson_2.11-3.5.3.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/json4s-jackson_2.11-3.5.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..f1d76561126ac61acc6466c8312340f61b14fb6d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/json4s-jackson_2.11-3.5.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/json4s-native_2.11-3.5.3.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/json4s-native_2.11-3.5.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..ed52b338a2f9a9b3c2bf75cd6a277750649d22a1
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/json4s-native_2.11-3.5.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/json4s-scalap_2.11-3.5.3.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/json4s-scalap_2.11-3.5.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..a0c0804ba60b1ca64ab665fc3bf0dca0cd2ee0cc
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/json4s-scalap_2.11-3.5.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jsp-api-2.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jsp-api-2.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..c0195afa5006e13cb9edc71ef64f13f4f70ff6bc
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jsp-api-2.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jsr305-3.0.2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jsr305-3.0.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..59222d9ca5e5654f5dcf6680783d0e265baa848f
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jsr305-3.0.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jta-1.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jta-1.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..7736ec9f08c2177f0068923020138bcd57d14f41
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jta-1.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jul-to-slf4j-1.7.16.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jul-to-slf4j-1.7.16.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d22a014f22e5ac5d2c1c22d63e823db1c8f3d285
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/jul-to-slf4j-1.7.16.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/junit-4.12.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/junit-4.12.jar
new file mode 100644
index 0000000000000000000000000000000000000000..3a7fc266c3e32283a2b21fe12166ebdcc33a1da1
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/junit-4.12.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/kryo-shaded-4.0.2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/kryo-shaded-4.0.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..a25fbdc405a5de5327ec927bd380cedffbec9046
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/kryo-shaded-4.0.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/leveldbjni-all-1.8.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/leveldbjni-all-1.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..cbda6e40a261f717f2a34470a9564f0bd39e5de8
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/leveldbjni-all-1.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/libfb303-0.9.3.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/libfb303-0.9.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..07c711b738e2292420a7a244ef089d1ab7fbb7cc
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/libfb303-0.9.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/libthrift-0.9.3.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/libthrift-0.9.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..f9221a9f958f2ac140e33a71abaf1d31adb6a3f7
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/libthrift-0.9.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar
new file mode 100644
index 0000000000000000000000000000000000000000..45832c052a10428a5b18613470458e1a2e0e941a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/lz4-java-1.4.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/lz4-java-1.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..301908be2c3a97e882ef553f0442c2e0854eb84d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/lz4-java-1.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/metrics-core-2.2.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/metrics-core-2.2.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..0f6d1cb0ecbd37f673bc87e109cacc535e23350a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/metrics-core-2.2.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/metrics-core-4.0.5.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/metrics-core-4.0.5.jar
new file mode 100644
index 0000000000000000000000000000000000000000..51a23f4a5c9d7528e91972cecf1dca36984763f1
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/metrics-core-4.0.5.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/metrics-graphite-4.0.5.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/metrics-graphite-4.0.5.jar
new file mode 100644
index 0000000000000000000000000000000000000000..a153ce07912c72b7c3d46531d19c6f170334e562
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/metrics-graphite-4.0.5.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/metrics-json-4.0.5.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/metrics-json-4.0.5.jar
new file mode 100644
index 0000000000000000000000000000000000000000..83d256e27e2fd414b85b692f37e745c3442567da
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/metrics-json-4.0.5.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/metrics-jvm-4.0.5.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/metrics-jvm-4.0.5.jar
new file mode 100644
index 0000000000000000000000000000000000000000..bb296705f7b6d13a270b953f44a4eacec485efbd
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/metrics-jvm-4.0.5.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/minlog-1.3.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/minlog-1.3.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..ec67b63e569bf974378f9a45149882dd9ba16f53
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/minlog-1.3.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/netflix-commons-util-0.1.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/netflix-commons-util-0.1.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..257ac836e7da91e2c97ced6c7fc8f3ac05c8ef29
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/netflix-commons-util-0.1.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/netflix-statistics-0.1.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/netflix-statistics-0.1.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..e8f6d9531b047f0ec995173138d8b9be2a635ffe
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/netflix-statistics-0.1.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/netty-3.9.9.Final.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/netty-3.9.9.Final.jar
new file mode 100644
index 0000000000000000000000000000000000000000..25b1849660473f66f332298232485e9b11dbd1e8
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/netty-3.9.9.Final.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/netty-all-4.1.30.Final.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/netty-all-4.1.30.Final.jar
new file mode 100644
index 0000000000000000000000000000000000000000..eb725425dfda20bca4bd5529f1f00eab3ec0c76b
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/netty-all-4.1.30.Final.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/nxcals-common-0.1.164.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/nxcals-common-0.1.164.jar
new file mode 100644
index 0000000000000000000000000000000000000000..179c5cb11747e204fc109c2f1e88c88652e18a9b
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/nxcals-common-0.1.164.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/nxcals-data-access-0.1.164.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/nxcals-data-access-0.1.164.jar
new file mode 100644
index 0000000000000000000000000000000000000000..39323f8a43b26ffee67564a1d951578738a010a8
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/nxcals-data-access-0.1.164.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/nxcals-extraction-starter-0.1.164.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/nxcals-extraction-starter-0.1.164.jar
new file mode 100644
index 0000000000000000000000000000000000000000..9cf026e06886d47620dab6452d74706886d81bdd
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/nxcals-extraction-starter-0.1.164.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/nxcals-service-client-0.1.164.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/nxcals-service-client-0.1.164.jar
new file mode 100644
index 0000000000000000000000000000000000000000..c39ae8de225bf1301c27bcd3c03bde0eb98c83f2
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/nxcals-service-client-0.1.164.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/objenesis-2.5.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/objenesis-2.5.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..31b245b99c47556de776978c44e0de8ad1553aa7
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/objenesis-2.5.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/opencsv-2.3.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/opencsv-2.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..01f82ca733e3dd9974d5afcf4e368b01941a7710
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/opencsv-2.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/orc-core-1.5.2-nohive.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/orc-core-1.5.2-nohive.jar
new file mode 100644
index 0000000000000000000000000000000000000000..a704608327c61d52221c1e7ecff06f45af8cce65
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/orc-core-1.5.2-nohive.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/orc-mapreduce-1.5.2-nohive.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/orc-mapreduce-1.5.2-nohive.jar
new file mode 100644
index 0000000000000000000000000000000000000000..4bb15263efde782b01486d635230826cd19b0856
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/orc-mapreduce-1.5.2-nohive.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/orc-shims-1.5.2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/orc-shims-1.5.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..3685a1a95c14caa39d4b24b364545fdd4d99e324
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/orc-shims-1.5.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/oro-2.0.8.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/oro-2.0.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..23488d2600f5f4784c0ba2be5baa4c41f396f616
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/oro-2.0.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/osgi-resource-locator-1.0.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/osgi-resource-locator-1.0.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..bd6aa17ebbcbb8c940c92d8f945919f5d13c1f86
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/osgi-resource-locator-1.0.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/paranamer-2.8.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/paranamer-2.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..0bf659b93e74bb53003bc521f59c855d0ab8e692
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/paranamer-2.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/parquet-column-1.10.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/parquet-column-1.10.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..ac6864a3d0b8f58a343fb73d866ad257da021258
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/parquet-column-1.10.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/parquet-common-1.10.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/parquet-common-1.10.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..ddfbad6cd2ca790b0fa9a93d7e34831ea162238e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/parquet-common-1.10.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/parquet-encoding-1.10.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/parquet-encoding-1.10.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..19d2b6487c8c6c7586b4ccdee170bea28672a936
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/parquet-encoding-1.10.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/parquet-format-2.4.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/parquet-format-2.4.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..5ee2192f025170f7435a40a0154490f56a3da12c
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/parquet-format-2.4.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/parquet-hadoop-1.10.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/parquet-hadoop-1.10.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..99a7d71995642cb6b62841c1fc6ef807a0b55bea
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/parquet-hadoop-1.10.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/parquet-hadoop-bundle-1.6.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/parquet-hadoop-bundle-1.6.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..8ec829df64b7ecf149e19e02d31d8d19d198ef12
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/parquet-hadoop-bundle-1.6.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/parquet-jackson-1.10.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/parquet-jackson-1.10.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..11f465a4674848c9b5a45258cad6a16e7d444699
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/parquet-jackson-1.10.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/protobuf-java-2.5.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/protobuf-java-2.5.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..4c4e686a497c4d4944caf39d830c1271db5d6fee
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/protobuf-java-2.5.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/py4j-0.10.7.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/py4j-0.10.7.jar
new file mode 100644
index 0000000000000000000000000000000000000000..b7327d70a0d382463c7291d032f15b8ea8331475
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/py4j-0.10.7.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/pyrolite-4.13.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/pyrolite-4.13.jar
new file mode 100644
index 0000000000000000000000000000000000000000..af7bd6cc9b6ec754289112ef1f8b6e1877feacdf
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/pyrolite-4.13.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/ribbon-core-2.1.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/ribbon-core-2.1.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..ccc04d127676dc2679ad8e93aad97e43050e4bf2
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/ribbon-core-2.1.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/ribbon-loadbalancer-2.1.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/ribbon-loadbalancer-2.1.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..31a1ddf104055c231f8cdbfad4133c0ef3589280
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/ribbon-loadbalancer-2.1.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/rxjava-1.0.9.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/rxjava-1.0.9.jar
new file mode 100644
index 0000000000000000000000000000000000000000..67acbc4d32e60f172ea097e37097ea0ec603767f
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/rxjava-1.0.9.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/scaffeine_2.11-2.5.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/scaffeine_2.11-2.5.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..96e94e6a5f7de01b07ca24b19cfec35caf3bf9de
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/scaffeine_2.11-2.5.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/scala-java8-compat_2.11-0.8.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/scala-java8-compat_2.11-0.8.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..1f18fd4102ce75c292b4e249ce909423563ead7d
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/scala-java8-compat_2.11-0.8.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/scala-library-2.11.12.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/scala-library-2.11.12.jar
new file mode 100644
index 0000000000000000000000000000000000000000..a8bcc617a78ca27a92a0f8eb195d4d8631b17d24
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/scala-library-2.11.12.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/scala-parser-combinators_2.11-1.1.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/scala-parser-combinators_2.11-1.1.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..f76440dddead49b2135e0c353c03c4f816fe7749
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/scala-parser-combinators_2.11-1.1.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/scala-reflect-2.11.12.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/scala-reflect-2.11.12.jar
new file mode 100644
index 0000000000000000000000000000000000000000..f987cf84d0aceed288cb30afd15fc69ef8506059
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/scala-reflect-2.11.12.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/scala-xml_2.11-1.0.6.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/scala-xml_2.11-1.0.6.jar
new file mode 100644
index 0000000000000000000000000000000000000000..928c254c7cd9c37949f9724e022b020fc787c943
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/scala-xml_2.11-1.0.6.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/servlet-api-2.5-20081211.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/servlet-api-2.5-20081211.jar
new file mode 100644
index 0000000000000000000000000000000000000000..b0537c4dbdfbaeaf91078d79a32d01110f8f1de4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/servlet-api-2.5-20081211.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/servlet-api-2.5.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/servlet-api-2.5.jar
new file mode 100644
index 0000000000000000000000000000000000000000..fb5249346847105c26d30da1048d8e2c364e7d6f
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/servlet-api-2.5.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/servo-core-0.9.2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/servo-core-0.9.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d3b38b23fd4b152bfe059179f00e1b9bb074b028
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/servo-core-0.9.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/servo-internal-0.9.2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/servo-internal-0.9.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..f51fba4ac7c8e464d17ba8422dd464e1ad753268
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/servo-internal-0.9.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/shc-core-1.1.1-2.1-s_2.11-NXCALS.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/shc-core-1.1.1-2.1-s_2.11-NXCALS.jar
new file mode 100644
index 0000000000000000000000000000000000000000..030886fedf4849e00e54873c29c87c05c008361b
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/shc-core-1.1.1-2.1-s_2.11-NXCALS.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/slf4j-api-1.7.22.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/slf4j-api-1.7.22.jar
new file mode 100644
index 0000000000000000000000000000000000000000..3c3d95313096239507ee4c5acd56fb7b9fa42a38
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/slf4j-api-1.7.22.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/snakeyaml-1.23.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/snakeyaml-1.23.jar
new file mode 100644
index 0000000000000000000000000000000000000000..adcef4f72ccc619a9815a55c9fc8a9f0f14a5815
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/snakeyaml-1.23.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/snappy-0.2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/snappy-0.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..38cc82527f8f6a19b5d31b92829e775e69301f29
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/snappy-0.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/snappy-java-1.1.7.2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/snappy-java-1.1.7.2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..da8dd434abb6d9774b4d00c238af4d10061028d8
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/snappy-java-1.1.7.2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/stax-api-1.0-2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/stax-api-1.0-2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..015169dc71f06832714d21b67970ff8a7970d5b4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/stax-api-1.0-2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/stax-api-1.0.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/stax-api-1.0.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d9a1665151cf5ba827d15f9bccc35dc77b57cf98
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/stax-api-1.0.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/stream-2.7.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/stream-2.7.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..979ed3eee214a637bd6fe7ea4730547b71f28ae4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/stream-2.7.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/stringtemplate-3.2.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/stringtemplate-3.2.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..d0e11b7193734c5b2784127951a1a91372f2ab94
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/stringtemplate-3.2.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/univocity-parsers-2.7.3.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/univocity-parsers-2.7.3.jar
new file mode 100644
index 0000000000000000000000000000000000000000..894facbdec98395aa1e509479cbb388f8461fa5e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/univocity-parsers-2.7.3.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/unused-1.0.0.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/unused-1.0.0.jar
new file mode 100644
index 0000000000000000000000000000000000000000..30aff2fbd75b0dd1994a4edb39e0031778450a2f
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/unused-1.0.0.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/validation-api-2.0.1.Final.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/validation-api-2.0.1.Final.jar
new file mode 100644
index 0000000000000000000000000000000000000000..2368e10a5323ad6bae79f6cfac89d61e66b15f37
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/validation-api-2.0.1.Final.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/xbean-asm6-shaded-4.8.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/xbean-asm6-shaded-4.8.jar
new file mode 100644
index 0000000000000000000000000000000000000000..792fe6c24c722f1fc6f2c9d0587f817bf3a8238e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/xbean-asm6-shaded-4.8.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/xercesImpl-2.9.1.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/xercesImpl-2.9.1.jar
new file mode 100644
index 0000000000000000000000000000000000000000..8f762e1a85fc0ac9d83c24b51f520a8765e7c461
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/xercesImpl-2.9.1.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/xmlenc-0.52.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/xmlenc-0.52.jar
new file mode 100644
index 0000000000000000000000000000000000000000..ec568b4c9e05327edd26e76ad24a56023fa7ea08
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/xmlenc-0.52.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/xz-1.5.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/xz-1.5.jar
new file mode 100644
index 0000000000000000000000000000000000000000..2e9599ecfa6eef94200d2ca4275b9bafda773d4a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/xz-1.5.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/zookeeper-3.4.10.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/zookeeper-3.4.10.jar
new file mode 100644
index 0000000000000000000000000000000000000000..0d8b0d31245ae6ca464e6cf043428035d99b027c
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/zookeeper-3.4.10.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-jars/zstd-jni-1.3.2-2.jar b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/zstd-jni-1.3.2-2.jar
new file mode 100644
index 0000000000000000000000000000000000000000..84311f75027a269d97c719d887540c960c54eda6
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-jars/zstd-jni-1.3.2-2.jar differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/activate b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/activate
new file mode 100644
index 0000000000000000000000000000000000000000..bc4ae33aa6ebfd785b8acb21446d04dc7ea54fcf
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/activate
@@ -0,0 +1,76 @@
+# This file must be used with "source bin/activate" *from bash*
+# you cannot run it directly
+
+deactivate () {
+    # reset old environment variables
+    if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
+        PATH="${_OLD_VIRTUAL_PATH:-}"
+        export PATH
+        unset _OLD_VIRTUAL_PATH
+    fi
+    if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
+        PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
+        export PYTHONHOME
+        unset _OLD_VIRTUAL_PYTHONHOME
+    fi
+
+    # This should detect bash and zsh, which have a hash command that must
+    # be called to get it to forget past commands.  Without forgetting
+    # past commands the $PATH changes we made may not be respected
+    if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
+        hash -r
+    fi
+
+    if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
+        PS1="${_OLD_VIRTUAL_PS1:-}"
+        export PS1
+        unset _OLD_VIRTUAL_PS1
+    fi
+
+    unset VIRTUAL_ENV
+    if [ ! "$1" = "nondestructive" ] ; then
+    # Self destruct!
+        unset -f deactivate
+    fi
+}
+
+# unset irrelevant variables
+deactivate nondestructive
+
+VIRTUAL_ENV="/root/PycharmProjects/nxdblmbackendpython/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env"
+export VIRTUAL_ENV
+
+_OLD_VIRTUAL_PATH="$PATH"
+PATH="$VIRTUAL_ENV/bin:$PATH"
+export PATH
+
+# unset PYTHONHOME if set
+# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
+# could use `if (set -u; : $PYTHONHOME) ;` in bash
+if [ -n "${PYTHONHOME:-}" ] ; then
+    _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
+    unset PYTHONHOME
+fi
+
+if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
+    _OLD_VIRTUAL_PS1="${PS1:-}"
+    if [ "x(nxcals-python3-env) " != x ] ; then
+	PS1="(nxcals-python3-env) ${PS1:-}"
+    else
+    if [ "`basename \"$VIRTUAL_ENV\"`" = "__" ] ; then
+        # special case for Aspen magic directories
+        # see http://www.zetadev.com/software/aspen/
+        PS1="[`basename \`dirname \"$VIRTUAL_ENV\"\``] $PS1"
+    else
+        PS1="(`basename \"$VIRTUAL_ENV\"`)$PS1"
+    fi
+    fi
+    export PS1
+fi
+
+# This should detect bash and zsh, which have a hash command that must
+# be called to get it to forget past commands.  Without forgetting
+# past commands the $PATH changes we made may not be respected
+if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
+    hash -r
+fi
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/activate.csh b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/activate.csh
new file mode 100644
index 0000000000000000000000000000000000000000..3c473d53cc20539b8248827a9b0e2bbd4dd37511
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/activate.csh
@@ -0,0 +1,37 @@
+# This file must be used with "source bin/activate.csh" *from csh*.
+# You cannot run it directly.
+# Created by Davide Di Blasi <davidedb@gmail.com>.
+# Ported to Python 3.3 venv by Andrew Svetlov <andrew.svetlov@gmail.com>
+
+alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; test "\!:*" != "nondestructive" && unalias deactivate'
+
+# Unset irrelevant variables.
+deactivate nondestructive
+
+setenv VIRTUAL_ENV "/root/PycharmProjects/nxdblmbackendpython/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env"
+
+set _OLD_VIRTUAL_PATH="$PATH"
+setenv PATH "$VIRTUAL_ENV/bin:$PATH"
+
+
+set _OLD_VIRTUAL_PROMPT="$prompt"
+
+if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
+    if ("nxcals-python3-env" != "") then
+        set env_name = "nxcals-python3-env"
+    else
+        if (`basename "VIRTUAL_ENV"` == "__") then
+            # special case for Aspen magic directories
+            # see http://www.zetadev.com/software/aspen/
+            set env_name = `basename \`dirname "$VIRTUAL_ENV"\``
+        else
+            set env_name = `basename "$VIRTUAL_ENV"`
+        endif
+    endif
+    set prompt = "[$env_name] $prompt"
+    unset env_name
+endif
+
+alias pydoc python -m pydoc
+
+rehash
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/activate.fish b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/activate.fish
new file mode 100644
index 0000000000000000000000000000000000000000..b069a6b4c75cfe297e6d6c6212d7bb43046109db
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/activate.fish
@@ -0,0 +1,75 @@
+# This file must be used with ". bin/activate.fish" *from fish* (http://fishshell.org)
+# you cannot run it directly
+
+function deactivate  -d "Exit virtualenv and return to normal shell environment"
+    # reset old environment variables
+    if test -n "$_OLD_VIRTUAL_PATH"
+        set -gx PATH $_OLD_VIRTUAL_PATH
+        set -e _OLD_VIRTUAL_PATH
+    end
+    if test -n "$_OLD_VIRTUAL_PYTHONHOME"
+        set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
+        set -e _OLD_VIRTUAL_PYTHONHOME
+    end
+
+    if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
+        functions -e fish_prompt
+        set -e _OLD_FISH_PROMPT_OVERRIDE
+        functions -c _old_fish_prompt fish_prompt
+        functions -e _old_fish_prompt
+    end
+
+    set -e VIRTUAL_ENV
+    if test "$argv[1]" != "nondestructive"
+        # Self destruct!
+        functions -e deactivate
+    end
+end
+
+# unset irrelevant variables
+deactivate nondestructive
+
+set -gx VIRTUAL_ENV "/root/PycharmProjects/nxdblmbackendpython/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env"
+
+set -gx _OLD_VIRTUAL_PATH $PATH
+set -gx PATH "$VIRTUAL_ENV/bin" $PATH
+
+# unset PYTHONHOME if set
+if set -q PYTHONHOME
+    set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
+    set -e PYTHONHOME
+end
+
+if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
+    # fish uses a function instead of an env var to generate the prompt.
+
+    # save the current fish_prompt function as the function _old_fish_prompt
+    functions -c fish_prompt _old_fish_prompt
+
+    # with the original prompt function renamed, we can override with our own.
+    function fish_prompt
+        # Save the return status of the last command
+        set -l old_status $status
+
+        # Prompt override?
+        if test -n "(nxcals-python3-env) "            
+            printf "%s%s" "(nxcals-python3-env) " (set_color normal)
+        else
+            # ...Otherwise, prepend env
+            set -l _checkbase (basename "$VIRTUAL_ENV")
+            if test $_checkbase = "__"
+                # special case for Aspen magic directories
+                # see http://www.zetadev.com/software/aspen/
+                printf "%s[%s]%s " (set_color -b blue white) (basename (dirname "$VIRTUAL_ENV")) (set_color normal)
+            else
+                printf "%s(%s)%s" (set_color -b blue white) (basename "$VIRTUAL_ENV") (set_color normal)
+            end
+        end
+
+        # Restore the return status of the previous command.
+        echo "exit $old_status" | .
+        _old_fish_prompt
+    end
+
+    set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
+end
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/easy_install b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/easy_install
new file mode 100755
index 0000000000000000000000000000000000000000..4485d2c51f5758dcf1486e8fd62edca314819fea
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/easy_install
@@ -0,0 +1,11 @@
+#!/root/PycharmProjects/nxdblmbackendpython/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/python3
+
+# -*- coding: utf-8 -*-
+import re
+import sys
+
+from setuptools.command.easy_install import main
+
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])
+    sys.exit(main())
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/easy_install-3.6 b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/easy_install-3.6
new file mode 100755
index 0000000000000000000000000000000000000000..4485d2c51f5758dcf1486e8fd62edca314819fea
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/easy_install-3.6
@@ -0,0 +1,11 @@
+#!/root/PycharmProjects/nxdblmbackendpython/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/python3
+
+# -*- coding: utf-8 -*-
+import re
+import sys
+
+from setuptools.command.easy_install import main
+
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])
+    sys.exit(main())
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/pip b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/pip
new file mode 100755
index 0000000000000000000000000000000000000000..8fdb7e5f0d2d8f00bfd892bd7e90f27111dfe606
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/pip
@@ -0,0 +1,11 @@
+#!/root/PycharmProjects/nxdblmbackendpython/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/python3
+
+# -*- coding: utf-8 -*-
+import re
+import sys
+
+from pip import main
+
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])
+    sys.exit(main())
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/pip3 b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/pip3
new file mode 100755
index 0000000000000000000000000000000000000000..8fdb7e5f0d2d8f00bfd892bd7e90f27111dfe606
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/pip3
@@ -0,0 +1,11 @@
+#!/root/PycharmProjects/nxdblmbackendpython/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/python3
+
+# -*- coding: utf-8 -*-
+import re
+import sys
+
+from pip import main
+
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])
+    sys.exit(main())
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/pip3.6 b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/pip3.6
new file mode 100755
index 0000000000000000000000000000000000000000..8fdb7e5f0d2d8f00bfd892bd7e90f27111dfe606
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/pip3.6
@@ -0,0 +1,11 @@
+#!/root/PycharmProjects/nxdblmbackendpython/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/python3
+
+# -*- coding: utf-8 -*-
+import re
+import sys
+
+from pip import main
+
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])
+    sys.exit(main())
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/python b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/python
new file mode 120000
index 0000000000000000000000000000000000000000..b8a0adbbb97ea11f36eb0c6b2a3c2881e96f8e26
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/python
@@ -0,0 +1 @@
+python3
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/python3 b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/python3
new file mode 120000
index 0000000000000000000000000000000000000000..ae65fdaa12936b0d7525b090d198249fa7623e66
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/bin/python3
@@ -0,0 +1 @@
+/usr/bin/python3
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/easy_install.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/easy_install.py
new file mode 100644
index 0000000000000000000000000000000000000000..d87e984034b6e6e9eb456ebcb2b3f420c07a48bc
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/easy_install.py
@@ -0,0 +1,5 @@
+"""Run the EasyInstall command"""
+
+if __name__ == '__main__':
+    from setuptools.command.easy_install import main
+    main()
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/DESCRIPTION.rst b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/DESCRIPTION.rst
new file mode 100644
index 0000000000000000000000000000000000000000..8ef94c438212615ceb0b177da8850d54e67deac0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/DESCRIPTION.rst
@@ -0,0 +1,39 @@
+pip
+===
+
+The `PyPA recommended
+<https://packaging.python.org/en/latest/current/>`_
+tool for installing Python packages.
+
+* `Installation <https://pip.pypa.io/en/stable/installing.html>`_
+* `Documentation <https://pip.pypa.io/>`_
+* `Changelog <https://pip.pypa.io/en/stable/news.html>`_
+* `Github Page <https://github.com/pypa/pip>`_
+* `Issue Tracking <https://github.com/pypa/pip/issues>`_
+* `User mailing list <http://groups.google.com/group/python-virtualenv>`_
+* `Dev mailing list <http://groups.google.com/group/pypa-dev>`_
+* User IRC: #pypa on Freenode.
+* Dev IRC: #pypa-dev on Freenode.
+
+
+.. image:: https://img.shields.io/pypi/v/pip.svg
+   :target: https://pypi.python.org/pypi/pip
+
+.. image:: https://img.shields.io/travis/pypa/pip/master.svg
+   :target: http://travis-ci.org/pypa/pip
+
+.. image:: https://img.shields.io/appveyor/ci/pypa/pip.svg
+   :target: https://ci.appveyor.com/project/pypa/pip/history
+
+.. image:: https://readthedocs.org/projects/pip/badge/?version=stable
+   :target: https://pip.pypa.io/en/stable
+
+Code of Conduct
+---------------
+
+Everyone interacting in the pip project's codebases, issue trackers, chat
+rooms, and mailing lists is expected to follow the `PyPA Code of Conduct`_.
+
+.. _PyPA Code of Conduct: https://www.pypa.io/en/latest/code-of-conduct/
+
+
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/INSTALLER b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/INSTALLER
new file mode 100644
index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/INSTALLER
@@ -0,0 +1 @@
+pip
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/METADATA b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/METADATA
new file mode 100644
index 0000000000000000000000000000000000000000..291a4a0e41a12a06b4abe30bc26a343757b425f0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/METADATA
@@ -0,0 +1,70 @@
+Metadata-Version: 2.0
+Name: pip
+Version: 9.0.1
+Summary: The PyPA recommended tool for installing Python packages.
+Home-page: https://pip.pypa.io/
+Author: The pip developers
+Author-email: python-virtualenv@groups.google.com
+License: MIT
+Keywords: easy_install distutils setuptools egg virtualenv
+Platform: UNKNOWN
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Topic :: Software Development :: Build Tools
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 2.6
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.3
+Classifier: Programming Language :: Python :: 3.4
+Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: Implementation :: PyPy
+Requires-Python: >=2.6,!=3.0.*,!=3.1.*,!=3.2.*
+Provides-Extra: testing
+Provides-Extra: testing
+Requires-Dist: mock; extra == 'testing'
+Requires-Dist: pretend; extra == 'testing'
+Requires-Dist: pytest; extra == 'testing'
+Requires-Dist: scripttest (>=1.3); extra == 'testing'
+Requires-Dist: virtualenv (>=1.10); extra == 'testing'
+
+pip
+===
+
+The `PyPA recommended
+<https://packaging.python.org/en/latest/current/>`_
+tool for installing Python packages.
+
+* `Installation <https://pip.pypa.io/en/stable/installing.html>`_
+* `Documentation <https://pip.pypa.io/>`_
+* `Changelog <https://pip.pypa.io/en/stable/news.html>`_
+* `Github Page <https://github.com/pypa/pip>`_
+* `Issue Tracking <https://github.com/pypa/pip/issues>`_
+* `User mailing list <http://groups.google.com/group/python-virtualenv>`_
+* `Dev mailing list <http://groups.google.com/group/pypa-dev>`_
+* User IRC: #pypa on Freenode.
+* Dev IRC: #pypa-dev on Freenode.
+
+
+.. image:: https://img.shields.io/pypi/v/pip.svg
+   :target: https://pypi.python.org/pypi/pip
+
+.. image:: https://img.shields.io/travis/pypa/pip/master.svg
+   :target: http://travis-ci.org/pypa/pip
+
+.. image:: https://img.shields.io/appveyor/ci/pypa/pip.svg
+   :target: https://ci.appveyor.com/project/pypa/pip/history
+
+.. image:: https://readthedocs.org/projects/pip/badge/?version=stable
+   :target: https://pip.pypa.io/en/stable
+
+Code of Conduct
+---------------
+
+Everyone interacting in the pip project's codebases, issue trackers, chat
+rooms, and mailing lists is expected to follow the `PyPA Code of Conduct`_.
+
+.. _PyPA Code of Conduct: https://www.pypa.io/en/latest/code-of-conduct/
+
+
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/RECORD b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/RECORD
new file mode 100644
index 0000000000000000000000000000000000000000..6b348c5f3f5eeeb107f21c5c0fefe914b854c2cd
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/RECORD
@@ -0,0 +1,123 @@
+pip/__init__.py,sha256=ds3YAAeZwhX6e8yfxjLCxPlOb37Bh42ew09XEPJjQGE,11537
+pip/__main__.py,sha256=V6Kh-IEDEFpt1cahRE6MajUF_14qJR_Qsvn4MjWZXzE,584
+pip/basecommand.py,sha256=TTlmZesQ4Vuxcto2KqwZGmgmN5ioHEl_DeFev9ie_SA,11910
+pip/baseparser.py,sha256=AKMOeF3fTrRroiv0DmTQbdiLW0DQux2KqGC_dJJB9d0,10465
+pip/cmdoptions.py,sha256=pRptFz05iFEfSW4Flg3x1_P92sYlFvq7elhnwujikNY,16473
+pip/download.py,sha256=rA0wbmqC2n9ejX481YJSidmKgQqQDjdaxkHkHlAN68k,32171
+pip/exceptions.py,sha256=BvqH-Jw3tP2b-2IJ2kjrQemOAPMqKrQMLRIZHZQpJXk,8121
+pip/index.py,sha256=oVFx3dW7KTFL3Ecc1t1Kz04C6ZCSk4T1gnzB7j332hg,39952
+pip/locations.py,sha256=9rJRlgonC6QC2zGDIn_7mXaoZ9_tF_IHM2BQhWVRgbo,5626
+pip/pep425tags.py,sha256=q3kec4f6NHszuGYIhGIbVvs896D06uJAnKFgJ_wce44,10980
+pip/status_codes.py,sha256=F6uDG6Gj7RNKQJUDnd87QKqI16Us-t-B0wPF_4QMpWc,156
+pip/wheel.py,sha256=QSWmGs2ui-n4UMWm0JUY6aMCcwNKungVzbWsxI9KlJQ,32010
+pip/_vendor/__init__.py,sha256=L-0x9jj0HSZen1Fm2U0GUbxfjfwQPIXc4XJ4IAxy8D8,4804
+pip/commands/__init__.py,sha256=2Uq3HCdjchJD9FL1LB7rd5v6UySVAVizX0W3EX3hIoE,2244
+pip/commands/check.py,sha256=-A7GI1-WZBh9a4P6UoH_aR-J7I8Lz8ly7m3wnCjmevs,1382
+pip/commands/completion.py,sha256=kkPgVX7SUcJ_8Juw5GkgWaxHN9_45wmAr9mGs1zXEEs,2453
+pip/commands/download.py,sha256=8RuuPmSYgAq3iEDTqZY_1PDXRqREdUULHNjWJeAv7Mo,7810
+pip/commands/freeze.py,sha256=h6-yFMpjCjbNj8-gOm5UuoF6cg14N5rPV4TCi3_CeuI,2835
+pip/commands/hash.py,sha256=MCt4jEFyfoce0lVeNEz1x49uaTY-VDkKiBvvxrVcHkw,1597
+pip/commands/help.py,sha256=84HWkEdnGP_AEBHnn8gJP2Te0XTXRKFoXqXopbOZTNo,982
+pip/commands/install.py,sha256=o-CR1TKf-b1qaFv47nNlawqsIfDjXyIzv_iJUw1Trag,18069
+pip/commands/list.py,sha256=93bCiFyt2Qut_YHkYHJMZHpXladmxsjS-yOtZeb3uqI,11369
+pip/commands/search.py,sha256=oTs9QNdefnrmCV_JeftG0PGiMuYVmiEDF1OUaYsmDao,4502
+pip/commands/show.py,sha256=ZYM57_7U8KP9MQIIyHKQdZxmiEZByy-DRzB697VFoTY,5891
+pip/commands/uninstall.py,sha256=tz8cXz4WdpUdnt3RvpdQwH6_SNMB50egBIZWa1dwfcc,2884
+pip/commands/wheel.py,sha256=z5SEhws2YRMb0Ml1IEkg6jFZMLRpLl86bHCrQbYt5zo,7729
+pip/compat/__init__.py,sha256=2Xs_IpsmdRgHbQgQO0c8_lPvHJnQXHyGWxPbLbYJL4c,4672
+pip/compat/dictconfig.py,sha256=dRrelPDWrceDSzFT51RTEVY2GuM7UDyc5Igh_tn4Fvk,23096
+pip/models/__init__.py,sha256=0Rs7_RA4DxeOkWT5Cq4CQzDrSEhvYcN3TH2cazr72PE,71
+pip/models/index.py,sha256=pUfbO__v3mD9j-2n_ClwPS8pVyx4l2wIwyvWt8GMCRA,487
+pip/operations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+pip/operations/check.py,sha256=uwUN9cs1sPo7c0Sj6pRrSv7b22Pk29SXUImTelVchMQ,1590
+pip/operations/freeze.py,sha256=k-7w7LsM-RpPv7ERBzHiPpYkH-GuYfHLyR-Cp_1VPL0,5194
+pip/req/__init__.py,sha256=vFwZY8_Vc1WU1zFAespg1My_r_AT3n7cN0W9eX0EFqk,276
+pip/req/req_file.py,sha256=fG9MDsXUNPhmGwxUiwrIXEynyD8Q7s3L47-hLZPDXq0,11926
+pip/req/req_install.py,sha256=gYrH-lwQMmt55VVbav_EtRIPu94cQbHFHm_Kq6AeHbg,46487
+pip/req/req_set.py,sha256=jHspXqcA2FxcF05dgUIAZ5huYPv6bn0wRUX0Z7PKmaA,34462
+pip/req/req_uninstall.py,sha256=fdH2VgCjEC8NRYDS7fRu3ZJaBBUEy-N5muwxDX5MBNM,6897
+pip/utils/__init__.py,sha256=zk1vF2EzHZX1ZKPwgeC9I6yKvs8IJ6NZEfXgp2IP8hI,27912
+pip/utils/appdirs.py,sha256=kj2LK-I2fC5QnEh_A_v-ev_IQMcXaWWF5DE39sNvCLQ,8811
+pip/utils/build.py,sha256=4smLRrfSCmXmjEnVnMFh2tBEpNcSLRe6J0ejZJ-wWJE,1312
+pip/utils/deprecation.py,sha256=X_FMjtDbMJqfqEkdRrki-mYyIdPB6I6DHUTCA_ChY6M,2232
+pip/utils/encoding.py,sha256=NQxGiFS5GbeAveLZTnx92t5r0PYqvt0iRnP2u9SGG1w,971
+pip/utils/filesystem.py,sha256=ZEVBuYM3fqr2_lgOESh4Y7fPFszGD474zVm_M3Mb5Tk,899
+pip/utils/glibc.py,sha256=jcQYjt_oJLPKVZB28Kauy4Sw70zS-wawxoU1HHX36_0,2939
+pip/utils/hashes.py,sha256=oMk7cd3PbJgzpSQyXq1MytMud5f6H5Oa2YY5hYuCq6I,2866
+pip/utils/logging.py,sha256=7yWu4gZw-Qclj7X80QVdpGWkdTWGKT4LiUVKcE04pro,3327
+pip/utils/outdated.py,sha256=fNwOCL5r2EftPGhgCYGMKu032HC8cV-JAr9lp0HmToM,5455
+pip/utils/packaging.py,sha256=qhmli14odw6DIhWJgQYS2Q0RrSbr8nXNcG48f5yTRms,2080
+pip/utils/setuptools_build.py,sha256=0blfscmNJW_iZ5DcswJeDB_PbtTEjfK9RL1R1WEDW2E,278
+pip/utils/ui.py,sha256=pbDkSAeumZ6jdZcOJ2yAbx8iBgeP2zfpqNnLJK1gskQ,11597
+pip/vcs/__init__.py,sha256=WafFliUTHMmsSISV8PHp1M5EXDNSWyJr78zKaQmPLdY,12374
+pip/vcs/bazaar.py,sha256=tYTwc4b4off8mr0O2o8SiGejqBDJxcbDBMSMd9-ISYc,3803
+pip/vcs/git.py,sha256=5LfWryi78A-2ULjEZJvCTarJ_3l8venwXASlwm8hiug,11197
+pip/vcs/mercurial.py,sha256=xG6rDiwHCRytJEs23SIHBXl_SwQo2jkkdD_6rVVP5h4,3472
+pip/vcs/subversion.py,sha256=GAuX2Sk7IZvJyEzENKcVld_wGBrQ3fpXDlXjapZEYdI,9350
+pip-9.0.1.dist-info/DESCRIPTION.rst,sha256=Va8Wj1XBpTbVQ2Z41mZRJdALEeziiS_ZewWn1H2ecY4,1287
+pip-9.0.1.dist-info/METADATA,sha256=LZLdUBpPmFB4Of_9wIHegCXhbmiByzOv6WCGs3rixt0,2553
+pip-9.0.1.dist-info/RECORD,,
+pip-9.0.1.dist-info/WHEEL,sha256=kdsN-5OJAZIiHN-iO4Rhl82KyS0bDWf4uBwMbkNafr8,110
+pip-9.0.1.dist-info/entry_points.txt,sha256=Q-fR2tcp9DRdeXoGn1wR67Xecy32o5EyQEnzDghwqqk,68
+pip-9.0.1.dist-info/metadata.json,sha256=eAfMY0s5HjwtLLjIZ9LYDxWocl2her-knzH7qTJ38CU,1565
+pip-9.0.1.dist-info/top_level.txt,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+../../../bin/pip,sha256=mhCRRh69vnCfOAgWkbBGSIzgOQmDNMWQoeZJ_F7C23g,288
+../../../bin/pip3,sha256=mhCRRh69vnCfOAgWkbBGSIzgOQmDNMWQoeZJ_F7C23g,288
+../../../bin/pip3.6,sha256=mhCRRh69vnCfOAgWkbBGSIzgOQmDNMWQoeZJ_F7C23g,288
+pip-9.0.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+pip/_vendor/__pycache__/__init__.cpython-36.pyc,,
+pip/commands/__pycache__/uninstall.cpython-36.pyc,,
+pip/commands/__pycache__/completion.cpython-36.pyc,,
+pip/commands/__pycache__/download.cpython-36.pyc,,
+pip/commands/__pycache__/hash.cpython-36.pyc,,
+pip/commands/__pycache__/list.cpython-36.pyc,,
+pip/commands/__pycache__/search.cpython-36.pyc,,
+pip/commands/__pycache__/check.cpython-36.pyc,,
+pip/commands/__pycache__/freeze.cpython-36.pyc,,
+pip/commands/__pycache__/show.cpython-36.pyc,,
+pip/commands/__pycache__/install.cpython-36.pyc,,
+pip/commands/__pycache__/__init__.cpython-36.pyc,,
+pip/commands/__pycache__/wheel.cpython-36.pyc,,
+pip/commands/__pycache__/help.cpython-36.pyc,,
+pip/operations/__pycache__/check.cpython-36.pyc,,
+pip/operations/__pycache__/freeze.cpython-36.pyc,,
+pip/operations/__pycache__/__init__.cpython-36.pyc,,
+pip/__pycache__/baseparser.cpython-36.pyc,,
+pip/__pycache__/locations.cpython-36.pyc,,
+pip/__pycache__/download.cpython-36.pyc,,
+pip/__pycache__/index.cpython-36.pyc,,
+pip/__pycache__/cmdoptions.cpython-36.pyc,,
+pip/__pycache__/basecommand.cpython-36.pyc,,
+pip/__pycache__/__main__.cpython-36.pyc,,
+pip/__pycache__/status_codes.cpython-36.pyc,,
+pip/__pycache__/__init__.cpython-36.pyc,,
+pip/__pycache__/pep425tags.cpython-36.pyc,,
+pip/__pycache__/wheel.cpython-36.pyc,,
+pip/__pycache__/exceptions.cpython-36.pyc,,
+pip/models/__pycache__/index.cpython-36.pyc,,
+pip/models/__pycache__/__init__.cpython-36.pyc,,
+pip/utils/__pycache__/deprecation.cpython-36.pyc,,
+pip/utils/__pycache__/glibc.cpython-36.pyc,,
+pip/utils/__pycache__/setuptools_build.cpython-36.pyc,,
+pip/utils/__pycache__/build.cpython-36.pyc,,
+pip/utils/__pycache__/appdirs.cpython-36.pyc,,
+pip/utils/__pycache__/logging.cpython-36.pyc,,
+pip/utils/__pycache__/ui.cpython-36.pyc,,
+pip/utils/__pycache__/hashes.cpython-36.pyc,,
+pip/utils/__pycache__/encoding.cpython-36.pyc,,
+pip/utils/__pycache__/__init__.cpython-36.pyc,,
+pip/utils/__pycache__/packaging.cpython-36.pyc,,
+pip/utils/__pycache__/outdated.cpython-36.pyc,,
+pip/utils/__pycache__/filesystem.cpython-36.pyc,,
+pip/req/__pycache__/req_set.cpython-36.pyc,,
+pip/req/__pycache__/req_file.cpython-36.pyc,,
+pip/req/__pycache__/__init__.cpython-36.pyc,,
+pip/req/__pycache__/req_install.cpython-36.pyc,,
+pip/req/__pycache__/req_uninstall.cpython-36.pyc,,
+pip/compat/__pycache__/dictconfig.cpython-36.pyc,,
+pip/compat/__pycache__/__init__.cpython-36.pyc,,
+pip/vcs/__pycache__/mercurial.cpython-36.pyc,,
+pip/vcs/__pycache__/subversion.cpython-36.pyc,,
+pip/vcs/__pycache__/bazaar.cpython-36.pyc,,
+pip/vcs/__pycache__/git.cpython-36.pyc,,
+pip/vcs/__pycache__/__init__.cpython-36.pyc,,
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/WHEEL b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/WHEEL
new file mode 100644
index 0000000000000000000000000000000000000000..7332a419cda6903b61439f3bac93492b0747e6e7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/WHEEL
@@ -0,0 +1,6 @@
+Wheel-Version: 1.0
+Generator: bdist_wheel (0.30.0)
+Root-Is-Purelib: true
+Tag: py2-none-any
+Tag: py3-none-any
+
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/entry_points.txt b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/entry_points.txt
new file mode 100644
index 0000000000000000000000000000000000000000..879fd89648478d3b98551de10b0157007182e7cd
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/entry_points.txt
@@ -0,0 +1,5 @@
+[console_scripts]
+pip = pip:main
+pip3 = pip:main
+pip3.6 = pip:main
+
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/metadata.json b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..15c01e9caa51d1a24265dcf84bfc9d5185da2d91
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/metadata.json
@@ -0,0 +1 @@
+{"classifiers": ["Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Topic :: Software Development :: Build Tools", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: Implementation :: PyPy"], "extensions": {"python.commands": {"wrap_console": {"pip": "pip:main", "pip3": "pip:main", "pip3.6": "pip:main"}}, "python.details": {"contacts": [{"email": "python-virtualenv@groups.google.com", "name": "The pip developers", "role": "author"}], "document_names": {"description": "DESCRIPTION.rst"}, "project_urls": {"Home": "https://pip.pypa.io/"}}, "python.exports": {"console_scripts": {"pip": "pip:main", "pip3": "pip:main", "pip3.6": "pip:main"}}}, "extras": ["testing"], "generator": "bdist_wheel (0.30.0)", "keywords": ["easy_install", "distutils", "setuptools", "egg", "virtualenv"], "license": "MIT", "metadata_version": "2.0", "name": "pip", "requires_python": ">=2.6,!=3.0.*,!=3.1.*,!=3.2.*", "run_requires": [{"extra": "testing", "requires": ["mock", "pretend", "pytest", "scripttest (>=1.3)", "virtualenv (>=1.10)"]}], "summary": "The PyPA recommended tool for installing Python packages.", "test_requires": [{"requires": ["mock", "pretend", "pytest", "scripttest (>=1.3)", "virtualenv (>=1.10)"]}], "version": "9.0.1"}
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/top_level.txt b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/top_level.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip-9.0.1.dist-info/top_level.txt
@@ -0,0 +1 @@
+pip
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/__init__.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b197d6eafc871413e352d8dfd3249e205f8bddb
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/__init__.py
@@ -0,0 +1,338 @@
+#!/usr/bin/env python
+from __future__ import absolute_import
+
+import locale
+import logging
+import os
+import optparse
+import warnings
+
+import sys
+import re
+
+# 2016-06-17 barry@debian.org: urllib3 1.14 added optional support for socks,
+# but if invoked (i.e. imported), it will issue a warning to stderr if socks
+# isn't available.  requests unconditionally imports urllib3's socks contrib
+# module, triggering this warning.  The warning breaks DEP-8 tests (because of
+# the stderr output) and is just plain annoying in normal usage.  I don't want
+# to add socks as yet another dependency for pip, nor do I want to allow-stder
+# in the DEP-8 tests, so just suppress the warning.  pdb tells me this has to
+# be done before the import of pip.vcs.
+try:
+  from pip._vendor.requests.packages.urllib3.exceptions import DependencyWarning
+except ImportError:
+  from urllib3.exceptions import DependencyWarning
+warnings.filterwarnings("ignore", category=DependencyWarning)  # noqa
+
+
+from pip.exceptions import InstallationError, CommandError, PipError
+from pip.utils import get_installed_distributions, get_prog
+from pip.utils import deprecation, dist_is_editable
+from pip.vcs import git, mercurial, subversion, bazaar  # noqa
+from pip.baseparser import ConfigOptionParser, UpdatingDefaultsHelpFormatter
+from pip.commands import get_summaries, get_similar_commands
+from pip.commands import commands_dict
+try:
+    from pip._vendor.requests.packages.urllib3.exceptions import (
+        InsecureRequestWarning,
+    )
+except ImportError:
+    from urllib3.exceptions import (
+        InsecureRequestWarning,
+    )
+
+# assignment for flake8 to be happy
+
+# This fixes a peculiarity when importing via __import__ - as we are
+# initialising the pip module, "from pip import cmdoptions" is recursive
+# and appears not to work properly in that situation.
+import pip.cmdoptions
+cmdoptions = pip.cmdoptions
+
+# The version as used in the setup.py and the docs conf.py
+__version__ = "9.0.1"
+
+
+logger = logging.getLogger(__name__)
+
+# Hide the InsecureRequestWarning from urllib3
+warnings.filterwarnings("ignore", category=InsecureRequestWarning)
+
+
+def autocomplete():
+    """Command and option completion for the main option parser (and options)
+    and its subcommands (and options).
+
+    Enable by sourcing one of the completion shell scripts (bash, zsh or fish).
+    """
+    # Don't complete if user hasn't sourced bash_completion file.
+    if 'PIP_AUTO_COMPLETE' not in os.environ:
+        return
+    cwords = os.environ['COMP_WORDS'].split()[1:]
+    cword = int(os.environ['COMP_CWORD'])
+    try:
+        current = cwords[cword - 1]
+    except IndexError:
+        current = ''
+
+    subcommands = [cmd for cmd, summary in get_summaries()]
+    options = []
+    # subcommand
+    try:
+        subcommand_name = [w for w in cwords if w in subcommands][0]
+    except IndexError:
+        subcommand_name = None
+
+    parser = create_main_parser()
+    # subcommand options
+    if subcommand_name:
+        # special case: 'help' subcommand has no options
+        if subcommand_name == 'help':
+            sys.exit(1)
+        # special case: list locally installed dists for uninstall command
+        if subcommand_name == 'uninstall' and not current.startswith('-'):
+            installed = []
+            lc = current.lower()
+            for dist in get_installed_distributions(local_only=True):
+                if dist.key.startswith(lc) and dist.key not in cwords[1:]:
+                    installed.append(dist.key)
+            # if there are no dists installed, fall back to option completion
+            if installed:
+                for dist in installed:
+                    print(dist)
+                sys.exit(1)
+
+        subcommand = commands_dict[subcommand_name]()
+        options += [(opt.get_opt_string(), opt.nargs)
+                    for opt in subcommand.parser.option_list_all
+                    if opt.help != optparse.SUPPRESS_HELP]
+
+        # filter out previously specified options from available options
+        prev_opts = [x.split('=')[0] for x in cwords[1:cword - 1]]
+        options = [(x, v) for (x, v) in options if x not in prev_opts]
+        # filter options by current input
+        options = [(k, v) for k, v in options if k.startswith(current)]
+        for option in options:
+            opt_label = option[0]
+            # append '=' to options which require args
+            if option[1]:
+                opt_label += '='
+            print(opt_label)
+    else:
+        # show main parser options only when necessary
+        if current.startswith('-') or current.startswith('--'):
+            opts = [i.option_list for i in parser.option_groups]
+            opts.append(parser.option_list)
+            opts = (o for it in opts for o in it)
+
+            subcommands += [i.get_opt_string() for i in opts
+                            if i.help != optparse.SUPPRESS_HELP]
+
+        print(' '.join([x for x in subcommands if x.startswith(current)]))
+    sys.exit(1)
+
+
+def create_main_parser():
+    parser_kw = {
+        'usage': '\n%prog <command> [options]',
+        'add_help_option': False,
+        'formatter': UpdatingDefaultsHelpFormatter(),
+        'name': 'global',
+        'prog': get_prog(),
+    }
+
+    parser = ConfigOptionParser(**parser_kw)
+    parser.disable_interspersed_args()
+
+    pip_pkg_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    parser.version = 'pip %s from %s (python %s)' % (
+        __version__, pip_pkg_dir, sys.version[:3])
+
+    # add the general options
+    gen_opts = cmdoptions.make_option_group(cmdoptions.general_group, parser)
+    parser.add_option_group(gen_opts)
+
+    parser.main = True  # so the help formatter knows
+
+    # create command listing for description
+    command_summaries = get_summaries()
+    description = [''] + ['%-27s %s' % (i, j) for i, j in command_summaries]
+    parser.description = '\n'.join(description)
+
+    return parser
+
+
+def parseopts(args):
+    parser = create_main_parser()
+
+    # Note: parser calls disable_interspersed_args(), so the result of this
+    # call is to split the initial args into the general options before the
+    # subcommand and everything else.
+    # For example:
+    #  args: ['--timeout=5', 'install', '--user', 'INITools']
+    #  general_options: ['--timeout==5']
+    #  args_else: ['install', '--user', 'INITools']
+    general_options, args_else = parser.parse_args(args)
+
+    # --version
+    if general_options.version:
+        sys.stdout.write(parser.version)
+        sys.stdout.write(os.linesep)
+        sys.exit()
+
+    # pip || pip help -> print_help()
+    if not args_else or (args_else[0] == 'help' and len(args_else) == 1):
+        parser.print_help()
+        sys.exit()
+
+    # the subcommand name
+    cmd_name = args_else[0]
+
+    if cmd_name not in commands_dict:
+        guess = get_similar_commands(cmd_name)
+
+        msg = ['unknown command "%s"' % cmd_name]
+        if guess:
+            msg.append('maybe you meant "%s"' % guess)
+
+        raise CommandError(' - '.join(msg))
+
+    # all the args without the subcommand
+    cmd_args = args[:]
+    cmd_args.remove(cmd_name)
+
+    return cmd_name, cmd_args
+
+
+def check_isolated(args):
+    isolated = False
+
+    if "--isolated" in args:
+        isolated = True
+
+    return isolated
+
+
+def main(args=None):
+    if args is None:
+        args = sys.argv[1:]
+
+    # Configure our deprecation warnings to be sent through loggers
+    deprecation.install_warning_logger()
+
+    autocomplete()
+
+    try:
+        cmd_name, cmd_args = parseopts(args)
+    except PipError as exc:
+        sys.stderr.write("ERROR: %s" % exc)
+        sys.stderr.write(os.linesep)
+        sys.exit(1)
+
+    # Needed for locale.getpreferredencoding(False) to work
+    # in pip.utils.encoding.auto_decode
+    try:
+        locale.setlocale(locale.LC_ALL, '')
+    except locale.Error as e:
+        # setlocale can apparently crash if locale are uninitialized
+        logger.debug("Ignoring error %s when setting locale", e)
+    command = commands_dict[cmd_name](isolated=check_isolated(cmd_args))
+    return command.main(cmd_args)
+
+
+# ###########################################################
+# # Writing freeze files
+
+class FrozenRequirement(object):
+
+    def __init__(self, name, req, editable, comments=()):
+        self.name = name
+        self.req = req
+        self.editable = editable
+        self.comments = comments
+
+    _rev_re = re.compile(r'-r(\d+)$')
+    _date_re = re.compile(r'-(20\d\d\d\d\d\d)$')
+
+    @classmethod
+    def from_dist(cls, dist, dependency_links):
+        location = os.path.normcase(os.path.abspath(dist.location))
+        comments = []
+        from pip.vcs import vcs, get_src_requirement
+        if dist_is_editable(dist) and vcs.get_backend_name(location):
+            editable = True
+            try:
+                req = get_src_requirement(dist, location)
+            except InstallationError as exc:
+                logger.warning(
+                    "Error when trying to get requirement for VCS system %s, "
+                    "falling back to uneditable format", exc
+                )
+                req = None
+            if req is None:
+                logger.warning(
+                    'Could not determine repository location of %s', location
+                )
+                comments.append(
+                    '## !! Could not determine repository location'
+                )
+                req = dist.as_requirement()
+                editable = False
+        else:
+            editable = False
+            req = dist.as_requirement()
+            specs = req.specs
+            assert len(specs) == 1 and specs[0][0] in ["==", "==="], \
+                'Expected 1 spec with == or ===; specs = %r; dist = %r' % \
+                (specs, dist)
+            version = specs[0][1]
+            ver_match = cls._rev_re.search(version)
+            date_match = cls._date_re.search(version)
+            if ver_match or date_match:
+                svn_backend = vcs.get_backend('svn')
+                if svn_backend:
+                    svn_location = svn_backend().get_location(
+                        dist,
+                        dependency_links,
+                    )
+                if not svn_location:
+                    logger.warning(
+                        'Warning: cannot find svn location for %s', req)
+                    comments.append(
+                        '## FIXME: could not find svn URL in dependency_links '
+                        'for this package:'
+                    )
+                else:
+                    comments.append(
+                        '# Installing as editable to satisfy requirement %s:' %
+                        req
+                    )
+                    if ver_match:
+                        rev = ver_match.group(1)
+                    else:
+                        rev = '{%s}' % date_match.group(1)
+                    editable = True
+                    req = '%s@%s#egg=%s' % (
+                        svn_location,
+                        rev,
+                        cls.egg_name(dist)
+                    )
+        return cls(dist.project_name, req, editable, comments)
+
+    @staticmethod
+    def egg_name(dist):
+        name = dist.egg_name()
+        match = re.search(r'-py\d\.\d$', name)
+        if match:
+            name = name[:match.start()]
+        return name
+
+    def __str__(self):
+        req = self.req
+        if self.editable:
+            req = '-e %s' % req
+        return '\n'.join(list(self.comments) + [str(req)]) + '\n'
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/__main__.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5556539cb714e8ef5e7a969cc1a74f67a5a63436
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/__main__.py
@@ -0,0 +1,19 @@
+from __future__ import absolute_import
+
+import os
+import sys
+
+# If we are running from a wheel, add the wheel to sys.path
+# This allows the usage python pip-*.whl/pip install pip-*.whl
+if __package__ == '':
+    # __file__ is pip-*.whl/pip/__main__.py
+    # first dirname call strips of '/__main__.py', second strips off '/pip'
+    # Resulting path is the name of the wheel itself
+    # Add that to sys.path so we can import pip
+    path = os.path.dirname(os.path.dirname(__file__))
+    sys.path.insert(0, path)
+
+import pip  # noqa
+
+if __name__ == '__main__':
+    sys.exit(pip.main())
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/_vendor/__init__.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/_vendor/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e76ab8254456a71b45ac108686425167021788b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/_vendor/__init__.py
@@ -0,0 +1,111 @@
+"""
+pip._vendor is for vendoring dependencies of pip to prevent needing pip to
+depend on something external.
+
+Files inside of pip._vendor should be considered immutable and should only be
+updated to versions from upstream.
+"""
+from __future__ import absolute_import
+
+import glob
+import os.path
+import sys
+
+# Downstream redistributors which have debundled our dependencies should also
+# patch this value to be true. This will trigger the additional patching
+# to cause things like "six" to be available as pip.
+DEBUNDLED = True
+
+# By default, look in this directory for a bunch of .whl files which we will
+# add to the beginning of sys.path before attempting to import anything. This
+# is done to support downstream re-distributors like Debian and Fedora who
+# wish to create their own Wheels for our dependencies to aid in debundling.
+WHEEL_DIR = os.path.abspath(os.path.join(sys.prefix, 'share', 'python-wheels'))
+
+
+# Define a small helper function to alias our vendored modules to the real ones
+# if the vendored ones do not exist. This idea of this was taken from
+# https://github.com/kennethreitz/requests/pull/2567.
+def vendored(modulename):
+    vendored_name = "{0}.{1}".format(__name__, modulename)
+
+    try:
+        __import__(vendored_name, globals(), locals(), level=0)
+    except ImportError:
+        try:
+            __import__(modulename, globals(), locals(), level=0)
+        except ImportError:
+            # We can just silently allow import failures to pass here. If we
+            # got to this point it means that ``import pip._vendor.whatever``
+            # failed and so did ``import whatever``. Since we're importing this
+            # upfront in an attempt to alias imports, not erroring here will
+            # just mean we get a regular import error whenever pip *actually*
+            # tries to import one of these modules to use it, which actually
+            # gives us a better error message than we would have otherwise
+            # gotten.
+            pass
+        else:
+            sys.modules[vendored_name] = sys.modules[modulename]
+            base, head = vendored_name.rsplit(".", 1)
+            setattr(sys.modules[base], head, sys.modules[modulename])
+
+
+# If we're operating in a debundled setup, then we want to go ahead and trigger
+# the aliasing of our vendored libraries as well as looking for wheels to add
+# to our sys.path. This will cause all of this code to be a no-op typically
+# however downstream redistributors can enable it in a consistent way across
+# all platforms.
+if DEBUNDLED:
+    # Actually look inside of WHEEL_DIR to find .whl files and add them to the
+    # front of our sys.path.
+    sys.path[:] = glob.glob(os.path.join(WHEEL_DIR, "*.whl")) + sys.path
+
+    # Actually alias all of our vendored dependencies.
+    vendored("cachecontrol")
+    vendored("colorama")
+    vendored("distlib")
+    vendored("distro")
+    vendored("html5lib")
+    vendored("lockfile")
+    vendored("six")
+    vendored("six.moves")
+    vendored("six.moves.urllib")
+    vendored("packaging")
+    vendored("packaging.version")
+    vendored("packaging.specifiers")
+    vendored("pkg_resources")
+    vendored("progress")
+    vendored("retrying")
+    vendored("requests")
+    vendored("requests.packages")
+    vendored("requests.packages.urllib3")
+    vendored("requests.packages.urllib3._collections")
+    vendored("requests.packages.urllib3.connection")
+    vendored("requests.packages.urllib3.connectionpool")
+    vendored("requests.packages.urllib3.contrib")
+    vendored("requests.packages.urllib3.contrib.ntlmpool")
+    vendored("requests.packages.urllib3.contrib.pyopenssl")
+    vendored("requests.packages.urllib3.exceptions")
+    vendored("requests.packages.urllib3.fields")
+    vendored("requests.packages.urllib3.filepost")
+    vendored("requests.packages.urllib3.packages")
+    try:
+        vendored("requests.packages.urllib3.packages.ordered_dict")
+        vendored("requests.packages.urllib3.packages.six")
+    except ImportError:
+        # Debian already unbundles these from requests.
+        pass
+    vendored("requests.packages.urllib3.packages.ssl_match_hostname")
+    vendored("requests.packages.urllib3.packages.ssl_match_hostname."
+             "_implementation")
+    vendored("requests.packages.urllib3.poolmanager")
+    vendored("requests.packages.urllib3.request")
+    vendored("requests.packages.urllib3.response")
+    vendored("requests.packages.urllib3.util")
+    vendored("requests.packages.urllib3.util.connection")
+    vendored("requests.packages.urllib3.util.request")
+    vendored("requests.packages.urllib3.util.response")
+    vendored("requests.packages.urllib3.util.retry")
+    vendored("requests.packages.urllib3.util.ssl_")
+    vendored("requests.packages.urllib3.util.timeout")
+    vendored("requests.packages.urllib3.util.url")
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/basecommand.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/basecommand.py
new file mode 100644
index 0000000000000000000000000000000000000000..54c67067204e72af095e29c8b44c0c56fd591142
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/basecommand.py
@@ -0,0 +1,337 @@
+"""Base Command class, and related routines"""
+from __future__ import absolute_import
+
+import logging
+import os
+import sys
+import optparse
+import warnings
+
+from pip import cmdoptions
+from pip.index import PackageFinder
+from pip.locations import running_under_virtualenv
+from pip.download import PipSession
+from pip.exceptions import (BadCommand, InstallationError, UninstallationError,
+                            CommandError, PreviousBuildDirError)
+
+from pip.compat import logging_dictConfig
+from pip.baseparser import ConfigOptionParser, UpdatingDefaultsHelpFormatter
+from pip.req import InstallRequirement, parse_requirements
+from pip.status_codes import (
+    SUCCESS, ERROR, UNKNOWN_ERROR, VIRTUALENV_NOT_FOUND,
+    PREVIOUS_BUILD_DIR_ERROR,
+)
+from pip.utils import deprecation, get_prog, normalize_path
+from pip.utils.logging import IndentingFormatter
+from pip.utils.outdated import pip_version_check
+
+
+__all__ = ['Command']
+
+
+logger = logging.getLogger(__name__)
+
+
+class Command(object):
+    name = None
+    usage = None
+    hidden = False
+    log_streams = ("ext://sys.stdout", "ext://sys.stderr")
+
+    def __init__(self, isolated=False):
+        parser_kw = {
+            'usage': self.usage,
+            'prog': '%s %s' % (get_prog(), self.name),
+            'formatter': UpdatingDefaultsHelpFormatter(),
+            'add_help_option': False,
+            'name': self.name,
+            'description': self.__doc__,
+            'isolated': isolated,
+        }
+
+        self.parser = ConfigOptionParser(**parser_kw)
+
+        # Commands should add options to this option group
+        optgroup_name = '%s Options' % self.name.capitalize()
+        self.cmd_opts = optparse.OptionGroup(self.parser, optgroup_name)
+
+        # Add the general options
+        gen_opts = cmdoptions.make_option_group(
+            cmdoptions.general_group,
+            self.parser,
+        )
+        self.parser.add_option_group(gen_opts)
+
+    def _build_session(self, options, retries=None, timeout=None):
+        session = PipSession(
+            cache=(
+                normalize_path(os.path.join(options.cache_dir, "http"))
+                if options.cache_dir else None
+            ),
+            retries=retries if retries is not None else options.retries,
+            insecure_hosts=options.trusted_hosts,
+        )
+
+        # Handle custom ca-bundles from the user
+        if options.cert:
+            session.verify = options.cert
+
+        # Handle SSL client certificate
+        if options.client_cert:
+            session.cert = options.client_cert
+
+        # Handle timeouts
+        if options.timeout or timeout:
+            session.timeout = (
+                timeout if timeout is not None else options.timeout
+            )
+
+        # Handle configured proxies
+        if options.proxy:
+            session.proxies = {
+                "http": options.proxy,
+                "https": options.proxy,
+            }
+
+        # Determine if we can prompt the user for authentication or not
+        session.auth.prompting = not options.no_input
+
+        return session
+
+    def parse_args(self, args):
+        # factored out for testability
+        return self.parser.parse_args(args)
+
+    def main(self, args):
+        options, args = self.parse_args(args)
+
+        if options.quiet:
+            if options.quiet == 1:
+                level = "WARNING"
+            if options.quiet == 2:
+                level = "ERROR"
+            else:
+                level = "CRITICAL"
+        elif options.verbose:
+            level = "DEBUG"
+        else:
+            level = "INFO"
+
+        # The root logger should match the "console" level *unless* we
+        # specified "--log" to send debug logs to a file.
+        root_level = level
+        if options.log:
+            root_level = "DEBUG"
+
+        logging_dictConfig({
+            "version": 1,
+            "disable_existing_loggers": False,
+            "filters": {
+                "exclude_warnings": {
+                    "()": "pip.utils.logging.MaxLevelFilter",
+                    "level": logging.WARNING,
+                },
+            },
+            "formatters": {
+                "indent": {
+                    "()": IndentingFormatter,
+                    "format": "%(message)s",
+                },
+            },
+            "handlers": {
+                "console": {
+                    "level": level,
+                    "class": "pip.utils.logging.ColorizedStreamHandler",
+                    "stream": self.log_streams[0],
+                    "filters": ["exclude_warnings"],
+                    "formatter": "indent",
+                },
+                "console_errors": {
+                    "level": "WARNING",
+                    "class": "pip.utils.logging.ColorizedStreamHandler",
+                    "stream": self.log_streams[1],
+                    "formatter": "indent",
+                },
+                "user_log": {
+                    "level": "DEBUG",
+                    "class": "pip.utils.logging.BetterRotatingFileHandler",
+                    "filename": options.log or "/dev/null",
+                    "delay": True,
+                    "formatter": "indent",
+                },
+            },
+            "root": {
+                "level": root_level,
+                "handlers": list(filter(None, [
+                    "console",
+                    "console_errors",
+                    "user_log" if options.log else None,
+                ])),
+            },
+            # Disable any logging besides WARNING unless we have DEBUG level
+            # logging enabled. These use both pip._vendor and the bare names
+            # for the case where someone unbundles our libraries.
+            "loggers": dict(
+                (
+                    name,
+                    {
+                        "level": (
+                            "WARNING"
+                            if level in ["INFO", "ERROR"]
+                            else "DEBUG"
+                        ),
+                    },
+                )
+                for name in ["pip._vendor", "distlib", "requests", "urllib3"]
+            ),
+        })
+
+        if sys.version_info[:2] == (2, 6):
+            warnings.warn(
+                "Python 2.6 is no longer supported by the Python core team, "
+                "please upgrade your Python. A future version of pip will "
+                "drop support for Python 2.6",
+                deprecation.Python26DeprecationWarning
+            )
+
+        # TODO: try to get these passing down from the command?
+        #      without resorting to os.environ to hold these.
+
+        if options.no_input:
+            os.environ['PIP_NO_INPUT'] = '1'
+
+        if options.exists_action:
+            os.environ['PIP_EXISTS_ACTION'] = ' '.join(options.exists_action)
+
+        if options.require_venv:
+            # If a venv is required check if it can really be found
+            if not running_under_virtualenv():
+                logger.critical(
+                    'Could not find an activated virtualenv (required).'
+                )
+                sys.exit(VIRTUALENV_NOT_FOUND)
+
+        try:
+            status = self.run(options, args)
+            # FIXME: all commands should return an exit status
+            # and when it is done, isinstance is not needed anymore
+            if isinstance(status, int):
+                return status
+        except PreviousBuildDirError as exc:
+            logger.critical(str(exc))
+            logger.debug('Exception information:', exc_info=True)
+
+            return PREVIOUS_BUILD_DIR_ERROR
+        except (InstallationError, UninstallationError, BadCommand) as exc:
+            logger.critical(str(exc))
+            logger.debug('Exception information:', exc_info=True)
+
+            return ERROR
+        except CommandError as exc:
+            logger.critical('ERROR: %s', exc)
+            logger.debug('Exception information:', exc_info=True)
+
+            return ERROR
+        except KeyboardInterrupt:
+            logger.critical('Operation cancelled by user')
+            logger.debug('Exception information:', exc_info=True)
+
+            return ERROR
+        except:
+            logger.critical('Exception:', exc_info=True)
+
+            return UNKNOWN_ERROR
+        finally:
+            # Check if we're using the latest version of pip available
+            if (not options.disable_pip_version_check and not
+                    getattr(options, "no_index", False)):
+                with self._build_session(
+                        options,
+                        retries=0,
+                        timeout=min(5, options.timeout)) as session:
+                    pip_version_check(session)
+
+        return SUCCESS
+
+
+class RequirementCommand(Command):
+
+    @staticmethod
+    def populate_requirement_set(requirement_set, args, options, finder,
+                                 session, name, wheel_cache):
+        """
+        Marshal cmd line args into a requirement set.
+        """
+        for filename in options.constraints:
+            for req in parse_requirements(
+                    filename,
+                    constraint=True, finder=finder, options=options,
+                    session=session, wheel_cache=wheel_cache):
+                requirement_set.add_requirement(req)
+
+        for req in args:
+            requirement_set.add_requirement(
+                InstallRequirement.from_line(
+                    req, None, isolated=options.isolated_mode,
+                    wheel_cache=wheel_cache
+                )
+            )
+
+        for req in options.editables:
+            requirement_set.add_requirement(
+                InstallRequirement.from_editable(
+                    req,
+                    default_vcs=options.default_vcs,
+                    isolated=options.isolated_mode,
+                    wheel_cache=wheel_cache
+                )
+            )
+
+        found_req_in_file = False
+        for filename in options.requirements:
+            for req in parse_requirements(
+                    filename,
+                    finder=finder, options=options, session=session,
+                    wheel_cache=wheel_cache):
+                found_req_in_file = True
+                requirement_set.add_requirement(req)
+        # If --require-hashes was a line in a requirements file, tell
+        # RequirementSet about it:
+        requirement_set.require_hashes = options.require_hashes
+
+        if not (args or options.editables or found_req_in_file):
+            opts = {'name': name}
+            if options.find_links:
+                msg = ('You must give at least one requirement to '
+                       '%(name)s (maybe you meant "pip %(name)s '
+                       '%(links)s"?)' %
+                       dict(opts, links=' '.join(options.find_links)))
+            else:
+                msg = ('You must give at least one requirement '
+                       'to %(name)s (see "pip help %(name)s")' % opts)
+            logger.warning(msg)
+
+    def _build_package_finder(self, options, session,
+                              platform=None, python_versions=None,
+                              abi=None, implementation=None):
+        """
+        Create a package finder appropriate to this requirement command.
+        """
+        index_urls = [options.index_url] + options.extra_index_urls
+        if options.no_index:
+            logger.debug('Ignoring indexes: %s', ','.join(index_urls))
+            index_urls = []
+
+        return PackageFinder(
+            find_links=options.find_links,
+            format_control=options.format_control,
+            index_urls=index_urls,
+            trusted_hosts=options.trusted_hosts,
+            allow_all_prereleases=options.pre,
+            process_dependency_links=options.process_dependency_links,
+            session=session,
+            platform=platform,
+            versions=python_versions,
+            abi=abi,
+            implementation=implementation,
+        )
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/baseparser.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/baseparser.py
new file mode 100644
index 0000000000000000000000000000000000000000..2dd4533016b22becf6501be3061e515e582be93e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/baseparser.py
@@ -0,0 +1,293 @@
+"""Base option parser setup"""
+from __future__ import absolute_import
+
+import sys
+import optparse
+import os
+import re
+import textwrap
+from distutils.util import strtobool
+
+from pip._vendor.six import string_types
+from pip._vendor.six.moves import configparser
+from pip.locations import (
+    legacy_config_file, config_basename, running_under_virtualenv,
+    site_config_files
+)
+from pip.utils import appdirs, get_terminal_size
+
+
+_environ_prefix_re = re.compile(r"^PIP_", re.I)
+
+
+class PrettyHelpFormatter(optparse.IndentedHelpFormatter):
+    """A prettier/less verbose help formatter for optparse."""
+
+    def __init__(self, *args, **kwargs):
+        # help position must be aligned with __init__.parseopts.description
+        kwargs['max_help_position'] = 30
+        kwargs['indent_increment'] = 1
+        kwargs['width'] = get_terminal_size()[0] - 2
+        optparse.IndentedHelpFormatter.__init__(self, *args, **kwargs)
+
+    def format_option_strings(self, option):
+        return self._format_option_strings(option, ' <%s>', ', ')
+
+    def _format_option_strings(self, option, mvarfmt=' <%s>', optsep=', '):
+        """
+        Return a comma-separated list of option strings and metavars.
+
+        :param option:  tuple of (short opt, long opt), e.g: ('-f', '--format')
+        :param mvarfmt: metavar format string - evaluated as mvarfmt % metavar
+        :param optsep:  separator
+        """
+        opts = []
+
+        if option._short_opts:
+            opts.append(option._short_opts[0])
+        if option._long_opts:
+            opts.append(option._long_opts[0])
+        if len(opts) > 1:
+            opts.insert(1, optsep)
+
+        if option.takes_value():
+            metavar = option.metavar or option.dest.lower()
+            opts.append(mvarfmt % metavar.lower())
+
+        return ''.join(opts)
+
+    def format_heading(self, heading):
+        if heading == 'Options':
+            return ''
+        return heading + ':\n'
+
+    def format_usage(self, usage):
+        """
+        Ensure there is only one newline between usage and the first heading
+        if there is no description.
+        """
+        msg = '\nUsage: %s\n' % self.indent_lines(textwrap.dedent(usage), "  ")
+        return msg
+
+    def format_description(self, description):
+        # leave full control over description to us
+        if description:
+            if hasattr(self.parser, 'main'):
+                label = 'Commands'
+            else:
+                label = 'Description'
+            # some doc strings have initial newlines, some don't
+            description = description.lstrip('\n')
+            # some doc strings have final newlines and spaces, some don't
+            description = description.rstrip()
+            # dedent, then reindent
+            description = self.indent_lines(textwrap.dedent(description), "  ")
+            description = '%s:\n%s\n' % (label, description)
+            return description
+        else:
+            return ''
+
+    def format_epilog(self, epilog):
+        # leave full control over epilog to us
+        if epilog:
+            return epilog
+        else:
+            return ''
+
+    def indent_lines(self, text, indent):
+        new_lines = [indent + line for line in text.split('\n')]
+        return "\n".join(new_lines)
+
+
+class UpdatingDefaultsHelpFormatter(PrettyHelpFormatter):
+    """Custom help formatter for use in ConfigOptionParser.
+
+    This is updates the defaults before expanding them, allowing
+    them to show up correctly in the help listing.
+    """
+
+    def expand_default(self, option):
+        if self.parser is not None:
+            self.parser._update_defaults(self.parser.defaults)
+        return optparse.IndentedHelpFormatter.expand_default(self, option)
+
+
+class CustomOptionParser(optparse.OptionParser):
+
+    def insert_option_group(self, idx, *args, **kwargs):
+        """Insert an OptionGroup at a given position."""
+        group = self.add_option_group(*args, **kwargs)
+
+        self.option_groups.pop()
+        self.option_groups.insert(idx, group)
+
+        return group
+
+    @property
+    def option_list_all(self):
+        """Get a list of all options, including those in option groups."""
+        res = self.option_list[:]
+        for i in self.option_groups:
+            res.extend(i.option_list)
+
+        return res
+
+
+class ConfigOptionParser(CustomOptionParser):
+    """Custom option parser which updates its defaults by checking the
+    configuration files and environmental variables"""
+
+    isolated = False
+
+    def __init__(self, *args, **kwargs):
+        self.config = configparser.RawConfigParser()
+        self.name = kwargs.pop('name')
+        self.isolated = kwargs.pop("isolated", False)
+        self.files = self.get_config_files()
+        if self.files:
+            self.config.read(self.files)
+        assert self.name
+        optparse.OptionParser.__init__(self, *args, **kwargs)
+
+    def get_config_files(self):
+        # the files returned by this method will be parsed in order with the
+        # first files listed being overridden by later files in standard
+        # ConfigParser fashion
+        config_file = os.environ.get('PIP_CONFIG_FILE', False)
+        if config_file == os.devnull:
+            return []
+
+        # at the base we have any site-wide configuration
+        files = list(site_config_files)
+
+        # per-user configuration next
+        if not self.isolated:
+            if config_file and os.path.exists(config_file):
+                files.append(config_file)
+            else:
+                # This is the legacy config file, we consider it to be a lower
+                # priority than the new file location.
+                files.append(legacy_config_file)
+
+                # This is the new config file, we consider it to be a higher
+                # priority than the legacy file.
+                files.append(
+                    os.path.join(
+                        appdirs.user_config_dir("pip"),
+                        config_basename,
+                    )
+                )
+
+        # finally virtualenv configuration first trumping others
+        if running_under_virtualenv():
+            venv_config_file = os.path.join(
+                sys.prefix,
+                config_basename,
+            )
+            if os.path.exists(venv_config_file):
+                files.append(venv_config_file)
+
+        return files
+
+    def check_default(self, option, key, val):
+        try:
+            return option.check_value(key, val)
+        except optparse.OptionValueError as exc:
+            print("An error occurred during configuration: %s" % exc)
+            sys.exit(3)
+
+    def _update_defaults(self, defaults):
+        """Updates the given defaults with values from the config files and
+        the environ. Does a little special handling for certain types of
+        options (lists)."""
+        # Then go and look for the other sources of configuration:
+        config = {}
+        # 1. config files
+        for section in ('global', self.name):
+            config.update(
+                self.normalize_keys(self.get_config_section(section))
+            )
+        # 2. environmental variables
+        if not self.isolated:
+            config.update(self.normalize_keys(self.get_environ_vars()))
+        # Accumulate complex default state.
+        self.values = optparse.Values(self.defaults)
+        late_eval = set()
+        # Then set the options with those values
+        for key, val in config.items():
+            # ignore empty values
+            if not val:
+                continue
+
+            option = self.get_option(key)
+            # Ignore options not present in this parser. E.g. non-globals put
+            # in [global] by users that want them to apply to all applicable
+            # commands.
+            if option is None:
+                continue
+
+            if option.action in ('store_true', 'store_false', 'count'):
+                val = strtobool(val)
+            elif option.action == 'append':
+                val = val.split()
+                val = [self.check_default(option, key, v) for v in val]
+            elif option.action == 'callback':
+                late_eval.add(option.dest)
+                opt_str = option.get_opt_string()
+                val = option.convert_value(opt_str, val)
+                # From take_action
+                args = option.callback_args or ()
+                kwargs = option.callback_kwargs or {}
+                option.callback(option, opt_str, val, self, *args, **kwargs)
+            else:
+                val = self.check_default(option, key, val)
+
+            defaults[option.dest] = val
+
+        for key in late_eval:
+            defaults[key] = getattr(self.values, key)
+        self.values = None
+        return defaults
+
+    def normalize_keys(self, items):
+        """Return a config dictionary with normalized keys regardless of
+        whether the keys were specified in environment variables or in config
+        files"""
+        normalized = {}
+        for key, val in items:
+            key = key.replace('_', '-')
+            if not key.startswith('--'):
+                key = '--%s' % key  # only prefer long opts
+            normalized[key] = val
+        return normalized
+
+    def get_config_section(self, name):
+        """Get a section of a configuration"""
+        if self.config.has_section(name):
+            return self.config.items(name)
+        return []
+
+    def get_environ_vars(self):
+        """Returns a generator with all environmental vars with prefix PIP_"""
+        for key, val in os.environ.items():
+            if _environ_prefix_re.search(key):
+                yield (_environ_prefix_re.sub("", key).lower(), val)
+
+    def get_default_values(self):
+        """Overriding to make updating the defaults after instantiation of
+        the option parser possible, _update_defaults() does the dirty work."""
+        if not self.process_default_values:
+            # Old, pre-Optik 1.5 behaviour.
+            return optparse.Values(self.defaults)
+
+        defaults = self._update_defaults(self.defaults.copy())  # ours
+        for option in self._get_all_options():
+            default = defaults.get(option.dest)
+            if isinstance(default, string_types):
+                opt_str = option.get_opt_string()
+                defaults[option.dest] = option.check_value(opt_str, default)
+        return optparse.Values(defaults)
+
+    def error(self, msg):
+        self.print_usage(sys.stderr)
+        self.exit(2, "%s\n" % msg)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/cmdoptions.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/cmdoptions.py
new file mode 100644
index 0000000000000000000000000000000000000000..f75c0930d79d99eabcfe3fa157e17baf70c4fd78
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/cmdoptions.py
@@ -0,0 +1,633 @@
+"""
+shared options and groups
+
+The principle here is to define options once, but *not* instantiate them
+globally. One reason being that options with action='append' can carry state
+between parses. pip parses general options twice internally, and shouldn't
+pass on state. To be consistent, all options will follow this design.
+
+"""
+from __future__ import absolute_import
+
+from functools import partial
+from optparse import OptionGroup, SUPPRESS_HELP, Option
+import warnings
+
+from pip.index import (
+    FormatControl, fmt_ctl_handle_mutual_exclude, fmt_ctl_no_binary,
+    fmt_ctl_no_use_wheel)
+from pip.models import PyPI
+from pip.locations import USER_CACHE_DIR, src_prefix
+from pip.utils.hashes import STRONG_HASHES
+
+
+def make_option_group(group, parser):
+    """
+    Return an OptionGroup object
+    group  -- assumed to be dict with 'name' and 'options' keys
+    parser -- an optparse Parser
+    """
+    option_group = OptionGroup(parser, group['name'])
+    for option in group['options']:
+        option_group.add_option(option())
+    return option_group
+
+
+def resolve_wheel_no_use_binary(options):
+    if not options.use_wheel:
+        control = options.format_control
+        fmt_ctl_no_use_wheel(control)
+
+
+def check_install_build_global(options, check_options=None):
+    """Disable wheels if per-setup.py call options are set.
+
+    :param options: The OptionParser options to update.
+    :param check_options: The options to check, if not supplied defaults to
+        options.
+    """
+    if check_options is None:
+        check_options = options
+
+    def getname(n):
+        return getattr(check_options, n, None)
+    names = ["build_options", "global_options", "install_options"]
+    if any(map(getname, names)):
+        control = options.format_control
+        fmt_ctl_no_binary(control)
+        warnings.warn(
+            'Disabling all use of wheels due to the use of --build-options '
+            '/ --global-options / --install-options.', stacklevel=2)
+
+
+###########
+# options #
+###########
+
+help_ = partial(
+    Option,
+    '-h', '--help',
+    dest='help',
+    action='help',
+    help='Show help.')
+
+isolated_mode = partial(
+    Option,
+    "--isolated",
+    dest="isolated_mode",
+    action="store_true",
+    default=False,
+    help=(
+        "Run pip in an isolated mode, ignoring environment variables and user "
+        "configuration."
+    ),
+)
+
+require_virtualenv = partial(
+    Option,
+    # Run only if inside a virtualenv, bail if not.
+    '--require-virtualenv', '--require-venv',
+    dest='require_venv',
+    action='store_true',
+    default=False,
+    help=SUPPRESS_HELP)
+
+verbose = partial(
+    Option,
+    '-v', '--verbose',
+    dest='verbose',
+    action='count',
+    default=0,
+    help='Give more output. Option is additive, and can be used up to 3 times.'
+)
+
+version = partial(
+    Option,
+    '-V', '--version',
+    dest='version',
+    action='store_true',
+    help='Show version and exit.')
+
+quiet = partial(
+    Option,
+    '-q', '--quiet',
+    dest='quiet',
+    action='count',
+    default=0,
+    help=('Give less output. Option is additive, and can be used up to 3'
+          ' times (corresponding to WARNING, ERROR, and CRITICAL logging'
+          ' levels).')
+)
+
+log = partial(
+    Option,
+    "--log", "--log-file", "--local-log",
+    dest="log",
+    metavar="path",
+    help="Path to a verbose appending log."
+)
+
+no_input = partial(
+    Option,
+    # Don't ask for input
+    '--no-input',
+    dest='no_input',
+    action='store_true',
+    default=False,
+    help=SUPPRESS_HELP)
+
+proxy = partial(
+    Option,
+    '--proxy',
+    dest='proxy',
+    type='str',
+    default='',
+    help="Specify a proxy in the form [user:passwd@]proxy.server:port.")
+
+retries = partial(
+    Option,
+    '--retries',
+    dest='retries',
+    type='int',
+    default=5,
+    help="Maximum number of retries each connection should attempt "
+         "(default %default times).")
+
+timeout = partial(
+    Option,
+    '--timeout', '--default-timeout',
+    metavar='sec',
+    dest='timeout',
+    type='float',
+    default=15,
+    help='Set the socket timeout (default %default seconds).')
+
+default_vcs = partial(
+    Option,
+    # The default version control system for editables, e.g. 'svn'
+    '--default-vcs',
+    dest='default_vcs',
+    type='str',
+    default='',
+    help=SUPPRESS_HELP)
+
+skip_requirements_regex = partial(
+    Option,
+    # A regex to be used to skip requirements
+    '--skip-requirements-regex',
+    dest='skip_requirements_regex',
+    type='str',
+    default='',
+    help=SUPPRESS_HELP)
+
+
+def exists_action():
+    return Option(
+        # Option when path already exist
+        '--exists-action',
+        dest='exists_action',
+        type='choice',
+        choices=['s', 'i', 'w', 'b', 'a'],
+        default=[],
+        action='append',
+        metavar='action',
+        help="Default action when a path already exists: "
+        "(s)witch, (i)gnore, (w)ipe, (b)ackup, (a)bort.")
+
+
+cert = partial(
+    Option,
+    '--cert',
+    dest='cert',
+    type='str',
+    metavar='path',
+    help="Path to alternate CA bundle.")
+
+client_cert = partial(
+    Option,
+    '--client-cert',
+    dest='client_cert',
+    type='str',
+    default=None,
+    metavar='path',
+    help="Path to SSL client certificate, a single file containing the "
+         "private key and the certificate in PEM format.")
+
+index_url = partial(
+    Option,
+    '-i', '--index-url', '--pypi-url',
+    dest='index_url',
+    metavar='URL',
+    default=PyPI.simple_url,
+    help="Base URL of Python Package Index (default %default). "
+         "This should point to a repository compliant with PEP 503 "
+         "(the simple repository API) or a local directory laid out "
+         "in the same format.")
+
+
+def extra_index_url():
+    return Option(
+        '--extra-index-url',
+        dest='extra_index_urls',
+        metavar='URL',
+        action='append',
+        default=[],
+        help="Extra URLs of package indexes to use in addition to "
+             "--index-url. Should follow the same rules as "
+             "--index-url."
+    )
+
+
+no_index = partial(
+    Option,
+    '--no-index',
+    dest='no_index',
+    action='store_true',
+    default=False,
+    help='Ignore package index (only looking at --find-links URLs instead).')
+
+
+def find_links():
+    return Option(
+        '-f', '--find-links',
+        dest='find_links',
+        action='append',
+        default=[],
+        metavar='url',
+        help="If a url or path to an html file, then parse for links to "
+             "archives. If a local path or file:// url that's a directory, "
+             "then look for archives in the directory listing.")
+
+
+def allow_external():
+    return Option(
+        "--allow-external",
+        dest="allow_external",
+        action="append",
+        default=[],
+        metavar="PACKAGE",
+        help=SUPPRESS_HELP,
+    )
+
+
+allow_all_external = partial(
+    Option,
+    "--allow-all-external",
+    dest="allow_all_external",
+    action="store_true",
+    default=False,
+    help=SUPPRESS_HELP,
+)
+
+
+def trusted_host():
+    return Option(
+        "--trusted-host",
+        dest="trusted_hosts",
+        action="append",
+        metavar="HOSTNAME",
+        default=[],
+        help="Mark this host as trusted, even though it does not have valid "
+             "or any HTTPS.",
+    )
+
+
+# Remove after 7.0
+no_allow_external = partial(
+    Option,
+    "--no-allow-external",
+    dest="allow_all_external",
+    action="store_false",
+    default=False,
+    help=SUPPRESS_HELP,
+)
+
+
+# Remove --allow-insecure after 7.0
+def allow_unsafe():
+    return Option(
+        "--allow-unverified", "--allow-insecure",
+        dest="allow_unverified",
+        action="append",
+        default=[],
+        metavar="PACKAGE",
+        help=SUPPRESS_HELP,
+    )
+
+# Remove after 7.0
+no_allow_unsafe = partial(
+    Option,
+    "--no-allow-insecure",
+    dest="allow_all_insecure",
+    action="store_false",
+    default=False,
+    help=SUPPRESS_HELP
+)
+
+# Remove after 1.5
+process_dependency_links = partial(
+    Option,
+    "--process-dependency-links",
+    dest="process_dependency_links",
+    action="store_true",
+    default=False,
+    help="Enable the processing of dependency links.",
+)
+
+
+def constraints():
+    return Option(
+        '-c', '--constraint',
+        dest='constraints',
+        action='append',
+        default=[],
+        metavar='file',
+        help='Constrain versions using the given constraints file. '
+        'This option can be used multiple times.')
+
+
+def requirements():
+    return Option(
+        '-r', '--requirement',
+        dest='requirements',
+        action='append',
+        default=[],
+        metavar='file',
+        help='Install from the given requirements file. '
+        'This option can be used multiple times.')
+
+
+def editable():
+    return Option(
+        '-e', '--editable',
+        dest='editables',
+        action='append',
+        default=[],
+        metavar='path/url',
+        help=('Install a project in editable mode (i.e. setuptools '
+              '"develop mode") from a local project path or a VCS url.'),
+    )
+
+src = partial(
+    Option,
+    '--src', '--source', '--source-dir', '--source-directory',
+    dest='src_dir',
+    metavar='dir',
+    default=src_prefix,
+    help='Directory to check out editable projects into. '
+    'The default in a virtualenv is "<venv path>/src". '
+    'The default for global installs is "<current dir>/src".'
+)
+
+# XXX: deprecated, remove in 9.0
+use_wheel = partial(
+    Option,
+    '--use-wheel',
+    dest='use_wheel',
+    action='store_true',
+    default=True,
+    help=SUPPRESS_HELP,
+)
+
+# XXX: deprecated, remove in 9.0
+no_use_wheel = partial(
+    Option,
+    '--no-use-wheel',
+    dest='use_wheel',
+    action='store_false',
+    default=True,
+    help=('Do not Find and prefer wheel archives when searching indexes and '
+          'find-links locations. DEPRECATED in favour of --no-binary.'),
+)
+
+
+def _get_format_control(values, option):
+    """Get a format_control object."""
+    return getattr(values, option.dest)
+
+
+def _handle_no_binary(option, opt_str, value, parser):
+    existing = getattr(parser.values, option.dest)
+    fmt_ctl_handle_mutual_exclude(
+        value, existing.no_binary, existing.only_binary)
+
+
+def _handle_only_binary(option, opt_str, value, parser):
+    existing = getattr(parser.values, option.dest)
+    fmt_ctl_handle_mutual_exclude(
+        value, existing.only_binary, existing.no_binary)
+
+
+def no_binary():
+    return Option(
+        "--no-binary", dest="format_control", action="callback",
+        callback=_handle_no_binary, type="str",
+        default=FormatControl(set(), set()),
+        help="Do not use binary packages. Can be supplied multiple times, and "
+             "each time adds to the existing value. Accepts either :all: to "
+             "disable all binary packages, :none: to empty the set, or one or "
+             "more package names with commas between them. Note that some "
+             "packages are tricky to compile and may fail to install when "
+             "this option is used on them.")
+
+
+def only_binary():
+    return Option(
+        "--only-binary", dest="format_control", action="callback",
+        callback=_handle_only_binary, type="str",
+        default=FormatControl(set(), set()),
+        help="Do not use source packages. Can be supplied multiple times, and "
+             "each time adds to the existing value. Accepts either :all: to "
+             "disable all source packages, :none: to empty the set, or one or "
+             "more package names with commas between them. Packages without "
+             "binary distributions will fail to install when this option is "
+             "used on them.")
+
+
+cache_dir = partial(
+    Option,
+    "--cache-dir",
+    dest="cache_dir",
+    default=USER_CACHE_DIR,
+    metavar="dir",
+    help="Store the cache data in <dir>."
+)
+
+no_cache = partial(
+    Option,
+    "--no-cache-dir",
+    dest="cache_dir",
+    action="store_false",
+    help="Disable the cache.",
+)
+
+no_deps = partial(
+    Option,
+    '--no-deps', '--no-dependencies',
+    dest='ignore_dependencies',
+    action='store_true',
+    default=False,
+    help="Don't install package dependencies.")
+
+build_dir = partial(
+    Option,
+    '-b', '--build', '--build-dir', '--build-directory',
+    dest='build_dir',
+    metavar='dir',
+    help='Directory to unpack packages into and build in.'
+)
+
+ignore_requires_python = partial(
+    Option,
+    '--ignore-requires-python',
+    dest='ignore_requires_python',
+    action='store_true',
+    help='Ignore the Requires-Python information.')
+
+install_options = partial(
+    Option,
+    '--install-option',
+    dest='install_options',
+    action='append',
+    metavar='options',
+    help="Extra arguments to be supplied to the setup.py install "
+         "command (use like --install-option=\"--install-scripts=/usr/local/"
+         "bin\"). Use multiple --install-option options to pass multiple "
+         "options to setup.py install. If you are using an option with a "
+         "directory path, be sure to use absolute path.")
+
+global_options = partial(
+    Option,
+    '--global-option',
+    dest='global_options',
+    action='append',
+    metavar='options',
+    help="Extra global options to be supplied to the setup.py "
+         "call before the install command.")
+
+no_clean = partial(
+    Option,
+    '--no-clean',
+    action='store_true',
+    default=False,
+    help="Don't clean up build directories.")
+
+pre = partial(
+    Option,
+    '--pre',
+    action='store_true',
+    default=False,
+    help="Include pre-release and development versions. By default, "
+         "pip only finds stable versions.")
+
+disable_pip_version_check = partial(
+    Option,
+    "--disable-pip-version-check",
+    dest="disable_pip_version_check",
+    action="store_true",
+    default=True,
+    help="Don't periodically check PyPI to determine whether a new version "
+         "of pip is available for download. Implied with --no-index.")
+
+# Deprecated, Remove later
+always_unzip = partial(
+    Option,
+    '-Z', '--always-unzip',
+    dest='always_unzip',
+    action='store_true',
+    help=SUPPRESS_HELP,
+)
+
+
+def _merge_hash(option, opt_str, value, parser):
+    """Given a value spelled "algo:digest", append the digest to a list
+    pointed to in a dict by the algo name."""
+    if not parser.values.hashes:
+        parser.values.hashes = {}
+    try:
+        algo, digest = value.split(':', 1)
+    except ValueError:
+        parser.error('Arguments to %s must be a hash name '
+                     'followed by a value, like --hash=sha256:abcde...' %
+                     opt_str)
+    if algo not in STRONG_HASHES:
+        parser.error('Allowed hash algorithms for %s are %s.' %
+                     (opt_str, ', '.join(STRONG_HASHES)))
+    parser.values.hashes.setdefault(algo, []).append(digest)
+
+
+hash = partial(
+    Option,
+    '--hash',
+    # Hash values eventually end up in InstallRequirement.hashes due to
+    # __dict__ copying in process_line().
+    dest='hashes',
+    action='callback',
+    callback=_merge_hash,
+    type='string',
+    help="Verify that the package's archive matches this "
+         'hash before installing. Example: --hash=sha256:abcdef...')
+
+
+require_hashes = partial(
+    Option,
+    '--require-hashes',
+    dest='require_hashes',
+    action='store_true',
+    default=False,
+    help='Require a hash to check each requirement against, for '
+         'repeatable installs. This option is implied when any package in a '
+         'requirements file has a --hash option.')
+
+
+##########
+# groups #
+##########
+
+general_group = {
+    'name': 'General Options',
+    'options': [
+        help_,
+        isolated_mode,
+        require_virtualenv,
+        verbose,
+        version,
+        quiet,
+        log,
+        no_input,
+        proxy,
+        retries,
+        timeout,
+        default_vcs,
+        skip_requirements_regex,
+        exists_action,
+        trusted_host,
+        cert,
+        client_cert,
+        cache_dir,
+        no_cache,
+        disable_pip_version_check,
+    ]
+}
+
+non_deprecated_index_group = {
+    'name': 'Package Index Options',
+    'options': [
+        index_url,
+        extra_index_url,
+        no_index,
+        find_links,
+        process_dependency_links,
+    ]
+}
+
+index_group = {
+    'name': 'Package Index Options (including deprecated options)',
+    'options': non_deprecated_index_group['options'] + [
+        allow_external,
+        allow_all_external,
+        no_allow_external,
+        allow_unsafe,
+        no_allow_unsafe,
+    ]
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/__init__.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..62c64ebed2704a2322d3c6458fd8d7d8ff2f7feb
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/__init__.py
@@ -0,0 +1,86 @@
+"""
+Package containing all pip commands
+"""
+from __future__ import absolute_import
+
+from pip.commands.completion import CompletionCommand
+from pip.commands.download import DownloadCommand
+from pip.commands.freeze import FreezeCommand
+from pip.commands.hash import HashCommand
+from pip.commands.help import HelpCommand
+from pip.commands.list import ListCommand
+from pip.commands.check import CheckCommand
+from pip.commands.search import SearchCommand
+from pip.commands.show import ShowCommand
+from pip.commands.install import InstallCommand
+from pip.commands.uninstall import UninstallCommand
+from pip.commands.wheel import WheelCommand
+
+
+commands_dict = {
+    CompletionCommand.name: CompletionCommand,
+    FreezeCommand.name: FreezeCommand,
+    HashCommand.name: HashCommand,
+    HelpCommand.name: HelpCommand,
+    SearchCommand.name: SearchCommand,
+    ShowCommand.name: ShowCommand,
+    InstallCommand.name: InstallCommand,
+    UninstallCommand.name: UninstallCommand,
+    DownloadCommand.name: DownloadCommand,
+    ListCommand.name: ListCommand,
+    CheckCommand.name: CheckCommand,
+    WheelCommand.name: WheelCommand,
+}
+
+
+commands_order = [
+    InstallCommand,
+    DownloadCommand,
+    UninstallCommand,
+    FreezeCommand,
+    ListCommand,
+    ShowCommand,
+    CheckCommand,
+    SearchCommand,
+    WheelCommand,
+    HashCommand,
+    CompletionCommand,
+    HelpCommand,
+]
+
+
+def get_summaries(ordered=True):
+    """Yields sorted (command name, command summary) tuples."""
+
+    if ordered:
+        cmditems = _sort_commands(commands_dict, commands_order)
+    else:
+        cmditems = commands_dict.items()
+
+    for name, command_class in cmditems:
+        yield (name, command_class.summary)
+
+
+def get_similar_commands(name):
+    """Command name auto-correct."""
+    from difflib import get_close_matches
+
+    name = name.lower()
+
+    close_commands = get_close_matches(name, commands_dict.keys())
+
+    if close_commands:
+        return close_commands[0]
+    else:
+        return False
+
+
+def _sort_commands(cmddict, order):
+    def keyfn(key):
+        try:
+            return order.index(key[1])
+        except ValueError:
+            # unordered items should come last
+            return 0xff
+
+    return sorted(cmddict.items(), key=keyfn)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/check.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/check.py
new file mode 100644
index 0000000000000000000000000000000000000000..70458adf47d73a88ba9bef862e0546177eb4de55
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/check.py
@@ -0,0 +1,39 @@
+import logging
+
+from pip.basecommand import Command
+from pip.operations.check import check_requirements
+from pip.utils import get_installed_distributions
+
+
+logger = logging.getLogger(__name__)
+
+
+class CheckCommand(Command):
+    """Verify installed packages have compatible dependencies."""
+    name = 'check'
+    usage = """
+      %prog [options]"""
+    summary = 'Verify installed packages have compatible dependencies.'
+
+    def run(self, options, args):
+        dists = get_installed_distributions(local_only=False, skip=())
+        missing_reqs_dict, incompatible_reqs_dict = check_requirements(dists)
+
+        for dist in dists:
+            key = '%s==%s' % (dist.project_name, dist.version)
+
+            for requirement in missing_reqs_dict.get(key, []):
+                logger.info(
+                    "%s %s requires %s, which is not installed.",
+                    dist.project_name, dist.version, requirement.project_name)
+
+            for requirement, actual in incompatible_reqs_dict.get(key, []):
+                logger.info(
+                    "%s %s has requirement %s, but you have %s %s.",
+                    dist.project_name, dist.version, requirement,
+                    actual.project_name, actual.version)
+
+        if missing_reqs_dict or incompatible_reqs_dict:
+            return 1
+        else:
+            logger.info("No broken requirements found.")
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/completion.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/completion.py
new file mode 100644
index 0000000000000000000000000000000000000000..66e41a679123201415370edf9e1c7bc6416f5daf
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/completion.py
@@ -0,0 +1,81 @@
+from __future__ import absolute_import
+
+import sys
+from pip.basecommand import Command
+
+BASE_COMPLETION = """
+# pip %(shell)s completion start%(script)s# pip %(shell)s completion end
+"""
+
+COMPLETION_SCRIPTS = {
+    'bash': """
+_pip_completion()
+{
+    COMPREPLY=( $( COMP_WORDS="${COMP_WORDS[*]}" \\
+                   COMP_CWORD=$COMP_CWORD \\
+                   PIP_AUTO_COMPLETE=1 $1 ) )
+}
+complete -o default -F _pip_completion pip
+""", 'zsh': """
+function _pip_completion {
+  local words cword
+  read -Ac words
+  read -cn cword
+  reply=( $( COMP_WORDS="$words[*]" \\
+             COMP_CWORD=$(( cword-1 )) \\
+             PIP_AUTO_COMPLETE=1 $words[1] ) )
+}
+compctl -K _pip_completion pip
+""", 'fish': """
+function __fish_complete_pip
+    set -lx COMP_WORDS (commandline -o) ""
+    set -lx COMP_CWORD (math (contains -i -- (commandline -t) $COMP_WORDS)-1)
+    set -lx PIP_AUTO_COMPLETE 1
+    string split \  -- (eval $COMP_WORDS[1])
+end
+complete -fa "(__fish_complete_pip)" -c pip
+"""}
+
+
+class CompletionCommand(Command):
+    """A helper command to be used for command completion."""
+    name = 'completion'
+    summary = 'A helper command used for command completion.'
+
+    def __init__(self, *args, **kw):
+        super(CompletionCommand, self).__init__(*args, **kw)
+
+        cmd_opts = self.cmd_opts
+
+        cmd_opts.add_option(
+            '--bash', '-b',
+            action='store_const',
+            const='bash',
+            dest='shell',
+            help='Emit completion code for bash')
+        cmd_opts.add_option(
+            '--zsh', '-z',
+            action='store_const',
+            const='zsh',
+            dest='shell',
+            help='Emit completion code for zsh')
+        cmd_opts.add_option(
+            '--fish', '-f',
+            action='store_const',
+            const='fish',
+            dest='shell',
+            help='Emit completion code for fish')
+
+        self.parser.insert_option_group(0, cmd_opts)
+
+    def run(self, options, args):
+        """Prints the completion code of the given shell"""
+        shells = COMPLETION_SCRIPTS.keys()
+        shell_options = ['--' + shell for shell in sorted(shells)]
+        if options.shell in shells:
+            script = COMPLETION_SCRIPTS.get(options.shell, '')
+            print(BASE_COMPLETION % {'script': script, 'shell': options.shell})
+        else:
+            sys.stderr.write(
+                'ERROR: You must pass %s\n' % ' or '.join(shell_options)
+            )
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/download.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/download.py
new file mode 100644
index 0000000000000000000000000000000000000000..4bc06408751da1cb3d1ee9372b2ec00ffcdd0c30
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/download.py
@@ -0,0 +1,212 @@
+from __future__ import absolute_import
+
+import logging
+import os
+
+from pip.exceptions import CommandError
+from pip.index import FormatControl
+from pip.req import RequirementSet
+from pip.basecommand import RequirementCommand
+from pip import cmdoptions
+from pip.utils import ensure_dir, normalize_path
+from pip.utils.build import BuildDirectory
+from pip.utils.filesystem import check_path_owner
+
+
+logger = logging.getLogger(__name__)
+
+
+class DownloadCommand(RequirementCommand):
+    """
+    Download packages from:
+
+    - PyPI (and other indexes) using requirement specifiers.
+    - VCS project urls.
+    - Local project directories.
+    - Local or remote source archives.
+
+    pip also supports downloading from "requirements files", which provide
+    an easy way to specify a whole environment to be downloaded.
+    """
+    name = 'download'
+
+    usage = """
+      %prog [options] <requirement specifier> [package-index-options] ...
+      %prog [options] -r <requirements file> [package-index-options] ...
+      %prog [options] [-e] <vcs project url> ...
+      %prog [options] [-e] <local project path> ...
+      %prog [options] <archive url/path> ..."""
+
+    summary = 'Download packages.'
+
+    def __init__(self, *args, **kw):
+        super(DownloadCommand, self).__init__(*args, **kw)
+
+        cmd_opts = self.cmd_opts
+
+        cmd_opts.add_option(cmdoptions.constraints())
+        cmd_opts.add_option(cmdoptions.editable())
+        cmd_opts.add_option(cmdoptions.requirements())
+        cmd_opts.add_option(cmdoptions.build_dir())
+        cmd_opts.add_option(cmdoptions.no_deps())
+        cmd_opts.add_option(cmdoptions.global_options())
+        cmd_opts.add_option(cmdoptions.no_binary())
+        cmd_opts.add_option(cmdoptions.only_binary())
+        cmd_opts.add_option(cmdoptions.src())
+        cmd_opts.add_option(cmdoptions.pre())
+        cmd_opts.add_option(cmdoptions.no_clean())
+        cmd_opts.add_option(cmdoptions.require_hashes())
+
+        cmd_opts.add_option(
+            '-d', '--dest', '--destination-dir', '--destination-directory',
+            dest='download_dir',
+            metavar='dir',
+            default=os.curdir,
+            help=("Download packages into <dir>."),
+        )
+
+        cmd_opts.add_option(
+            '--platform',
+            dest='platform',
+            metavar='platform',
+            default=None,
+            help=("Only download wheels compatible with <platform>. "
+                  "Defaults to the platform of the running system."),
+        )
+
+        cmd_opts.add_option(
+            '--python-version',
+            dest='python_version',
+            metavar='python_version',
+            default=None,
+            help=("Only download wheels compatible with Python "
+                  "interpreter version <version>. If not specified, then the "
+                  "current system interpreter minor version is used. A major "
+                  "version (e.g. '2') can be specified to match all "
+                  "minor revs of that major version.  A minor version "
+                  "(e.g. '34') can also be specified."),
+        )
+
+        cmd_opts.add_option(
+            '--implementation',
+            dest='implementation',
+            metavar='implementation',
+            default=None,
+            help=("Only download wheels compatible with Python "
+                  "implementation <implementation>, e.g. 'pp', 'jy', 'cp', "
+                  " or 'ip'. If not specified, then the current "
+                  "interpreter implementation is used.  Use 'py' to force "
+                  "implementation-agnostic wheels."),
+        )
+
+        cmd_opts.add_option(
+            '--abi',
+            dest='abi',
+            metavar='abi',
+            default=None,
+            help=("Only download wheels compatible with Python "
+                  "abi <abi>, e.g. 'pypy_41'.  If not specified, then the "
+                  "current interpreter abi tag is used.  Generally "
+                  "you will need to specify --implementation, "
+                  "--platform, and --python-version when using "
+                  "this option."),
+        )
+
+        index_opts = cmdoptions.make_option_group(
+            cmdoptions.non_deprecated_index_group,
+            self.parser,
+        )
+
+        self.parser.insert_option_group(0, index_opts)
+        self.parser.insert_option_group(0, cmd_opts)
+
+    def run(self, options, args):
+        options.ignore_installed = True
+
+        if options.python_version:
+            python_versions = [options.python_version]
+        else:
+            python_versions = None
+
+        dist_restriction_set = any([
+            options.python_version,
+            options.platform,
+            options.abi,
+            options.implementation,
+        ])
+        binary_only = FormatControl(set(), set([':all:']))
+        if dist_restriction_set and options.format_control != binary_only:
+            raise CommandError(
+                "--only-binary=:all: must be set and --no-binary must not "
+                "be set (or must be set to :none:) when restricting platform "
+                "and interpreter constraints using --python-version, "
+                "--platform, --abi, or --implementation."
+            )
+
+        options.src_dir = os.path.abspath(options.src_dir)
+        options.download_dir = normalize_path(options.download_dir)
+
+        ensure_dir(options.download_dir)
+
+        with self._build_session(options) as session:
+            finder = self._build_package_finder(
+                options=options,
+                session=session,
+                platform=options.platform,
+                python_versions=python_versions,
+                abi=options.abi,
+                implementation=options.implementation,
+            )
+            build_delete = (not (options.no_clean or options.build_dir))
+            if options.cache_dir and not check_path_owner(options.cache_dir):
+                logger.warning(
+                    "The directory '%s' or its parent directory is not owned "
+                    "by the current user and caching wheels has been "
+                    "disabled. check the permissions and owner of that "
+                    "directory. If executing pip with sudo, you may want "
+                    "sudo's -H flag.",
+                    options.cache_dir,
+                )
+                options.cache_dir = None
+
+            with BuildDirectory(options.build_dir,
+                                delete=build_delete) as build_dir:
+
+                requirement_set = RequirementSet(
+                    build_dir=build_dir,
+                    src_dir=options.src_dir,
+                    download_dir=options.download_dir,
+                    ignore_installed=True,
+                    ignore_dependencies=options.ignore_dependencies,
+                    session=session,
+                    isolated=options.isolated_mode,
+                    require_hashes=options.require_hashes
+                )
+                self.populate_requirement_set(
+                    requirement_set,
+                    args,
+                    options,
+                    finder,
+                    session,
+                    self.name,
+                    None
+                )
+
+                if not requirement_set.has_requirements:
+                    return
+
+                requirement_set.prepare_files(finder)
+
+                downloaded = ' '.join([
+                    req.name for req in requirement_set.successfully_downloaded
+                ])
+                if downloaded:
+                    logger.info(
+                        'Successfully downloaded %s', downloaded
+                    )
+
+                # Clean up
+                if not options.no_clean:
+                    requirement_set.cleanup_files()
+
+        return requirement_set
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/freeze.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/freeze.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1987961985b4fb4c67d69561c3b301432da0f8d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/freeze.py
@@ -0,0 +1,87 @@
+from __future__ import absolute_import
+
+import sys
+
+import pip
+from pip.compat import stdlib_pkgs
+from pip.basecommand import Command
+from pip.operations.freeze import freeze
+from pip.wheel import WheelCache
+
+
+DEV_PKGS = ('pip', 'setuptools', 'distribute', 'wheel')
+
+
+class FreezeCommand(Command):
+    """
+    Output installed packages in requirements format.
+
+    packages are listed in a case-insensitive sorted order.
+    """
+    name = 'freeze'
+    usage = """
+      %prog [options]"""
+    summary = 'Output installed packages in requirements format.'
+    log_streams = ("ext://sys.stderr", "ext://sys.stderr")
+
+    def __init__(self, *args, **kw):
+        super(FreezeCommand, self).__init__(*args, **kw)
+
+        self.cmd_opts.add_option(
+            '-r', '--requirement',
+            dest='requirements',
+            action='append',
+            default=[],
+            metavar='file',
+            help="Use the order in the given requirements file and its "
+                 "comments when generating output. This option can be "
+                 "used multiple times.")
+        self.cmd_opts.add_option(
+            '-f', '--find-links',
+            dest='find_links',
+            action='append',
+            default=[],
+            metavar='URL',
+            help='URL for finding packages, which will be added to the '
+                 'output.')
+        self.cmd_opts.add_option(
+            '-l', '--local',
+            dest='local',
+            action='store_true',
+            default=False,
+            help='If in a virtualenv that has global access, do not output '
+                 'globally-installed packages.')
+        self.cmd_opts.add_option(
+            '--user',
+            dest='user',
+            action='store_true',
+            default=False,
+            help='Only output packages installed in user-site.')
+        self.cmd_opts.add_option(
+            '--all',
+            dest='freeze_all',
+            action='store_true',
+            help='Do not skip these packages in the output:'
+                 ' %s' % ', '.join(DEV_PKGS))
+
+        self.parser.insert_option_group(0, self.cmd_opts)
+
+    def run(self, options, args):
+        format_control = pip.index.FormatControl(set(), set())
+        wheel_cache = WheelCache(options.cache_dir, format_control)
+        skip = set(stdlib_pkgs)
+        if not options.freeze_all:
+            skip.update(DEV_PKGS)
+
+        freeze_kwargs = dict(
+            requirement=options.requirements,
+            find_links=options.find_links,
+            local_only=options.local,
+            user_only=options.user,
+            skip_regex=options.skip_requirements_regex,
+            isolated=options.isolated_mode,
+            wheel_cache=wheel_cache,
+            skip=skip)
+
+        for line in freeze(**freeze_kwargs):
+            sys.stdout.write(line + '\n')
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/hash.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/hash.py
new file mode 100644
index 0000000000000000000000000000000000000000..27cca0bfa409121799aa9260d659727b094a76db
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/hash.py
@@ -0,0 +1,57 @@
+from __future__ import absolute_import
+
+import hashlib
+import logging
+import sys
+
+from pip.basecommand import Command
+from pip.status_codes import ERROR
+from pip.utils import read_chunks
+from pip.utils.hashes import FAVORITE_HASH, STRONG_HASHES
+
+
+logger = logging.getLogger(__name__)
+
+
+class HashCommand(Command):
+    """
+    Compute a hash of a local package archive.
+
+    These can be used with --hash in a requirements file to do repeatable
+    installs.
+
+    """
+    name = 'hash'
+    usage = '%prog [options] <file> ...'
+    summary = 'Compute hashes of package archives.'
+
+    def __init__(self, *args, **kw):
+        super(HashCommand, self).__init__(*args, **kw)
+        self.cmd_opts.add_option(
+            '-a', '--algorithm',
+            dest='algorithm',
+            choices=STRONG_HASHES,
+            action='store',
+            default=FAVORITE_HASH,
+            help='The hash algorithm to use: one of %s' %
+                 ', '.join(STRONG_HASHES))
+        self.parser.insert_option_group(0, self.cmd_opts)
+
+    def run(self, options, args):
+        if not args:
+            self.parser.print_usage(sys.stderr)
+            return ERROR
+
+        algorithm = options.algorithm
+        for path in args:
+            logger.info('%s:\n--hash=%s:%s',
+                        path, algorithm, _hash_of_file(path, algorithm))
+
+
+def _hash_of_file(path, algorithm):
+    """Return the hash digest of a file."""
+    with open(path, 'rb') as archive:
+        hash = hashlib.new(algorithm)
+        for chunk in read_chunks(archive):
+            hash.update(chunk)
+    return hash.hexdigest()
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/help.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/help.py
new file mode 100644
index 0000000000000000000000000000000000000000..11722f1e067fc9b1d41adc44f91b5686fedb7d70
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/help.py
@@ -0,0 +1,35 @@
+from __future__ import absolute_import
+
+from pip.basecommand import Command, SUCCESS
+from pip.exceptions import CommandError
+
+
+class HelpCommand(Command):
+    """Show help for commands"""
+    name = 'help'
+    usage = """
+      %prog <command>"""
+    summary = 'Show help for commands.'
+
+    def run(self, options, args):
+        from pip.commands import commands_dict, get_similar_commands
+
+        try:
+            # 'pip help' with no args is handled by pip.__init__.parseopt()
+            cmd_name = args[0]  # the command we need help for
+        except IndexError:
+            return SUCCESS
+
+        if cmd_name not in commands_dict:
+            guess = get_similar_commands(cmd_name)
+
+            msg = ['unknown command "%s"' % cmd_name]
+            if guess:
+                msg.append('maybe you meant "%s"' % guess)
+
+            raise CommandError(' - '.join(msg))
+
+        command = commands_dict[cmd_name]()
+        command.parser.print_help()
+
+        return SUCCESS
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/install.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/install.py
new file mode 100644
index 0000000000000000000000000000000000000000..39292b1175064d91140fab658f03aaaae1a747c4
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/install.py
@@ -0,0 +1,455 @@
+from __future__ import absolute_import
+
+import logging
+import operator
+import os
+import tempfile
+import shutil
+import warnings
+try:
+    import wheel
+except ImportError:
+    wheel = None
+
+from pip.req import RequirementSet
+from pip.basecommand import RequirementCommand
+from pip.locations import virtualenv_no_global, distutils_scheme
+from pip.exceptions import (
+    InstallationError, CommandError, PreviousBuildDirError,
+)
+from pip import cmdoptions
+from pip.utils import ensure_dir, get_installed_version
+from pip.utils.build import BuildDirectory
+from pip.utils.deprecation import RemovedInPip10Warning
+from pip.utils.filesystem import check_path_owner
+from pip.wheel import WheelCache, WheelBuilder
+
+from pip.locations import running_under_virtualenv
+
+logger = logging.getLogger(__name__)
+
+
+class InstallCommand(RequirementCommand):
+    """
+    Install packages from:
+
+    - PyPI (and other indexes) using requirement specifiers.
+    - VCS project urls.
+    - Local project directories.
+    - Local or remote source archives.
+
+    pip also supports installing from "requirements files", which provide
+    an easy way to specify a whole environment to be installed.
+    """
+    name = 'install'
+
+    usage = """
+      %prog [options] <requirement specifier> [package-index-options] ...
+      %prog [options] -r <requirements file> [package-index-options] ...
+      %prog [options] [-e] <vcs project url> ...
+      %prog [options] [-e] <local project path> ...
+      %prog [options] <archive url/path> ..."""
+
+    summary = 'Install packages.'
+
+    def __init__(self, *args, **kw):
+        super(InstallCommand, self).__init__(*args, **kw)
+
+        default_user = True
+        if running_under_virtualenv():
+            default_user = False
+        if os.geteuid() == 0:
+            default_user = False
+
+        cmd_opts = self.cmd_opts
+
+        cmd_opts.add_option(cmdoptions.constraints())
+        cmd_opts.add_option(cmdoptions.editable())
+        cmd_opts.add_option(cmdoptions.requirements())
+        cmd_opts.add_option(cmdoptions.build_dir())
+
+        cmd_opts.add_option(
+            '-t', '--target',
+            dest='target_dir',
+            metavar='dir',
+            default=None,
+            help='Install packages into <dir>. '
+                 'By default this will not replace existing files/folders in '
+                 '<dir>. Use --upgrade to replace existing packages in <dir> '
+                 'with new versions.'
+        )
+
+        cmd_opts.add_option(
+            '-d', '--download', '--download-dir', '--download-directory',
+            dest='download_dir',
+            metavar='dir',
+            default=None,
+            help=("Download packages into <dir> instead of installing them, "
+                  "regardless of what's already installed."),
+        )
+
+        cmd_opts.add_option(cmdoptions.src())
+
+        cmd_opts.add_option(
+            '-U', '--upgrade',
+            dest='upgrade',
+            action='store_true',
+            help='Upgrade all specified packages to the newest available '
+                 'version. The handling of dependencies depends on the '
+                 'upgrade-strategy used.'
+        )
+
+        cmd_opts.add_option(
+            '--upgrade-strategy',
+            dest='upgrade_strategy',
+            default='eager',
+            choices=['only-if-needed', 'eager'],
+            help='Determines how dependency upgrading should be handled. '
+                 '"eager" - dependencies are upgraded regardless of '
+                 'whether the currently installed version satisfies the '
+                 'requirements of the upgraded package(s). '
+                 '"only-if-needed" -  are upgraded only when they do not '
+                 'satisfy the requirements of the upgraded package(s).'
+        )
+
+        cmd_opts.add_option(
+            '--force-reinstall',
+            dest='force_reinstall',
+            action='store_true',
+            help='When upgrading, reinstall all packages even if they are '
+                 'already up-to-date.')
+
+        cmd_opts.add_option(
+            '-I', '--ignore-installed',
+            dest='ignore_installed',
+            action='store_true',
+            default=default_user,
+            help='Ignore the installed packages (reinstalling instead).')
+
+        cmd_opts.add_option(cmdoptions.ignore_requires_python())
+        cmd_opts.add_option(cmdoptions.no_deps())
+
+        cmd_opts.add_option(cmdoptions.install_options())
+        cmd_opts.add_option(cmdoptions.global_options())
+
+        cmd_opts.add_option(
+            '--user',
+            dest='use_user_site',
+            action='store_true',
+            default=default_user,
+            help="Install to the Python user install directory for your "
+                 "platform. Typically ~/.local/, or %APPDATA%\Python on "
+                 "Windows. (See the Python documentation for site.USER_BASE "
+                 "for full details.)  On Debian systems, this is the "
+                 "default when running outside of a virtual environment "
+                 "and not as root.")
+
+        cmd_opts.add_option(
+            '--system',
+            dest='use_user_site',
+            action='store_false',
+            help="Install using the system scheme (overrides --user on "
+                 "Debian systems)")
+
+        cmd_opts.add_option(
+            '--egg',
+            dest='as_egg',
+            action='store_true',
+            help="Install packages as eggs, not 'flat', like pip normally "
+                 "does. This option is not about installing *from* eggs. "
+                 "(WARNING: Because this option overrides pip's normal install"
+                 " logic, requirements files may not behave as expected.)")
+
+        cmd_opts.add_option(
+            '--root',
+            dest='root_path',
+            metavar='dir',
+            default=None,
+            help="Install everything relative to this alternate root "
+                 "directory.")
+
+        cmd_opts.add_option(
+            '--prefix',
+            dest='prefix_path',
+            metavar='dir',
+            default=None,
+            help="Installation prefix where lib, bin and other top-level "
+                 "folders are placed")
+
+        cmd_opts.add_option(
+            "--compile",
+            action="store_true",
+            dest="compile",
+            default=True,
+            help="Compile py files to pyc",
+        )
+
+        cmd_opts.add_option(
+            "--no-compile",
+            action="store_false",
+            dest="compile",
+            help="Do not compile py files to pyc",
+        )
+
+        cmd_opts.add_option(cmdoptions.use_wheel())
+        cmd_opts.add_option(cmdoptions.no_use_wheel())
+        cmd_opts.add_option(cmdoptions.no_binary())
+        cmd_opts.add_option(cmdoptions.only_binary())
+        cmd_opts.add_option(cmdoptions.pre())
+        cmd_opts.add_option(cmdoptions.no_clean())
+        cmd_opts.add_option(cmdoptions.require_hashes())
+
+        index_opts = cmdoptions.make_option_group(
+            cmdoptions.index_group,
+            self.parser,
+        )
+
+        self.parser.insert_option_group(0, index_opts)
+        self.parser.insert_option_group(0, cmd_opts)
+
+    def run(self, options, args):
+        cmdoptions.resolve_wheel_no_use_binary(options)
+        cmdoptions.check_install_build_global(options)
+
+        if options.as_egg:
+            warnings.warn(
+                "--egg has been deprecated and will be removed in the future. "
+                "This flag is mutually exclusive with large parts of pip, and "
+                "actually using it invalidates pip's ability to manage the "
+                "installation process.",
+                RemovedInPip10Warning,
+            )
+
+        if options.allow_external:
+            warnings.warn(
+                "--allow-external has been deprecated and will be removed in "
+                "the future. Due to changes in the repository protocol, it no "
+                "longer has any effect.",
+                RemovedInPip10Warning,
+            )
+
+        if options.allow_all_external:
+            warnings.warn(
+                "--allow-all-external has been deprecated and will be removed "
+                "in the future. Due to changes in the repository protocol, it "
+                "no longer has any effect.",
+                RemovedInPip10Warning,
+            )
+
+        if options.allow_unverified:
+            warnings.warn(
+                "--allow-unverified has been deprecated and will be removed "
+                "in the future. Due to changes in the repository protocol, it "
+                "no longer has any effect.",
+                RemovedInPip10Warning,
+            )
+
+        if options.download_dir:
+            warnings.warn(
+                "pip install --download has been deprecated and will be "
+                "removed in the future. Pip now has a download command that "
+                "should be used instead.",
+                RemovedInPip10Warning,
+            )
+            options.ignore_installed = True
+
+        if options.build_dir:
+            options.build_dir = os.path.abspath(options.build_dir)
+
+        options.src_dir = os.path.abspath(options.src_dir)
+        install_options = options.install_options or []
+        if options.use_user_site:
+            if options.prefix_path:
+                raise CommandError(
+                    "Can not combine '--user' and '--prefix' as they imply "
+                    "different installation locations"
+                )
+            if virtualenv_no_global():
+                raise InstallationError(
+                    "Can not perform a '--user' install. User site-packages "
+                    "are not visible in this virtualenv."
+                )
+            install_options.append('--user')
+            install_options.append('--prefix=')
+
+        temp_target_dir = None
+        if options.target_dir:
+            options.ignore_installed = True
+            temp_target_dir = tempfile.mkdtemp()
+            options.target_dir = os.path.abspath(options.target_dir)
+            if (os.path.exists(options.target_dir) and not
+                    os.path.isdir(options.target_dir)):
+                raise CommandError(
+                    "Target path exists but is not a directory, will not "
+                    "continue."
+                )
+            install_options.append('--home=' + temp_target_dir)
+
+        global_options = options.global_options or []
+
+        with self._build_session(options) as session:
+
+            finder = self._build_package_finder(options, session)
+            build_delete = (not (options.no_clean or options.build_dir))
+            wheel_cache = WheelCache(options.cache_dir, options.format_control)
+            if options.cache_dir and not check_path_owner(options.cache_dir):
+                logger.warning(
+                    "The directory '%s' or its parent directory is not owned "
+                    "by the current user and caching wheels has been "
+                    "disabled. check the permissions and owner of that "
+                    "directory. If executing pip with sudo, you may want "
+                    "sudo's -H flag.",
+                    options.cache_dir,
+                )
+                options.cache_dir = None
+
+            with BuildDirectory(options.build_dir,
+                                delete=build_delete) as build_dir:
+                requirement_set = RequirementSet(
+                    build_dir=build_dir,
+                    src_dir=options.src_dir,
+                    download_dir=options.download_dir,
+                    upgrade=options.upgrade,
+                    upgrade_strategy=options.upgrade_strategy,
+                    as_egg=options.as_egg,
+                    ignore_installed=options.ignore_installed,
+                    ignore_dependencies=options.ignore_dependencies,
+                    ignore_requires_python=options.ignore_requires_python,
+                    force_reinstall=options.force_reinstall,
+                    use_user_site=options.use_user_site,
+                    target_dir=temp_target_dir,
+                    session=session,
+                    pycompile=options.compile,
+                    isolated=options.isolated_mode,
+                    wheel_cache=wheel_cache,
+                    require_hashes=options.require_hashes,
+                )
+
+                self.populate_requirement_set(
+                    requirement_set, args, options, finder, session, self.name,
+                    wheel_cache
+                )
+
+                if not requirement_set.has_requirements:
+                    return
+
+                try:
+                    if (options.download_dir or not wheel or not
+                            options.cache_dir):
+                        # on -d don't do complex things like building
+                        # wheels, and don't try to build wheels when wheel is
+                        # not installed.
+                        requirement_set.prepare_files(finder)
+                    else:
+                        # build wheels before install.
+                        wb = WheelBuilder(
+                            requirement_set,
+                            finder,
+                            build_options=[],
+                            global_options=[],
+                        )
+                        # Ignore the result: a failed wheel will be
+                        # installed from the sdist/vcs whatever.
+                        wb.build(autobuilding=True)
+
+                    if not options.download_dir:
+                        requirement_set.install(
+                            install_options,
+                            global_options,
+                            root=options.root_path,
+                            prefix=options.prefix_path,
+                        )
+
+                        possible_lib_locations = get_lib_location_guesses(
+                            user=options.use_user_site,
+                            home=temp_target_dir,
+                            root=options.root_path,
+                            prefix=options.prefix_path,
+                            isolated=options.isolated_mode,
+                        )
+                        reqs = sorted(
+                            requirement_set.successfully_installed,
+                            key=operator.attrgetter('name'))
+                        items = []
+                        for req in reqs:
+                            item = req.name
+                            try:
+                                installed_version = get_installed_version(
+                                    req.name, possible_lib_locations
+                                )
+                                if installed_version:
+                                    item += '-' + installed_version
+                            except Exception:
+                                pass
+                            items.append(item)
+                        installed = ' '.join(items)
+                        if installed:
+                            logger.info('Successfully installed %s', installed)
+                    else:
+                        downloaded = ' '.join([
+                            req.name
+                            for req in requirement_set.successfully_downloaded
+                        ])
+                        if downloaded:
+                            logger.info(
+                                'Successfully downloaded %s', downloaded
+                            )
+                except PreviousBuildDirError:
+                    options.no_clean = True
+                    raise
+                finally:
+                    # Clean up
+                    if not options.no_clean:
+                        requirement_set.cleanup_files()
+
+        if options.target_dir:
+            ensure_dir(options.target_dir)
+
+            # Checking both purelib and platlib directories for installed
+            # packages to be moved to target directory
+            lib_dir_list = []
+
+            purelib_dir = distutils_scheme('', home=temp_target_dir)['purelib']
+            platlib_dir = distutils_scheme('', home=temp_target_dir)['platlib']
+
+            if os.path.exists(purelib_dir):
+                lib_dir_list.append(purelib_dir)
+            if os.path.exists(platlib_dir) and platlib_dir != purelib_dir:
+                lib_dir_list.append(platlib_dir)
+
+            for lib_dir in lib_dir_list:
+                for item in os.listdir(lib_dir):
+                    target_item_dir = os.path.join(options.target_dir, item)
+                    if os.path.exists(target_item_dir):
+                        if not options.upgrade:
+                            logger.warning(
+                                'Target directory %s already exists. Specify '
+                                '--upgrade to force replacement.',
+                                target_item_dir
+                            )
+                            continue
+                        if os.path.islink(target_item_dir):
+                            logger.warning(
+                                'Target directory %s already exists and is '
+                                'a link. Pip will not automatically replace '
+                                'links, please remove if replacement is '
+                                'desired.',
+                                target_item_dir
+                            )
+                            continue
+                        if os.path.isdir(target_item_dir):
+                            shutil.rmtree(target_item_dir)
+                        else:
+                            os.remove(target_item_dir)
+
+                    shutil.move(
+                        os.path.join(lib_dir, item),
+                        target_item_dir
+                    )
+            shutil.rmtree(temp_target_dir)
+        return requirement_set
+
+
+def get_lib_location_guesses(*args, **kwargs):
+    scheme = distutils_scheme('', *args, **kwargs)
+    return [scheme['purelib'], scheme['platlib']]
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/list.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/list.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f6995d70f58764f081a35b0905d78ccd5b7dad6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/list.py
@@ -0,0 +1,337 @@
+from __future__ import absolute_import
+
+import json
+import logging
+import warnings
+try:
+    from itertools import zip_longest
+except ImportError:
+    from itertools import izip_longest as zip_longest
+
+from pip._vendor import six
+
+from pip.basecommand import Command
+from pip.exceptions import CommandError
+from pip.index import PackageFinder
+from pip.utils import (
+    get_installed_distributions, dist_is_editable)
+from pip.utils.deprecation import RemovedInPip10Warning
+from pip.cmdoptions import make_option_group, index_group
+
+logger = logging.getLogger(__name__)
+
+
+class ListCommand(Command):
+    """
+    List installed packages, including editables.
+
+    Packages are listed in a case-insensitive sorted order.
+    """
+    name = 'list'
+    usage = """
+      %prog [options]"""
+    summary = 'List installed packages.'
+
+    def __init__(self, *args, **kw):
+        super(ListCommand, self).__init__(*args, **kw)
+
+        cmd_opts = self.cmd_opts
+
+        cmd_opts.add_option(
+            '-o', '--outdated',
+            action='store_true',
+            default=False,
+            help='List outdated packages')
+        cmd_opts.add_option(
+            '-u', '--uptodate',
+            action='store_true',
+            default=False,
+            help='List uptodate packages')
+        cmd_opts.add_option(
+            '-e', '--editable',
+            action='store_true',
+            default=False,
+            help='List editable projects.')
+        cmd_opts.add_option(
+            '-l', '--local',
+            action='store_true',
+            default=False,
+            help=('If in a virtualenv that has global access, do not list '
+                  'globally-installed packages.'),
+        )
+        self.cmd_opts.add_option(
+            '--user',
+            dest='user',
+            action='store_true',
+            default=False,
+            help='Only output packages installed in user-site.')
+
+        cmd_opts.add_option(
+            '--pre',
+            action='store_true',
+            default=False,
+            help=("Include pre-release and development versions. By default, "
+                  "pip only finds stable versions."),
+        )
+
+        cmd_opts.add_option(
+            '--format',
+            action='store',
+            dest='list_format',
+            choices=('legacy', 'columns', 'freeze', 'json'),
+            help="Select the output format among: legacy (default), columns, "
+                 "freeze or json.",
+        )
+
+        cmd_opts.add_option(
+            '--not-required',
+            action='store_true',
+            dest='not_required',
+            help="List packages that are not dependencies of "
+                 "installed packages.",
+        )
+
+        index_opts = make_option_group(index_group, self.parser)
+
+        self.parser.insert_option_group(0, index_opts)
+        self.parser.insert_option_group(0, cmd_opts)
+
+    def _build_package_finder(self, options, index_urls, session):
+        """
+        Create a package finder appropriate to this list command.
+        """
+        return PackageFinder(
+            find_links=options.find_links,
+            index_urls=index_urls,
+            allow_all_prereleases=options.pre,
+            trusted_hosts=options.trusted_hosts,
+            process_dependency_links=options.process_dependency_links,
+            session=session,
+        )
+
+    def run(self, options, args):
+        if options.allow_external:
+            warnings.warn(
+                "--allow-external has been deprecated and will be removed in "
+                "the future. Due to changes in the repository protocol, it no "
+                "longer has any effect.",
+                RemovedInPip10Warning,
+            )
+
+        if options.allow_all_external:
+            warnings.warn(
+                "--allow-all-external has been deprecated and will be removed "
+                "in the future. Due to changes in the repository protocol, it "
+                "no longer has any effect.",
+                RemovedInPip10Warning,
+            )
+
+        if options.allow_unverified:
+            warnings.warn(
+                "--allow-unverified has been deprecated and will be removed "
+                "in the future. Due to changes in the repository protocol, it "
+                "no longer has any effect.",
+                RemovedInPip10Warning,
+            )
+
+        if options.list_format is None:
+            warnings.warn(
+                "The default format will switch to columns in the future. "
+                "You can use --format=(legacy|columns) (or define a "
+                "format=(legacy|columns) in your pip.conf under the [list] "
+                "section) to disable this warning.",
+                RemovedInPip10Warning,
+            )
+
+        if options.outdated and options.uptodate:
+            raise CommandError(
+                "Options --outdated and --uptodate cannot be combined.")
+
+        packages = get_installed_distributions(
+            local_only=options.local,
+            user_only=options.user,
+            editables_only=options.editable,
+        )
+
+        if options.outdated:
+            packages = self.get_outdated(packages, options)
+        elif options.uptodate:
+            packages = self.get_uptodate(packages, options)
+
+        if options.not_required:
+            packages = self.get_not_required(packages, options)
+
+        self.output_package_listing(packages, options)
+
+    def get_outdated(self, packages, options):
+        return [
+            dist for dist in self.iter_packages_latest_infos(packages, options)
+            if dist.latest_version > dist.parsed_version
+        ]
+
+    def get_uptodate(self, packages, options):
+        return [
+            dist for dist in self.iter_packages_latest_infos(packages, options)
+            if dist.latest_version == dist.parsed_version
+        ]
+
+    def get_not_required(self, packages, options):
+        dep_keys = set()
+        for dist in packages:
+            dep_keys.update(requirement.key for requirement in dist.requires())
+        return set(pkg for pkg in packages if pkg.key not in dep_keys)
+
+    def iter_packages_latest_infos(self, packages, options):
+        index_urls = [options.index_url] + options.extra_index_urls
+        if options.no_index:
+            logger.debug('Ignoring indexes: %s', ','.join(index_urls))
+            index_urls = []
+
+        dependency_links = []
+        for dist in packages:
+            if dist.has_metadata('dependency_links.txt'):
+                dependency_links.extend(
+                    dist.get_metadata_lines('dependency_links.txt'),
+                )
+
+        with self._build_session(options) as session:
+            finder = self._build_package_finder(options, index_urls, session)
+            finder.add_dependency_links(dependency_links)
+
+            for dist in packages:
+                typ = 'unknown'
+                all_candidates = finder.find_all_candidates(dist.key)
+                if not options.pre:
+                    # Remove prereleases
+                    all_candidates = [candidate for candidate in all_candidates
+                                      if not candidate.version.is_prerelease]
+
+                if not all_candidates:
+                    continue
+                best_candidate = max(all_candidates,
+                                     key=finder._candidate_sort_key)
+                remote_version = best_candidate.version
+                if best_candidate.location.is_wheel:
+                    typ = 'wheel'
+                else:
+                    typ = 'sdist'
+                # This is dirty but makes the rest of the code much cleaner
+                dist.latest_version = remote_version
+                dist.latest_filetype = typ
+                yield dist
+
+    def output_legacy(self, dist):
+        if dist_is_editable(dist):
+            return '%s (%s, %s)' % (
+                dist.project_name,
+                dist.version,
+                dist.location,
+            )
+        else:
+            return '%s (%s)' % (dist.project_name, dist.version)
+
+    def output_legacy_latest(self, dist):
+        return '%s - Latest: %s [%s]' % (
+            self.output_legacy(dist),
+            dist.latest_version,
+            dist.latest_filetype,
+        )
+
+    def output_package_listing(self, packages, options):
+        packages = sorted(
+            packages,
+            key=lambda dist: dist.project_name.lower(),
+        )
+        if options.list_format == 'columns' and packages:
+            data, header = format_for_columns(packages, options)
+            self.output_package_listing_columns(data, header)
+        elif options.list_format == 'freeze':
+            for dist in packages:
+                logger.info("%s==%s", dist.project_name, dist.version)
+        elif options.list_format == 'json':
+            logger.info(format_for_json(packages, options))
+        else:  # legacy
+            for dist in packages:
+                if options.outdated:
+                    logger.info(self.output_legacy_latest(dist))
+                else:
+                    logger.info(self.output_legacy(dist))
+
+    def output_package_listing_columns(self, data, header):
+        # insert the header first: we need to know the size of column names
+        if len(data) > 0:
+            data.insert(0, header)
+
+        pkg_strings, sizes = tabulate(data)
+
+        # Create and add a separator.
+        if len(data) > 0:
+            pkg_strings.insert(1, " ".join(map(lambda x: '-' * x, sizes)))
+
+        for val in pkg_strings:
+            logger.info(val)
+
+
+def tabulate(vals):
+    # From pfmoore on GitHub:
+    # https://github.com/pypa/pip/issues/3651#issuecomment-216932564
+    assert len(vals) > 0
+
+    sizes = [0] * max(len(x) for x in vals)
+    for row in vals:
+        sizes = [max(s, len(str(c))) for s, c in zip_longest(sizes, row)]
+
+    result = []
+    for row in vals:
+        display = " ".join([str(c).ljust(s) if c is not None else ''
+                            for s, c in zip_longest(sizes, row)])
+        result.append(display)
+
+    return result, sizes
+
+
+def format_for_columns(pkgs, options):
+    """
+    Convert the package data into something usable
+    by output_package_listing_columns.
+    """
+    running_outdated = options.outdated
+    # Adjust the header for the `pip list --outdated` case.
+    if running_outdated:
+        header = ["Package", "Version", "Latest", "Type"]
+    else:
+        header = ["Package", "Version"]
+
+    data = []
+    if any(dist_is_editable(x) for x in pkgs):
+        header.append("Location")
+
+    for proj in pkgs:
+        # if we're working on the 'outdated' list, separate out the
+        # latest_version and type
+        row = [proj.project_name, proj.version]
+
+        if running_outdated:
+            row.append(proj.latest_version)
+            row.append(proj.latest_filetype)
+
+        if dist_is_editable(proj):
+            row.append(proj.location)
+
+        data.append(row)
+
+    return data, header
+
+
+def format_for_json(packages, options):
+    data = []
+    for dist in packages:
+        info = {
+            'name': dist.project_name,
+            'version': six.text_type(dist.version),
+        }
+        if options.outdated:
+            info['latest_version'] = six.text_type(dist.latest_version)
+            info['latest_filetype'] = dist.latest_filetype
+        data.append(info)
+    return json.dumps(data)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/search.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/search.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd2ea8ad3e5dea9baa95aea331241533f1c21a42
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/search.py
@@ -0,0 +1,133 @@
+from __future__ import absolute_import
+
+import logging
+import sys
+import textwrap
+
+from pip.basecommand import Command, SUCCESS
+from pip.compat import OrderedDict
+from pip.download import PipXmlrpcTransport
+from pip.models import PyPI
+from pip.utils import get_terminal_size
+from pip.utils.logging import indent_log
+from pip.exceptions import CommandError
+from pip.status_codes import NO_MATCHES_FOUND
+from pip._vendor.packaging.version import parse as parse_version
+from pip._vendor import pkg_resources
+from pip._vendor.six.moves import xmlrpc_client
+
+
+logger = logging.getLogger(__name__)
+
+
+class SearchCommand(Command):
+    """Search for PyPI packages whose name or summary contains <query>."""
+    name = 'search'
+    usage = """
+      %prog [options] <query>"""
+    summary = 'Search PyPI for packages.'
+
+    def __init__(self, *args, **kw):
+        super(SearchCommand, self).__init__(*args, **kw)
+        self.cmd_opts.add_option(
+            '-i', '--index',
+            dest='index',
+            metavar='URL',
+            default=PyPI.pypi_url,
+            help='Base URL of Python Package Index (default %default)')
+
+        self.parser.insert_option_group(0, self.cmd_opts)
+
+    def run(self, options, args):
+        if not args:
+            raise CommandError('Missing required argument (search query).')
+        query = args
+        pypi_hits = self.search(query, options)
+        hits = transform_hits(pypi_hits)
+
+        terminal_width = None
+        if sys.stdout.isatty():
+            terminal_width = get_terminal_size()[0]
+
+        print_results(hits, terminal_width=terminal_width)
+        if pypi_hits:
+            return SUCCESS
+        return NO_MATCHES_FOUND
+
+    def search(self, query, options):
+        index_url = options.index
+        with self._build_session(options) as session:
+            transport = PipXmlrpcTransport(index_url, session)
+            pypi = xmlrpc_client.ServerProxy(index_url, transport)
+            hits = pypi.search({'name': query, 'summary': query}, 'or')
+            return hits
+
+
+def transform_hits(hits):
+    """
+    The list from pypi is really a list of versions. We want a list of
+    packages with the list of versions stored inline. This converts the
+    list from pypi into one we can use.
+    """
+    packages = OrderedDict()
+    for hit in hits:
+        name = hit['name']
+        summary = hit['summary']
+        version = hit['version']
+
+        if name not in packages.keys():
+            packages[name] = {
+                'name': name,
+                'summary': summary,
+                'versions': [version],
+            }
+        else:
+            packages[name]['versions'].append(version)
+
+            # if this is the highest version, replace summary and score
+            if version == highest_version(packages[name]['versions']):
+                packages[name]['summary'] = summary
+
+    return list(packages.values())
+
+
+def print_results(hits, name_column_width=None, terminal_width=None):
+    if not hits:
+        return
+    if name_column_width is None:
+        name_column_width = max([
+            len(hit['name']) + len(hit.get('versions', ['-'])[-1])
+            for hit in hits
+        ]) + 4
+
+    installed_packages = [p.project_name for p in pkg_resources.working_set]
+    for hit in hits:
+        name = hit['name']
+        summary = hit['summary'] or ''
+        version = hit.get('versions', ['-'])[-1]
+        if terminal_width is not None:
+            target_width = terminal_width - name_column_width - 5
+            if target_width > 10:
+                # wrap and indent summary to fit terminal
+                summary = textwrap.wrap(summary, target_width)
+                summary = ('\n' + ' ' * (name_column_width + 3)).join(summary)
+
+        line = '%-*s - %s' % (name_column_width,
+                              '%s (%s)' % (name, version), summary)
+        try:
+            logger.info(line)
+            if name in installed_packages:
+                dist = pkg_resources.get_distribution(name)
+                with indent_log():
+                    latest = highest_version(hit['versions'])
+                    if dist.version == latest:
+                        logger.info('INSTALLED: %s (latest)', dist.version)
+                    else:
+                        logger.info('INSTALLED: %s', dist.version)
+                        logger.info('LATEST:    %s', latest)
+        except UnicodeEncodeError:
+            pass
+
+
+def highest_version(versions):
+    return max(versions, key=parse_version)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/show.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/show.py
new file mode 100644
index 0000000000000000000000000000000000000000..111c16d1a88dc293a33d3df2343f4d4d7b1384b7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/show.py
@@ -0,0 +1,154 @@
+from __future__ import absolute_import
+
+from email.parser import FeedParser
+import logging
+import os
+
+from pip.basecommand import Command
+from pip.status_codes import SUCCESS, ERROR
+from pip._vendor import pkg_resources
+from pip._vendor.packaging.utils import canonicalize_name
+
+
+logger = logging.getLogger(__name__)
+
+
+class ShowCommand(Command):
+    """Show information about one or more installed packages."""
+    name = 'show'
+    usage = """
+      %prog [options] <package> ..."""
+    summary = 'Show information about installed packages.'
+
+    def __init__(self, *args, **kw):
+        super(ShowCommand, self).__init__(*args, **kw)
+        self.cmd_opts.add_option(
+            '-f', '--files',
+            dest='files',
+            action='store_true',
+            default=False,
+            help='Show the full list of installed files for each package.')
+
+        self.parser.insert_option_group(0, self.cmd_opts)
+
+    def run(self, options, args):
+        if not args:
+            logger.warning('ERROR: Please provide a package name or names.')
+            return ERROR
+        query = args
+
+        results = search_packages_info(query)
+        if not print_results(
+                results, list_files=options.files, verbose=options.verbose):
+            return ERROR
+        return SUCCESS
+
+
+def search_packages_info(query):
+    """
+    Gather details from installed distributions. Print distribution name,
+    version, location, and installed files. Installed files requires a
+    pip generated 'installed-files.txt' in the distributions '.egg-info'
+    directory.
+    """
+    installed = {}
+    for p in pkg_resources.working_set:
+        installed[canonicalize_name(p.project_name)] = p
+
+    query_names = [canonicalize_name(name) for name in query]
+
+    for dist in [installed[pkg] for pkg in query_names if pkg in installed]:
+        package = {
+            'name': dist.project_name,
+            'version': dist.version,
+            'location': dist.location,
+            'requires': [dep.project_name for dep in dist.requires()],
+        }
+        file_list = None
+        metadata = None
+        if isinstance(dist, pkg_resources.DistInfoDistribution):
+            # RECORDs should be part of .dist-info metadatas
+            if dist.has_metadata('RECORD'):
+                lines = dist.get_metadata_lines('RECORD')
+                paths = [l.split(',')[0] for l in lines]
+                paths = [os.path.join(dist.location, p) for p in paths]
+                file_list = [os.path.relpath(p, dist.location) for p in paths]
+
+            if dist.has_metadata('METADATA'):
+                metadata = dist.get_metadata('METADATA')
+        else:
+            # Otherwise use pip's log for .egg-info's
+            if dist.has_metadata('installed-files.txt'):
+                paths = dist.get_metadata_lines('installed-files.txt')
+                paths = [os.path.join(dist.egg_info, p) for p in paths]
+                file_list = [os.path.relpath(p, dist.location) for p in paths]
+
+            if dist.has_metadata('PKG-INFO'):
+                metadata = dist.get_metadata('PKG-INFO')
+
+        if dist.has_metadata('entry_points.txt'):
+            entry_points = dist.get_metadata_lines('entry_points.txt')
+            package['entry_points'] = entry_points
+
+        if dist.has_metadata('INSTALLER'):
+            for line in dist.get_metadata_lines('INSTALLER'):
+                if line.strip():
+                    package['installer'] = line.strip()
+                    break
+
+        # @todo: Should pkg_resources.Distribution have a
+        # `get_pkg_info` method?
+        feed_parser = FeedParser()
+        feed_parser.feed(metadata)
+        pkg_info_dict = feed_parser.close()
+        for key in ('metadata-version', 'summary',
+                    'home-page', 'author', 'author-email', 'license'):
+            package[key] = pkg_info_dict.get(key)
+
+        # It looks like FeedParser cannot deal with repeated headers
+        classifiers = []
+        for line in metadata.splitlines():
+            if line.startswith('Classifier: '):
+                classifiers.append(line[len('Classifier: '):])
+        package['classifiers'] = classifiers
+
+        if file_list:
+            package['files'] = sorted(file_list)
+        yield package
+
+
+def print_results(distributions, list_files=False, verbose=False):
+    """
+    Print the informations from installed distributions found.
+    """
+    results_printed = False
+    for i, dist in enumerate(distributions):
+        results_printed = True
+        if i > 0:
+            logger.info("---")
+        logger.info("Name: %s", dist.get('name', ''))
+        logger.info("Version: %s", dist.get('version', ''))
+        logger.info("Summary: %s", dist.get('summary', ''))
+        logger.info("Home-page: %s", dist.get('home-page', ''))
+        logger.info("Author: %s", dist.get('author', ''))
+        logger.info("Author-email: %s", dist.get('author-email', ''))
+        logger.info("License: %s", dist.get('license', ''))
+        logger.info("Location: %s", dist.get('location', ''))
+        logger.info("Requires: %s", ', '.join(dist.get('requires', [])))
+        if verbose:
+            logger.info("Metadata-Version: %s",
+                        dist.get('metadata-version', ''))
+            logger.info("Installer: %s", dist.get('installer', ''))
+            logger.info("Classifiers:")
+            for classifier in dist.get('classifiers', []):
+                logger.info("  %s", classifier)
+            logger.info("Entry-points:")
+            for entry in dist.get('entry_points', []):
+                logger.info("  %s", entry.strip())
+        if list_files:
+            logger.info("Files:")
+            for line in dist.get('files', []):
+                logger.info("  %s", line.strip())
+            if "files" not in dist:
+                logger.info("Cannot locate installed-files.txt")
+    return results_printed
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/uninstall.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/uninstall.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ba1a7c65d92c70f65c48783a00ce2d48e2983af
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/uninstall.py
@@ -0,0 +1,76 @@
+from __future__ import absolute_import
+
+import pip
+from pip.wheel import WheelCache
+from pip.req import InstallRequirement, RequirementSet, parse_requirements
+from pip.basecommand import Command
+from pip.exceptions import InstallationError
+
+
+class UninstallCommand(Command):
+    """
+    Uninstall packages.
+
+    pip is able to uninstall most installed packages. Known exceptions are:
+
+    - Pure distutils packages installed with ``python setup.py install``, which
+      leave behind no metadata to determine what files were installed.
+    - Script wrappers installed by ``python setup.py develop``.
+    """
+    name = 'uninstall'
+    usage = """
+      %prog [options] <package> ...
+      %prog [options] -r <requirements file> ..."""
+    summary = 'Uninstall packages.'
+
+    def __init__(self, *args, **kw):
+        super(UninstallCommand, self).__init__(*args, **kw)
+        self.cmd_opts.add_option(
+            '-r', '--requirement',
+            dest='requirements',
+            action='append',
+            default=[],
+            metavar='file',
+            help='Uninstall all the packages listed in the given requirements '
+                 'file.  This option can be used multiple times.',
+        )
+        self.cmd_opts.add_option(
+            '-y', '--yes',
+            dest='yes',
+            action='store_true',
+            help="Don't ask for confirmation of uninstall deletions.")
+
+        self.parser.insert_option_group(0, self.cmd_opts)
+
+    def run(self, options, args):
+        with self._build_session(options) as session:
+            format_control = pip.index.FormatControl(set(), set())
+            wheel_cache = WheelCache(options.cache_dir, format_control)
+            requirement_set = RequirementSet(
+                build_dir=None,
+                src_dir=None,
+                download_dir=None,
+                isolated=options.isolated_mode,
+                session=session,
+                wheel_cache=wheel_cache,
+            )
+            for name in args:
+                requirement_set.add_requirement(
+                    InstallRequirement.from_line(
+                        name, isolated=options.isolated_mode,
+                        wheel_cache=wheel_cache
+                    )
+                )
+            for filename in options.requirements:
+                for req in parse_requirements(
+                        filename,
+                        options=options,
+                        session=session,
+                        wheel_cache=wheel_cache):
+                    requirement_set.add_requirement(req)
+            if not requirement_set.has_requirements:
+                raise InstallationError(
+                    'You must give at least one requirement to %(name)s (see '
+                    '"pip help %(name)s")' % dict(name=self.name)
+                )
+            requirement_set.uninstall(auto_confirm=options.yes)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/wheel.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/wheel.py
new file mode 100644
index 0000000000000000000000000000000000000000..70e95eb8e0bf997b86cb433ef97c9ddf5b00d337
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/commands/wheel.py
@@ -0,0 +1,208 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+import logging
+import os
+import warnings
+
+from pip.basecommand import RequirementCommand
+from pip.exceptions import CommandError, PreviousBuildDirError
+from pip.req import RequirementSet
+from pip.utils import import_or_raise
+from pip.utils.build import BuildDirectory
+from pip.utils.deprecation import RemovedInPip10Warning
+from pip.wheel import WheelCache, WheelBuilder
+from pip import cmdoptions
+
+
+logger = logging.getLogger(__name__)
+
+
+class WheelCommand(RequirementCommand):
+    """
+    Build Wheel archives for your requirements and dependencies.
+
+    Wheel is a built-package format, and offers the advantage of not
+    recompiling your software during every install. For more details, see the
+    wheel docs: https://wheel.readthedocs.io/en/latest/
+
+    Requirements: setuptools>=0.8, and wheel.
+
+    'pip wheel' uses the bdist_wheel setuptools extension from the wheel
+    package to build individual wheels.
+
+    """
+
+    name = 'wheel'
+    usage = """
+      %prog [options] <requirement specifier> ...
+      %prog [options] -r <requirements file> ...
+      %prog [options] [-e] <vcs project url> ...
+      %prog [options] [-e] <local project path> ...
+      %prog [options] <archive url/path> ..."""
+
+    summary = 'Build wheels from your requirements.'
+
+    def __init__(self, *args, **kw):
+        super(WheelCommand, self).__init__(*args, **kw)
+
+        cmd_opts = self.cmd_opts
+
+        cmd_opts.add_option(
+            '-w', '--wheel-dir',
+            dest='wheel_dir',
+            metavar='dir',
+            default=os.curdir,
+            help=("Build wheels into <dir>, where the default is the "
+                  "current working directory."),
+        )
+        cmd_opts.add_option(cmdoptions.use_wheel())
+        cmd_opts.add_option(cmdoptions.no_use_wheel())
+        cmd_opts.add_option(cmdoptions.no_binary())
+        cmd_opts.add_option(cmdoptions.only_binary())
+        cmd_opts.add_option(
+            '--build-option',
+            dest='build_options',
+            metavar='options',
+            action='append',
+            help="Extra arguments to be supplied to 'setup.py bdist_wheel'.")
+        cmd_opts.add_option(cmdoptions.constraints())
+        cmd_opts.add_option(cmdoptions.editable())
+        cmd_opts.add_option(cmdoptions.requirements())
+        cmd_opts.add_option(cmdoptions.src())
+        cmd_opts.add_option(cmdoptions.ignore_requires_python())
+        cmd_opts.add_option(cmdoptions.no_deps())
+        cmd_opts.add_option(cmdoptions.build_dir())
+
+        cmd_opts.add_option(
+            '--global-option',
+            dest='global_options',
+            action='append',
+            metavar='options',
+            help="Extra global options to be supplied to the setup.py "
+            "call before the 'bdist_wheel' command.")
+
+        cmd_opts.add_option(
+            '--pre',
+            action='store_true',
+            default=False,
+            help=("Include pre-release and development versions. By default, "
+                  "pip only finds stable versions."),
+        )
+
+        cmd_opts.add_option(cmdoptions.no_clean())
+        cmd_opts.add_option(cmdoptions.require_hashes())
+
+        index_opts = cmdoptions.make_option_group(
+            cmdoptions.index_group,
+            self.parser,
+        )
+
+        self.parser.insert_option_group(0, index_opts)
+        self.parser.insert_option_group(0, cmd_opts)
+
+    def check_required_packages(self):
+        import_or_raise(
+            'wheel.bdist_wheel',
+            CommandError,
+            "'pip wheel' requires the 'wheel' package. To fix this, run: "
+            "pip install wheel"
+        )
+        pkg_resources = import_or_raise(
+            'pkg_resources',
+            CommandError,
+            "'pip wheel' requires setuptools >= 0.8 for dist-info support."
+            " To fix this, run: pip install --upgrade setuptools"
+        )
+        if not hasattr(pkg_resources, 'DistInfoDistribution'):
+            raise CommandError(
+                "'pip wheel' requires setuptools >= 0.8 for dist-info "
+                "support. To fix this, run: pip install --upgrade "
+                "setuptools"
+            )
+
+    def run(self, options, args):
+        self.check_required_packages()
+        cmdoptions.resolve_wheel_no_use_binary(options)
+        cmdoptions.check_install_build_global(options)
+
+        if options.allow_external:
+            warnings.warn(
+                "--allow-external has been deprecated and will be removed in "
+                "the future. Due to changes in the repository protocol, it no "
+                "longer has any effect.",
+                RemovedInPip10Warning,
+            )
+
+        if options.allow_all_external:
+            warnings.warn(
+                "--allow-all-external has been deprecated and will be removed "
+                "in the future. Due to changes in the repository protocol, it "
+                "no longer has any effect.",
+                RemovedInPip10Warning,
+            )
+
+        if options.allow_unverified:
+            warnings.warn(
+                "--allow-unverified has been deprecated and will be removed "
+                "in the future. Due to changes in the repository protocol, it "
+                "no longer has any effect.",
+                RemovedInPip10Warning,
+            )
+
+        index_urls = [options.index_url] + options.extra_index_urls
+        if options.no_index:
+            logger.debug('Ignoring indexes: %s', ','.join(index_urls))
+            index_urls = []
+
+        if options.build_dir:
+            options.build_dir = os.path.abspath(options.build_dir)
+
+        options.src_dir = os.path.abspath(options.src_dir)
+
+        with self._build_session(options) as session:
+            finder = self._build_package_finder(options, session)
+            build_delete = (not (options.no_clean or options.build_dir))
+            wheel_cache = WheelCache(options.cache_dir, options.format_control)
+            with BuildDirectory(options.build_dir,
+                                delete=build_delete) as build_dir:
+                requirement_set = RequirementSet(
+                    build_dir=build_dir,
+                    src_dir=options.src_dir,
+                    download_dir=None,
+                    ignore_dependencies=options.ignore_dependencies,
+                    ignore_installed=True,
+                    ignore_requires_python=options.ignore_requires_python,
+                    isolated=options.isolated_mode,
+                    session=session,
+                    wheel_cache=wheel_cache,
+                    wheel_download_dir=options.wheel_dir,
+                    require_hashes=options.require_hashes
+                )
+
+                self.populate_requirement_set(
+                    requirement_set, args, options, finder, session, self.name,
+                    wheel_cache
+                )
+
+                if not requirement_set.has_requirements:
+                    return
+
+                try:
+                    # build wheels
+                    wb = WheelBuilder(
+                        requirement_set,
+                        finder,
+                        build_options=options.build_options or [],
+                        global_options=options.global_options or [],
+                    )
+                    if not wb.build():
+                        raise CommandError(
+                            "Failed to build one or more wheels"
+                        )
+                except PreviousBuildDirError:
+                    options.no_clean = True
+                    raise
+                finally:
+                    if not options.no_clean:
+                        requirement_set.cleanup_files()
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/compat/__init__.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/compat/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..099672cd1ad42d6b54c4343a78b88b2d1fcfc85c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/compat/__init__.py
@@ -0,0 +1,164 @@
+"""Stuff that differs in different Python versions and platform
+distributions."""
+from __future__ import absolute_import, division
+
+import os
+import sys
+
+from pip._vendor.six import text_type
+
+try:
+    from logging.config import dictConfig as logging_dictConfig
+except ImportError:
+    from pip.compat.dictconfig import dictConfig as logging_dictConfig
+
+try:
+    from collections import OrderedDict
+except ImportError:
+    from pip._vendor.ordereddict import OrderedDict
+
+try:
+    import ipaddress
+except ImportError:
+    try:
+        from pip._vendor import ipaddress
+    except ImportError:
+        import ipaddr as ipaddress
+        ipaddress.ip_address = ipaddress.IPAddress
+        ipaddress.ip_network = ipaddress.IPNetwork
+
+
+try:
+    import sysconfig
+
+    def get_stdlib():
+        paths = [
+            sysconfig.get_path("stdlib"),
+            sysconfig.get_path("platstdlib"),
+        ]
+        return set(filter(bool, paths))
+except ImportError:
+    from distutils import sysconfig
+
+    def get_stdlib():
+        paths = [
+            sysconfig.get_python_lib(standard_lib=True),
+            sysconfig.get_python_lib(standard_lib=True, plat_specific=True),
+        ]
+        return set(filter(bool, paths))
+
+
+__all__ = [
+    "logging_dictConfig", "ipaddress", "uses_pycache", "console_to_str",
+    "native_str", "get_path_uid", "stdlib_pkgs", "WINDOWS", "samefile",
+    "OrderedDict",
+]
+
+
+if sys.version_info >= (3, 4):
+    uses_pycache = True
+    from importlib.util import cache_from_source
+else:
+    import imp
+    uses_pycache = hasattr(imp, 'cache_from_source')
+    if uses_pycache:
+        cache_from_source = imp.cache_from_source
+    else:
+        cache_from_source = None
+
+
+if sys.version_info >= (3,):
+    def console_to_str(s):
+        try:
+            return s.decode(sys.__stdout__.encoding)
+        except UnicodeDecodeError:
+            return s.decode('utf_8')
+
+    def native_str(s, replace=False):
+        if isinstance(s, bytes):
+            return s.decode('utf-8', 'replace' if replace else 'strict')
+        return s
+
+else:
+    def console_to_str(s):
+        return s
+
+    def native_str(s, replace=False):
+        # Replace is ignored -- unicode to UTF-8 can't fail
+        if isinstance(s, text_type):
+            return s.encode('utf-8')
+        return s
+
+
+def total_seconds(td):
+    if hasattr(td, "total_seconds"):
+        return td.total_seconds()
+    else:
+        val = td.microseconds + (td.seconds + td.days * 24 * 3600) * 10 ** 6
+        return val / 10 ** 6
+
+
+def get_path_uid(path):
+    """
+    Return path's uid.
+
+    Does not follow symlinks:
+        https://github.com/pypa/pip/pull/935#discussion_r5307003
+
+    Placed this function in compat due to differences on AIX and
+    Jython, that should eventually go away.
+
+    :raises OSError: When path is a symlink or can't be read.
+    """
+    if hasattr(os, 'O_NOFOLLOW'):
+        fd = os.open(path, os.O_RDONLY | os.O_NOFOLLOW)
+        file_uid = os.fstat(fd).st_uid
+        os.close(fd)
+    else:  # AIX and Jython
+        # WARNING: time of check vulnerability, but best we can do w/o NOFOLLOW
+        if not os.path.islink(path):
+            # older versions of Jython don't have `os.fstat`
+            file_uid = os.stat(path).st_uid
+        else:
+            # raise OSError for parity with os.O_NOFOLLOW above
+            raise OSError(
+                "%s is a symlink; Will not return uid for symlinks" % path
+            )
+    return file_uid
+
+
+def expanduser(path):
+    """
+    Expand ~ and ~user constructions.
+
+    Includes a workaround for http://bugs.python.org/issue14768
+    """
+    expanded = os.path.expanduser(path)
+    if path.startswith('~/') and expanded.startswith('//'):
+        expanded = expanded[1:]
+    return expanded
+
+
+# packages in the stdlib that may have installation metadata, but should not be
+# considered 'installed'.  this theoretically could be determined based on
+# dist.location (py27:`sysconfig.get_paths()['stdlib']`,
+# py26:sysconfig.get_config_vars('LIBDEST')), but fear platform variation may
+# make this ineffective, so hard-coding
+stdlib_pkgs = ('python', 'wsgiref')
+if sys.version_info >= (2, 7):
+    stdlib_pkgs += ('argparse',)
+
+
+# windows detection, covers cpython and ironpython
+WINDOWS = (sys.platform.startswith("win") or
+           (sys.platform == 'cli' and os.name == 'nt'))
+
+
+def samefile(file1, file2):
+    """Provide an alternative for os.path.samefile on Windows/Python2"""
+    if hasattr(os.path, 'samefile'):
+        return os.path.samefile(file1, file2)
+    else:
+        path1 = os.path.normcase(os.path.abspath(file1))
+        path2 = os.path.normcase(os.path.abspath(file2))
+        return path1 == path2
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/compat/dictconfig.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/compat/dictconfig.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec684aac2033a5d9897e5a9330d2090372ddf298
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/compat/dictconfig.py
@@ -0,0 +1,565 @@
+# This is a copy of the Python logging.config.dictconfig module,
+# reproduced with permission. It is provided here for backwards
+# compatibility for Python versions prior to 2.7.
+#
+# Copyright 2009-2010 by Vinay Sajip. All Rights Reserved.
+#
+# Permission to use, copy, modify, and distribute this software and its
+# documentation for any purpose and without fee is hereby granted,
+# provided that the above copyright notice appear in all copies and that
+# both that copyright notice and this permission notice appear in
+# supporting documentation, and that the name of Vinay Sajip
+# not be used in advertising or publicity pertaining to distribution
+# of the software without specific, written prior permission.
+# VINAY SAJIP DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
+# VINAY SAJIP BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+# ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
+# IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+from __future__ import absolute_import
+
+import logging.handlers
+import re
+import sys
+import types
+
+from pip._vendor import six
+
+# flake8: noqa
+
+IDENTIFIER = re.compile('^[a-z_][a-z0-9_]*$', re.I)
+
+
+def valid_ident(s):
+    m = IDENTIFIER.match(s)
+    if not m:
+        raise ValueError('Not a valid Python identifier: %r' % s)
+    return True
+
+#
+# This function is defined in logging only in recent versions of Python
+#
+try:
+    from logging import _checkLevel
+except ImportError:
+    def _checkLevel(level):
+        if isinstance(level, int):
+            rv = level
+        elif str(level) == level:
+            if level not in logging._levelNames:
+                raise ValueError('Unknown level: %r' % level)
+            rv = logging._levelNames[level]
+        else:
+            raise TypeError('Level not an integer or a '
+                            'valid string: %r' % level)
+        return rv
+
+# The ConvertingXXX classes are wrappers around standard Python containers,
+# and they serve to convert any suitable values in the container. The
+# conversion converts base dicts, lists and tuples to their wrapped
+# equivalents, whereas strings which match a conversion format are converted
+# appropriately.
+#
+# Each wrapper should have a configurator attribute holding the actual
+# configurator to use for conversion.
+
+
+class ConvertingDict(dict):
+    """A converting dictionary wrapper."""
+
+    def __getitem__(self, key):
+        value = dict.__getitem__(self, key)
+        result = self.configurator.convert(value)
+        # If the converted value is different, save for next time
+        if value is not result:
+            self[key] = result
+            if type(result) in (ConvertingDict, ConvertingList,
+                                ConvertingTuple):
+                result.parent = self
+                result.key = key
+        return result
+
+    def get(self, key, default=None):
+        value = dict.get(self, key, default)
+        result = self.configurator.convert(value)
+        # If the converted value is different, save for next time
+        if value is not result:
+            self[key] = result
+            if type(result) in (ConvertingDict, ConvertingList,
+                                ConvertingTuple):
+                result.parent = self
+                result.key = key
+        return result
+
+    def pop(self, key, default=None):
+        value = dict.pop(self, key, default)
+        result = self.configurator.convert(value)
+        if value is not result:
+            if type(result) in (ConvertingDict, ConvertingList,
+                                ConvertingTuple):
+                result.parent = self
+                result.key = key
+        return result
+
+
+class ConvertingList(list):
+    """A converting list wrapper."""
+    def __getitem__(self, key):
+        value = list.__getitem__(self, key)
+        result = self.configurator.convert(value)
+        # If the converted value is different, save for next time
+        if value is not result:
+            self[key] = result
+            if type(result) in (ConvertingDict, ConvertingList,
+                                ConvertingTuple):
+                result.parent = self
+                result.key = key
+        return result
+
+    def pop(self, idx=-1):
+        value = list.pop(self, idx)
+        result = self.configurator.convert(value)
+        if value is not result:
+            if type(result) in (ConvertingDict, ConvertingList,
+                                ConvertingTuple):
+                result.parent = self
+        return result
+
+
+class ConvertingTuple(tuple):
+    """A converting tuple wrapper."""
+    def __getitem__(self, key):
+        value = tuple.__getitem__(self, key)
+        result = self.configurator.convert(value)
+        if value is not result:
+            if type(result) in (ConvertingDict, ConvertingList,
+                                ConvertingTuple):
+                result.parent = self
+                result.key = key
+        return result
+
+
+class BaseConfigurator(object):
+    """
+    The configurator base class which defines some useful defaults.
+    """
+
+    CONVERT_PATTERN = re.compile(r'^(?P<prefix>[a-z]+)://(?P<suffix>.*)$')
+
+    WORD_PATTERN = re.compile(r'^\s*(\w+)\s*')
+    DOT_PATTERN = re.compile(r'^\.\s*(\w+)\s*')
+    INDEX_PATTERN = re.compile(r'^\[\s*(\w+)\s*\]\s*')
+    DIGIT_PATTERN = re.compile(r'^\d+$')
+
+    value_converters = {
+        'ext' : 'ext_convert',
+        'cfg' : 'cfg_convert',
+    }
+
+    # We might want to use a different one, e.g. importlib
+    importer = __import__
+
+    def __init__(self, config):
+        self.config = ConvertingDict(config)
+        self.config.configurator = self
+
+    def resolve(self, s):
+        """
+        Resolve strings to objects using standard import and attribute
+        syntax.
+        """
+        name = s.split('.')
+        used = name.pop(0)
+        try:
+            found = self.importer(used)
+            for frag in name:
+                used += '.' + frag
+                try:
+                    found = getattr(found, frag)
+                except AttributeError:
+                    self.importer(used)
+                    found = getattr(found, frag)
+            return found
+        except ImportError:
+            e, tb = sys.exc_info()[1:]
+            v = ValueError('Cannot resolve %r: %s' % (s, e))
+            v.__cause__, v.__traceback__ = e, tb
+            raise v
+
+    def ext_convert(self, value):
+        """Default converter for the ext:// protocol."""
+        return self.resolve(value)
+
+    def cfg_convert(self, value):
+        """Default converter for the cfg:// protocol."""
+        rest = value
+        m = self.WORD_PATTERN.match(rest)
+        if m is None:
+            raise ValueError("Unable to convert %r" % value)
+        else:
+            rest = rest[m.end():]
+            d = self.config[m.groups()[0]]
+            # print d, rest
+            while rest:
+                m = self.DOT_PATTERN.match(rest)
+                if m:
+                    d = d[m.groups()[0]]
+                else:
+                    m = self.INDEX_PATTERN.match(rest)
+                    if m:
+                        idx = m.groups()[0]
+                        if not self.DIGIT_PATTERN.match(idx):
+                            d = d[idx]
+                        else:
+                            try:
+                                n = int(idx)  # try as number first (most likely)
+                                d = d[n]
+                            except TypeError:
+                                d = d[idx]
+                if m:
+                    rest = rest[m.end():]
+                else:
+                    raise ValueError('Unable to convert '
+                                     '%r at %r' % (value, rest))
+        # rest should be empty
+        return d
+
+    def convert(self, value):
+        """
+        Convert values to an appropriate type. dicts, lists and tuples are
+        replaced by their converting alternatives. Strings are checked to
+        see if they have a conversion format and are converted if they do.
+        """
+        if not isinstance(value, ConvertingDict) and isinstance(value, dict):
+            value = ConvertingDict(value)
+            value.configurator = self
+        elif not isinstance(value, ConvertingList) and isinstance(value, list):
+            value = ConvertingList(value)
+            value.configurator = self
+        elif not isinstance(value, ConvertingTuple) and\
+                 isinstance(value, tuple):
+            value = ConvertingTuple(value)
+            value.configurator = self
+        elif isinstance(value, six.string_types):  # str for py3k
+            m = self.CONVERT_PATTERN.match(value)
+            if m:
+                d = m.groupdict()
+                prefix = d['prefix']
+                converter = self.value_converters.get(prefix, None)
+                if converter:
+                    suffix = d['suffix']
+                    converter = getattr(self, converter)
+                    value = converter(suffix)
+        return value
+
+    def configure_custom(self, config):
+        """Configure an object with a user-supplied factory."""
+        c = config.pop('()')
+        if not hasattr(c, '__call__') and hasattr(types, 'ClassType') and type(c) != types.ClassType:
+            c = self.resolve(c)
+        props = config.pop('.', None)
+        # Check for valid identifiers
+        kwargs = dict((k, config[k]) for k in config if valid_ident(k))
+        result = c(**kwargs)
+        if props:
+            for name, value in props.items():
+                setattr(result, name, value)
+        return result
+
+    def as_tuple(self, value):
+        """Utility function which converts lists to tuples."""
+        if isinstance(value, list):
+            value = tuple(value)
+        return value
+
+
+class DictConfigurator(BaseConfigurator):
+    """
+    Configure logging using a dictionary-like object to describe the
+    configuration.
+    """
+
+    def configure(self):
+        """Do the configuration."""
+
+        config = self.config
+        if 'version' not in config:
+            raise ValueError("dictionary doesn't specify a version")
+        if config['version'] != 1:
+            raise ValueError("Unsupported version: %s" % config['version'])
+        incremental = config.pop('incremental', False)
+        EMPTY_DICT = {}
+        logging._acquireLock()
+        try:
+            if incremental:
+                handlers = config.get('handlers', EMPTY_DICT)
+                # incremental handler config only if handler name
+                # ties in to logging._handlers (Python 2.7)
+                if sys.version_info[:2] == (2, 7):
+                    for name in handlers:
+                        if name not in logging._handlers:
+                            raise ValueError('No handler found with '
+                                             'name %r'  % name)
+                        else:
+                            try:
+                                handler = logging._handlers[name]
+                                handler_config = handlers[name]
+                                level = handler_config.get('level', None)
+                                if level:
+                                    handler.setLevel(_checkLevel(level))
+                            except StandardError as e:
+                                raise ValueError('Unable to configure handler '
+                                                 '%r: %s' % (name, e))
+                loggers = config.get('loggers', EMPTY_DICT)
+                for name in loggers:
+                    try:
+                        self.configure_logger(name, loggers[name], True)
+                    except StandardError as e:
+                        raise ValueError('Unable to configure logger '
+                                         '%r: %s' % (name, e))
+                root = config.get('root', None)
+                if root:
+                    try:
+                        self.configure_root(root, True)
+                    except StandardError as e:
+                        raise ValueError('Unable to configure root '
+                                         'logger: %s' % e)
+            else:
+                disable_existing = config.pop('disable_existing_loggers', True)
+
+                logging._handlers.clear()
+                del logging._handlerList[:]
+
+                # Do formatters first - they don't refer to anything else
+                formatters = config.get('formatters', EMPTY_DICT)
+                for name in formatters:
+                    try:
+                        formatters[name] = self.configure_formatter(
+                                                            formatters[name])
+                    except StandardError as e:
+                        raise ValueError('Unable to configure '
+                                         'formatter %r: %s' % (name, e))
+                # Next, do filters - they don't refer to anything else, either
+                filters = config.get('filters', EMPTY_DICT)
+                for name in filters:
+                    try:
+                        filters[name] = self.configure_filter(filters[name])
+                    except StandardError as e:
+                        raise ValueError('Unable to configure '
+                                         'filter %r: %s' % (name, e))
+
+                # Next, do handlers - they refer to formatters and filters
+                # As handlers can refer to other handlers, sort the keys
+                # to allow a deterministic order of configuration
+                handlers = config.get('handlers', EMPTY_DICT)
+                for name in sorted(handlers):
+                    try:
+                        handler = self.configure_handler(handlers[name])
+                        handler.name = name
+                        handlers[name] = handler
+                    except StandardError as e:
+                        raise ValueError('Unable to configure handler '
+                                         '%r: %s' % (name, e))
+                # Next, do loggers - they refer to handlers and filters
+
+                # we don't want to lose the existing loggers,
+                # since other threads may have pointers to them.
+                # existing is set to contain all existing loggers,
+                # and as we go through the new configuration we
+                # remove any which are configured. At the end,
+                # what's left in existing is the set of loggers
+                # which were in the previous configuration but
+                # which are not in the new configuration.
+                root = logging.root
+                existing = list(root.manager.loggerDict)
+                # The list needs to be sorted so that we can
+                # avoid disabling child loggers of explicitly
+                # named loggers. With a sorted list it is easier
+                # to find the child loggers.
+                existing.sort()
+                # We'll keep the list of existing loggers
+                # which are children of named loggers here...
+                child_loggers = []
+                # now set up the new ones...
+                loggers = config.get('loggers', EMPTY_DICT)
+                for name in loggers:
+                    if name in existing:
+                        i = existing.index(name)
+                        prefixed = name + "."
+                        pflen = len(prefixed)
+                        num_existing = len(existing)
+                        i = i + 1  # look at the entry after name
+                        while (i < num_existing) and\
+                              (existing[i][:pflen] == prefixed):
+                            child_loggers.append(existing[i])
+                            i = i + 1
+                        existing.remove(name)
+                    try:
+                        self.configure_logger(name, loggers[name])
+                    except StandardError as e:
+                        raise ValueError('Unable to configure logger '
+                                         '%r: %s' % (name, e))
+
+                # Disable any old loggers. There's no point deleting
+                # them as other threads may continue to hold references
+                # and by disabling them, you stop them doing any logging.
+                # However, don't disable children of named loggers, as that's
+                # probably not what was intended by the user.
+                for log in existing:
+                    logger = root.manager.loggerDict[log]
+                    if log in child_loggers:
+                        logger.level = logging.NOTSET
+                        logger.handlers = []
+                        logger.propagate = True
+                    elif disable_existing:
+                        logger.disabled = True
+
+                # And finally, do the root logger
+                root = config.get('root', None)
+                if root:
+                    try:
+                        self.configure_root(root)
+                    except StandardError as e:
+                        raise ValueError('Unable to configure root '
+                                         'logger: %s' % e)
+        finally:
+            logging._releaseLock()
+
+    def configure_formatter(self, config):
+        """Configure a formatter from a dictionary."""
+        if '()' in config:
+            factory = config['()']  # for use in exception handler
+            try:
+                result = self.configure_custom(config)
+            except TypeError as te:
+                if "'format'" not in str(te):
+                    raise
+                # Name of parameter changed from fmt to format.
+                # Retry with old name.
+                # This is so that code can be used with older Python versions
+                #(e.g. by Django)
+                config['fmt'] = config.pop('format')
+                config['()'] = factory
+                result = self.configure_custom(config)
+        else:
+            fmt = config.get('format', None)
+            dfmt = config.get('datefmt', None)
+            result = logging.Formatter(fmt, dfmt)
+        return result
+
+    def configure_filter(self, config):
+        """Configure a filter from a dictionary."""
+        if '()' in config:
+            result = self.configure_custom(config)
+        else:
+            name = config.get('name', '')
+            result = logging.Filter(name)
+        return result
+
+    def add_filters(self, filterer, filters):
+        """Add filters to a filterer from a list of names."""
+        for f in filters:
+            try:
+                filterer.addFilter(self.config['filters'][f])
+            except StandardError as e:
+                raise ValueError('Unable to add filter %r: %s' % (f, e))
+
+    def configure_handler(self, config):
+        """Configure a handler from a dictionary."""
+        formatter = config.pop('formatter', None)
+        if formatter:
+            try:
+                formatter = self.config['formatters'][formatter]
+            except StandardError as e:
+                raise ValueError('Unable to set formatter '
+                                 '%r: %s' % (formatter, e))
+        level = config.pop('level', None)
+        filters = config.pop('filters', None)
+        if '()' in config:
+            c = config.pop('()')
+            if not hasattr(c, '__call__') and hasattr(types, 'ClassType') and type(c) != types.ClassType:
+                c = self.resolve(c)
+            factory = c
+        else:
+            klass = self.resolve(config.pop('class'))
+            # Special case for handler which refers to another handler
+            if issubclass(klass, logging.handlers.MemoryHandler) and\
+                'target' in config:
+                try:
+                    config['target'] = self.config['handlers'][config['target']]
+                except StandardError as e:
+                    raise ValueError('Unable to set target handler '
+                                     '%r: %s' % (config['target'], e))
+            elif issubclass(klass, logging.handlers.SMTPHandler) and\
+                'mailhost' in config:
+                config['mailhost'] = self.as_tuple(config['mailhost'])
+            elif issubclass(klass, logging.handlers.SysLogHandler) and\
+                'address' in config:
+                config['address'] = self.as_tuple(config['address'])
+            factory = klass
+        kwargs = dict((k, config[k]) for k in config if valid_ident(k))
+        try:
+            result = factory(**kwargs)
+        except TypeError as te:
+            if "'stream'" not in str(te):
+                raise
+            # The argument name changed from strm to stream
+            # Retry with old name.
+            # This is so that code can be used with older Python versions
+            #(e.g. by Django)
+            kwargs['strm'] = kwargs.pop('stream')
+            result = factory(**kwargs)
+        if formatter:
+            result.setFormatter(formatter)
+        if level is not None:
+            result.setLevel(_checkLevel(level))
+        if filters:
+            self.add_filters(result, filters)
+        return result
+
+    def add_handlers(self, logger, handlers):
+        """Add handlers to a logger from a list of names."""
+        for h in handlers:
+            try:
+                logger.addHandler(self.config['handlers'][h])
+            except StandardError as e:
+                raise ValueError('Unable to add handler %r: %s' % (h, e))
+
+    def common_logger_config(self, logger, config, incremental=False):
+        """
+        Perform configuration which is common to root and non-root loggers.
+        """
+        level = config.get('level', None)
+        if level is not None:
+            logger.setLevel(_checkLevel(level))
+        if not incremental:
+            # Remove any existing handlers
+            for h in logger.handlers[:]:
+                logger.removeHandler(h)
+            handlers = config.get('handlers', None)
+            if handlers:
+                self.add_handlers(logger, handlers)
+            filters = config.get('filters', None)
+            if filters:
+                self.add_filters(logger, filters)
+
+    def configure_logger(self, name, config, incremental=False):
+        """Configure a non-root logger from a dictionary."""
+        logger = logging.getLogger(name)
+        self.common_logger_config(logger, config, incremental)
+        propagate = config.get('propagate', None)
+        if propagate is not None:
+            logger.propagate = propagate
+
+    def configure_root(self, config, incremental=False):
+        """Configure a root logger from a dictionary."""
+        root = logging.getLogger()
+        self.common_logger_config(root, config, incremental)
+
+dictConfigClass = DictConfigurator
+
+
+def dictConfig(config):
+    """Configure logging using a dictionary."""
+    dictConfigClass(config).configure()
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/download.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/download.py
new file mode 100644
index 0000000000000000000000000000000000000000..54d3131dfbada80f902c6b1c44027759d750f96d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/download.py
@@ -0,0 +1,906 @@
+from __future__ import absolute_import
+
+import cgi
+import email.utils
+import getpass
+import json
+import logging
+import mimetypes
+import os
+import platform
+import re
+import shutil
+import sys
+import tempfile
+
+try:
+    import ssl  # noqa
+    HAS_TLS = True
+except ImportError:
+    HAS_TLS = False
+
+from pip._vendor.six.moves.urllib import parse as urllib_parse
+from pip._vendor.six.moves.urllib import request as urllib_request
+
+import pip
+
+from pip.exceptions import InstallationError, HashMismatch
+from pip.models import PyPI
+from pip.utils import (splitext, rmtree, format_size, display_path,
+                       backup_dir, ask_path_exists, unpack_file,
+                       ARCHIVE_EXTENSIONS, consume, call_subprocess)
+from pip.utils.encoding import auto_decode
+from pip.utils.filesystem import check_path_owner
+from pip.utils.logging import indent_log
+from pip.utils.setuptools_build import SETUPTOOLS_SHIM
+from pip.utils.glibc import libc_ver
+from pip.utils.ui import DownloadProgressBar, DownloadProgressSpinner
+from pip.locations import write_delete_marker_file
+from pip.vcs import vcs
+from pip._vendor import requests, six
+from pip._vendor.requests.adapters import BaseAdapter, HTTPAdapter
+from pip._vendor.requests.auth import AuthBase, HTTPBasicAuth
+from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response
+from pip._vendor.requests.utils import get_netrc_auth
+from pip._vendor.requests.structures import CaseInsensitiveDict
+from pip._vendor.requests.packages import urllib3
+from pip._vendor.cachecontrol import CacheControlAdapter
+from pip._vendor.cachecontrol.caches import FileCache
+from pip._vendor.lockfile import LockError
+from pip._vendor.six.moves import xmlrpc_client
+
+
+__all__ = ['get_file_content',
+           'is_url', 'url_to_path', 'path_to_url',
+           'is_archive_file', 'unpack_vcs_link',
+           'unpack_file_url', 'is_vcs_url', 'is_file_url',
+           'unpack_http_url', 'unpack_url']
+
+
+logger = logging.getLogger(__name__)
+
+
+def user_agent():
+    """
+    Return a string representing the user agent.
+    """
+    data = {
+        "installer": {"name": "pip", "version": pip.__version__},
+        "python": platform.python_version(),
+        "implementation": {
+            "name": platform.python_implementation(),
+        },
+    }
+
+    if data["implementation"]["name"] == 'CPython':
+        data["implementation"]["version"] = platform.python_version()
+    elif data["implementation"]["name"] == 'PyPy':
+        if sys.pypy_version_info.releaselevel == 'final':
+            pypy_version_info = sys.pypy_version_info[:3]
+        else:
+            pypy_version_info = sys.pypy_version_info
+        data["implementation"]["version"] = ".".join(
+            [str(x) for x in pypy_version_info]
+        )
+    elif data["implementation"]["name"] == 'Jython':
+        # Complete Guess
+        data["implementation"]["version"] = platform.python_version()
+    elif data["implementation"]["name"] == 'IronPython':
+        # Complete Guess
+        data["implementation"]["version"] = platform.python_version()
+
+    if sys.platform.startswith("linux"):
+        from pip._vendor import distro
+        distro_infos = dict(filter(
+            lambda x: x[1],
+            zip(["name", "version", "id"], distro.linux_distribution()),
+        ))
+        libc = dict(filter(
+            lambda x: x[1],
+            zip(["lib", "version"], libc_ver()),
+        ))
+        if libc:
+            distro_infos["libc"] = libc
+        if distro_infos:
+            data["distro"] = distro_infos
+
+    if sys.platform.startswith("darwin") and platform.mac_ver()[0]:
+        data["distro"] = {"name": "macOS", "version": platform.mac_ver()[0]}
+
+    if platform.system():
+        data.setdefault("system", {})["name"] = platform.system()
+
+    if platform.release():
+        data.setdefault("system", {})["release"] = platform.release()
+
+    if platform.machine():
+        data["cpu"] = platform.machine()
+
+    # Python 2.6 doesn't have ssl.OPENSSL_VERSION.
+    if HAS_TLS and sys.version_info[:2] > (2, 6):
+        data["openssl_version"] = ssl.OPENSSL_VERSION
+
+    return "{data[installer][name]}/{data[installer][version]} {json}".format(
+        data=data,
+        json=json.dumps(data, separators=(",", ":"), sort_keys=True),
+    )
+
+
+class MultiDomainBasicAuth(AuthBase):
+
+    def __init__(self, prompting=True):
+        self.prompting = prompting
+        self.passwords = {}
+
+    def __call__(self, req):
+        parsed = urllib_parse.urlparse(req.url)
+
+        # Get the netloc without any embedded credentials
+        netloc = parsed.netloc.rsplit("@", 1)[-1]
+
+        # Set the url of the request to the url without any credentials
+        req.url = urllib_parse.urlunparse(parsed[:1] + (netloc,) + parsed[2:])
+
+        # Use any stored credentials that we have for this netloc
+        username, password = self.passwords.get(netloc, (None, None))
+
+        # Extract credentials embedded in the url if we have none stored
+        if username is None:
+            username, password = self.parse_credentials(parsed.netloc)
+
+        # Get creds from netrc if we still don't have them
+        if username is None and password is None:
+            netrc_auth = get_netrc_auth(req.url)
+            username, password = netrc_auth if netrc_auth else (None, None)
+
+        if username or password:
+            # Store the username and password
+            self.passwords[netloc] = (username, password)
+
+            # Send the basic auth with this request
+            req = HTTPBasicAuth(username or "", password or "")(req)
+
+        # Attach a hook to handle 401 responses
+        req.register_hook("response", self.handle_401)
+
+        return req
+
+    def handle_401(self, resp, **kwargs):
+        # We only care about 401 responses, anything else we want to just
+        #   pass through the actual response
+        if resp.status_code != 401:
+            return resp
+
+        # We are not able to prompt the user so simply return the response
+        if not self.prompting:
+            return resp
+
+        parsed = urllib_parse.urlparse(resp.url)
+
+        # Prompt the user for a new username and password
+        username = six.moves.input("User for %s: " % parsed.netloc)
+        password = getpass.getpass("Password: ")
+
+        # Store the new username and password to use for future requests
+        if username or password:
+            self.passwords[parsed.netloc] = (username, password)
+
+        # Consume content and release the original connection to allow our new
+        #   request to reuse the same one.
+        resp.content
+        resp.raw.release_conn()
+
+        # Add our new username and password to the request
+        req = HTTPBasicAuth(username or "", password or "")(resp.request)
+
+        # Send our new request
+        new_resp = resp.connection.send(req, **kwargs)
+        new_resp.history.append(resp)
+
+        return new_resp
+
+    def parse_credentials(self, netloc):
+        if "@" in netloc:
+            userinfo = netloc.rsplit("@", 1)[0]
+            if ":" in userinfo:
+                return userinfo.split(":", 1)
+            return userinfo, None
+        return None, None
+
+
+class LocalFSAdapter(BaseAdapter):
+
+    def send(self, request, stream=None, timeout=None, verify=None, cert=None,
+             proxies=None):
+        pathname = url_to_path(request.url)
+
+        resp = Response()
+        resp.status_code = 200
+        resp.url = request.url
+
+        try:
+            stats = os.stat(pathname)
+        except OSError as exc:
+            resp.status_code = 404
+            resp.raw = exc
+        else:
+            modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
+            content_type = mimetypes.guess_type(pathname)[0] or "text/plain"
+            resp.headers = CaseInsensitiveDict({
+                "Content-Type": content_type,
+                "Content-Length": stats.st_size,
+                "Last-Modified": modified,
+            })
+
+            resp.raw = open(pathname, "rb")
+            resp.close = resp.raw.close
+
+        return resp
+
+    def close(self):
+        pass
+
+
+class SafeFileCache(FileCache):
+    """
+    A file based cache which is safe to use even when the target directory may
+    not be accessible or writable.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super(SafeFileCache, self).__init__(*args, **kwargs)
+
+        # Check to ensure that the directory containing our cache directory
+        # is owned by the user current executing pip. If it does not exist
+        # we will check the parent directory until we find one that does exist.
+        # If it is not owned by the user executing pip then we will disable
+        # the cache and log a warning.
+        if not check_path_owner(self.directory):
+            logger.warning(
+                "The directory '%s' or its parent directory is not owned by "
+                "the current user and the cache has been disabled. Please "
+                "check the permissions and owner of that directory. If "
+                "executing pip with sudo, you may want sudo's -H flag.",
+                self.directory,
+            )
+
+            # Set our directory to None to disable the Cache
+            self.directory = None
+
+    def get(self, *args, **kwargs):
+        # If we don't have a directory, then the cache should be a no-op.
+        if self.directory is None:
+            return
+
+        try:
+            return super(SafeFileCache, self).get(*args, **kwargs)
+        except (LockError, OSError, IOError):
+            # We intentionally silence this error, if we can't access the cache
+            # then we can just skip caching and process the request as if
+            # caching wasn't enabled.
+            pass
+
+    def set(self, *args, **kwargs):
+        # If we don't have a directory, then the cache should be a no-op.
+        if self.directory is None:
+            return
+
+        try:
+            return super(SafeFileCache, self).set(*args, **kwargs)
+        except (LockError, OSError, IOError):
+            # We intentionally silence this error, if we can't access the cache
+            # then we can just skip caching and process the request as if
+            # caching wasn't enabled.
+            pass
+
+    def delete(self, *args, **kwargs):
+        # If we don't have a directory, then the cache should be a no-op.
+        if self.directory is None:
+            return
+
+        try:
+            return super(SafeFileCache, self).delete(*args, **kwargs)
+        except (LockError, OSError, IOError):
+            # We intentionally silence this error, if we can't access the cache
+            # then we can just skip caching and process the request as if
+            # caching wasn't enabled.
+            pass
+
+
+class InsecureHTTPAdapter(HTTPAdapter):
+
+    def cert_verify(self, conn, url, verify, cert):
+        conn.cert_reqs = 'CERT_NONE'
+        conn.ca_certs = None
+
+
+class PipSession(requests.Session):
+
+    timeout = None
+
+    def __init__(self, *args, **kwargs):
+        retries = kwargs.pop("retries", 0)
+        cache = kwargs.pop("cache", None)
+        insecure_hosts = kwargs.pop("insecure_hosts", [])
+
+        super(PipSession, self).__init__(*args, **kwargs)
+
+        # Attach our User Agent to the request
+        self.headers["User-Agent"] = user_agent()
+
+        # Attach our Authentication handler to the session
+        self.auth = MultiDomainBasicAuth()
+
+        # Create our urllib3.Retry instance which will allow us to customize
+        # how we handle retries.
+        retries = urllib3.Retry(
+            # Set the total number of retries that a particular request can
+            # have.
+            total=retries,
+
+            # A 503 error from PyPI typically means that the Fastly -> Origin
+            # connection got interrupted in some way. A 503 error in general
+            # is typically considered a transient error so we'll go ahead and
+            # retry it.
+            status_forcelist=[503],
+
+            # Add a small amount of back off between failed requests in
+            # order to prevent hammering the service.
+            backoff_factor=0.25,
+        )
+
+        # We want to _only_ cache responses on securely fetched origins. We do
+        # this because we can't validate the response of an insecurely fetched
+        # origin, and we don't want someone to be able to poison the cache and
+        # require manual eviction from the cache to fix it.
+        if cache:
+            secure_adapter = CacheControlAdapter(
+                cache=SafeFileCache(cache, use_dir_lock=True),
+                max_retries=retries,
+            )
+        else:
+            secure_adapter = HTTPAdapter(max_retries=retries)
+
+        # Our Insecure HTTPAdapter disables HTTPS validation. It does not
+        # support caching (see above) so we'll use it for all http:// URLs as
+        # well as any https:// host that we've marked as ignoring TLS errors
+        # for.
+        insecure_adapter = InsecureHTTPAdapter(max_retries=retries)
+
+        self.mount("https://", secure_adapter)
+        self.mount("http://", insecure_adapter)
+
+        # Enable file:// urls
+        self.mount("file://", LocalFSAdapter())
+
+        # We want to use a non-validating adapter for any requests which are
+        # deemed insecure.
+        for host in insecure_hosts:
+            self.mount("https://{0}/".format(host), insecure_adapter)
+
+    def request(self, method, url, *args, **kwargs):
+        # Allow setting a default timeout on a session
+        kwargs.setdefault("timeout", self.timeout)
+
+        # Dispatch the actual request
+        return super(PipSession, self).request(method, url, *args, **kwargs)
+
+
+def get_file_content(url, comes_from=None, session=None):
+    """Gets the content of a file; it may be a filename, file: URL, or
+    http: URL.  Returns (location, content).  Content is unicode."""
+    if session is None:
+        raise TypeError(
+            "get_file_content() missing 1 required keyword argument: 'session'"
+        )
+
+    match = _scheme_re.search(url)
+    if match:
+        scheme = match.group(1).lower()
+        if (scheme == 'file' and comes_from and
+                comes_from.startswith('http')):
+            raise InstallationError(
+                'Requirements file %s references URL %s, which is local'
+                % (comes_from, url))
+        if scheme == 'file':
+            path = url.split(':', 1)[1]
+            path = path.replace('\\', '/')
+            match = _url_slash_drive_re.match(path)
+            if match:
+                path = match.group(1) + ':' + path.split('|', 1)[1]
+            path = urllib_parse.unquote(path)
+            if path.startswith('/'):
+                path = '/' + path.lstrip('/')
+            url = path
+        else:
+            # FIXME: catch some errors
+            resp = session.get(url)
+            resp.raise_for_status()
+            return resp.url, resp.text
+    try:
+        with open(url, 'rb') as f:
+            content = auto_decode(f.read())
+    except IOError as exc:
+        raise InstallationError(
+            'Could not open requirements file: %s' % str(exc)
+        )
+    return url, content
+
+
+_scheme_re = re.compile(r'^(http|https|file):', re.I)
+_url_slash_drive_re = re.compile(r'/*([a-z])\|', re.I)
+
+
+def is_url(name):
+    """Returns true if the name looks like a URL"""
+    if ':' not in name:
+        return False
+    scheme = name.split(':', 1)[0].lower()
+    return scheme in ['http', 'https', 'file', 'ftp'] + vcs.all_schemes
+
+
+def url_to_path(url):
+    """
+    Convert a file: URL to a path.
+    """
+    assert url.startswith('file:'), (
+        "You can only turn file: urls into filenames (not %r)" % url)
+
+    _, netloc, path, _, _ = urllib_parse.urlsplit(url)
+
+    # if we have a UNC path, prepend UNC share notation
+    if netloc:
+        netloc = '\\\\' + netloc
+
+    path = urllib_request.url2pathname(netloc + path)
+    return path
+
+
+def path_to_url(path):
+    """
+    Convert a path to a file: URL.  The path will be made absolute and have
+    quoted path parts.
+    """
+    path = os.path.normpath(os.path.abspath(path))
+    url = urllib_parse.urljoin('file:', urllib_request.pathname2url(path))
+    return url
+
+
+def is_archive_file(name):
+    """Return True if `name` is a considered as an archive file."""
+    ext = splitext(name)[1].lower()
+    if ext in ARCHIVE_EXTENSIONS:
+        return True
+    return False
+
+
+def unpack_vcs_link(link, location):
+    vcs_backend = _get_used_vcs_backend(link)
+    vcs_backend.unpack(location)
+
+
+def _get_used_vcs_backend(link):
+    for backend in vcs.backends:
+        if link.scheme in backend.schemes:
+            vcs_backend = backend(link.url)
+            return vcs_backend
+
+
+def is_vcs_url(link):
+    return bool(_get_used_vcs_backend(link))
+
+
+def is_file_url(link):
+    return link.url.lower().startswith('file:')
+
+
+def is_dir_url(link):
+    """Return whether a file:// Link points to a directory.
+
+    ``link`` must not have any other scheme but file://. Call is_file_url()
+    first.
+
+    """
+    link_path = url_to_path(link.url_without_fragment)
+    return os.path.isdir(link_path)
+
+
+def _progress_indicator(iterable, *args, **kwargs):
+    return iterable
+
+
+def _download_url(resp, link, content_file, hashes):
+    try:
+        total_length = int(resp.headers['content-length'])
+    except (ValueError, KeyError, TypeError):
+        total_length = 0
+
+    cached_resp = getattr(resp, "from_cache", False)
+
+    if logger.getEffectiveLevel() > logging.INFO:
+        show_progress = False
+    elif cached_resp:
+        show_progress = False
+    elif total_length > (40 * 1000):
+        show_progress = True
+    elif not total_length:
+        show_progress = True
+    else:
+        show_progress = False
+
+    show_url = link.show_url
+
+    def resp_read(chunk_size):
+        try:
+            # Special case for urllib3.
+            for chunk in resp.raw.stream(
+                    chunk_size,
+                    # We use decode_content=False here because we don't
+                    # want urllib3 to mess with the raw bytes we get
+                    # from the server. If we decompress inside of
+                    # urllib3 then we cannot verify the checksum
+                    # because the checksum will be of the compressed
+                    # file. This breakage will only occur if the
+                    # server adds a Content-Encoding header, which
+                    # depends on how the server was configured:
+                    # - Some servers will notice that the file isn't a
+                    #   compressible file and will leave the file alone
+                    #   and with an empty Content-Encoding
+                    # - Some servers will notice that the file is
+                    #   already compressed and will leave the file
+                    #   alone and will add a Content-Encoding: gzip
+                    #   header
+                    # - Some servers won't notice anything at all and
+                    #   will take a file that's already been compressed
+                    #   and compress it again and set the
+                    #   Content-Encoding: gzip header
+                    #
+                    # By setting this not to decode automatically we
+                    # hope to eliminate problems with the second case.
+                    decode_content=False):
+                yield chunk
+        except AttributeError:
+            # Standard file-like object.
+            while True:
+                chunk = resp.raw.read(chunk_size)
+                if not chunk:
+                    break
+                yield chunk
+
+    def written_chunks(chunks):
+        for chunk in chunks:
+            content_file.write(chunk)
+            yield chunk
+
+    progress_indicator = _progress_indicator
+
+    if link.netloc == PyPI.netloc:
+        url = show_url
+    else:
+        url = link.url_without_fragment
+
+    if show_progress:  # We don't show progress on cached responses
+        if total_length:
+            logger.info("Downloading %s (%s)", url, format_size(total_length))
+            progress_indicator = DownloadProgressBar(max=total_length).iter
+        else:
+            logger.info("Downloading %s", url)
+            progress_indicator = DownloadProgressSpinner().iter
+    elif cached_resp:
+        logger.info("Using cached %s", url)
+    else:
+        logger.info("Downloading %s", url)
+
+    logger.debug('Downloading from URL %s', link)
+
+    downloaded_chunks = written_chunks(
+        progress_indicator(
+            resp_read(CONTENT_CHUNK_SIZE),
+            CONTENT_CHUNK_SIZE
+        )
+    )
+    if hashes:
+        hashes.check_against_chunks(downloaded_chunks)
+    else:
+        consume(downloaded_chunks)
+
+
+def _copy_file(filename, location, link):
+    copy = True
+    download_location = os.path.join(location, link.filename)
+    if os.path.exists(download_location):
+        response = ask_path_exists(
+            'The file %s exists. (i)gnore, (w)ipe, (b)ackup, (a)abort' %
+            display_path(download_location), ('i', 'w', 'b', 'a'))
+        if response == 'i':
+            copy = False
+        elif response == 'w':
+            logger.warning('Deleting %s', display_path(download_location))
+            os.remove(download_location)
+        elif response == 'b':
+            dest_file = backup_dir(download_location)
+            logger.warning(
+                'Backing up %s to %s',
+                display_path(download_location),
+                display_path(dest_file),
+            )
+            shutil.move(download_location, dest_file)
+        elif response == 'a':
+            sys.exit(-1)
+    if copy:
+        shutil.copy(filename, download_location)
+        logger.info('Saved %s', display_path(download_location))
+
+
+def unpack_http_url(link, location, download_dir=None,
+                    session=None, hashes=None):
+    if session is None:
+        raise TypeError(
+            "unpack_http_url() missing 1 required keyword argument: 'session'"
+        )
+
+    temp_dir = tempfile.mkdtemp('-unpack', 'pip-')
+
+    # If a download dir is specified, is the file already downloaded there?
+    already_downloaded_path = None
+    if download_dir:
+        already_downloaded_path = _check_download_dir(link,
+                                                      download_dir,
+                                                      hashes)
+
+    if already_downloaded_path:
+        from_path = already_downloaded_path
+        content_type = mimetypes.guess_type(from_path)[0]
+    else:
+        # let's download to a tmp dir
+        from_path, content_type = _download_http_url(link,
+                                                     session,
+                                                     temp_dir,
+                                                     hashes)
+
+    # unpack the archive to the build dir location. even when only downloading
+    # archives, they have to be unpacked to parse dependencies
+    unpack_file(from_path, location, content_type, link)
+
+    # a download dir is specified; let's copy the archive there
+    if download_dir and not already_downloaded_path:
+        _copy_file(from_path, download_dir, link)
+
+    if not already_downloaded_path:
+        os.unlink(from_path)
+    rmtree(temp_dir)
+
+
+def unpack_file_url(link, location, download_dir=None, hashes=None):
+    """Unpack link into location.
+
+    If download_dir is provided and link points to a file, make a copy
+    of the link file inside download_dir.
+    """
+    link_path = url_to_path(link.url_without_fragment)
+
+    # If it's a url to a local directory
+    if is_dir_url(link):
+        if os.path.isdir(location):
+            rmtree(location)
+        shutil.copytree(link_path, location, symlinks=True)
+        if download_dir:
+            logger.info('Link is a directory, ignoring download_dir')
+        return
+
+    # If --require-hashes is off, `hashes` is either empty, the
+    # link's embedded hash, or MissingHashes; it is required to
+    # match. If --require-hashes is on, we are satisfied by any
+    # hash in `hashes` matching: a URL-based or an option-based
+    # one; no internet-sourced hash will be in `hashes`.
+    if hashes:
+        hashes.check_against_path(link_path)
+
+    # If a download dir is specified, is the file already there and valid?
+    already_downloaded_path = None
+    if download_dir:
+        already_downloaded_path = _check_download_dir(link,
+                                                      download_dir,
+                                                      hashes)
+
+    if already_downloaded_path:
+        from_path = already_downloaded_path
+    else:
+        from_path = link_path
+
+    content_type = mimetypes.guess_type(from_path)[0]
+
+    # unpack the archive to the build dir location. even when only downloading
+    # archives, they have to be unpacked to parse dependencies
+    unpack_file(from_path, location, content_type, link)
+
+    # a download dir is specified and not already downloaded
+    if download_dir and not already_downloaded_path:
+        _copy_file(from_path, download_dir, link)
+
+
+def _copy_dist_from_dir(link_path, location):
+    """Copy distribution files in `link_path` to `location`.
+
+    Invoked when user requests to install a local directory. E.g.:
+
+        pip install .
+        pip install ~/dev/git-repos/python-prompt-toolkit
+
+    """
+
+    # Note: This is currently VERY SLOW if you have a lot of data in the
+    # directory, because it copies everything with `shutil.copytree`.
+    # What it should really do is build an sdist and install that.
+    # See https://github.com/pypa/pip/issues/2195
+
+    if os.path.isdir(location):
+        rmtree(location)
+
+    # build an sdist
+    setup_py = 'setup.py'
+    sdist_args = [sys.executable]
+    sdist_args.append('-c')
+    sdist_args.append(SETUPTOOLS_SHIM % setup_py)
+    sdist_args.append('sdist')
+    sdist_args += ['--dist-dir', location]
+    logger.info('Running setup.py sdist for %s', link_path)
+
+    with indent_log():
+        call_subprocess(sdist_args, cwd=link_path, show_stdout=False)
+
+    # unpack sdist into `location`
+    sdist = os.path.join(location, os.listdir(location)[0])
+    logger.info('Unpacking sdist %s into %s', sdist, location)
+    unpack_file(sdist, location, content_type=None, link=None)
+
+
+class PipXmlrpcTransport(xmlrpc_client.Transport):
+    """Provide a `xmlrpclib.Transport` implementation via a `PipSession`
+    object.
+    """
+
+    def __init__(self, index_url, session, use_datetime=False):
+        xmlrpc_client.Transport.__init__(self, use_datetime)
+        index_parts = urllib_parse.urlparse(index_url)
+        self._scheme = index_parts.scheme
+        self._session = session
+
+    def request(self, host, handler, request_body, verbose=False):
+        parts = (self._scheme, host, handler, None, None, None)
+        url = urllib_parse.urlunparse(parts)
+        try:
+            headers = {'Content-Type': 'text/xml'}
+            response = self._session.post(url, data=request_body,
+                                          headers=headers, stream=True)
+            response.raise_for_status()
+            self.verbose = verbose
+            return self.parse_response(response.raw)
+        except requests.HTTPError as exc:
+            logger.critical(
+                "HTTP error %s while getting %s",
+                exc.response.status_code, url,
+            )
+            raise
+
+
+def unpack_url(link, location, download_dir=None,
+               only_download=False, session=None, hashes=None):
+    """Unpack link.
+       If link is a VCS link:
+         if only_download, export into download_dir and ignore location
+          else unpack into location
+       for other types of link:
+         - unpack into location
+         - if download_dir, copy the file into download_dir
+         - if only_download, mark location for deletion
+
+    :param hashes: A Hashes object, one of whose embedded hashes must match,
+        or HashMismatch will be raised. If the Hashes is empty, no matches are
+        required, and unhashable types of requirements (like VCS ones, which
+        would ordinarily raise HashUnsupported) are allowed.
+    """
+    # non-editable vcs urls
+    if is_vcs_url(link):
+        unpack_vcs_link(link, location)
+
+    # file urls
+    elif is_file_url(link):
+        unpack_file_url(link, location, download_dir, hashes=hashes)
+
+    # http urls
+    else:
+        if session is None:
+            session = PipSession()
+
+        unpack_http_url(
+            link,
+            location,
+            download_dir,
+            session,
+            hashes=hashes
+        )
+    if only_download:
+        write_delete_marker_file(location)
+
+
+def _download_http_url(link, session, temp_dir, hashes):
+    """Download link url into temp_dir using provided session"""
+    target_url = link.url.split('#', 1)[0]
+    try:
+        resp = session.get(
+            target_url,
+            # We use Accept-Encoding: identity here because requests
+            # defaults to accepting compressed responses. This breaks in
+            # a variety of ways depending on how the server is configured.
+            # - Some servers will notice that the file isn't a compressible
+            #   file and will leave the file alone and with an empty
+            #   Content-Encoding
+            # - Some servers will notice that the file is already
+            #   compressed and will leave the file alone and will add a
+            #   Content-Encoding: gzip header
+            # - Some servers won't notice anything at all and will take
+            #   a file that's already been compressed and compress it again
+            #   and set the Content-Encoding: gzip header
+            # By setting this to request only the identity encoding We're
+            # hoping to eliminate the third case. Hopefully there does not
+            # exist a server which when given a file will notice it is
+            # already compressed and that you're not asking for a
+            # compressed file and will then decompress it before sending
+            # because if that's the case I don't think it'll ever be
+            # possible to make this work.
+            headers={"Accept-Encoding": "identity"},
+            stream=True,
+        )
+        resp.raise_for_status()
+    except requests.HTTPError as exc:
+        logger.critical(
+            "HTTP error %s while getting %s", exc.response.status_code, link,
+        )
+        raise
+
+    content_type = resp.headers.get('content-type', '')
+    filename = link.filename  # fallback
+    # Have a look at the Content-Disposition header for a better guess
+    content_disposition = resp.headers.get('content-disposition')
+    if content_disposition:
+        type, params = cgi.parse_header(content_disposition)
+        # We use ``or`` here because we don't want to use an "empty" value
+        # from the filename param.
+        filename = params.get('filename') or filename
+    ext = splitext(filename)[1]
+    if not ext:
+        ext = mimetypes.guess_extension(content_type)
+        if ext:
+            filename += ext
+    if not ext and link.url != resp.url:
+        ext = os.path.splitext(resp.url)[1]
+        if ext:
+            filename += ext
+    file_path = os.path.join(temp_dir, filename)
+    with open(file_path, 'wb') as content_file:
+        _download_url(resp, link, content_file, hashes)
+    return file_path, content_type
+
+
+def _check_download_dir(link, download_dir, hashes):
+    """ Check download_dir for previously downloaded file with correct hash
+        If a correct file is found return its path else None
+    """
+    download_path = os.path.join(download_dir, link.filename)
+    if os.path.exists(download_path):
+        # If already downloaded, does its hash match?
+        logger.info('File was already downloaded %s', download_path)
+        if hashes:
+            try:
+                hashes.check_against_path(download_path)
+            except HashMismatch:
+                logger.warning(
+                    'Previously-downloaded file %s has bad hash. '
+                    'Re-downloading.',
+                    download_path
+                )
+                os.unlink(download_path)
+                return None
+        return download_path
+    return None
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/exceptions.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/exceptions.py
new file mode 100644
index 0000000000000000000000000000000000000000..50b527f90187ae7f2df69b822bf40f3069d3e6c2
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/exceptions.py
@@ -0,0 +1,244 @@
+"""Exceptions used throughout package"""
+from __future__ import absolute_import
+
+from itertools import chain, groupby, repeat
+
+from pip._vendor.six import iteritems
+
+
+class PipError(Exception):
+    """Base pip exception"""
+
+
+class InstallationError(PipError):
+    """General exception during installation"""
+
+
+class UninstallationError(PipError):
+    """General exception during uninstallation"""
+
+
+class DistributionNotFound(InstallationError):
+    """Raised when a distribution cannot be found to satisfy a requirement"""
+
+
+class RequirementsFileParseError(InstallationError):
+    """Raised when a general error occurs parsing a requirements file line."""
+
+
+class BestVersionAlreadyInstalled(PipError):
+    """Raised when the most up-to-date version of a package is already
+    installed."""
+
+
+class BadCommand(PipError):
+    """Raised when virtualenv or a command is not found"""
+
+
+class CommandError(PipError):
+    """Raised when there is an error in command-line arguments"""
+
+
+class PreviousBuildDirError(PipError):
+    """Raised when there's a previous conflicting build directory"""
+
+
+class InvalidWheelFilename(InstallationError):
+    """Invalid wheel filename."""
+
+
+class UnsupportedWheel(InstallationError):
+    """Unsupported wheel."""
+
+
+class HashErrors(InstallationError):
+    """Multiple HashError instances rolled into one for reporting"""
+
+    def __init__(self):
+        self.errors = []
+
+    def append(self, error):
+        self.errors.append(error)
+
+    def __str__(self):
+        lines = []
+        self.errors.sort(key=lambda e: e.order)
+        for cls, errors_of_cls in groupby(self.errors, lambda e: e.__class__):
+            lines.append(cls.head)
+            lines.extend(e.body() for e in errors_of_cls)
+        if lines:
+            return '\n'.join(lines)
+
+    def __nonzero__(self):
+        return bool(self.errors)
+
+    def __bool__(self):
+        return self.__nonzero__()
+
+
+class HashError(InstallationError):
+    """
+    A failure to verify a package against known-good hashes
+
+    :cvar order: An int sorting hash exception classes by difficulty of
+        recovery (lower being harder), so the user doesn't bother fretting
+        about unpinned packages when he has deeper issues, like VCS
+        dependencies, to deal with. Also keeps error reports in a
+        deterministic order.
+    :cvar head: A section heading for display above potentially many
+        exceptions of this kind
+    :ivar req: The InstallRequirement that triggered this error. This is
+        pasted on after the exception is instantiated, because it's not
+        typically available earlier.
+
+    """
+    req = None
+    head = ''
+
+    def body(self):
+        """Return a summary of me for display under the heading.
+
+        This default implementation simply prints a description of the
+        triggering requirement.
+
+        :param req: The InstallRequirement that provoked this error, with
+            populate_link() having already been called
+
+        """
+        return '    %s' % self._requirement_name()
+
+    def __str__(self):
+        return '%s\n%s' % (self.head, self.body())
+
+    def _requirement_name(self):
+        """Return a description of the requirement that triggered me.
+
+        This default implementation returns long description of the req, with
+        line numbers
+
+        """
+        return str(self.req) if self.req else 'unknown package'
+
+
+class VcsHashUnsupported(HashError):
+    """A hash was provided for a version-control-system-based requirement, but
+    we don't have a method for hashing those."""
+
+    order = 0
+    head = ("Can't verify hashes for these requirements because we don't "
+            "have a way to hash version control repositories:")
+
+
+class DirectoryUrlHashUnsupported(HashError):
+    """A hash was provided for a version-control-system-based requirement, but
+    we don't have a method for hashing those."""
+
+    order = 1
+    head = ("Can't verify hashes for these file:// requirements because they "
+            "point to directories:")
+
+
+class HashMissing(HashError):
+    """A hash was needed for a requirement but is absent."""
+
+    order = 2
+    head = ('Hashes are required in --require-hashes mode, but they are '
+            'missing from some requirements. Here is a list of those '
+            'requirements along with the hashes their downloaded archives '
+            'actually had. Add lines like these to your requirements files to '
+            'prevent tampering. (If you did not enable --require-hashes '
+            'manually, note that it turns on automatically when any package '
+            'has a hash.)')
+
+    def __init__(self, gotten_hash):
+        """
+        :param gotten_hash: The hash of the (possibly malicious) archive we
+            just downloaded
+        """
+        self.gotten_hash = gotten_hash
+
+    def body(self):
+        from pip.utils.hashes import FAVORITE_HASH  # Dodge circular import.
+
+        package = None
+        if self.req:
+            # In the case of URL-based requirements, display the original URL
+            # seen in the requirements file rather than the package name,
+            # so the output can be directly copied into the requirements file.
+            package = (self.req.original_link if self.req.original_link
+                       # In case someone feeds something downright stupid
+                       # to InstallRequirement's constructor.
+                       else getattr(self.req, 'req', None))
+        return '    %s --hash=%s:%s' % (package or 'unknown package',
+                                        FAVORITE_HASH,
+                                        self.gotten_hash)
+
+
+class HashUnpinned(HashError):
+    """A requirement had a hash specified but was not pinned to a specific
+    version."""
+
+    order = 3
+    head = ('In --require-hashes mode, all requirements must have their '
+            'versions pinned with ==. These do not:')
+
+
+class HashMismatch(HashError):
+    """
+    Distribution file hash values don't match.
+
+    :ivar package_name: The name of the package that triggered the hash
+        mismatch. Feel free to write to this after the exception is raise to
+        improve its error message.
+
+    """
+    order = 4
+    head = ('THESE PACKAGES DO NOT MATCH THE HASHES FROM THE REQUIREMENTS '
+            'FILE. If you have updated the package versions, please update '
+            'the hashes. Otherwise, examine the package contents carefully; '
+            'someone may have tampered with them.')
+
+    def __init__(self, allowed, gots):
+        """
+        :param allowed: A dict of algorithm names pointing to lists of allowed
+            hex digests
+        :param gots: A dict of algorithm names pointing to hashes we
+            actually got from the files under suspicion
+        """
+        self.allowed = allowed
+        self.gots = gots
+
+    def body(self):
+        return '    %s:\n%s' % (self._requirement_name(),
+                                self._hash_comparison())
+
+    def _hash_comparison(self):
+        """
+        Return a comparison of actual and expected hash values.
+
+        Example::
+
+               Expected sha256 abcdeabcdeabcdeabcdeabcdeabcdeabcdeabcdeabcde
+                            or 123451234512345123451234512345123451234512345
+                    Got        bcdefbcdefbcdefbcdefbcdefbcdefbcdefbcdefbcdef
+
+        """
+        def hash_then_or(hash_name):
+            # For now, all the decent hashes have 6-char names, so we can get
+            # away with hard-coding space literals.
+            return chain([hash_name], repeat('    or'))
+
+        lines = []
+        for hash_name, expecteds in iteritems(self.allowed):
+            prefix = hash_then_or(hash_name)
+            lines.extend(('        Expected %s %s' % (next(prefix), e))
+                         for e in expecteds)
+            lines.append('             Got        %s\n' %
+                         self.gots[hash_name].hexdigest())
+            prefix = '    or'
+        return '\n'.join(lines)
+
+
+class UnsupportedPythonVersion(InstallationError):
+    """Unsupported python version according to Requires-Python package
+    metadata."""
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/index.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/index.py
new file mode 100644
index 0000000000000000000000000000000000000000..acd90d6cc6d38a0a0461df83bfda4fdf3aa8cf68
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/index.py
@@ -0,0 +1,1102 @@
+"""Routines related to PyPI, indexes"""
+from __future__ import absolute_import
+
+import logging
+import cgi
+from collections import namedtuple
+import itertools
+import sys
+import os
+import re
+import mimetypes
+import posixpath
+import warnings
+
+from pip._vendor.six.moves.urllib import parse as urllib_parse
+from pip._vendor.six.moves.urllib import request as urllib_request
+
+from pip.compat import ipaddress
+from pip.utils import (
+    cached_property, splitext, normalize_path,
+    ARCHIVE_EXTENSIONS, SUPPORTED_EXTENSIONS,
+)
+from pip.utils.deprecation import RemovedInPip10Warning
+from pip.utils.logging import indent_log
+from pip.utils.packaging import check_requires_python
+from pip.exceptions import (
+    DistributionNotFound, BestVersionAlreadyInstalled, InvalidWheelFilename,
+    UnsupportedWheel,
+)
+from pip.download import HAS_TLS, is_url, path_to_url, url_to_path
+from pip.wheel import Wheel, wheel_ext
+from pip.pep425tags import get_supported
+from pip._vendor import html5lib, requests, six
+from pip._vendor.packaging.version import parse as parse_version
+from pip._vendor.packaging.utils import canonicalize_name
+from pip._vendor.packaging import specifiers
+from pip._vendor.requests.exceptions import HTTPError, SSLError
+from pip._vendor.distlib.compat import unescape
+
+
+__all__ = ['FormatControl', 'fmt_ctl_handle_mutual_exclude', 'PackageFinder']
+
+
+SECURE_ORIGINS = [
+    # protocol, hostname, port
+    # Taken from Chrome's list of secure origins (See: http://bit.ly/1qrySKC)
+    ("https", "*", "*"),
+    ("*", "localhost", "*"),
+    ("*", "127.0.0.0/8", "*"),
+    ("*", "::1/128", "*"),
+    ("file", "*", None),
+    # ssh is always secure.
+    ("ssh", "*", "*"),
+]
+
+
+logger = logging.getLogger(__name__)
+
+
+class InstallationCandidate(object):
+
+    def __init__(self, project, version, location):
+        self.project = project
+        self.version = parse_version(version)
+        self.location = location
+        self._key = (self.project, self.version, self.location)
+
+    def __repr__(self):
+        return "<InstallationCandidate({0!r}, {1!r}, {2!r})>".format(
+            self.project, self.version, self.location,
+        )
+
+    def __hash__(self):
+        return hash(self._key)
+
+    def __lt__(self, other):
+        return self._compare(other, lambda s, o: s < o)
+
+    def __le__(self, other):
+        return self._compare(other, lambda s, o: s <= o)
+
+    def __eq__(self, other):
+        return self._compare(other, lambda s, o: s == o)
+
+    def __ge__(self, other):
+        return self._compare(other, lambda s, o: s >= o)
+
+    def __gt__(self, other):
+        return self._compare(other, lambda s, o: s > o)
+
+    def __ne__(self, other):
+        return self._compare(other, lambda s, o: s != o)
+
+    def _compare(self, other, method):
+        if not isinstance(other, InstallationCandidate):
+            return NotImplemented
+
+        return method(self._key, other._key)
+
+
+class PackageFinder(object):
+    """This finds packages.
+
+    This is meant to match easy_install's technique for looking for
+    packages, by reading pages and looking for appropriate links.
+    """
+
+    def __init__(self, find_links, index_urls, allow_all_prereleases=False,
+                 trusted_hosts=None, process_dependency_links=False,
+                 session=None, format_control=None, platform=None,
+                 versions=None, abi=None, implementation=None):
+        """Create a PackageFinder.
+
+        :param format_control: A FormatControl object or None. Used to control
+            the selection of source packages / binary packages when consulting
+            the index and links.
+        :param platform: A string or None. If None, searches for packages
+            that are supported by the current system. Otherwise, will find
+            packages that can be built on the platform passed in. These
+            packages will only be downloaded for distribution: they will
+            not be built locally.
+        :param versions: A list of strings or None. This is passed directly
+            to pep425tags.py in the get_supported() method.
+        :param abi: A string or None. This is passed directly
+            to pep425tags.py in the get_supported() method.
+        :param implementation: A string or None. This is passed directly
+            to pep425tags.py in the get_supported() method.
+        """
+        if session is None:
+            raise TypeError(
+                "PackageFinder() missing 1 required keyword argument: "
+                "'session'"
+            )
+
+        # Build find_links. If an argument starts with ~, it may be
+        # a local file relative to a home directory. So try normalizing
+        # it and if it exists, use the normalized version.
+        # This is deliberately conservative - it might be fine just to
+        # blindly normalize anything starting with a ~...
+        self.find_links = []
+        for link in find_links:
+            if link.startswith('~'):
+                new_link = normalize_path(link)
+                if os.path.exists(new_link):
+                    link = new_link
+            self.find_links.append(link)
+
+        self.index_urls = index_urls
+        self.dependency_links = []
+
+        # These are boring links that have already been logged somehow:
+        self.logged_links = set()
+
+        self.format_control = format_control or FormatControl(set(), set())
+
+        # Domains that we won't emit warnings for when not using HTTPS
+        self.secure_origins = [
+            ("*", host, "*")
+            for host in (trusted_hosts if trusted_hosts else [])
+        ]
+
+        # Do we want to allow _all_ pre-releases?
+        self.allow_all_prereleases = allow_all_prereleases
+
+        # Do we process dependency links?
+        self.process_dependency_links = process_dependency_links
+
+        # The Session we'll use to make requests
+        self.session = session
+
+        # The valid tags to check potential found wheel candidates against
+        self.valid_tags = get_supported(
+            versions=versions,
+            platform=platform,
+            abi=abi,
+            impl=implementation,
+        )
+
+        # If we don't have TLS enabled, then WARN if anyplace we're looking
+        # relies on TLS.
+        if not HAS_TLS:
+            for link in itertools.chain(self.index_urls, self.find_links):
+                parsed = urllib_parse.urlparse(link)
+                if parsed.scheme == "https":
+                    logger.warning(
+                        "pip is configured with locations that require "
+                        "TLS/SSL, however the ssl module in Python is not "
+                        "available."
+                    )
+                    break
+
+    def add_dependency_links(self, links):
+        # # FIXME: this shouldn't be global list this, it should only
+        # # apply to requirements of the package that specifies the
+        # # dependency_links value
+        # # FIXME: also, we should track comes_from (i.e., use Link)
+        if self.process_dependency_links:
+            warnings.warn(
+                "Dependency Links processing has been deprecated and will be "
+                "removed in a future release.",
+                RemovedInPip10Warning,
+            )
+            self.dependency_links.extend(links)
+
+    @staticmethod
+    def _sort_locations(locations, expand_dir=False):
+        """
+        Sort locations into "files" (archives) and "urls", and return
+        a pair of lists (files,urls)
+        """
+        files = []
+        urls = []
+
+        # puts the url for the given file path into the appropriate list
+        def sort_path(path):
+            url = path_to_url(path)
+            if mimetypes.guess_type(url, strict=False)[0] == 'text/html':
+                urls.append(url)
+            else:
+                files.append(url)
+
+        for url in locations:
+
+            is_local_path = os.path.exists(url)
+            is_file_url = url.startswith('file:')
+
+            if is_local_path or is_file_url:
+                if is_local_path:
+                    path = url
+                else:
+                    path = url_to_path(url)
+                if os.path.isdir(path):
+                    if expand_dir:
+                        path = os.path.realpath(path)
+                        for item in os.listdir(path):
+                            sort_path(os.path.join(path, item))
+                    elif is_file_url:
+                        urls.append(url)
+                elif os.path.isfile(path):
+                    sort_path(path)
+                else:
+                    logger.warning(
+                        "Url '%s' is ignored: it is neither a file "
+                        "nor a directory.", url)
+            elif is_url(url):
+                # Only add url with clear scheme
+                urls.append(url)
+            else:
+                logger.warning(
+                    "Url '%s' is ignored. It is either a non-existing "
+                    "path or lacks a specific scheme.", url)
+
+        return files, urls
+
+    def _candidate_sort_key(self, candidate):
+        """
+        Function used to generate link sort key for link tuples.
+        The greater the return value, the more preferred it is.
+        If not finding wheels, then sorted by version only.
+        If finding wheels, then the sort order is by version, then:
+          1. existing installs
+          2. wheels ordered via Wheel.support_index_min(self.valid_tags)
+          3. source archives
+        Note: it was considered to embed this logic into the Link
+              comparison operators, but then different sdist links
+              with the same version, would have to be considered equal
+        """
+        support_num = len(self.valid_tags)
+        if candidate.location.is_wheel:
+            # can raise InvalidWheelFilename
+            wheel = Wheel(candidate.location.filename)
+            if not wheel.supported(self.valid_tags):
+                raise UnsupportedWheel(
+                    "%s is not a supported wheel for this platform. It "
+                    "can't be sorted." % wheel.filename
+                )
+            pri = -(wheel.support_index_min(self.valid_tags))
+        else:  # sdist
+            pri = -(support_num)
+        return (candidate.version, pri)
+
+    def _validate_secure_origin(self, logger, location):
+        # Determine if this url used a secure transport mechanism
+        parsed = urllib_parse.urlparse(str(location))
+        origin = (parsed.scheme, parsed.hostname, parsed.port)
+
+        # The protocol to use to see if the protocol matches.
+        # Don't count the repository type as part of the protocol: in
+        # cases such as "git+ssh", only use "ssh". (I.e., Only verify against
+        # the last scheme.)
+        protocol = origin[0].rsplit('+', 1)[-1]
+
+        # Determine if our origin is a secure origin by looking through our
+        # hardcoded list of secure origins, as well as any additional ones
+        # configured on this PackageFinder instance.
+        for secure_origin in (SECURE_ORIGINS + self.secure_origins):
+            if protocol != secure_origin[0] and secure_origin[0] != "*":
+                continue
+
+            try:
+                # We need to do this decode dance to ensure that we have a
+                # unicode object, even on Python 2.x.
+                addr = ipaddress.ip_address(
+                    origin[1]
+                    if (
+                        isinstance(origin[1], six.text_type) or
+                        origin[1] is None
+                    )
+                    else origin[1].decode("utf8")
+                )
+                network = ipaddress.ip_network(
+                    secure_origin[1]
+                    if isinstance(secure_origin[1], six.text_type)
+                    else secure_origin[1].decode("utf8")
+                )
+            except ValueError:
+                # We don't have both a valid address or a valid network, so
+                # we'll check this origin against hostnames.
+                if (origin[1] and
+                        origin[1].lower() != secure_origin[1].lower() and
+                        secure_origin[1] != "*"):
+                    continue
+            else:
+                # We have a valid address and network, so see if the address
+                # is contained within the network.
+                if addr not in network:
+                    continue
+
+            # Check to see if the port patches
+            if (origin[2] != secure_origin[2] and
+                    secure_origin[2] != "*" and
+                    secure_origin[2] is not None):
+                continue
+
+            # If we've gotten here, then this origin matches the current
+            # secure origin and we should return True
+            return True
+
+        # If we've gotten to this point, then the origin isn't secure and we
+        # will not accept it as a valid location to search. We will however
+        # log a warning that we are ignoring it.
+        logger.warning(
+            "The repository located at %s is not a trusted or secure host and "
+            "is being ignored. If this repository is available via HTTPS it "
+            "is recommended to use HTTPS instead, otherwise you may silence "
+            "this warning and allow it anyways with '--trusted-host %s'.",
+            parsed.hostname,
+            parsed.hostname,
+        )
+
+        return False
+
+    def _get_index_urls_locations(self, project_name):
+        """Returns the locations found via self.index_urls
+
+        Checks the url_name on the main (first in the list) index and
+        use this url_name to produce all locations
+        """
+
+        def mkurl_pypi_url(url):
+            loc = posixpath.join(
+                url,
+                urllib_parse.quote(canonicalize_name(project_name)))
+            # For maximum compatibility with easy_install, ensure the path
+            # ends in a trailing slash.  Although this isn't in the spec
+            # (and PyPI can handle it without the slash) some other index
+            # implementations might break if they relied on easy_install's
+            # behavior.
+            if not loc.endswith('/'):
+                loc = loc + '/'
+            return loc
+
+        return [mkurl_pypi_url(url) for url in self.index_urls]
+
+    def find_all_candidates(self, project_name):
+        """Find all available InstallationCandidate for project_name
+
+        This checks index_urls, find_links and dependency_links.
+        All versions found are returned as an InstallationCandidate list.
+
+        See _link_package_versions for details on which files are accepted
+        """
+        index_locations = self._get_index_urls_locations(project_name)
+        index_file_loc, index_url_loc = self._sort_locations(index_locations)
+        fl_file_loc, fl_url_loc = self._sort_locations(
+            self.find_links, expand_dir=True)
+        dep_file_loc, dep_url_loc = self._sort_locations(self.dependency_links)
+
+        file_locations = (
+            Link(url) for url in itertools.chain(
+                index_file_loc, fl_file_loc, dep_file_loc)
+        )
+
+        # We trust every url that the user has given us whether it was given
+        #   via --index-url or --find-links
+        # We explicitly do not trust links that came from dependency_links
+        # We want to filter out any thing which does not have a secure origin.
+        url_locations = [
+            link for link in itertools.chain(
+                (Link(url) for url in index_url_loc),
+                (Link(url) for url in fl_url_loc),
+                (Link(url) for url in dep_url_loc),
+            )
+            if self._validate_secure_origin(logger, link)
+        ]
+
+        logger.debug('%d location(s) to search for versions of %s:',
+                     len(url_locations), project_name)
+
+        for location in url_locations:
+            logger.debug('* %s', location)
+
+        canonical_name = canonicalize_name(project_name)
+        formats = fmt_ctl_formats(self.format_control, canonical_name)
+        search = Search(project_name, canonical_name, formats)
+        find_links_versions = self._package_versions(
+            # We trust every directly linked archive in find_links
+            (Link(url, '-f') for url in self.find_links),
+            search
+        )
+
+        page_versions = []
+        for page in self._get_pages(url_locations, project_name):
+            logger.debug('Analyzing links from page %s', page.url)
+            with indent_log():
+                page_versions.extend(
+                    self._package_versions(page.links, search)
+                )
+
+        dependency_versions = self._package_versions(
+            (Link(url) for url in self.dependency_links), search
+        )
+        if dependency_versions:
+            logger.debug(
+                'dependency_links found: %s',
+                ', '.join([
+                    version.location.url for version in dependency_versions
+                ])
+            )
+
+        file_versions = self._package_versions(file_locations, search)
+        if file_versions:
+            file_versions.sort(reverse=True)
+            logger.debug(
+                'Local files found: %s',
+                ', '.join([
+                    url_to_path(candidate.location.url)
+                    for candidate in file_versions
+                ])
+            )
+
+        # This is an intentional priority ordering
+        return (
+            file_versions + find_links_versions + page_versions +
+            dependency_versions
+        )
+
+    def find_requirement(self, req, upgrade):
+        """Try to find a Link matching req
+
+        Expects req, an InstallRequirement and upgrade, a boolean
+        Returns a Link if found,
+        Raises DistributionNotFound or BestVersionAlreadyInstalled otherwise
+        """
+        all_candidates = self.find_all_candidates(req.name)
+
+        # Filter out anything which doesn't match our specifier
+        compatible_versions = set(
+            req.specifier.filter(
+                # We turn the version object into a str here because otherwise
+                # when we're debundled but setuptools isn't, Python will see
+                # packaging.version.Version and
+                # pkg_resources._vendor.packaging.version.Version as different
+                # types. This way we'll use a str as a common data interchange
+                # format. If we stop using the pkg_resources provided specifier
+                # and start using our own, we can drop the cast to str().
+                [str(c.version) for c in all_candidates],
+                prereleases=(
+                    self.allow_all_prereleases
+                    if self.allow_all_prereleases else None
+                ),
+            )
+        )
+        applicable_candidates = [
+            # Again, converting to str to deal with debundling.
+            c for c in all_candidates if str(c.version) in compatible_versions
+        ]
+
+        if applicable_candidates:
+            best_candidate = max(applicable_candidates,
+                                 key=self._candidate_sort_key)
+        else:
+            best_candidate = None
+
+        if req.satisfied_by is not None:
+            installed_version = parse_version(req.satisfied_by.version)
+        else:
+            installed_version = None
+
+        if installed_version is None and best_candidate is None:
+            logger.critical(
+                'Could not find a version that satisfies the requirement %s '
+                '(from versions: %s)',
+                req,
+                ', '.join(
+                    sorted(
+                        set(str(c.version) for c in all_candidates),
+                        key=parse_version,
+                    )
+                )
+            )
+
+            raise DistributionNotFound(
+                'No matching distribution found for %s' % req
+            )
+
+        best_installed = False
+        if installed_version and (
+                best_candidate is None or
+                best_candidate.version <= installed_version):
+            best_installed = True
+
+        if not upgrade and installed_version is not None:
+            if best_installed:
+                logger.debug(
+                    'Existing installed version (%s) is most up-to-date and '
+                    'satisfies requirement',
+                    installed_version,
+                )
+            else:
+                logger.debug(
+                    'Existing installed version (%s) satisfies requirement '
+                    '(most up-to-date version is %s)',
+                    installed_version,
+                    best_candidate.version,
+                )
+            return None
+
+        if best_installed:
+            # We have an existing version, and its the best version
+            logger.debug(
+                'Installed version (%s) is most up-to-date (past versions: '
+                '%s)',
+                installed_version,
+                ', '.join(sorted(compatible_versions, key=parse_version)) or
+                "none",
+            )
+            raise BestVersionAlreadyInstalled
+
+        logger.debug(
+            'Using version %s (newest of versions: %s)',
+            best_candidate.version,
+            ', '.join(sorted(compatible_versions, key=parse_version))
+        )
+        return best_candidate.location
+
+    def _get_pages(self, locations, project_name):
+        """
+        Yields (page, page_url) from the given locations, skipping
+        locations that have errors.
+        """
+        seen = set()
+        for location in locations:
+            if location in seen:
+                continue
+            seen.add(location)
+
+            page = self._get_page(location)
+            if page is None:
+                continue
+
+            yield page
+
+    _py_version_re = re.compile(r'-py([123]\.?[0-9]?)$')
+
+    def _sort_links(self, links):
+        """
+        Returns elements of links in order, non-egg links first, egg links
+        second, while eliminating duplicates
+        """
+        eggs, no_eggs = [], []
+        seen = set()
+        for link in links:
+            if link not in seen:
+                seen.add(link)
+                if link.egg_fragment:
+                    eggs.append(link)
+                else:
+                    no_eggs.append(link)
+        return no_eggs + eggs
+
+    def _package_versions(self, links, search):
+        result = []
+        for link in self._sort_links(links):
+            v = self._link_package_versions(link, search)
+            if v is not None:
+                result.append(v)
+        return result
+
+    def _log_skipped_link(self, link, reason):
+        if link not in self.logged_links:
+            logger.debug('Skipping link %s; %s', link, reason)
+            self.logged_links.add(link)
+
+    def _link_package_versions(self, link, search):
+        """Return an InstallationCandidate or None"""
+        version = None
+        if link.egg_fragment:
+            egg_info = link.egg_fragment
+            ext = link.ext
+        else:
+            egg_info, ext = link.splitext()
+            if not ext:
+                self._log_skipped_link(link, 'not a file')
+                return
+            if ext not in SUPPORTED_EXTENSIONS:
+                self._log_skipped_link(
+                    link, 'unsupported archive format: %s' % ext)
+                return
+            if "binary" not in search.formats and ext == wheel_ext:
+                self._log_skipped_link(
+                    link, 'No binaries permitted for %s' % search.supplied)
+                return
+            if "macosx10" in link.path and ext == '.zip':
+                self._log_skipped_link(link, 'macosx10 one')
+                return
+            if ext == wheel_ext:
+                try:
+                    wheel = Wheel(link.filename)
+                except InvalidWheelFilename:
+                    self._log_skipped_link(link, 'invalid wheel filename')
+                    return
+                if canonicalize_name(wheel.name) != search.canonical:
+                    self._log_skipped_link(
+                        link, 'wrong project name (not %s)' % search.supplied)
+                    return
+
+                if not wheel.supported(self.valid_tags):
+                    self._log_skipped_link(
+                        link, 'it is not compatible with this Python')
+                    return
+
+                version = wheel.version
+
+        # This should be up by the search.ok_binary check, but see issue 2700.
+        if "source" not in search.formats and ext != wheel_ext:
+            self._log_skipped_link(
+                link, 'No sources permitted for %s' % search.supplied)
+            return
+
+        if not version:
+            version = egg_info_matches(egg_info, search.supplied, link)
+        if version is None:
+            self._log_skipped_link(
+                link, 'wrong project name (not %s)' % search.supplied)
+            return
+
+        match = self._py_version_re.search(version)
+        if match:
+            version = version[:match.start()]
+            py_version = match.group(1)
+            if py_version != sys.version[:3]:
+                self._log_skipped_link(
+                    link, 'Python version is incorrect')
+                return
+        try:
+            support_this_python = check_requires_python(link.requires_python)
+        except specifiers.InvalidSpecifier:
+            logger.debug("Package %s has an invalid Requires-Python entry: %s",
+                         link.filename, link.requires_python)
+            support_this_python = True
+
+        if not support_this_python:
+            logger.debug("The package %s is incompatible with the python"
+                         "version in use. Acceptable python versions are:%s",
+                         link, link.requires_python)
+            return
+        logger.debug('Found link %s, version: %s', link, version)
+
+        return InstallationCandidate(search.supplied, version, link)
+
+    def _get_page(self, link):
+        return HTMLPage.get_page(link, session=self.session)
+
+
+def egg_info_matches(
+        egg_info, search_name, link,
+        _egg_info_re=re.compile(r'([a-z0-9_.]+)-([a-z0-9_.!+-]+)', re.I)):
+    """Pull the version part out of a string.
+
+    :param egg_info: The string to parse. E.g. foo-2.1
+    :param search_name: The name of the package this belongs to. None to
+        infer the name. Note that this cannot unambiguously parse strings
+        like foo-2-2 which might be foo, 2-2 or foo-2, 2.
+    :param link: The link the string came from, for logging on failure.
+    """
+    match = _egg_info_re.search(egg_info)
+    if not match:
+        logger.debug('Could not parse version from link: %s', link)
+        return None
+    if search_name is None:
+        full_match = match.group(0)
+        return full_match[full_match.index('-'):]
+    name = match.group(0).lower()
+    # To match the "safe" name that pkg_resources creates:
+    name = name.replace('_', '-')
+    # project name and version must be separated by a dash
+    look_for = search_name.lower() + "-"
+    if name.startswith(look_for):
+        return match.group(0)[len(look_for):]
+    else:
+        return None
+
+
+class HTMLPage(object):
+    """Represents one page, along with its URL"""
+
+    def __init__(self, content, url, headers=None):
+        # Determine if we have any encoding information in our headers
+        encoding = None
+        if headers and "Content-Type" in headers:
+            content_type, params = cgi.parse_header(headers["Content-Type"])
+
+            if "charset" in params:
+                encoding = params['charset']
+
+        self.content = content
+        self.parsed = html5lib.parse(
+            self.content,
+            transport_encoding=encoding,
+            namespaceHTMLElements=False,
+        )
+        self.url = url
+        self.headers = headers
+
+    def __str__(self):
+        return self.url
+
+    @classmethod
+    def get_page(cls, link, skip_archives=True, session=None):
+        if session is None:
+            raise TypeError(
+                "get_page() missing 1 required keyword argument: 'session'"
+            )
+
+        url = link.url
+        url = url.split('#', 1)[0]
+
+        # Check for VCS schemes that do not support lookup as web pages.
+        from pip.vcs import VcsSupport
+        for scheme in VcsSupport.schemes:
+            if url.lower().startswith(scheme) and url[len(scheme)] in '+:':
+                logger.debug('Cannot look at %s URL %s', scheme, link)
+                return None
+
+        try:
+            if skip_archives:
+                filename = link.filename
+                for bad_ext in ARCHIVE_EXTENSIONS:
+                    if filename.endswith(bad_ext):
+                        content_type = cls._get_content_type(
+                            url, session=session,
+                        )
+                        if content_type.lower().startswith('text/html'):
+                            break
+                        else:
+                            logger.debug(
+                                'Skipping page %s because of Content-Type: %s',
+                                link,
+                                content_type,
+                            )
+                            return
+
+            logger.debug('Getting page %s', url)
+
+            # Tack index.html onto file:// URLs that point to directories
+            (scheme, netloc, path, params, query, fragment) = \
+                urllib_parse.urlparse(url)
+            if (scheme == 'file' and
+                    os.path.isdir(urllib_request.url2pathname(path))):
+                # add trailing slash if not present so urljoin doesn't trim
+                # final segment
+                if not url.endswith('/'):
+                    url += '/'
+                url = urllib_parse.urljoin(url, 'index.html')
+                logger.debug(' file: URL is directory, getting %s', url)
+
+            resp = session.get(
+                url,
+                headers={
+                    "Accept": "text/html",
+                    "Cache-Control": "max-age=600",
+                },
+            )
+            resp.raise_for_status()
+
+            # The check for archives above only works if the url ends with
+            # something that looks like an archive. However that is not a
+            # requirement of an url. Unless we issue a HEAD request on every
+            # url we cannot know ahead of time for sure if something is HTML
+            # or not. However we can check after we've downloaded it.
+            content_type = resp.headers.get('Content-Type', 'unknown')
+            if not content_type.lower().startswith("text/html"):
+                logger.debug(
+                    'Skipping page %s because of Content-Type: %s',
+                    link,
+                    content_type,
+                )
+                return
+
+            inst = cls(resp.content, resp.url, resp.headers)
+        except HTTPError as exc:
+            cls._handle_fail(link, exc, url)
+        except SSLError as exc:
+            reason = ("There was a problem confirming the ssl certificate: "
+                      "%s" % exc)
+            cls._handle_fail(link, reason, url, meth=logger.info)
+        except requests.ConnectionError as exc:
+            cls._handle_fail(link, "connection error: %s" % exc, url)
+        except requests.Timeout:
+            cls._handle_fail(link, "timed out", url)
+        else:
+            return inst
+
+    @staticmethod
+    def _handle_fail(link, reason, url, meth=None):
+        if meth is None:
+            meth = logger.debug
+
+        meth("Could not fetch URL %s: %s - skipping", link, reason)
+
+    @staticmethod
+    def _get_content_type(url, session):
+        """Get the Content-Type of the given url, using a HEAD request"""
+        scheme, netloc, path, query, fragment = urllib_parse.urlsplit(url)
+        if scheme not in ('http', 'https'):
+            # FIXME: some warning or something?
+            # assertion error?
+            return ''
+
+        resp = session.head(url, allow_redirects=True)
+        resp.raise_for_status()
+
+        return resp.headers.get("Content-Type", "")
+
+    @cached_property
+    def base_url(self):
+        bases = [
+            x for x in self.parsed.findall(".//base")
+            if x.get("href") is not None
+        ]
+        if bases and bases[0].get("href"):
+            return bases[0].get("href")
+        else:
+            return self.url
+
+    @property
+    def links(self):
+        """Yields all links in the page"""
+        for anchor in self.parsed.findall(".//a"):
+            if anchor.get("href"):
+                href = anchor.get("href")
+                url = self.clean_link(
+                    urllib_parse.urljoin(self.base_url, href)
+                )
+                pyrequire = anchor.get('data-requires-python')
+                pyrequire = unescape(pyrequire) if pyrequire else None
+                yield Link(url, self, requires_python=pyrequire)
+
+    _clean_re = re.compile(r'[^a-z0-9$&+,/:;=?@.#%_\\|-]', re.I)
+
+    def clean_link(self, url):
+        """Makes sure a link is fully encoded.  That is, if a ' ' shows up in
+        the link, it will be rewritten to %20 (while not over-quoting
+        % or other characters)."""
+        return self._clean_re.sub(
+            lambda match: '%%%2x' % ord(match.group(0)), url)
+
+
+class Link(object):
+
+    def __init__(self, url, comes_from=None, requires_python=None):
+        """
+        Object representing a parsed link from https://pypi.python.org/simple/*
+
+        url:
+            url of the resource pointed to (href of the link)
+        comes_from:
+            instance of HTMLPage where the link was found, or string.
+        requires_python:
+            String containing the `Requires-Python` metadata field, specified
+            in PEP 345. This may be specified by a data-requires-python
+            attribute in the HTML link tag, as described in PEP 503.
+        """
+
+        # url can be a UNC windows share
+        if url.startswith('\\\\'):
+            url = path_to_url(url)
+
+        self.url = url
+        self.comes_from = comes_from
+        self.requires_python = requires_python if requires_python else None
+
+    def __str__(self):
+        if self.requires_python:
+            rp = ' (requires-python:%s)' % self.requires_python
+        else:
+            rp = ''
+        if self.comes_from:
+            return '%s (from %s)%s' % (self.url, self.comes_from, rp)
+        else:
+            return str(self.url)
+
+    def __repr__(self):
+        return '<Link %s>' % self
+
+    def __eq__(self, other):
+        if not isinstance(other, Link):
+            return NotImplemented
+        return self.url == other.url
+
+    def __ne__(self, other):
+        if not isinstance(other, Link):
+            return NotImplemented
+        return self.url != other.url
+
+    def __lt__(self, other):
+        if not isinstance(other, Link):
+            return NotImplemented
+        return self.url < other.url
+
+    def __le__(self, other):
+        if not isinstance(other, Link):
+            return NotImplemented
+        return self.url <= other.url
+
+    def __gt__(self, other):
+        if not isinstance(other, Link):
+            return NotImplemented
+        return self.url > other.url
+
+    def __ge__(self, other):
+        if not isinstance(other, Link):
+            return NotImplemented
+        return self.url >= other.url
+
+    def __hash__(self):
+        return hash(self.url)
+
+    @property
+    def filename(self):
+        _, netloc, path, _, _ = urllib_parse.urlsplit(self.url)
+        name = posixpath.basename(path.rstrip('/')) or netloc
+        name = urllib_parse.unquote(name)
+        assert name, ('URL %r produced no filename' % self.url)
+        return name
+
+    @property
+    def scheme(self):
+        return urllib_parse.urlsplit(self.url)[0]
+
+    @property
+    def netloc(self):
+        return urllib_parse.urlsplit(self.url)[1]
+
+    @property
+    def path(self):
+        return urllib_parse.unquote(urllib_parse.urlsplit(self.url)[2])
+
+    def splitext(self):
+        return splitext(posixpath.basename(self.path.rstrip('/')))
+
+    @property
+    def ext(self):
+        return self.splitext()[1]
+
+    @property
+    def url_without_fragment(self):
+        scheme, netloc, path, query, fragment = urllib_parse.urlsplit(self.url)
+        return urllib_parse.urlunsplit((scheme, netloc, path, query, None))
+
+    _egg_fragment_re = re.compile(r'[#&]egg=([^&]*)')
+
+    @property
+    def egg_fragment(self):
+        match = self._egg_fragment_re.search(self.url)
+        if not match:
+            return None
+        return match.group(1)
+
+    _subdirectory_fragment_re = re.compile(r'[#&]subdirectory=([^&]*)')
+
+    @property
+    def subdirectory_fragment(self):
+        match = self._subdirectory_fragment_re.search(self.url)
+        if not match:
+            return None
+        return match.group(1)
+
+    _hash_re = re.compile(
+        r'(sha1|sha224|sha384|sha256|sha512|md5)=([a-f0-9]+)'
+    )
+
+    @property
+    def hash(self):
+        match = self._hash_re.search(self.url)
+        if match:
+            return match.group(2)
+        return None
+
+    @property
+    def hash_name(self):
+        match = self._hash_re.search(self.url)
+        if match:
+            return match.group(1)
+        return None
+
+    @property
+    def show_url(self):
+        return posixpath.basename(self.url.split('#', 1)[0].split('?', 1)[0])
+
+    @property
+    def is_wheel(self):
+        return self.ext == wheel_ext
+
+    @property
+    def is_artifact(self):
+        """
+        Determines if this points to an actual artifact (e.g. a tarball) or if
+        it points to an "abstract" thing like a path or a VCS location.
+        """
+        from pip.vcs import vcs
+
+        if self.scheme in vcs.all_schemes:
+            return False
+
+        return True
+
+
+FormatControl = namedtuple('FormatControl', 'no_binary only_binary')
+"""This object has two fields, no_binary and only_binary.
+
+If a field is falsy, it isn't set. If it is {':all:'}, it should match all
+packages except those listed in the other field. Only one field can be set
+to {':all:'} at a time. The rest of the time exact package name matches
+are listed, with any given package only showing up in one field at a time.
+"""
+
+
+def fmt_ctl_handle_mutual_exclude(value, target, other):
+    new = value.split(',')
+    while ':all:' in new:
+        other.clear()
+        target.clear()
+        target.add(':all:')
+        del new[:new.index(':all:') + 1]
+        if ':none:' not in new:
+            # Without a none, we want to discard everything as :all: covers it
+            return
+    for name in new:
+        if name == ':none:':
+            target.clear()
+            continue
+        name = canonicalize_name(name)
+        other.discard(name)
+        target.add(name)
+
+
+def fmt_ctl_formats(fmt_ctl, canonical_name):
+    result = set(["binary", "source"])
+    if canonical_name in fmt_ctl.only_binary:
+        result.discard('source')
+    elif canonical_name in fmt_ctl.no_binary:
+        result.discard('binary')
+    elif ':all:' in fmt_ctl.only_binary:
+        result.discard('source')
+    elif ':all:' in fmt_ctl.no_binary:
+        result.discard('binary')
+    return frozenset(result)
+
+
+def fmt_ctl_no_binary(fmt_ctl):
+    fmt_ctl_handle_mutual_exclude(
+        ':all:', fmt_ctl.no_binary, fmt_ctl.only_binary)
+
+
+def fmt_ctl_no_use_wheel(fmt_ctl):
+    fmt_ctl_no_binary(fmt_ctl)
+    warnings.warn(
+        '--no-use-wheel is deprecated and will be removed in the future. '
+        ' Please use --no-binary :all: instead.', RemovedInPip10Warning,
+        stacklevel=2)
+
+
+Search = namedtuple('Search', 'supplied canonical formats')
+"""Capture key aspects of a search.
+
+:attribute supplied: The user supplied package.
+:attribute canonical: The canonical package name.
+:attribute formats: The formats allowed for this package. Should be a set
+    with 'binary' or 'source' or both in it.
+"""
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/locations.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/locations.py
new file mode 100644
index 0000000000000000000000000000000000000000..e598ef105a440e371a9127da646ed1dcf877844f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/locations.py
@@ -0,0 +1,182 @@
+"""Locations where we look for configs, install stuff, etc"""
+from __future__ import absolute_import
+
+import os
+import os.path
+import site
+import sys
+
+from distutils import sysconfig
+from distutils.command.install import install, SCHEME_KEYS  # noqa
+
+from pip.compat import WINDOWS, expanduser
+from pip.utils import appdirs
+
+
+# Application Directories
+USER_CACHE_DIR = appdirs.user_cache_dir("pip")
+
+
+DELETE_MARKER_MESSAGE = '''\
+This file is placed here by pip to indicate the source was put
+here by pip.
+
+Once this package is successfully installed this source code will be
+deleted (unless you remove this file).
+'''
+PIP_DELETE_MARKER_FILENAME = 'pip-delete-this-directory.txt'
+
+
+def write_delete_marker_file(directory):
+    """
+    Write the pip delete marker file into this directory.
+    """
+    filepath = os.path.join(directory, PIP_DELETE_MARKER_FILENAME)
+    with open(filepath, 'w') as marker_fp:
+        marker_fp.write(DELETE_MARKER_MESSAGE)
+
+
+def running_under_virtualenv():
+    """
+    Return True if we're running inside a virtualenv, False otherwise.
+
+    """
+    if hasattr(sys, 'real_prefix'):
+        return True
+    elif sys.prefix != getattr(sys, "base_prefix", sys.prefix):
+        return True
+
+    return False
+
+
+def virtualenv_no_global():
+    """
+    Return True if in a venv and no system site packages.
+    """
+    # this mirrors the logic in virtualenv.py for locating the
+    # no-global-site-packages.txt file
+    site_mod_dir = os.path.dirname(os.path.abspath(site.__file__))
+    no_global_file = os.path.join(site_mod_dir, 'no-global-site-packages.txt')
+    if running_under_virtualenv() and os.path.isfile(no_global_file):
+        return True
+
+
+if running_under_virtualenv():
+    src_prefix = os.path.join(sys.prefix, 'src')
+else:
+    # FIXME: keep src in cwd for now (it is not a temporary folder)
+    try:
+        src_prefix = os.path.join(os.getcwd(), 'src')
+    except OSError:
+        # In case the current working directory has been renamed or deleted
+        sys.exit(
+            "The folder you are executing pip from can no longer be found."
+        )
+
+# under macOS + virtualenv sys.prefix is not properly resolved
+# it is something like /path/to/python/bin/..
+# Note: using realpath due to tmp dirs on OSX being symlinks
+src_prefix = os.path.abspath(src_prefix)
+
+# FIXME doesn't account for venv linked to global site-packages
+
+site_packages = sysconfig.get_python_lib()
+user_site = site.USER_SITE
+user_dir = expanduser('~')
+if WINDOWS:
+    bin_py = os.path.join(sys.prefix, 'Scripts')
+    bin_user = os.path.join(user_site, 'Scripts')
+    # buildout uses 'bin' on Windows too?
+    if not os.path.exists(bin_py):
+        bin_py = os.path.join(sys.prefix, 'bin')
+        bin_user = os.path.join(user_site, 'bin')
+
+    config_basename = 'pip.ini'
+
+    legacy_storage_dir = os.path.join(user_dir, 'pip')
+    legacy_config_file = os.path.join(
+        legacy_storage_dir,
+        config_basename,
+    )
+else:
+    bin_py = os.path.join(sys.prefix, 'bin')
+    bin_user = os.path.join(user_site, 'bin')
+
+    config_basename = 'pip.conf'
+
+    legacy_storage_dir = os.path.join(user_dir, '.pip')
+    legacy_config_file = os.path.join(
+        legacy_storage_dir,
+        config_basename,
+    )
+
+    # Forcing to use /usr/local/bin for standard macOS framework installs
+    # Also log to ~/Library/Logs/ for use with the Console.app log viewer
+    if sys.platform[:6] == 'darwin' and sys.prefix[:16] == '/System/Library/':
+        bin_py = '/usr/local/bin'
+
+site_config_files = [
+    os.path.join(path, config_basename)
+    for path in appdirs.site_config_dirs('pip')
+]
+
+
+def distutils_scheme(dist_name, user=False, home=None, root=None,
+                     isolated=False, prefix=None):
+    """
+    Return a distutils install scheme
+    """
+    from distutils.dist import Distribution
+
+    scheme = {}
+
+    if isolated:
+        extra_dist_args = {"script_args": ["--no-user-cfg"]}
+    else:
+        extra_dist_args = {}
+    dist_args = {'name': dist_name}
+    dist_args.update(extra_dist_args)
+
+    d = Distribution(dist_args)
+    d.parse_config_files()
+    i = d.get_command_obj('install', create=True)
+    # NOTE: setting user or home has the side-effect of creating the home dir
+    # or user base for installations during finalize_options()
+    # ideally, we'd prefer a scheme class that has no side-effects.
+    assert not (user and prefix), "user={0} prefix={1}".format(user, prefix)
+    i.user = user or i.user
+    if user:
+        i.prefix = ""
+    i.prefix = prefix or i.prefix
+    i.home = home or i.home
+    i.root = root or i.root
+    i.finalize_options()
+    for key in SCHEME_KEYS:
+        scheme[key] = getattr(i, 'install_' + key)
+
+    # install_lib specified in setup.cfg should install *everything*
+    # into there (i.e. it takes precedence over both purelib and
+    # platlib).  Note, i.install_lib is *always* set after
+    # finalize_options(); we only want to override here if the user
+    # has explicitly requested it hence going back to the config
+    if 'install_lib' in d.get_option_dict('install'):
+        scheme.update(dict(purelib=i.install_lib, platlib=i.install_lib))
+
+    if running_under_virtualenv():
+        scheme['headers'] = os.path.join(
+            sys.prefix,
+            'include',
+            'site',
+            'python' + sys.version[:3],
+            dist_name,
+        )
+
+        if root is not None:
+            path_no_drive = os.path.splitdrive(
+                os.path.abspath(scheme["headers"]))[1]
+            scheme["headers"] = os.path.join(
+                root,
+                path_no_drive[1:],
+            )
+
+    return scheme
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/models/__init__.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d727d7eea7ba617ebb5921be0c2f4d314cd06ed
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/models/__init__.py
@@ -0,0 +1,4 @@
+from pip.models.index import Index, PyPI
+
+
+__all__ = ["Index", "PyPI"]
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/models/index.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/models/index.py
new file mode 100644
index 0000000000000000000000000000000000000000..be9911988cf7d7e4231ca7256f3e7311d8e8a5f5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/models/index.py
@@ -0,0 +1,16 @@
+from pip._vendor.six.moves.urllib import parse as urllib_parse
+
+
+class Index(object):
+    def __init__(self, url):
+        self.url = url
+        self.netloc = urllib_parse.urlsplit(url).netloc
+        self.simple_url = self.url_to_path('simple')
+        self.pypi_url = self.url_to_path('pypi')
+        self.pip_json_url = self.url_to_path('pypi/pip/json')
+
+    def url_to_path(self, path):
+        return urllib_parse.urljoin(self.url, path)
+
+
+PyPI = Index('https://pypi.python.org/')
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/operations/__init__.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/operations/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/operations/check.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/operations/check.py
new file mode 100644
index 0000000000000000000000000000000000000000..2cf67aaeaf4023887e383482ef6086826caeb225
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/operations/check.py
@@ -0,0 +1,49 @@
+
+
+def check_requirements(installed_dists):
+    missing_reqs_dict = {}
+    incompatible_reqs_dict = {}
+
+    for dist in installed_dists:
+        key = '%s==%s' % (dist.project_name, dist.version)
+
+        missing_reqs = list(get_missing_reqs(dist, installed_dists))
+        if missing_reqs:
+            missing_reqs_dict[key] = missing_reqs
+
+        incompatible_reqs = list(get_incompatible_reqs(
+            dist, installed_dists))
+        if incompatible_reqs:
+            incompatible_reqs_dict[key] = incompatible_reqs
+
+    return (missing_reqs_dict, incompatible_reqs_dict)
+
+
+def get_missing_reqs(dist, installed_dists):
+    """Return all of the requirements of `dist` that aren't present in
+    `installed_dists`.
+
+    """
+    installed_names = set(d.project_name.lower() for d in installed_dists)
+    missing_requirements = set()
+
+    for requirement in dist.requires():
+        if requirement.project_name.lower() not in installed_names:
+            missing_requirements.add(requirement)
+            yield requirement
+
+
+def get_incompatible_reqs(dist, installed_dists):
+    """Return all of the requirements of `dist` that are present in
+    `installed_dists`, but have incompatible versions.
+
+    """
+    installed_dists_by_name = {}
+    for installed_dist in installed_dists:
+        installed_dists_by_name[installed_dist.project_name] = installed_dist
+
+    for requirement in dist.requires():
+        present_dist = installed_dists_by_name.get(requirement.project_name)
+
+        if present_dist and present_dist not in requirement:
+            yield (requirement, present_dist)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/operations/freeze.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/operations/freeze.py
new file mode 100644
index 0000000000000000000000000000000000000000..920c2c12848c31853af6cefc35efddc8e08d3b9e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/operations/freeze.py
@@ -0,0 +1,132 @@
+from __future__ import absolute_import
+
+import logging
+import re
+
+import pip
+from pip.req import InstallRequirement
+from pip.req.req_file import COMMENT_RE
+from pip.utils import get_installed_distributions
+from pip._vendor import pkg_resources
+from pip._vendor.packaging.utils import canonicalize_name
+from pip._vendor.pkg_resources import RequirementParseError
+
+
+logger = logging.getLogger(__name__)
+
+
+def freeze(
+        requirement=None,
+        find_links=None, local_only=None, user_only=None, skip_regex=None,
+        default_vcs=None,
+        isolated=False,
+        wheel_cache=None,
+        skip=()):
+    find_links = find_links or []
+    skip_match = None
+
+    if skip_regex:
+        skip_match = re.compile(skip_regex).search
+
+    dependency_links = []
+
+    for dist in pkg_resources.working_set:
+        if dist.has_metadata('dependency_links.txt'):
+            dependency_links.extend(
+                dist.get_metadata_lines('dependency_links.txt')
+            )
+    for link in find_links:
+        if '#egg=' in link:
+            dependency_links.append(link)
+    for link in find_links:
+        yield '-f %s' % link
+    installations = {}
+    for dist in get_installed_distributions(local_only=local_only,
+                                            skip=(),
+                                            user_only=user_only):
+        try:
+            req = pip.FrozenRequirement.from_dist(
+                dist,
+                dependency_links
+            )
+        except RequirementParseError:
+            logger.warning(
+                "Could not parse requirement: %s",
+                dist.project_name
+            )
+            continue
+        installations[req.name] = req
+
+    if requirement:
+        # the options that don't get turned into an InstallRequirement
+        # should only be emitted once, even if the same option is in multiple
+        # requirements files, so we need to keep track of what has been emitted
+        # so that we don't emit it again if it's seen again
+        emitted_options = set()
+        for req_file_path in requirement:
+            with open(req_file_path) as req_file:
+                for line in req_file:
+                    if (not line.strip() or
+                            line.strip().startswith('#') or
+                            (skip_match and skip_match(line)) or
+                            line.startswith((
+                                '-r', '--requirement',
+                                '-Z', '--always-unzip',
+                                '-f', '--find-links',
+                                '-i', '--index-url',
+                                '--pre',
+                                '--trusted-host',
+                                '--process-dependency-links',
+                                '--extra-index-url'))):
+                        line = line.rstrip()
+                        if line not in emitted_options:
+                            emitted_options.add(line)
+                            yield line
+                        continue
+
+                    if line.startswith('-e') or line.startswith('--editable'):
+                        if line.startswith('-e'):
+                            line = line[2:].strip()
+                        else:
+                            line = line[len('--editable'):].strip().lstrip('=')
+                        line_req = InstallRequirement.from_editable(
+                            line,
+                            default_vcs=default_vcs,
+                            isolated=isolated,
+                            wheel_cache=wheel_cache,
+                        )
+                    else:
+                        line_req = InstallRequirement.from_line(
+                            COMMENT_RE.sub('', line).strip(),
+                            isolated=isolated,
+                            wheel_cache=wheel_cache,
+                        )
+
+                    if not line_req.name:
+                        logger.info(
+                            "Skipping line in requirement file [%s] because "
+                            "it's not clear what it would install: %s",
+                            req_file_path, line.strip(),
+                        )
+                        logger.info(
+                            "  (add #egg=PackageName to the URL to avoid"
+                            " this warning)"
+                        )
+                    elif line_req.name not in installations:
+                        logger.warning(
+                            "Requirement file [%s] contains %s, but that "
+                            "package is not installed",
+                            req_file_path, COMMENT_RE.sub('', line).strip(),
+                        )
+                    else:
+                        yield str(installations[line_req.name]).rstrip()
+                        del installations[line_req.name]
+
+        yield(
+            '## The following requirements were added by '
+            'pip freeze:'
+        )
+    for installation in sorted(
+            installations.values(), key=lambda x: x.name.lower()):
+        if canonicalize_name(installation.name) not in skip:
+            yield str(installation).rstrip()
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/pep425tags.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/pep425tags.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad202ef313f57697c79dc0a4802956d1d49a7b4e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/pep425tags.py
@@ -0,0 +1,324 @@
+"""Generate and work with PEP 425 Compatibility Tags."""
+from __future__ import absolute_import
+
+import re
+import sys
+import warnings
+import platform
+import logging
+
+try:
+    import sysconfig
+except ImportError:  # pragma nocover
+    # Python < 2.7
+    import distutils.sysconfig as sysconfig
+import distutils.util
+
+from pip.compat import OrderedDict
+import pip.utils.glibc
+
+logger = logging.getLogger(__name__)
+
+_osx_arch_pat = re.compile(r'(.+)_(\d+)_(\d+)_(.+)')
+
+
+def get_config_var(var):
+    try:
+        return sysconfig.get_config_var(var)
+    except IOError as e:  # Issue #1074
+        warnings.warn("{0}".format(e), RuntimeWarning)
+        return None
+
+
+def get_abbr_impl():
+    """Return abbreviated implementation name."""
+    if hasattr(sys, 'pypy_version_info'):
+        pyimpl = 'pp'
+    elif sys.platform.startswith('java'):
+        pyimpl = 'jy'
+    elif sys.platform == 'cli':
+        pyimpl = 'ip'
+    else:
+        pyimpl = 'cp'
+    return pyimpl
+
+
+def get_impl_ver():
+    """Return implementation version."""
+    impl_ver = get_config_var("py_version_nodot")
+    if not impl_ver or get_abbr_impl() == 'pp':
+        impl_ver = ''.join(map(str, get_impl_version_info()))
+    return impl_ver
+
+
+def get_impl_version_info():
+    """Return sys.version_info-like tuple for use in decrementing the minor
+    version."""
+    if get_abbr_impl() == 'pp':
+        # as per https://github.com/pypa/pip/issues/2882
+        return (sys.version_info[0], sys.pypy_version_info.major,
+                sys.pypy_version_info.minor)
+    else:
+        return sys.version_info[0], sys.version_info[1]
+
+
+def get_impl_tag():
+    """
+    Returns the Tag for this specific implementation.
+    """
+    return "{0}{1}".format(get_abbr_impl(), get_impl_ver())
+
+
+def get_flag(var, fallback, expected=True, warn=True):
+    """Use a fallback method for determining SOABI flags if the needed config
+    var is unset or unavailable."""
+    val = get_config_var(var)
+    if val is None:
+        if warn:
+            logger.debug("Config variable '%s' is unset, Python ABI tag may "
+                         "be incorrect", var)
+        return fallback()
+    return val == expected
+
+
+def get_abi_tag():
+    """Return the ABI tag based on SOABI (if available) or emulate SOABI
+    (CPython 2, PyPy)."""
+    soabi = get_config_var('SOABI')
+    impl = get_abbr_impl()
+    if not soabi and impl in ('cp', 'pp') and hasattr(sys, 'maxunicode'):
+        d = ''
+        m = ''
+        u = ''
+        if get_flag('Py_DEBUG',
+                    lambda: hasattr(sys, 'gettotalrefcount'),
+                    warn=(impl == 'cp')):
+            d = 'd'
+        if get_flag('WITH_PYMALLOC',
+                    lambda: impl == 'cp',
+                    warn=(impl == 'cp')):
+            m = 'm'
+        if get_flag('Py_UNICODE_SIZE',
+                    lambda: sys.maxunicode == 0x10ffff,
+                    expected=4,
+                    warn=(impl == 'cp' and
+                          sys.version_info < (3, 3))) \
+                and sys.version_info < (3, 3):
+            u = 'u'
+        abi = '%s%s%s%s%s' % (impl, get_impl_ver(), d, m, u)
+    elif soabi and soabi.startswith('cpython-'):
+        abi = 'cp' + soabi.split('-')[1]
+    elif soabi:
+        abi = soabi.replace('.', '_').replace('-', '_')
+    else:
+        abi = None
+    return abi
+
+
+def _is_running_32bit():
+    return sys.maxsize == 2147483647
+
+
+def get_platform():
+    """Return our platform name 'win32', 'linux_x86_64'"""
+    if sys.platform == 'darwin':
+        # distutils.util.get_platform() returns the release based on the value
+        # of MACOSX_DEPLOYMENT_TARGET on which Python was built, which may
+        # be significantly older than the user's current machine.
+        release, _, machine = platform.mac_ver()
+        split_ver = release.split('.')
+
+        if machine == "x86_64" and _is_running_32bit():
+            machine = "i386"
+        elif machine == "ppc64" and _is_running_32bit():
+            machine = "ppc"
+
+        return 'macosx_{0}_{1}_{2}'.format(split_ver[0], split_ver[1], machine)
+
+    # XXX remove distutils dependency
+    result = distutils.util.get_platform().replace('.', '_').replace('-', '_')
+    if result == "linux_x86_64" and _is_running_32bit():
+        # 32 bit Python program (running on a 64 bit Linux): pip should only
+        # install and run 32 bit compiled extensions in that case.
+        result = "linux_i686"
+
+    return result
+
+
+def is_manylinux1_compatible():
+    # Only Linux, and only x86-64 / i686
+    if get_platform() not in ("linux_x86_64", "linux_i686"):
+        return False
+
+    # Check for presence of _manylinux module
+    try:
+        import _manylinux
+        return bool(_manylinux.manylinux1_compatible)
+    except (ImportError, AttributeError):
+        # Fall through to heuristic check below
+        pass
+
+    # Check glibc version. CentOS 5 uses glibc 2.5.
+    return pip.utils.glibc.have_compatible_glibc(2, 5)
+
+
+def get_darwin_arches(major, minor, machine):
+    """Return a list of supported arches (including group arches) for
+    the given major, minor and machine architecture of an macOS machine.
+    """
+    arches = []
+
+    def _supports_arch(major, minor, arch):
+        # Looking at the application support for macOS versions in the chart
+        # provided by https://en.wikipedia.org/wiki/OS_X#Versions it appears
+        # our timeline looks roughly like:
+        #
+        # 10.0 - Introduces ppc support.
+        # 10.4 - Introduces ppc64, i386, and x86_64 support, however the ppc64
+        #        and x86_64 support is CLI only, and cannot be used for GUI
+        #        applications.
+        # 10.5 - Extends ppc64 and x86_64 support to cover GUI applications.
+        # 10.6 - Drops support for ppc64
+        # 10.7 - Drops support for ppc
+        #
+        # Given that we do not know if we're installing a CLI or a GUI
+        # application, we must be conservative and assume it might be a GUI
+        # application and behave as if ppc64 and x86_64 support did not occur
+        # until 10.5.
+        #
+        # Note: The above information is taken from the "Application support"
+        #       column in the chart not the "Processor support" since I believe
+        #       that we care about what instruction sets an application can use
+        #       not which processors the OS supports.
+        if arch == 'ppc':
+            return (major, minor) <= (10, 5)
+        if arch == 'ppc64':
+            return (major, minor) == (10, 5)
+        if arch == 'i386':
+            return (major, minor) >= (10, 4)
+        if arch == 'x86_64':
+            return (major, minor) >= (10, 5)
+        if arch in groups:
+            for garch in groups[arch]:
+                if _supports_arch(major, minor, garch):
+                    return True
+        return False
+
+    groups = OrderedDict([
+        ("fat", ("i386", "ppc")),
+        ("intel", ("x86_64", "i386")),
+        ("fat64", ("x86_64", "ppc64")),
+        ("fat32", ("x86_64", "i386", "ppc")),
+    ])
+
+    if _supports_arch(major, minor, machine):
+        arches.append(machine)
+
+    for garch in groups:
+        if machine in groups[garch] and _supports_arch(major, minor, garch):
+            arches.append(garch)
+
+    arches.append('universal')
+
+    return arches
+
+
+def get_supported(versions=None, noarch=False, platform=None,
+                  impl=None, abi=None):
+    """Return a list of supported tags for each version specified in
+    `versions`.
+
+    :param versions: a list of string versions, of the form ["33", "32"],
+        or None. The first version will be assumed to support our ABI.
+    :param platform: specify the exact platform you want valid
+        tags for, or None. If None, use the local system platform.
+    :param impl: specify the exact implementation you want valid
+        tags for, or None. If None, use the local interpreter impl.
+    :param abi: specify the exact abi you want valid
+        tags for, or None. If None, use the local interpreter abi.
+    """
+    supported = []
+
+    # Versions must be given with respect to the preference
+    if versions is None:
+        versions = []
+        version_info = get_impl_version_info()
+        major = version_info[:-1]
+        # Support all previous minor Python versions.
+        for minor in range(version_info[-1], -1, -1):
+            versions.append(''.join(map(str, major + (minor,))))
+
+    impl = impl or get_abbr_impl()
+
+    abis = []
+
+    abi = abi or get_abi_tag()
+    if abi:
+        abis[0:0] = [abi]
+
+    abi3s = set()
+    import imp
+    for suffix in imp.get_suffixes():
+        if suffix[0].startswith('.abi'):
+            abi3s.add(suffix[0].split('.', 2)[1])
+
+    abis.extend(sorted(list(abi3s)))
+
+    abis.append('none')
+
+    if not noarch:
+        arch = platform or get_platform()
+        if arch.startswith('macosx'):
+            # support macosx-10.6-intel on macosx-10.9-x86_64
+            match = _osx_arch_pat.match(arch)
+            if match:
+                name, major, minor, actual_arch = match.groups()
+                tpl = '{0}_{1}_%i_%s'.format(name, major)
+                arches = []
+                for m in reversed(range(int(minor) + 1)):
+                    for a in get_darwin_arches(int(major), m, actual_arch):
+                        arches.append(tpl % (m, a))
+            else:
+                # arch pattern didn't match (?!)
+                arches = [arch]
+        elif platform is None and is_manylinux1_compatible():
+            arches = [arch.replace('linux', 'manylinux1'), arch]
+        else:
+            arches = [arch]
+
+        # Current version, current API (built specifically for our Python):
+        for abi in abis:
+            for arch in arches:
+                supported.append(('%s%s' % (impl, versions[0]), abi, arch))
+
+        # abi3 modules compatible with older version of Python
+        for version in versions[1:]:
+            # abi3 was introduced in Python 3.2
+            if version in ('31', '30'):
+                break
+            for abi in abi3s:   # empty set if not Python 3
+                for arch in arches:
+                    supported.append(("%s%s" % (impl, version), abi, arch))
+
+        # Has binaries, does not use the Python API:
+        for arch in arches:
+            supported.append(('py%s' % (versions[0][0]), 'none', arch))
+
+    # No abi / arch, but requires our implementation:
+    supported.append(('%s%s' % (impl, versions[0]), 'none', 'any'))
+    # Tagged specifically as being cross-version compatible
+    # (with just the major version specified)
+    supported.append(('%s%s' % (impl, versions[0][0]), 'none', 'any'))
+
+    # No abi / arch, generic Python
+    for i, version in enumerate(versions):
+        supported.append(('py%s' % (version,), 'none', 'any'))
+        if i == 0:
+            supported.append(('py%s' % (version[0]), 'none', 'any'))
+
+    return supported
+
+supported_tags = get_supported()
+supported_tags_noarch = get_supported(noarch=True)
+
+implementation_tag = get_impl_tag()
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/req/__init__.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/req/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..00185a4308337a11602f40dc238e93d9b3d47f2d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/req/__init__.py
@@ -0,0 +1,10 @@
+from __future__ import absolute_import
+
+from .req_install import InstallRequirement
+from .req_set import RequirementSet, Requirements
+from .req_file import parse_requirements
+
+__all__ = [
+    "RequirementSet", "Requirements", "InstallRequirement",
+    "parse_requirements",
+]
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/req/req_file.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/req/req_file.py
new file mode 100644
index 0000000000000000000000000000000000000000..821df2271db505f1f6cb4ad612868251361cb99b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/req/req_file.py
@@ -0,0 +1,342 @@
+"""
+Requirements file parsing
+"""
+
+from __future__ import absolute_import
+
+import os
+import re
+import shlex
+import sys
+import optparse
+import warnings
+
+from pip._vendor.six.moves.urllib import parse as urllib_parse
+from pip._vendor.six.moves import filterfalse
+
+import pip
+from pip.download import get_file_content
+from pip.req.req_install import InstallRequirement
+from pip.exceptions import (RequirementsFileParseError)
+from pip.utils.deprecation import RemovedInPip10Warning
+from pip import cmdoptions
+
+__all__ = ['parse_requirements']
+
+SCHEME_RE = re.compile(r'^(http|https|file):', re.I)
+COMMENT_RE = re.compile(r'(^|\s)+#.*$')
+
+SUPPORTED_OPTIONS = [
+    cmdoptions.constraints,
+    cmdoptions.editable,
+    cmdoptions.requirements,
+    cmdoptions.no_index,
+    cmdoptions.index_url,
+    cmdoptions.find_links,
+    cmdoptions.extra_index_url,
+    cmdoptions.allow_external,
+    cmdoptions.allow_all_external,
+    cmdoptions.no_allow_external,
+    cmdoptions.allow_unsafe,
+    cmdoptions.no_allow_unsafe,
+    cmdoptions.use_wheel,
+    cmdoptions.no_use_wheel,
+    cmdoptions.always_unzip,
+    cmdoptions.no_binary,
+    cmdoptions.only_binary,
+    cmdoptions.pre,
+    cmdoptions.process_dependency_links,
+    cmdoptions.trusted_host,
+    cmdoptions.require_hashes,
+]
+
+# options to be passed to requirements
+SUPPORTED_OPTIONS_REQ = [
+    cmdoptions.install_options,
+    cmdoptions.global_options,
+    cmdoptions.hash,
+]
+
+# the 'dest' string values
+SUPPORTED_OPTIONS_REQ_DEST = [o().dest for o in SUPPORTED_OPTIONS_REQ]
+
+
+def parse_requirements(filename, finder=None, comes_from=None, options=None,
+                       session=None, constraint=False, wheel_cache=None):
+    """Parse a requirements file and yield InstallRequirement instances.
+
+    :param filename:    Path or url of requirements file.
+    :param finder:      Instance of pip.index.PackageFinder.
+    :param comes_from:  Origin description of requirements.
+    :param options:     cli options.
+    :param session:     Instance of pip.download.PipSession.
+    :param constraint:  If true, parsing a constraint file rather than
+        requirements file.
+    :param wheel_cache: Instance of pip.wheel.WheelCache
+    """
+    if session is None:
+        raise TypeError(
+            "parse_requirements() missing 1 required keyword argument: "
+            "'session'"
+        )
+
+    _, content = get_file_content(
+        filename, comes_from=comes_from, session=session
+    )
+
+    lines_enum = preprocess(content, options)
+
+    for line_number, line in lines_enum:
+        req_iter = process_line(line, filename, line_number, finder,
+                                comes_from, options, session, wheel_cache,
+                                constraint=constraint)
+        for req in req_iter:
+            yield req
+
+
+def preprocess(content, options):
+    """Split, filter, and join lines, and return a line iterator
+
+    :param content: the content of the requirements file
+    :param options: cli options
+    """
+    lines_enum = enumerate(content.splitlines(), start=1)
+    lines_enum = join_lines(lines_enum)
+    lines_enum = ignore_comments(lines_enum)
+    lines_enum = skip_regex(lines_enum, options)
+    return lines_enum
+
+
+def process_line(line, filename, line_number, finder=None, comes_from=None,
+                 options=None, session=None, wheel_cache=None,
+                 constraint=False):
+    """Process a single requirements line; This can result in creating/yielding
+    requirements, or updating the finder.
+
+    For lines that contain requirements, the only options that have an effect
+    are from SUPPORTED_OPTIONS_REQ, and they are scoped to the
+    requirement. Other options from SUPPORTED_OPTIONS may be present, but are
+    ignored.
+
+    For lines that do not contain requirements, the only options that have an
+    effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may
+    be present, but are ignored. These lines may contain multiple options
+    (although our docs imply only one is supported), and all our parsed and
+    affect the finder.
+
+    :param constraint: If True, parsing a constraints file.
+    :param options: OptionParser options that we may update
+    """
+    parser = build_parser()
+    defaults = parser.get_default_values()
+    defaults.index_url = None
+    if finder:
+        # `finder.format_control` will be updated during parsing
+        defaults.format_control = finder.format_control
+    args_str, options_str = break_args_options(line)
+    if sys.version_info < (2, 7, 3):
+        # Prior to 2.7.3, shlex cannot deal with unicode entries
+        options_str = options_str.encode('utf8')
+    opts, _ = parser.parse_args(shlex.split(options_str), defaults)
+
+    # preserve for the nested code path
+    line_comes_from = '%s %s (line %s)' % (
+        '-c' if constraint else '-r', filename, line_number)
+
+    # yield a line requirement
+    if args_str:
+        isolated = options.isolated_mode if options else False
+        if options:
+            cmdoptions.check_install_build_global(options, opts)
+        # get the options that apply to requirements
+        req_options = {}
+        for dest in SUPPORTED_OPTIONS_REQ_DEST:
+            if dest in opts.__dict__ and opts.__dict__[dest]:
+                req_options[dest] = opts.__dict__[dest]
+        yield InstallRequirement.from_line(
+            args_str, line_comes_from, constraint=constraint,
+            isolated=isolated, options=req_options, wheel_cache=wheel_cache
+        )
+
+    # yield an editable requirement
+    elif opts.editables:
+        isolated = options.isolated_mode if options else False
+        default_vcs = options.default_vcs if options else None
+        yield InstallRequirement.from_editable(
+            opts.editables[0], comes_from=line_comes_from,
+            constraint=constraint, default_vcs=default_vcs, isolated=isolated,
+            wheel_cache=wheel_cache
+        )
+
+    # parse a nested requirements file
+    elif opts.requirements or opts.constraints:
+        if opts.requirements:
+            req_path = opts.requirements[0]
+            nested_constraint = False
+        else:
+            req_path = opts.constraints[0]
+            nested_constraint = True
+        # original file is over http
+        if SCHEME_RE.search(filename):
+            # do a url join so relative paths work
+            req_path = urllib_parse.urljoin(filename, req_path)
+        # original file and nested file are paths
+        elif not SCHEME_RE.search(req_path):
+            # do a join so relative paths work
+            req_path = os.path.join(os.path.dirname(filename), req_path)
+        # TODO: Why not use `comes_from='-r {} (line {})'` here as well?
+        parser = parse_requirements(
+            req_path, finder, comes_from, options, session,
+            constraint=nested_constraint, wheel_cache=wheel_cache
+        )
+        for req in parser:
+            yield req
+
+    # percolate hash-checking option upward
+    elif opts.require_hashes:
+        options.require_hashes = opts.require_hashes
+
+    # set finder options
+    elif finder:
+        if opts.allow_external:
+            warnings.warn(
+                "--allow-external has been deprecated and will be removed in "
+                "the future. Due to changes in the repository protocol, it no "
+                "longer has any effect.",
+                RemovedInPip10Warning,
+            )
+
+        if opts.allow_all_external:
+            warnings.warn(
+                "--allow-all-external has been deprecated and will be removed "
+                "in the future. Due to changes in the repository protocol, it "
+                "no longer has any effect.",
+                RemovedInPip10Warning,
+            )
+
+        if opts.allow_unverified:
+            warnings.warn(
+                "--allow-unverified has been deprecated and will be removed "
+                "in the future. Due to changes in the repository protocol, it "
+                "no longer has any effect.",
+                RemovedInPip10Warning,
+            )
+
+        if opts.index_url:
+            finder.index_urls = [opts.index_url]
+        if opts.use_wheel is False:
+            finder.use_wheel = False
+            pip.index.fmt_ctl_no_use_wheel(finder.format_control)
+        if opts.no_index is True:
+            finder.index_urls = []
+        if opts.extra_index_urls:
+            finder.index_urls.extend(opts.extra_index_urls)
+        if opts.find_links:
+            # FIXME: it would be nice to keep track of the source
+            # of the find_links: support a find-links local path
+            # relative to a requirements file.
+            value = opts.find_links[0]
+            req_dir = os.path.dirname(os.path.abspath(filename))
+            relative_to_reqs_file = os.path.join(req_dir, value)
+            if os.path.exists(relative_to_reqs_file):
+                value = relative_to_reqs_file
+            finder.find_links.append(value)
+        if opts.pre:
+            finder.allow_all_prereleases = True
+        if opts.process_dependency_links:
+            finder.process_dependency_links = True
+        if opts.trusted_hosts:
+            finder.secure_origins.extend(
+                ("*", host, "*") for host in opts.trusted_hosts)
+
+
+def break_args_options(line):
+    """Break up the line into an args and options string.  We only want to shlex
+    (and then optparse) the options, not the args.  args can contain markers
+    which are corrupted by shlex.
+    """
+    tokens = line.split(' ')
+    args = []
+    options = tokens[:]
+    for token in tokens:
+        if token.startswith('-') or token.startswith('--'):
+            break
+        else:
+            args.append(token)
+            options.pop(0)
+    return ' '.join(args), ' '.join(options)
+
+
+def build_parser():
+    """
+    Return a parser for parsing requirement lines
+    """
+    parser = optparse.OptionParser(add_help_option=False)
+
+    option_factories = SUPPORTED_OPTIONS + SUPPORTED_OPTIONS_REQ
+    for option_factory in option_factories:
+        option = option_factory()
+        parser.add_option(option)
+
+    # By default optparse sys.exits on parsing errors. We want to wrap
+    # that in our own exception.
+    def parser_exit(self, msg):
+        raise RequirementsFileParseError(msg)
+    parser.exit = parser_exit
+
+    return parser
+
+
+def join_lines(lines_enum):
+    """Joins a line ending in '\' with the previous line (except when following
+    comments).  The joined line takes on the index of the first line.
+    """
+    primary_line_number = None
+    new_line = []
+    for line_number, line in lines_enum:
+        if not line.endswith('\\') or COMMENT_RE.match(line):
+            if COMMENT_RE.match(line):
+                # this ensures comments are always matched later
+                line = ' ' + line
+            if new_line:
+                new_line.append(line)
+                yield primary_line_number, ''.join(new_line)
+                new_line = []
+            else:
+                yield line_number, line
+        else:
+            if not new_line:
+                primary_line_number = line_number
+            new_line.append(line.strip('\\'))
+
+    # last line contains \
+    if new_line:
+        yield primary_line_number, ''.join(new_line)
+
+    # TODO: handle space after '\'.
+
+
+def ignore_comments(lines_enum):
+    """
+    Strips comments and filter empty lines.
+    """
+    for line_number, line in lines_enum:
+        line = COMMENT_RE.sub('', line)
+        line = line.strip()
+        if line:
+            yield line_number, line
+
+
+def skip_regex(lines_enum, options):
+    """
+    Skip lines that match '--skip-requirements-regex' pattern
+
+    Note: the regex pattern is only built once
+    """
+    skip_regex = options.skip_requirements_regex if options else None
+    if skip_regex:
+        pattern = re.compile(skip_regex)
+        lines_enum = filterfalse(
+            lambda e: pattern.search(e[1]),
+            lines_enum)
+    return lines_enum
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/req/req_install.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/req/req_install.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a98f377b6f2a85cbf173b8a49027e51b8b79c7b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/req/req_install.py
@@ -0,0 +1,1204 @@
+from __future__ import absolute_import
+
+import logging
+import os
+import re
+import shutil
+import sys
+import tempfile
+import traceback
+import warnings
+import zipfile
+
+from distutils import sysconfig
+from distutils.util import change_root
+from email.parser import FeedParser
+
+from pip._vendor import pkg_resources, six
+from pip._vendor.packaging import specifiers
+from pip._vendor.packaging.markers import Marker
+from pip._vendor.packaging.requirements import InvalidRequirement, Requirement
+from pip._vendor.packaging.utils import canonicalize_name
+from pip._vendor.packaging.version import Version, parse as parse_version
+from pip._vendor.six.moves import configparser
+
+import pip.wheel
+
+from pip.compat import native_str, get_stdlib, WINDOWS
+from pip.download import is_url, url_to_path, path_to_url, is_archive_file
+from pip.exceptions import (
+    InstallationError, UninstallationError,
+)
+from pip.locations import (
+    bin_py, running_under_virtualenv, PIP_DELETE_MARKER_FILENAME, bin_user,
+)
+from pip.utils import (
+    display_path, rmtree, ask_path_exists, backup_dir, is_installable_dir,
+    dist_in_usersite, dist_in_site_packages, egg_link_path,
+    call_subprocess, read_text_file, FakeFile, _make_build_dir, ensure_dir,
+    get_installed_version, normalize_path, dist_is_local,
+)
+
+from pip.utils.hashes import Hashes
+from pip.utils.deprecation import RemovedInPip10Warning
+from pip.utils.logging import indent_log
+from pip.utils.setuptools_build import SETUPTOOLS_SHIM
+from pip.utils.ui import open_spinner
+from pip.req.req_uninstall import UninstallPathSet
+from pip.vcs import vcs
+from pip.wheel import move_wheel_files, Wheel
+
+
+logger = logging.getLogger(__name__)
+
+operators = specifiers.Specifier._operators.keys()
+
+
+def _strip_extras(path):
+    m = re.match(r'^(.+)(\[[^\]]+\])$', path)
+    extras = None
+    if m:
+        path_no_extras = m.group(1)
+        extras = m.group(2)
+    else:
+        path_no_extras = path
+
+    return path_no_extras, extras
+
+
+def _safe_extras(extras):
+    return set(pkg_resources.safe_extra(extra) for extra in extras)
+
+
+class InstallRequirement(object):
+
+    def __init__(self, req, comes_from, source_dir=None, editable=False,
+                 link=None, as_egg=False, update=True,
+                 pycompile=True, markers=None, isolated=False, options=None,
+                 wheel_cache=None, constraint=False):
+        self.extras = ()
+        if isinstance(req, six.string_types):
+            try:
+                req = Requirement(req)
+            except InvalidRequirement:
+                if os.path.sep in req:
+                    add_msg = "It looks like a path. Does it exist ?"
+                elif '=' in req and not any(op in req for op in operators):
+                    add_msg = "= is not a valid operator. Did you mean == ?"
+                else:
+                    add_msg = traceback.format_exc()
+                raise InstallationError(
+                    "Invalid requirement: '%s'\n%s" % (req, add_msg))
+            self.extras = _safe_extras(req.extras)
+
+        self.req = req
+        self.comes_from = comes_from
+        self.constraint = constraint
+        self.source_dir = source_dir
+        self.editable = editable
+
+        self._wheel_cache = wheel_cache
+        self.link = self.original_link = link
+        self.as_egg = as_egg
+        if markers is not None:
+            self.markers = markers
+        else:
+            self.markers = req and req.marker
+        self._egg_info_path = None
+        # This holds the pkg_resources.Distribution object if this requirement
+        # is already available:
+        self.satisfied_by = None
+        # This hold the pkg_resources.Distribution object if this requirement
+        # conflicts with another installed distribution:
+        self.conflicts_with = None
+        # Temporary build location
+        self._temp_build_dir = None
+        # Used to store the global directory where the _temp_build_dir should
+        # have been created. Cf _correct_build_location method.
+        self._ideal_build_dir = None
+        # True if the editable should be updated:
+        self.update = update
+        # Set to True after successful installation
+        self.install_succeeded = None
+        # UninstallPathSet of uninstalled distribution (for possible rollback)
+        self.uninstalled = None
+        # Set True if a legitimate do-nothing-on-uninstall has happened - e.g.
+        # system site packages, stdlib packages.
+        self.nothing_to_uninstall = False
+        self.use_user_site = False
+        self.target_dir = None
+        self.options = options if options else {}
+        self.pycompile = pycompile
+        # Set to True after successful preparation of this requirement
+        self.prepared = False
+
+        self.isolated = isolated
+
+    @classmethod
+    def from_editable(cls, editable_req, comes_from=None, default_vcs=None,
+                      isolated=False, options=None, wheel_cache=None,
+                      constraint=False):
+        from pip.index import Link
+
+        name, url, extras_override = parse_editable(
+            editable_req, default_vcs)
+        if url.startswith('file:'):
+            source_dir = url_to_path(url)
+        else:
+            source_dir = None
+
+        res = cls(name, comes_from, source_dir=source_dir,
+                  editable=True,
+                  link=Link(url),
+                  constraint=constraint,
+                  isolated=isolated,
+                  options=options if options else {},
+                  wheel_cache=wheel_cache)
+
+        if extras_override is not None:
+            res.extras = _safe_extras(extras_override)
+
+        return res
+
+    @classmethod
+    def from_line(
+            cls, name, comes_from=None, isolated=False, options=None,
+            wheel_cache=None, constraint=False):
+        """Creates an InstallRequirement from a name, which might be a
+        requirement, directory containing 'setup.py', filename, or URL.
+        """
+        from pip.index import Link
+
+        if is_url(name):
+            marker_sep = '; '
+        else:
+            marker_sep = ';'
+        if marker_sep in name:
+            name, markers = name.split(marker_sep, 1)
+            markers = markers.strip()
+            if not markers:
+                markers = None
+            else:
+                markers = Marker(markers)
+        else:
+            markers = None
+        name = name.strip()
+        req = None
+        path = os.path.normpath(os.path.abspath(name))
+        link = None
+        extras = None
+
+        if is_url(name):
+            link = Link(name)
+        else:
+            p, extras = _strip_extras(path)
+            if (os.path.isdir(p) and
+                    (os.path.sep in name or name.startswith('.'))):
+
+                if not is_installable_dir(p):
+                    raise InstallationError(
+                        "Directory %r is not installable. File 'setup.py' "
+                        "not found." % name
+                    )
+                link = Link(path_to_url(p))
+            elif is_archive_file(p):
+                if not os.path.isfile(p):
+                    logger.warning(
+                        'Requirement %r looks like a filename, but the '
+                        'file does not exist',
+                        name
+                    )
+                link = Link(path_to_url(p))
+
+        # it's a local file, dir, or url
+        if link:
+            # Handle relative file URLs
+            if link.scheme == 'file' and re.search(r'\.\./', link.url):
+                link = Link(
+                    path_to_url(os.path.normpath(os.path.abspath(link.path))))
+            # wheel file
+            if link.is_wheel:
+                wheel = Wheel(link.filename)  # can raise InvalidWheelFilename
+                req = "%s==%s" % (wheel.name, wheel.version)
+            else:
+                # set the req to the egg fragment.  when it's not there, this
+                # will become an 'unnamed' requirement
+                req = link.egg_fragment
+
+        # a requirement specifier
+        else:
+            req = name
+
+        options = options if options else {}
+        res = cls(req, comes_from, link=link, markers=markers,
+                  isolated=isolated, options=options,
+                  wheel_cache=wheel_cache, constraint=constraint)
+
+        if extras:
+            res.extras = _safe_extras(
+                Requirement('placeholder' + extras).extras)
+
+        return res
+
+    def __str__(self):
+        if self.req:
+            s = str(self.req)
+            if self.link:
+                s += ' from %s' % self.link.url
+        else:
+            s = self.link.url if self.link else None
+        if self.satisfied_by is not None:
+            s += ' in %s' % display_path(self.satisfied_by.location)
+        if self.comes_from:
+            if isinstance(self.comes_from, six.string_types):
+                comes_from = self.comes_from
+            else:
+                comes_from = self.comes_from.from_path()
+            if comes_from:
+                s += ' (from %s)' % comes_from
+        return s
+
+    def __repr__(self):
+        return '<%s object: %s editable=%r>' % (
+            self.__class__.__name__, str(self), self.editable)
+
+    def populate_link(self, finder, upgrade, require_hashes):
+        """Ensure that if a link can be found for this, that it is found.
+
+        Note that self.link may still be None - if Upgrade is False and the
+        requirement is already installed.
+
+        If require_hashes is True, don't use the wheel cache, because cached
+        wheels, always built locally, have different hashes than the files
+        downloaded from the index server and thus throw false hash mismatches.
+        Furthermore, cached wheels at present have undeterministic contents due
+        to file modification times.
+        """
+        if self.link is None:
+            self.link = finder.find_requirement(self, upgrade)
+        if self._wheel_cache is not None and not require_hashes:
+            old_link = self.link
+            self.link = self._wheel_cache.cached_wheel(self.link, self.name)
+            if old_link != self.link:
+                logger.debug('Using cached wheel link: %s', self.link)
+
+    @property
+    def specifier(self):
+        return self.req.specifier
+
+    @property
+    def is_pinned(self):
+        """Return whether I am pinned to an exact version.
+
+        For example, some-package==1.2 is pinned; some-package>1.2 is not.
+        """
+        specifiers = self.specifier
+        return (len(specifiers) == 1 and
+                next(iter(specifiers)).operator in ('==', '==='))
+
+    def from_path(self):
+        if self.req is None:
+            return None
+        s = str(self.req)
+        if self.comes_from:
+            if isinstance(self.comes_from, six.string_types):
+                comes_from = self.comes_from
+            else:
+                comes_from = self.comes_from.from_path()
+            if comes_from:
+                s += '->' + comes_from
+        return s
+
+    def build_location(self, build_dir):
+        if self._temp_build_dir is not None:
+            return self._temp_build_dir
+        if self.req is None:
+            # for requirement via a path to a directory: the name of the
+            # package is not available yet so we create a temp directory
+            # Once run_egg_info will have run, we'll be able
+            # to fix it via _correct_build_location
+            # Some systems have /tmp as a symlink which confuses custom
+            # builds (such as numpy). Thus, we ensure that the real path
+            # is returned.
+            self._temp_build_dir = os.path.realpath(
+                tempfile.mkdtemp('-build', 'pip-')
+            )
+            self._ideal_build_dir = build_dir
+            return self._temp_build_dir
+        if self.editable:
+            name = self.name.lower()
+        else:
+            name = self.name
+        # FIXME: Is there a better place to create the build_dir? (hg and bzr
+        # need this)
+        if not os.path.exists(build_dir):
+            logger.debug('Creating directory %s', build_dir)
+            _make_build_dir(build_dir)
+        return os.path.join(build_dir, name)
+
+    def _correct_build_location(self):
+        """Move self._temp_build_dir to self._ideal_build_dir/self.req.name
+
+        For some requirements (e.g. a path to a directory), the name of the
+        package is not available until we run egg_info, so the build_location
+        will return a temporary directory and store the _ideal_build_dir.
+
+        This is only called by self.egg_info_path to fix the temporary build
+        directory.
+        """
+        if self.source_dir is not None:
+            return
+        assert self.req is not None
+        assert self._temp_build_dir
+        assert self._ideal_build_dir
+        old_location = self._temp_build_dir
+        self._temp_build_dir = None
+        new_location = self.build_location(self._ideal_build_dir)
+        if os.path.exists(new_location):
+            raise InstallationError(
+                'A package already exists in %s; please remove it to continue'
+                % display_path(new_location))
+        logger.debug(
+            'Moving package %s from %s to new location %s',
+            self, display_path(old_location), display_path(new_location),
+        )
+        shutil.move(old_location, new_location)
+        self._temp_build_dir = new_location
+        self._ideal_build_dir = None
+        self.source_dir = new_location
+        self._egg_info_path = None
+
+    @property
+    def name(self):
+        if self.req is None:
+            return None
+        return native_str(pkg_resources.safe_name(self.req.name))
+
+    @property
+    def setup_py_dir(self):
+        return os.path.join(
+            self.source_dir,
+            self.link and self.link.subdirectory_fragment or '')
+
+    @property
+    def setup_py(self):
+        assert self.source_dir, "No source dir for %s" % self
+        try:
+            import setuptools  # noqa
+        except ImportError:
+            if get_installed_version('setuptools') is None:
+                add_msg = "Please install setuptools."
+            else:
+                add_msg = traceback.format_exc()
+            # Setuptools is not available
+            raise InstallationError(
+                "Could not import setuptools which is required to "
+                "install from a source distribution.\n%s" % add_msg
+            )
+
+        setup_py = os.path.join(self.setup_py_dir, 'setup.py')
+
+        # Python2 __file__ should not be unicode
+        if six.PY2 and isinstance(setup_py, six.text_type):
+            setup_py = setup_py.encode(sys.getfilesystemencoding())
+
+        return setup_py
+
+    def run_egg_info(self):
+        assert self.source_dir
+        if self.name:
+            logger.debug(
+                'Running setup.py (path:%s) egg_info for package %s',
+                self.setup_py, self.name,
+            )
+        else:
+            logger.debug(
+                'Running setup.py (path:%s) egg_info for package from %s',
+                self.setup_py, self.link,
+            )
+
+        with indent_log():
+            script = SETUPTOOLS_SHIM % self.setup_py
+            base_cmd = [sys.executable, '-c', script]
+            if self.isolated:
+                base_cmd += ["--no-user-cfg"]
+            egg_info_cmd = base_cmd + ['egg_info']
+            # We can't put the .egg-info files at the root, because then the
+            # source code will be mistaken for an installed egg, causing
+            # problems
+            if self.editable:
+                egg_base_option = []
+            else:
+                egg_info_dir = os.path.join(self.setup_py_dir, 'pip-egg-info')
+                ensure_dir(egg_info_dir)
+                egg_base_option = ['--egg-base', 'pip-egg-info']
+            call_subprocess(
+                egg_info_cmd + egg_base_option,
+                cwd=self.setup_py_dir,
+                show_stdout=False,
+                command_desc='python setup.py egg_info')
+
+        if not self.req:
+            if isinstance(parse_version(self.pkg_info()["Version"]), Version):
+                op = "=="
+            else:
+                op = "==="
+            self.req = Requirement(
+                "".join([
+                    self.pkg_info()["Name"],
+                    op,
+                    self.pkg_info()["Version"],
+                ])
+            )
+            self._correct_build_location()
+        else:
+            metadata_name = canonicalize_name(self.pkg_info()["Name"])
+            if canonicalize_name(self.req.name) != metadata_name:
+                logger.warning(
+                    'Running setup.py (path:%s) egg_info for package %s '
+                    'produced metadata for project name %s. Fix your '
+                    '#egg=%s fragments.',
+                    self.setup_py, self.name, metadata_name, self.name
+                )
+                self.req = Requirement(metadata_name)
+
+    def egg_info_data(self, filename):
+        if self.satisfied_by is not None:
+            if not self.satisfied_by.has_metadata(filename):
+                return None
+            return self.satisfied_by.get_metadata(filename)
+        assert self.source_dir
+        filename = self.egg_info_path(filename)
+        if not os.path.exists(filename):
+            return None
+        data = read_text_file(filename)
+        return data
+
+    def egg_info_path(self, filename):
+        if self._egg_info_path is None:
+            if self.editable:
+                base = self.source_dir
+            else:
+                base = os.path.join(self.setup_py_dir, 'pip-egg-info')
+            filenames = os.listdir(base)
+            if self.editable:
+                filenames = []
+                for root, dirs, files in os.walk(base):
+                    for dir in vcs.dirnames:
+                        if dir in dirs:
+                            dirs.remove(dir)
+                    # Iterate over a copy of ``dirs``, since mutating
+                    # a list while iterating over it can cause trouble.
+                    # (See https://github.com/pypa/pip/pull/462.)
+                    for dir in list(dirs):
+                        # Don't search in anything that looks like a virtualenv
+                        # environment
+                        if (
+                                os.path.lexists(
+                                    os.path.join(root, dir, 'bin', 'python')
+                                ) or
+                                os.path.exists(
+                                    os.path.join(
+                                        root, dir, 'Scripts', 'Python.exe'
+                                    )
+                                )):
+                            dirs.remove(dir)
+                        # Also don't search through tests
+                        elif dir == 'test' or dir == 'tests':
+                            dirs.remove(dir)
+                    filenames.extend([os.path.join(root, dir)
+                                     for dir in dirs])
+                filenames = [f for f in filenames if f.endswith('.egg-info')]
+
+            if not filenames:
+                raise InstallationError(
+                    'No files/directories in %s (from %s)' % (base, filename)
+                )
+            assert filenames, \
+                "No files/directories in %s (from %s)" % (base, filename)
+
+            # if we have more than one match, we pick the toplevel one.  This
+            # can easily be the case if there is a dist folder which contains
+            # an extracted tarball for testing purposes.
+            if len(filenames) > 1:
+                filenames.sort(
+                    key=lambda x: x.count(os.path.sep) +
+                    (os.path.altsep and x.count(os.path.altsep) or 0)
+                )
+            self._egg_info_path = os.path.join(base, filenames[0])
+        return os.path.join(self._egg_info_path, filename)
+
+    def pkg_info(self):
+        p = FeedParser()
+        data = self.egg_info_data('PKG-INFO')
+        if not data:
+            logger.warning(
+                'No PKG-INFO file found in %s',
+                display_path(self.egg_info_path('PKG-INFO')),
+            )
+        p.feed(data or '')
+        return p.close()
+
+    _requirements_section_re = re.compile(r'\[(.*?)\]')
+
+    @property
+    def installed_version(self):
+        return get_installed_version(self.name)
+
+    def assert_source_matches_version(self):
+        assert self.source_dir
+        version = self.pkg_info()['version']
+        if self.req.specifier and version not in self.req.specifier:
+            logger.warning(
+                'Requested %s, but installing version %s',
+                self,
+                self.installed_version,
+            )
+        else:
+            logger.debug(
+                'Source in %s has version %s, which satisfies requirement %s',
+                display_path(self.source_dir),
+                version,
+                self,
+            )
+
+    def update_editable(self, obtain=True):
+        if not self.link:
+            logger.debug(
+                "Cannot update repository at %s; repository location is "
+                "unknown",
+                self.source_dir,
+            )
+            return
+        assert self.editable
+        assert self.source_dir
+        if self.link.scheme == 'file':
+            # Static paths don't get updated
+            return
+        assert '+' in self.link.url, "bad url: %r" % self.link.url
+        if not self.update:
+            return
+        vc_type, url = self.link.url.split('+', 1)
+        backend = vcs.get_backend(vc_type)
+        if backend:
+            vcs_backend = backend(self.link.url)
+            if obtain:
+                vcs_backend.obtain(self.source_dir)
+            else:
+                vcs_backend.export(self.source_dir)
+        else:
+            assert 0, (
+                'Unexpected version control type (in %s): %s'
+                % (self.link, vc_type))
+
+    def uninstall(self, auto_confirm=False):
+        """
+        Uninstall the distribution currently satisfying this requirement.
+
+        Prompts before removing or modifying files unless
+        ``auto_confirm`` is True.
+
+        Refuses to delete or modify files outside of ``sys.prefix`` -
+        thus uninstallation within a virtual environment can only
+        modify that virtual environment, even if the virtualenv is
+        linked to global site-packages.
+
+        """
+        if not self.check_if_exists():
+            raise UninstallationError(
+                "Cannot uninstall requirement %s, not installed" % (self.name,)
+            )
+        dist = self.satisfied_by or self.conflicts_with
+
+        dist_path = normalize_path(dist.location)
+        if not dist_is_local(dist):
+            logger.info(
+                "Not uninstalling %s at %s, outside environment %s",
+                dist.key,
+                dist_path,
+                sys.prefix,
+            )
+            self.nothing_to_uninstall = True
+            return
+
+        if dist_path in get_stdlib():
+            logger.info(
+                "Not uninstalling %s at %s, as it is in the standard library.",
+                dist.key,
+                dist_path,
+            )
+            self.nothing_to_uninstall = True
+            return
+
+        paths_to_remove = UninstallPathSet(dist)
+        develop_egg_link = egg_link_path(dist)
+        develop_egg_link_egg_info = '{0}.egg-info'.format(
+            pkg_resources.to_filename(dist.project_name))
+        egg_info_exists = dist.egg_info and os.path.exists(dist.egg_info)
+        # Special case for distutils installed package
+        distutils_egg_info = getattr(dist._provider, 'path', None)
+
+        # Uninstall cases order do matter as in the case of 2 installs of the
+        # same package, pip needs to uninstall the currently detected version
+        if (egg_info_exists and dist.egg_info.endswith('.egg-info') and
+                not dist.egg_info.endswith(develop_egg_link_egg_info)):
+            # if dist.egg_info.endswith(develop_egg_link_egg_info), we
+            # are in fact in the develop_egg_link case
+            paths_to_remove.add(dist.egg_info)
+            if dist.has_metadata('installed-files.txt'):
+                for installed_file in dist.get_metadata(
+                        'installed-files.txt').splitlines():
+                    path = os.path.normpath(
+                        os.path.join(dist.egg_info, installed_file)
+                    )
+                    paths_to_remove.add(path)
+            # FIXME: need a test for this elif block
+            # occurs with --single-version-externally-managed/--record outside
+            # of pip
+            elif dist.has_metadata('top_level.txt'):
+                if dist.has_metadata('namespace_packages.txt'):
+                    namespaces = dist.get_metadata('namespace_packages.txt')
+                else:
+                    namespaces = []
+                for top_level_pkg in [
+                        p for p
+                        in dist.get_metadata('top_level.txt').splitlines()
+                        if p and p not in namespaces]:
+                    path = os.path.join(dist.location, top_level_pkg)
+                    paths_to_remove.add(path)
+                    paths_to_remove.add(path + '.py')
+                    paths_to_remove.add(path + '.pyc')
+                    paths_to_remove.add(path + '.pyo')
+
+        elif distutils_egg_info:
+            warnings.warn(
+                "Uninstalling a distutils installed project ({0}) has been "
+                "deprecated and will be removed in a future version. This is "
+                "due to the fact that uninstalling a distutils project will "
+                "only partially uninstall the project.".format(self.name),
+                RemovedInPip10Warning,
+            )
+            paths_to_remove.add(distutils_egg_info)
+
+        elif dist.location.endswith('.egg'):
+            # package installed by easy_install
+            # We cannot match on dist.egg_name because it can slightly vary
+            # i.e. setuptools-0.6c11-py2.6.egg vs setuptools-0.6rc11-py2.6.egg
+            paths_to_remove.add(dist.location)
+            easy_install_egg = os.path.split(dist.location)[1]
+            easy_install_pth = os.path.join(os.path.dirname(dist.location),
+                                            'easy-install.pth')
+            paths_to_remove.add_pth(easy_install_pth, './' + easy_install_egg)
+
+        elif egg_info_exists and dist.egg_info.endswith('.dist-info'):
+            for path in pip.wheel.uninstallation_paths(dist):
+                paths_to_remove.add(path)
+
+        elif develop_egg_link:
+            # develop egg
+            with open(develop_egg_link, 'r') as fh:
+                link_pointer = os.path.normcase(fh.readline().strip())
+            assert (link_pointer == dist.location), (
+                'Egg-link %s does not match installed location of %s '
+                '(at %s)' % (link_pointer, self.name, dist.location)
+            )
+            paths_to_remove.add(develop_egg_link)
+            easy_install_pth = os.path.join(os.path.dirname(develop_egg_link),
+                                            'easy-install.pth')
+            paths_to_remove.add_pth(easy_install_pth, dist.location)
+
+        else:
+            logger.debug(
+                'Not sure how to uninstall: %s - Check: %s',
+                dist, dist.location)
+
+        # find distutils scripts= scripts
+        if dist.has_metadata('scripts') and dist.metadata_isdir('scripts'):
+            for script in dist.metadata_listdir('scripts'):
+                if dist_in_usersite(dist):
+                    bin_dir = bin_user
+                else:
+                    bin_dir = bin_py
+                paths_to_remove.add(os.path.join(bin_dir, script))
+                if WINDOWS:
+                    paths_to_remove.add(os.path.join(bin_dir, script) + '.bat')
+
+        # find console_scripts
+        if dist.has_metadata('entry_points.txt'):
+            if six.PY2:
+                options = {}
+            else:
+                options = {"delimiters": ('=', )}
+            config = configparser.SafeConfigParser(**options)
+            config.readfp(
+                FakeFile(dist.get_metadata_lines('entry_points.txt'))
+            )
+            if config.has_section('console_scripts'):
+                for name, value in config.items('console_scripts'):
+                    if dist_in_usersite(dist):
+                        bin_dir = bin_user
+                    else:
+                        bin_dir = bin_py
+                    paths_to_remove.add(os.path.join(bin_dir, name))
+                    if WINDOWS:
+                        paths_to_remove.add(
+                            os.path.join(bin_dir, name) + '.exe'
+                        )
+                        paths_to_remove.add(
+                            os.path.join(bin_dir, name) + '.exe.manifest'
+                        )
+                        paths_to_remove.add(
+                            os.path.join(bin_dir, name) + '-script.py'
+                        )
+
+        paths_to_remove.remove(auto_confirm)
+        self.uninstalled = paths_to_remove
+
+    def rollback_uninstall(self):
+        if self.uninstalled:
+            self.uninstalled.rollback()
+        else:
+            logger.error(
+                "Can't rollback %s, nothing uninstalled.", self.name,
+            )
+
+    def commit_uninstall(self):
+        if self.uninstalled:
+            self.uninstalled.commit()
+        elif not self.nothing_to_uninstall:
+            logger.error(
+                "Can't commit %s, nothing uninstalled.", self.name,
+            )
+
+    def archive(self, build_dir):
+        assert self.source_dir
+        create_archive = True
+        archive_name = '%s-%s.zip' % (self.name, self.pkg_info()["version"])
+        archive_path = os.path.join(build_dir, archive_name)
+        if os.path.exists(archive_path):
+            response = ask_path_exists(
+                'The file %s exists. (i)gnore, (w)ipe, (b)ackup, (a)bort ' %
+                display_path(archive_path), ('i', 'w', 'b', 'a'))
+            if response == 'i':
+                create_archive = False
+            elif response == 'w':
+                logger.warning('Deleting %s', display_path(archive_path))
+                os.remove(archive_path)
+            elif response == 'b':
+                dest_file = backup_dir(archive_path)
+                logger.warning(
+                    'Backing up %s to %s',
+                    display_path(archive_path),
+                    display_path(dest_file),
+                )
+                shutil.move(archive_path, dest_file)
+            elif response == 'a':
+                sys.exit(-1)
+        if create_archive:
+            zip = zipfile.ZipFile(
+                archive_path, 'w', zipfile.ZIP_DEFLATED,
+                allowZip64=True
+            )
+            dir = os.path.normcase(os.path.abspath(self.setup_py_dir))
+            for dirpath, dirnames, filenames in os.walk(dir):
+                if 'pip-egg-info' in dirnames:
+                    dirnames.remove('pip-egg-info')
+                for dirname in dirnames:
+                    dirname = os.path.join(dirpath, dirname)
+                    name = self._clean_zip_name(dirname, dir)
+                    zipdir = zipfile.ZipInfo(self.name + '/' + name + '/')
+                    zipdir.external_attr = 0x1ED << 16  # 0o755
+                    zip.writestr(zipdir, '')
+                for filename in filenames:
+                    if filename == PIP_DELETE_MARKER_FILENAME:
+                        continue
+                    filename = os.path.join(dirpath, filename)
+                    name = self._clean_zip_name(filename, dir)
+                    zip.write(filename, self.name + '/' + name)
+            zip.close()
+            logger.info('Saved %s', display_path(archive_path))
+
+    def _clean_zip_name(self, name, prefix):
+        assert name.startswith(prefix + os.path.sep), (
+            "name %r doesn't start with prefix %r" % (name, prefix)
+        )
+        name = name[len(prefix) + 1:]
+        name = name.replace(os.path.sep, '/')
+        return name
+
+    def match_markers(self, extras_requested=None):
+        if not extras_requested:
+            # Provide an extra to safely evaluate the markers
+            # without matching any extra
+            extras_requested = ('',)
+        if self.markers is not None:
+            return any(
+                self.markers.evaluate({'extra': extra})
+                for extra in extras_requested)
+        else:
+            return True
+
+    def install(self, install_options, global_options=[], root=None,
+                prefix=None):
+        if self.editable:
+            self.install_editable(
+                install_options, global_options, prefix=prefix)
+            return
+        if self.is_wheel:
+            version = pip.wheel.wheel_version(self.source_dir)
+            pip.wheel.check_compatibility(version, self.name)
+
+            self.move_wheel_files(self.source_dir, root=root, prefix=prefix)
+            self.install_succeeded = True
+            return
+
+        # Extend the list of global and install options passed on to
+        # the setup.py call with the ones from the requirements file.
+        # Options specified in requirements file override those
+        # specified on the command line, since the last option given
+        # to setup.py is the one that is used.
+        global_options += self.options.get('global_options', [])
+        install_options += self.options.get('install_options', [])
+
+        if self.isolated:
+            global_options = list(global_options) + ["--no-user-cfg"]
+
+        temp_location = tempfile.mkdtemp('-record', 'pip-')
+        record_filename = os.path.join(temp_location, 'install-record.txt')
+        try:
+            install_args = self.get_install_args(
+                global_options, record_filename, root, prefix)
+            msg = 'Running setup.py install for %s' % (self.name,)
+            with open_spinner(msg) as spinner:
+                with indent_log():
+                    call_subprocess(
+                        install_args + install_options,
+                        cwd=self.setup_py_dir,
+                        show_stdout=False,
+                        spinner=spinner,
+                    )
+
+            if not os.path.exists(record_filename):
+                logger.debug('Record file %s not found', record_filename)
+                return
+            self.install_succeeded = True
+            if self.as_egg:
+                # there's no --always-unzip option we can pass to install
+                # command so we unable to save the installed-files.txt
+                return
+
+            def prepend_root(path):
+                if root is None or not os.path.isabs(path):
+                    return path
+                else:
+                    return change_root(root, path)
+
+            with open(record_filename) as f:
+                for line in f:
+                    directory = os.path.dirname(line)
+                    if directory.endswith('.egg-info'):
+                        egg_info_dir = prepend_root(directory)
+                        break
+                else:
+                    logger.warning(
+                        'Could not find .egg-info directory in install record'
+                        ' for %s',
+                        self,
+                    )
+                    # FIXME: put the record somewhere
+                    # FIXME: should this be an error?
+                    return
+            new_lines = []
+            with open(record_filename) as f:
+                for line in f:
+                    filename = line.strip()
+                    if os.path.isdir(filename):
+                        filename += os.path.sep
+                    new_lines.append(
+                        os.path.relpath(
+                            prepend_root(filename), egg_info_dir)
+                    )
+            inst_files_path = os.path.join(egg_info_dir, 'installed-files.txt')
+            with open(inst_files_path, 'w') as f:
+                f.write('\n'.join(new_lines) + '\n')
+        finally:
+            if os.path.exists(record_filename):
+                os.remove(record_filename)
+            rmtree(temp_location)
+
+    def ensure_has_source_dir(self, parent_dir):
+        """Ensure that a source_dir is set.
+
+        This will create a temporary build dir if the name of the requirement
+        isn't known yet.
+
+        :param parent_dir: The ideal pip parent_dir for the source_dir.
+            Generally src_dir for editables and build_dir for sdists.
+        :return: self.source_dir
+        """
+        if self.source_dir is None:
+            self.source_dir = self.build_location(parent_dir)
+        return self.source_dir
+
+    def get_install_args(self, global_options, record_filename, root, prefix):
+        install_args = [sys.executable, "-u"]
+        install_args.append('-c')
+        install_args.append(SETUPTOOLS_SHIM % self.setup_py)
+        install_args += list(global_options) + \
+            ['install', '--record', record_filename]
+
+        if not self.as_egg:
+            install_args += ['--single-version-externally-managed']
+
+        if root is not None:
+            install_args += ['--root', root]
+        if prefix is not None:
+            install_args += ['--prefix', prefix]
+
+        if self.pycompile:
+            install_args += ["--compile"]
+        else:
+            install_args += ["--no-compile"]
+
+        if running_under_virtualenv():
+            py_ver_str = 'python' + sysconfig.get_python_version()
+            install_args += ['--install-headers',
+                             os.path.join(sys.prefix, 'include', 'site',
+                                          py_ver_str, self.name)]
+
+        return install_args
+
+    def remove_temporary_source(self):
+        """Remove the source files from this requirement, if they are marked
+        for deletion"""
+        if self.source_dir and os.path.exists(
+                os.path.join(self.source_dir, PIP_DELETE_MARKER_FILENAME)):
+            logger.debug('Removing source in %s', self.source_dir)
+            rmtree(self.source_dir)
+        self.source_dir = None
+        if self._temp_build_dir and os.path.exists(self._temp_build_dir):
+            rmtree(self._temp_build_dir)
+        self._temp_build_dir = None
+
+    def install_editable(self, install_options,
+                         global_options=(), prefix=None):
+        logger.info('Running setup.py develop for %s', self.name)
+
+        if self.isolated:
+            global_options = list(global_options) + ["--no-user-cfg"]
+
+        if prefix:
+            prefix_param = ['--prefix={0}'.format(prefix)]
+            install_options = list(install_options) + prefix_param
+
+        with indent_log():
+            # FIXME: should we do --install-headers here too?
+            call_subprocess(
+                [
+                    sys.executable,
+                    '-c',
+                    SETUPTOOLS_SHIM % self.setup_py
+                ] +
+                list(global_options) +
+                ['develop', '--no-deps'] +
+                list(install_options),
+
+                cwd=self.setup_py_dir,
+                show_stdout=False)
+
+        self.install_succeeded = True
+
+    def check_if_exists(self):
+        """Find an installed distribution that satisfies or conflicts
+        with this requirement, and set self.satisfied_by or
+        self.conflicts_with appropriately.
+        """
+        if self.req is None:
+            return False
+        try:
+            # get_distribution() will resolve the entire list of requirements
+            # anyway, and we've already determined that we need the requirement
+            # in question, so strip the marker so that we don't try to
+            # evaluate it.
+            no_marker = Requirement(str(self.req))
+            no_marker.marker = None
+            self.satisfied_by = pkg_resources.get_distribution(str(no_marker))
+            if self.editable and self.satisfied_by:
+                self.conflicts_with = self.satisfied_by
+                # when installing editables, nothing pre-existing should ever
+                # satisfy
+                self.satisfied_by = None
+                return True
+        except pkg_resources.DistributionNotFound:
+            return False
+        except pkg_resources.VersionConflict:
+            existing_dist = pkg_resources.get_distribution(
+                self.req.name
+            )
+            if self.use_user_site:
+                if dist_in_usersite(existing_dist):
+                    self.conflicts_with = existing_dist
+                elif (running_under_virtualenv() and
+                        dist_in_site_packages(existing_dist)):
+                    raise InstallationError(
+                        "Will not install to the user site because it will "
+                        "lack sys.path precedence to %s in %s" %
+                        (existing_dist.project_name, existing_dist.location)
+                    )
+            else:
+                self.conflicts_with = existing_dist
+        return True
+
+    @property
+    def is_wheel(self):
+        return self.link and self.link.is_wheel
+
+    def move_wheel_files(self, wheeldir, root=None, prefix=None):
+        move_wheel_files(
+            self.name, self.req, wheeldir,
+            user=self.use_user_site,
+            home=self.target_dir,
+            root=root,
+            prefix=prefix,
+            pycompile=self.pycompile,
+            isolated=self.isolated,
+        )
+
+    def get_dist(self):
+        """Return a pkg_resources.Distribution built from self.egg_info_path"""
+        egg_info = self.egg_info_path('').rstrip('/')
+        base_dir = os.path.dirname(egg_info)
+        metadata = pkg_resources.PathMetadata(base_dir, egg_info)
+        dist_name = os.path.splitext(os.path.basename(egg_info))[0]
+        return pkg_resources.Distribution(
+            os.path.dirname(egg_info),
+            project_name=dist_name,
+            metadata=metadata)
+
+    @property
+    def has_hash_options(self):
+        """Return whether any known-good hashes are specified as options.
+
+        These activate --require-hashes mode; hashes specified as part of a
+        URL do not.
+
+        """
+        return bool(self.options.get('hashes', {}))
+
+    def hashes(self, trust_internet=True):
+        """Return a hash-comparer that considers my option- and URL-based
+        hashes to be known-good.
+
+        Hashes in URLs--ones embedded in the requirements file, not ones
+        downloaded from an index server--are almost peers with ones from
+        flags. They satisfy --require-hashes (whether it was implicitly or
+        explicitly activated) but do not activate it. md5 and sha224 are not
+        allowed in flags, which should nudge people toward good algos. We
+        always OR all hashes together, even ones from URLs.
+
+        :param trust_internet: Whether to trust URL-based (#md5=...) hashes
+            downloaded from the internet, as by populate_link()
+
+        """
+        good_hashes = self.options.get('hashes', {}).copy()
+        link = self.link if trust_internet else self.original_link
+        if link and link.hash:
+            good_hashes.setdefault(link.hash_name, []).append(link.hash)
+        return Hashes(good_hashes)
+
+
+def _strip_postfix(req):
+    """
+        Strip req postfix ( -dev, 0.2, etc )
+    """
+    # FIXME: use package_to_requirement?
+    match = re.search(r'^(.*?)(?:-dev|-\d.*)$', req)
+    if match:
+        # Strip off -dev, -0.2, etc.
+        req = match.group(1)
+    return req
+
+
+def parse_editable(editable_req, default_vcs=None):
+    """Parses an editable requirement into:
+        - a requirement name
+        - an URL
+        - extras
+        - editable options
+    Accepted requirements:
+        svn+http://blahblah@rev#egg=Foobar[baz]&subdirectory=version_subdir
+        .[some_extra]
+    """
+
+    from pip.index import Link
+
+    url = editable_req
+    extras = None
+
+    # If a file path is specified with extras, strip off the extras.
+    m = re.match(r'^(.+)(\[[^\]]+\])$', url)
+    if m:
+        url_no_extras = m.group(1)
+        extras = m.group(2)
+    else:
+        url_no_extras = url
+
+    if os.path.isdir(url_no_extras):
+        if not os.path.exists(os.path.join(url_no_extras, 'setup.py')):
+            raise InstallationError(
+                "Directory %r is not installable. File 'setup.py' not found." %
+                url_no_extras
+            )
+        # Treating it as code that has already been checked out
+        url_no_extras = path_to_url(url_no_extras)
+
+    if url_no_extras.lower().startswith('file:'):
+        package_name = Link(url_no_extras).egg_fragment
+        if extras:
+            return (
+                package_name,
+                url_no_extras,
+                Requirement("placeholder" + extras.lower()).extras,
+            )
+        else:
+            return package_name, url_no_extras, None
+
+    for version_control in vcs:
+        if url.lower().startswith('%s:' % version_control):
+            url = '%s+%s' % (version_control, url)
+            break
+
+    if '+' not in url:
+        if default_vcs:
+            warnings.warn(
+                "--default-vcs has been deprecated and will be removed in "
+                "the future.",
+                RemovedInPip10Warning,
+            )
+            url = default_vcs + '+' + url
+        else:
+            raise InstallationError(
+                '%s should either be a path to a local project or a VCS url '
+                'beginning with svn+, git+, hg+, or bzr+' %
+                editable_req
+            )
+
+    vc_type = url.split('+', 1)[0].lower()
+
+    if not vcs.get_backend(vc_type):
+        error_message = 'For --editable=%s only ' % editable_req + \
+            ', '.join([backend.name + '+URL' for backend in vcs.backends]) + \
+            ' is currently supported'
+        raise InstallationError(error_message)
+
+    package_name = Link(url).egg_fragment
+    if not package_name:
+        raise InstallationError(
+            "Could not detect requirement name, please specify one with #egg="
+        )
+    if not package_name:
+        raise InstallationError(
+            '--editable=%s is not the right format; it must have '
+            '#egg=Package' % editable_req
+        )
+    return _strip_postfix(package_name), url, None
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/req/req_set.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/req/req_set.py
new file mode 100644
index 0000000000000000000000000000000000000000..76aec0616612e22f2864fe8dba13886c10b579bc
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/req/req_set.py
@@ -0,0 +1,798 @@
+from __future__ import absolute_import
+
+from collections import defaultdict
+from itertools import chain
+import logging
+import os
+
+from pip._vendor import pkg_resources
+from pip._vendor import requests
+
+from pip.compat import expanduser
+from pip.download import (is_file_url, is_dir_url, is_vcs_url, url_to_path,
+                          unpack_url)
+from pip.exceptions import (InstallationError, BestVersionAlreadyInstalled,
+                            DistributionNotFound, PreviousBuildDirError,
+                            HashError, HashErrors, HashUnpinned,
+                            DirectoryUrlHashUnsupported, VcsHashUnsupported,
+                            UnsupportedPythonVersion)
+from pip.req.req_install import InstallRequirement
+from pip.utils import (
+    display_path, dist_in_usersite, ensure_dir, normalize_path)
+from pip.utils.hashes import MissingHashes
+from pip.utils.logging import indent_log
+from pip.utils.packaging import check_dist_requires_python
+from pip.vcs import vcs
+from pip.wheel import Wheel
+
+logger = logging.getLogger(__name__)
+
+
+class Requirements(object):
+
+    def __init__(self):
+        self._keys = []
+        self._dict = {}
+
+    def keys(self):
+        return self._keys
+
+    def values(self):
+        return [self._dict[key] for key in self._keys]
+
+    def __contains__(self, item):
+        return item in self._keys
+
+    def __setitem__(self, key, value):
+        if key not in self._keys:
+            self._keys.append(key)
+        self._dict[key] = value
+
+    def __getitem__(self, key):
+        return self._dict[key]
+
+    def __repr__(self):
+        values = ['%s: %s' % (repr(k), repr(self[k])) for k in self.keys()]
+        return 'Requirements({%s})' % ', '.join(values)
+
+
+class DistAbstraction(object):
+    """Abstracts out the wheel vs non-wheel prepare_files logic.
+
+    The requirements for anything installable are as follows:
+     - we must be able to determine the requirement name
+       (or we can't correctly handle the non-upgrade case).
+     - we must be able to generate a list of run-time dependencies
+       without installing any additional packages (or we would
+       have to either burn time by doing temporary isolated installs
+       or alternatively violate pips 'don't start installing unless
+       all requirements are available' rule - neither of which are
+       desirable).
+     - for packages with setup requirements, we must also be able
+       to determine their requirements without installing additional
+       packages (for the same reason as run-time dependencies)
+     - we must be able to create a Distribution object exposing the
+       above metadata.
+    """
+
+    def __init__(self, req_to_install):
+        self.req_to_install = req_to_install
+
+    def dist(self, finder):
+        """Return a setuptools Dist object."""
+        raise NotImplementedError(self.dist)
+
+    def prep_for_dist(self):
+        """Ensure that we can get a Dist for this requirement."""
+        raise NotImplementedError(self.dist)
+
+
+def make_abstract_dist(req_to_install):
+    """Factory to make an abstract dist object.
+
+    Preconditions: Either an editable req with a source_dir, or satisfied_by or
+    a wheel link, or a non-editable req with a source_dir.
+
+    :return: A concrete DistAbstraction.
+    """
+    if req_to_install.editable:
+        return IsSDist(req_to_install)
+    elif req_to_install.link and req_to_install.link.is_wheel:
+        return IsWheel(req_to_install)
+    else:
+        return IsSDist(req_to_install)
+
+
+class IsWheel(DistAbstraction):
+
+    def dist(self, finder):
+        return list(pkg_resources.find_distributions(
+            self.req_to_install.source_dir))[0]
+
+    def prep_for_dist(self):
+        # FIXME:https://github.com/pypa/pip/issues/1112
+        pass
+
+
+class IsSDist(DistAbstraction):
+
+    def dist(self, finder):
+        dist = self.req_to_install.get_dist()
+        # FIXME: shouldn't be globally added:
+        if dist.has_metadata('dependency_links.txt'):
+            finder.add_dependency_links(
+                dist.get_metadata_lines('dependency_links.txt')
+            )
+        return dist
+
+    def prep_for_dist(self):
+        self.req_to_install.run_egg_info()
+        self.req_to_install.assert_source_matches_version()
+
+
+class Installed(DistAbstraction):
+
+    def dist(self, finder):
+        return self.req_to_install.satisfied_by
+
+    def prep_for_dist(self):
+        pass
+
+
+class RequirementSet(object):
+
+    def __init__(self, build_dir, src_dir, download_dir, upgrade=False,
+                 upgrade_strategy=None, ignore_installed=False, as_egg=False,
+                 target_dir=None, ignore_dependencies=False,
+                 force_reinstall=False, use_user_site=False, session=None,
+                 pycompile=True, isolated=False, wheel_download_dir=None,
+                 wheel_cache=None, require_hashes=False,
+                 ignore_requires_python=False):
+        """Create a RequirementSet.
+
+        :param wheel_download_dir: Where still-packed .whl files should be
+            written to. If None they are written to the download_dir parameter.
+            Separate to download_dir to permit only keeping wheel archives for
+            pip wheel.
+        :param download_dir: Where still packed archives should be written to.
+            If None they are not saved, and are deleted immediately after
+            unpacking.
+        :param wheel_cache: The pip wheel cache, for passing to
+            InstallRequirement.
+        """
+        if session is None:
+            raise TypeError(
+                "RequirementSet() missing 1 required keyword argument: "
+                "'session'"
+            )
+
+        self.build_dir = build_dir
+        self.src_dir = src_dir
+        # XXX: download_dir and wheel_download_dir overlap semantically and may
+        # be combined if we're willing to have non-wheel archives present in
+        # the wheelhouse output by 'pip wheel'.
+        self.download_dir = download_dir
+        self.upgrade = upgrade
+        self.upgrade_strategy = upgrade_strategy
+        self.ignore_installed = ignore_installed
+        self.force_reinstall = force_reinstall
+        self.requirements = Requirements()
+        # Mapping of alias: real_name
+        self.requirement_aliases = {}
+        self.unnamed_requirements = []
+        self.ignore_dependencies = ignore_dependencies
+        self.ignore_requires_python = ignore_requires_python
+        self.successfully_downloaded = []
+        self.successfully_installed = []
+        self.reqs_to_cleanup = []
+        self.as_egg = as_egg
+        self.use_user_site = use_user_site
+        self.target_dir = target_dir  # set from --target option
+        self.session = session
+        self.pycompile = pycompile
+        self.isolated = isolated
+        if wheel_download_dir:
+            wheel_download_dir = normalize_path(wheel_download_dir)
+        self.wheel_download_dir = wheel_download_dir
+        self._wheel_cache = wheel_cache
+        self.require_hashes = require_hashes
+        # Maps from install_req -> dependencies_of_install_req
+        self._dependencies = defaultdict(list)
+
+    def __str__(self):
+        reqs = [req for req in self.requirements.values()
+                if not req.comes_from]
+        reqs.sort(key=lambda req: req.name.lower())
+        return ' '.join([str(req.req) for req in reqs])
+
+    def __repr__(self):
+        reqs = [req for req in self.requirements.values()]
+        reqs.sort(key=lambda req: req.name.lower())
+        reqs_str = ', '.join([str(req.req) for req in reqs])
+        return ('<%s object; %d requirement(s): %s>'
+                % (self.__class__.__name__, len(reqs), reqs_str))
+
+    def add_requirement(self, install_req, parent_req_name=None,
+                        extras_requested=None):
+        """Add install_req as a requirement to install.
+
+        :param parent_req_name: The name of the requirement that needed this
+            added. The name is used because when multiple unnamed requirements
+            resolve to the same name, we could otherwise end up with dependency
+            links that point outside the Requirements set. parent_req must
+            already be added. Note that None implies that this is a user
+            supplied requirement, vs an inferred one.
+        :param extras_requested: an iterable of extras used to evaluate the
+            environement markers.
+        :return: Additional requirements to scan. That is either [] if
+            the requirement is not applicable, or [install_req] if the
+            requirement is applicable and has just been added.
+        """
+        name = install_req.name
+        if not install_req.match_markers(extras_requested):
+            logger.warning("Ignoring %s: markers '%s' don't match your "
+                           "environment", install_req.name,
+                           install_req.markers)
+            return []
+
+        # This check has to come after we filter requirements with the
+        # environment markers.
+        if install_req.link and install_req.link.is_wheel:
+            wheel = Wheel(install_req.link.filename)
+            if not wheel.supported():
+                raise InstallationError(
+                    "%s is not a supported wheel on this platform." %
+                    wheel.filename
+                )
+
+        install_req.as_egg = self.as_egg
+        install_req.use_user_site = self.use_user_site
+        install_req.target_dir = self.target_dir
+        install_req.pycompile = self.pycompile
+        install_req.is_direct = (parent_req_name is None)
+
+        if not name:
+            # url or path requirement w/o an egg fragment
+            self.unnamed_requirements.append(install_req)
+            return [install_req]
+        else:
+            try:
+                existing_req = self.get_requirement(name)
+            except KeyError:
+                existing_req = None
+            if (parent_req_name is None and existing_req and not
+                    existing_req.constraint and
+                    existing_req.extras == install_req.extras and not
+                    existing_req.req.specifier == install_req.req.specifier):
+                raise InstallationError(
+                    'Double requirement given: %s (already in %s, name=%r)'
+                    % (install_req, existing_req, name))
+            if not existing_req:
+                # Add requirement
+                self.requirements[name] = install_req
+                # FIXME: what about other normalizations?  E.g., _ vs. -?
+                if name.lower() != name:
+                    self.requirement_aliases[name.lower()] = name
+                result = [install_req]
+            else:
+                # Assume there's no need to scan, and that we've already
+                # encountered this for scanning.
+                result = []
+                if not install_req.constraint and existing_req.constraint:
+                    if (install_req.link and not (existing_req.link and
+                       install_req.link.path == existing_req.link.path)):
+                        self.reqs_to_cleanup.append(install_req)
+                        raise InstallationError(
+                            "Could not satisfy constraints for '%s': "
+                            "installation from path or url cannot be "
+                            "constrained to a version" % name)
+                    # If we're now installing a constraint, mark the existing
+                    # object for real installation.
+                    existing_req.constraint = False
+                    existing_req.extras = tuple(
+                        sorted(set(existing_req.extras).union(
+                               set(install_req.extras))))
+                    logger.debug("Setting %s extras to: %s",
+                                 existing_req, existing_req.extras)
+                    # And now we need to scan this.
+                    result = [existing_req]
+                # Canonicalise to the already-added object for the backref
+                # check below.
+                install_req = existing_req
+            if parent_req_name:
+                parent_req = self.get_requirement(parent_req_name)
+                self._dependencies[parent_req].append(install_req)
+            return result
+
+    def has_requirement(self, project_name):
+        name = project_name.lower()
+        if (name in self.requirements and
+           not self.requirements[name].constraint or
+           name in self.requirement_aliases and
+           not self.requirements[self.requirement_aliases[name]].constraint):
+            return True
+        return False
+
+    @property
+    def has_requirements(self):
+        return list(req for req in self.requirements.values() if not
+                    req.constraint) or self.unnamed_requirements
+
+    @property
+    def is_download(self):
+        if self.download_dir:
+            self.download_dir = expanduser(self.download_dir)
+            if os.path.exists(self.download_dir):
+                return True
+            else:
+                logger.critical('Could not find download directory')
+                raise InstallationError(
+                    "Could not find or access download directory '%s'"
+                    % display_path(self.download_dir))
+        return False
+
+    def get_requirement(self, project_name):
+        for name in project_name, project_name.lower():
+            if name in self.requirements:
+                return self.requirements[name]
+            if name in self.requirement_aliases:
+                return self.requirements[self.requirement_aliases[name]]
+        raise KeyError("No project with the name %r" % project_name)
+
+    def uninstall(self, auto_confirm=False):
+        for req in self.requirements.values():
+            if req.constraint:
+                continue
+            req.uninstall(auto_confirm=auto_confirm)
+            req.commit_uninstall()
+
+    def prepare_files(self, finder):
+        """
+        Prepare process. Create temp directories, download and/or unpack files.
+        """
+        # make the wheelhouse
+        if self.wheel_download_dir:
+            ensure_dir(self.wheel_download_dir)
+
+        # If any top-level requirement has a hash specified, enter
+        # hash-checking mode, which requires hashes from all.
+        root_reqs = self.unnamed_requirements + self.requirements.values()
+        require_hashes = (self.require_hashes or
+                          any(req.has_hash_options for req in root_reqs))
+        if require_hashes and self.as_egg:
+            raise InstallationError(
+                '--egg is not allowed with --require-hashes mode, since it '
+                'delegates dependency resolution to setuptools and could thus '
+                'result in installation of unhashed packages.')
+
+        # Actually prepare the files, and collect any exceptions. Most hash
+        # exceptions cannot be checked ahead of time, because
+        # req.populate_link() needs to be called before we can make decisions
+        # based on link type.
+        discovered_reqs = []
+        hash_errors = HashErrors()
+        for req in chain(root_reqs, discovered_reqs):
+            try:
+                discovered_reqs.extend(self._prepare_file(
+                    finder,
+                    req,
+                    require_hashes=require_hashes,
+                    ignore_dependencies=self.ignore_dependencies))
+            except HashError as exc:
+                exc.req = req
+                hash_errors.append(exc)
+
+        if hash_errors:
+            raise hash_errors
+
+    def _is_upgrade_allowed(self, req):
+        return self.upgrade and (
+            self.upgrade_strategy == "eager" or (
+                self.upgrade_strategy == "only-if-needed" and req.is_direct
+            )
+        )
+
+    def _check_skip_installed(self, req_to_install, finder):
+        """Check if req_to_install should be skipped.
+
+        This will check if the req is installed, and whether we should upgrade
+        or reinstall it, taking into account all the relevant user options.
+
+        After calling this req_to_install will only have satisfied_by set to
+        None if the req_to_install is to be upgraded/reinstalled etc. Any
+        other value will be a dist recording the current thing installed that
+        satisfies the requirement.
+
+        Note that for vcs urls and the like we can't assess skipping in this
+        routine - we simply identify that we need to pull the thing down,
+        then later on it is pulled down and introspected to assess upgrade/
+        reinstalls etc.
+
+        :return: A text reason for why it was skipped, or None.
+        """
+        # Check whether to upgrade/reinstall this req or not.
+        req_to_install.check_if_exists()
+        if req_to_install.satisfied_by:
+            upgrade_allowed = self._is_upgrade_allowed(req_to_install)
+
+            # Is the best version is installed.
+            best_installed = False
+
+            if upgrade_allowed:
+                # For link based requirements we have to pull the
+                # tree down and inspect to assess the version #, so
+                # its handled way down.
+                if not (self.force_reinstall or req_to_install.link):
+                    try:
+                        finder.find_requirement(
+                            req_to_install, upgrade_allowed)
+                    except BestVersionAlreadyInstalled:
+                        best_installed = True
+                    except DistributionNotFound:
+                        # No distribution found, so we squash the
+                        # error - it will be raised later when we
+                        # re-try later to do the install.
+                        # Why don't we just raise here?
+                        pass
+
+                if not best_installed:
+                    # don't uninstall conflict if user install and
+                    # conflict is not user install
+                    if not (self.use_user_site and not
+                            dist_in_usersite(req_to_install.satisfied_by)):
+                        req_to_install.conflicts_with = \
+                            req_to_install.satisfied_by
+                    req_to_install.satisfied_by = None
+
+            # Figure out a nice message to say why we're skipping this.
+            if best_installed:
+                skip_reason = 'already up-to-date'
+            elif self.upgrade_strategy == "only-if-needed":
+                skip_reason = 'not upgraded as not directly required'
+            else:
+                skip_reason = 'already satisfied'
+
+            return skip_reason
+        else:
+            return None
+
+    def _prepare_file(self,
+                      finder,
+                      req_to_install,
+                      require_hashes=False,
+                      ignore_dependencies=False):
+        """Prepare a single requirements file.
+
+        :return: A list of additional InstallRequirements to also install.
+        """
+        # Tell user what we are doing for this requirement:
+        # obtain (editable), skipping, processing (local url), collecting
+        # (remote url or package name)
+        if req_to_install.constraint or req_to_install.prepared:
+            return []
+
+        req_to_install.prepared = True
+
+        # ###################### #
+        # # print log messages # #
+        # ###################### #
+        if req_to_install.editable:
+            logger.info('Obtaining %s', req_to_install)
+        else:
+            # satisfied_by is only evaluated by calling _check_skip_installed,
+            # so it must be None here.
+            assert req_to_install.satisfied_by is None
+            if not self.ignore_installed:
+                skip_reason = self._check_skip_installed(
+                    req_to_install, finder)
+
+            if req_to_install.satisfied_by:
+                assert skip_reason is not None, (
+                    '_check_skip_installed returned None but '
+                    'req_to_install.satisfied_by is set to %r'
+                    % (req_to_install.satisfied_by,))
+                logger.info(
+                    'Requirement %s: %s', skip_reason,
+                    req_to_install)
+            else:
+                if (req_to_install.link and
+                        req_to_install.link.scheme == 'file'):
+                    path = url_to_path(req_to_install.link.url)
+                    logger.info('Processing %s', display_path(path))
+                else:
+                    logger.info('Collecting %s', req_to_install)
+
+        with indent_log():
+            # ################################ #
+            # # vcs update or unpack archive # #
+            # ################################ #
+            if req_to_install.editable:
+                if require_hashes:
+                    raise InstallationError(
+                        'The editable requirement %s cannot be installed when '
+                        'requiring hashes, because there is no single file to '
+                        'hash.' % req_to_install)
+                req_to_install.ensure_has_source_dir(self.src_dir)
+                req_to_install.update_editable(not self.is_download)
+                abstract_dist = make_abstract_dist(req_to_install)
+                abstract_dist.prep_for_dist()
+                if self.is_download:
+                    req_to_install.archive(self.download_dir)
+                req_to_install.check_if_exists()
+            elif req_to_install.satisfied_by:
+                if require_hashes:
+                    logger.debug(
+                        'Since it is already installed, we are trusting this '
+                        'package without checking its hash. To ensure a '
+                        'completely repeatable environment, install into an '
+                        'empty virtualenv.')
+                abstract_dist = Installed(req_to_install)
+            else:
+                # @@ if filesystem packages are not marked
+                # editable in a req, a non deterministic error
+                # occurs when the script attempts to unpack the
+                # build directory
+                req_to_install.ensure_has_source_dir(self.build_dir)
+                # If a checkout exists, it's unwise to keep going.  version
+                # inconsistencies are logged later, but do not fail the
+                # installation.
+                # FIXME: this won't upgrade when there's an existing
+                # package unpacked in `req_to_install.source_dir`
+                if os.path.exists(
+                        os.path.join(req_to_install.source_dir, 'setup.py')):
+                    raise PreviousBuildDirError(
+                        "pip can't proceed with requirements '%s' due to a"
+                        " pre-existing build directory (%s). This is "
+                        "likely due to a previous installation that failed"
+                        ". pip is being responsible and not assuming it "
+                        "can delete this. Please delete it and try again."
+                        % (req_to_install, req_to_install.source_dir)
+                    )
+                req_to_install.populate_link(
+                    finder,
+                    self._is_upgrade_allowed(req_to_install),
+                    require_hashes
+                )
+                # We can't hit this spot and have populate_link return None.
+                # req_to_install.satisfied_by is None here (because we're
+                # guarded) and upgrade has no impact except when satisfied_by
+                # is not None.
+                # Then inside find_requirement existing_applicable -> False
+                # If no new versions are found, DistributionNotFound is raised,
+                # otherwise a result is guaranteed.
+                assert req_to_install.link
+                link = req_to_install.link
+
+                # Now that we have the real link, we can tell what kind of
+                # requirements we have and raise some more informative errors
+                # than otherwise. (For example, we can raise VcsHashUnsupported
+                # for a VCS URL rather than HashMissing.)
+                if require_hashes:
+                    # We could check these first 2 conditions inside
+                    # unpack_url and save repetition of conditions, but then
+                    # we would report less-useful error messages for
+                    # unhashable requirements, complaining that there's no
+                    # hash provided.
+                    if is_vcs_url(link):
+                        raise VcsHashUnsupported()
+                    elif is_file_url(link) and is_dir_url(link):
+                        raise DirectoryUrlHashUnsupported()
+                    if (not req_to_install.original_link and
+                            not req_to_install.is_pinned):
+                        # Unpinned packages are asking for trouble when a new
+                        # version is uploaded. This isn't a security check, but
+                        # it saves users a surprising hash mismatch in the
+                        # future.
+                        #
+                        # file:/// URLs aren't pinnable, so don't complain
+                        # about them not being pinned.
+                        raise HashUnpinned()
+                hashes = req_to_install.hashes(
+                    trust_internet=not require_hashes)
+                if require_hashes and not hashes:
+                    # Known-good hashes are missing for this requirement, so
+                    # shim it with a facade object that will provoke hash
+                    # computation and then raise a HashMissing exception
+                    # showing the user what the hash should be.
+                    hashes = MissingHashes()
+
+                try:
+                    download_dir = self.download_dir
+                    # We always delete unpacked sdists after pip ran.
+                    autodelete_unpacked = True
+                    if req_to_install.link.is_wheel \
+                            and self.wheel_download_dir:
+                        # when doing 'pip wheel` we download wheels to a
+                        # dedicated dir.
+                        download_dir = self.wheel_download_dir
+                    if req_to_install.link.is_wheel:
+                        if download_dir:
+                            # When downloading, we only unpack wheels to get
+                            # metadata.
+                            autodelete_unpacked = True
+                        else:
+                            # When installing a wheel, we use the unpacked
+                            # wheel.
+                            autodelete_unpacked = False
+                    unpack_url(
+                        req_to_install.link, req_to_install.source_dir,
+                        download_dir, autodelete_unpacked,
+                        session=self.session, hashes=hashes)
+                except requests.HTTPError as exc:
+                    logger.critical(
+                        'Could not install requirement %s because '
+                        'of error %s',
+                        req_to_install,
+                        exc,
+                    )
+                    raise InstallationError(
+                        'Could not install requirement %s because '
+                        'of HTTP error %s for URL %s' %
+                        (req_to_install, exc, req_to_install.link)
+                    )
+                abstract_dist = make_abstract_dist(req_to_install)
+                abstract_dist.prep_for_dist()
+                if self.is_download:
+                    # Make a .zip of the source_dir we already created.
+                    if req_to_install.link.scheme in vcs.all_schemes:
+                        req_to_install.archive(self.download_dir)
+                # req_to_install.req is only avail after unpack for URL
+                # pkgs repeat check_if_exists to uninstall-on-upgrade
+                # (#14)
+                if not self.ignore_installed:
+                    req_to_install.check_if_exists()
+                if req_to_install.satisfied_by:
+                    if self.upgrade or self.ignore_installed:
+                        # don't uninstall conflict if user install and
+                        # conflict is not user install
+                        if not (self.use_user_site and not
+                                dist_in_usersite(
+                                    req_to_install.satisfied_by)):
+                            req_to_install.conflicts_with = \
+                                req_to_install.satisfied_by
+                        req_to_install.satisfied_by = None
+                    else:
+                        logger.info(
+                            'Requirement already satisfied (use '
+                            '--upgrade to upgrade): %s',
+                            req_to_install,
+                        )
+
+            # ###################### #
+            # # parse dependencies # #
+            # ###################### #
+            dist = abstract_dist.dist(finder)
+            try:
+                check_dist_requires_python(dist)
+            except UnsupportedPythonVersion as e:
+                if self.ignore_requires_python:
+                    logger.warning(e.args[0])
+                else:
+                    req_to_install.remove_temporary_source()
+                    raise
+            more_reqs = []
+
+            def add_req(subreq, extras_requested):
+                sub_install_req = InstallRequirement(
+                    str(subreq),
+                    req_to_install,
+                    isolated=self.isolated,
+                    wheel_cache=self._wheel_cache,
+                )
+                more_reqs.extend(self.add_requirement(
+                    sub_install_req, req_to_install.name,
+                    extras_requested=extras_requested))
+
+            # We add req_to_install before its dependencies, so that we
+            # can refer to it when adding dependencies.
+            if not self.has_requirement(req_to_install.name):
+                # 'unnamed' requirements will get added here
+                self.add_requirement(req_to_install, None)
+
+            if not ignore_dependencies:
+                if (req_to_install.extras):
+                    logger.debug(
+                        "Installing extra requirements: %r",
+                        ','.join(req_to_install.extras),
+                    )
+                missing_requested = sorted(
+                    set(req_to_install.extras) - set(dist.extras)
+                )
+                for missing in missing_requested:
+                    logger.warning(
+                        '%s does not provide the extra \'%s\'',
+                        dist, missing
+                    )
+
+                available_requested = sorted(
+                    set(dist.extras) & set(req_to_install.extras)
+                )
+                for subreq in dist.requires(available_requested):
+                    add_req(subreq, extras_requested=available_requested)
+
+            # cleanup tmp src
+            self.reqs_to_cleanup.append(req_to_install)
+
+            if not req_to_install.editable and not req_to_install.satisfied_by:
+                # XXX: --no-install leads this to report 'Successfully
+                # downloaded' for only non-editable reqs, even though we took
+                # action on them.
+                self.successfully_downloaded.append(req_to_install)
+
+        return more_reqs
+
+    def cleanup_files(self):
+        """Clean up files, remove builds."""
+        logger.debug('Cleaning up...')
+        with indent_log():
+            for req in self.reqs_to_cleanup:
+                req.remove_temporary_source()
+
+    def _to_install(self):
+        """Create the installation order.
+
+        The installation order is topological - requirements are installed
+        before the requiring thing. We break cycles at an arbitrary point,
+        and make no other guarantees.
+        """
+        # The current implementation, which we may change at any point
+        # installs the user specified things in the order given, except when
+        # dependencies must come earlier to achieve topological order.
+        order = []
+        ordered_reqs = set()
+
+        def schedule(req):
+            if req.satisfied_by or req in ordered_reqs:
+                return
+            if req.constraint:
+                return
+            ordered_reqs.add(req)
+            for dep in self._dependencies[req]:
+                schedule(dep)
+            order.append(req)
+        for install_req in self.requirements.values():
+            schedule(install_req)
+        return order
+
+    def install(self, install_options, global_options=(), *args, **kwargs):
+        """
+        Install everything in this set (after having downloaded and unpacked
+        the packages)
+        """
+        to_install = self._to_install()
+
+        if to_install:
+            logger.info(
+                'Installing collected packages: %s',
+                ', '.join([req.name for req in to_install]),
+            )
+
+        with indent_log():
+            for requirement in to_install:
+                if requirement.conflicts_with:
+                    logger.info(
+                        'Found existing installation: %s',
+                        requirement.conflicts_with,
+                    )
+                    with indent_log():
+                        requirement.uninstall(auto_confirm=True)
+                try:
+                    requirement.install(
+                        install_options,
+                        global_options,
+                        *args,
+                        **kwargs
+                    )
+                except:
+                    # if install did not succeed, rollback previous uninstall
+                    if (requirement.conflicts_with and not
+                            requirement.install_succeeded):
+                        requirement.rollback_uninstall()
+                    raise
+                else:
+                    if (requirement.conflicts_with and
+                            requirement.install_succeeded):
+                        requirement.commit_uninstall()
+                requirement.remove_temporary_source()
+
+        self.successfully_installed = to_install
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/req/req_uninstall.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/req/req_uninstall.py
new file mode 100644
index 0000000000000000000000000000000000000000..5248430a9b17c102fcabe4f9ccbafd2a307f5701
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/req/req_uninstall.py
@@ -0,0 +1,195 @@
+from __future__ import absolute_import
+
+import logging
+import os
+import tempfile
+
+from pip.compat import uses_pycache, WINDOWS, cache_from_source
+from pip.exceptions import UninstallationError
+from pip.utils import rmtree, ask, is_local, renames, normalize_path
+from pip.utils.logging import indent_log
+
+
+logger = logging.getLogger(__name__)
+
+
+class UninstallPathSet(object):
+    """A set of file paths to be removed in the uninstallation of a
+    requirement."""
+    def __init__(self, dist):
+        self.paths = set()
+        self._refuse = set()
+        self.pth = {}
+        self.dist = dist
+        self.save_dir = None
+        self._moved_paths = []
+
+    def _permitted(self, path):
+        """
+        Return True if the given path is one we are permitted to
+        remove/modify, False otherwise.
+
+        """
+        return is_local(path)
+
+    def add(self, path):
+        head, tail = os.path.split(path)
+
+        # we normalize the head to resolve parent directory symlinks, but not
+        # the tail, since we only want to uninstall symlinks, not their targets
+        path = os.path.join(normalize_path(head), os.path.normcase(tail))
+
+        if not os.path.exists(path):
+            return
+        if self._permitted(path):
+            self.paths.add(path)
+        else:
+            self._refuse.add(path)
+
+        # __pycache__ files can show up after 'installed-files.txt' is created,
+        # due to imports
+        if os.path.splitext(path)[1] == '.py' and uses_pycache:
+            self.add(cache_from_source(path))
+
+    def add_pth(self, pth_file, entry):
+        pth_file = normalize_path(pth_file)
+        if self._permitted(pth_file):
+            if pth_file not in self.pth:
+                self.pth[pth_file] = UninstallPthEntries(pth_file)
+            self.pth[pth_file].add(entry)
+        else:
+            self._refuse.add(pth_file)
+
+    def compact(self, paths):
+        """Compact a path set to contain the minimal number of paths
+        necessary to contain all paths in the set. If /a/path/ and
+        /a/path/to/a/file.txt are both in the set, leave only the
+        shorter path."""
+        short_paths = set()
+        for path in sorted(paths, key=len):
+            if not any([
+                    (path.startswith(shortpath) and
+                     path[len(shortpath.rstrip(os.path.sep))] == os.path.sep)
+                    for shortpath in short_paths]):
+                short_paths.add(path)
+        return short_paths
+
+    def _stash(self, path):
+        return os.path.join(
+            self.save_dir, os.path.splitdrive(path)[1].lstrip(os.path.sep))
+
+    def remove(self, auto_confirm=False):
+        """Remove paths in ``self.paths`` with confirmation (unless
+        ``auto_confirm`` is True)."""
+        if not self.paths:
+            logger.info(
+                "Can't uninstall '%s'. No files were found to uninstall.",
+                self.dist.project_name,
+            )
+            return
+        logger.info(
+            'Uninstalling %s-%s:',
+            self.dist.project_name, self.dist.version
+        )
+
+        with indent_log():
+            paths = sorted(self.compact(self.paths))
+
+            if auto_confirm:
+                response = 'y'
+            else:
+                for path in paths:
+                    logger.info(path)
+                response = ask('Proceed (y/n)? ', ('y', 'n'))
+            if self._refuse:
+                logger.info('Not removing or modifying (outside of prefix):')
+                for path in self.compact(self._refuse):
+                    logger.info(path)
+            if response == 'y':
+                self.save_dir = tempfile.mkdtemp(suffix='-uninstall',
+                                                 prefix='pip-')
+                for path in paths:
+                    new_path = self._stash(path)
+                    logger.debug('Removing file or directory %s', path)
+                    self._moved_paths.append(path)
+                    renames(path, new_path)
+                for pth in self.pth.values():
+                    pth.remove()
+                logger.info(
+                    'Successfully uninstalled %s-%s',
+                    self.dist.project_name, self.dist.version
+                )
+
+    def rollback(self):
+        """Rollback the changes previously made by remove()."""
+        if self.save_dir is None:
+            logger.error(
+                "Can't roll back %s; was not uninstalled",
+                self.dist.project_name,
+            )
+            return False
+        logger.info('Rolling back uninstall of %s', self.dist.project_name)
+        for path in self._moved_paths:
+            tmp_path = self._stash(path)
+            logger.debug('Replacing %s', path)
+            renames(tmp_path, path)
+        for pth in self.pth.values():
+            pth.rollback()
+
+    def commit(self):
+        """Remove temporary save dir: rollback will no longer be possible."""
+        if self.save_dir is not None:
+            rmtree(self.save_dir)
+            self.save_dir = None
+            self._moved_paths = []
+
+
+class UninstallPthEntries(object):
+    def __init__(self, pth_file):
+        if not os.path.isfile(pth_file):
+            raise UninstallationError(
+                "Cannot remove entries from nonexistent file %s" % pth_file
+            )
+        self.file = pth_file
+        self.entries = set()
+        self._saved_lines = None
+
+    def add(self, entry):
+        entry = os.path.normcase(entry)
+        # On Windows, os.path.normcase converts the entry to use
+        # backslashes.  This is correct for entries that describe absolute
+        # paths outside of site-packages, but all the others use forward
+        # slashes.
+        if WINDOWS and not os.path.splitdrive(entry)[0]:
+            entry = entry.replace('\\', '/')
+        self.entries.add(entry)
+
+    def remove(self):
+        logger.debug('Removing pth entries from %s:', self.file)
+        with open(self.file, 'rb') as fh:
+            # windows uses '\r\n' with py3k, but uses '\n' with py2.x
+            lines = fh.readlines()
+            self._saved_lines = lines
+        if any(b'\r\n' in line for line in lines):
+            endline = '\r\n'
+        else:
+            endline = '\n'
+        for entry in self.entries:
+            try:
+                logger.debug('Removing entry: %s', entry)
+                lines.remove((entry + endline).encode("utf-8"))
+            except ValueError:
+                pass
+        with open(self.file, 'wb') as fh:
+            fh.writelines(lines)
+
+    def rollback(self):
+        if self._saved_lines is None:
+            logger.error(
+                'Cannot roll back changes to %s, none were made', self.file
+            )
+            return False
+        logger.debug('Rolling %s back to previous state', self.file)
+        with open(self.file, 'wb') as fh:
+            fh.writelines(self._saved_lines)
+        return True
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/status_codes.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/status_codes.py
new file mode 100644
index 0000000000000000000000000000000000000000..275360a3175abaeab86148d61b735904f96d72f6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/status_codes.py
@@ -0,0 +1,8 @@
+from __future__ import absolute_import
+
+SUCCESS = 0
+ERROR = 1
+UNKNOWN_ERROR = 2
+VIRTUALENV_NOT_FOUND = 3
+PREVIOUS_BUILD_DIR_ERROR = 4
+NO_MATCHES_FOUND = 23
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/__init__.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d25d917a1b382c1b2dca3bbb54bbff4b36a25b7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/__init__.py
@@ -0,0 +1,870 @@
+from __future__ import absolute_import
+
+from collections import deque
+import contextlib
+import errno
+import io
+import locale
+# we have a submodule named 'logging' which would shadow this if we used the
+# regular name:
+import logging as std_logging
+import re
+import os
+import posixpath
+import shutil
+import stat
+import subprocess
+import sys
+import tarfile
+import zipfile
+
+from pip.exceptions import InstallationError
+from pip.compat import console_to_str, expanduser, stdlib_pkgs
+from pip.locations import (
+    site_packages, user_site, running_under_virtualenv, virtualenv_no_global,
+    write_delete_marker_file,
+)
+from pip._vendor import pkg_resources
+from pip._vendor.six.moves import input
+from pip._vendor.six import PY2
+from pip._vendor.retrying import retry
+
+if PY2:
+    from io import BytesIO as StringIO
+else:
+    from io import StringIO
+
+__all__ = ['rmtree', 'display_path', 'backup_dir',
+           'ask', 'splitext',
+           'format_size', 'is_installable_dir',
+           'is_svn_page', 'file_contents',
+           'split_leading_dir', 'has_leading_dir',
+           'normalize_path',
+           'renames', 'get_terminal_size', 'get_prog',
+           'unzip_file', 'untar_file', 'unpack_file', 'call_subprocess',
+           'captured_stdout', 'ensure_dir',
+           'ARCHIVE_EXTENSIONS', 'SUPPORTED_EXTENSIONS',
+           'get_installed_version']
+
+
+logger = std_logging.getLogger(__name__)
+
+BZ2_EXTENSIONS = ('.tar.bz2', '.tbz')
+XZ_EXTENSIONS = ('.tar.xz', '.txz', '.tlz', '.tar.lz', '.tar.lzma')
+ZIP_EXTENSIONS = ('.zip', '.whl')
+TAR_EXTENSIONS = ('.tar.gz', '.tgz', '.tar')
+ARCHIVE_EXTENSIONS = (
+    ZIP_EXTENSIONS + BZ2_EXTENSIONS + TAR_EXTENSIONS + XZ_EXTENSIONS)
+SUPPORTED_EXTENSIONS = ZIP_EXTENSIONS + TAR_EXTENSIONS
+try:
+    import bz2  # noqa
+    SUPPORTED_EXTENSIONS += BZ2_EXTENSIONS
+except ImportError:
+    logger.debug('bz2 module is not available')
+
+try:
+    # Only for Python 3.3+
+    import lzma  # noqa
+    SUPPORTED_EXTENSIONS += XZ_EXTENSIONS
+except ImportError:
+    logger.debug('lzma module is not available')
+
+
+def import_or_raise(pkg_or_module_string, ExceptionType, *args, **kwargs):
+    try:
+        return __import__(pkg_or_module_string)
+    except ImportError:
+        raise ExceptionType(*args, **kwargs)
+
+
+def ensure_dir(path):
+    """os.path.makedirs without EEXIST."""
+    try:
+        os.makedirs(path)
+    except OSError as e:
+        if e.errno != errno.EEXIST:
+            raise
+
+
+def get_prog():
+    try:
+        if os.path.basename(sys.argv[0]) in ('__main__.py', '-c'):
+            return "%s -m pip" % sys.executable
+    except (AttributeError, TypeError, IndexError):
+        pass
+    return 'pip'
+
+
+# Retry every half second for up to 3 seconds
+@retry(stop_max_delay=3000, wait_fixed=500)
+def rmtree(dir, ignore_errors=False):
+    shutil.rmtree(dir, ignore_errors=ignore_errors,
+                  onerror=rmtree_errorhandler)
+
+
+def rmtree_errorhandler(func, path, exc_info):
+    """On Windows, the files in .svn are read-only, so when rmtree() tries to
+    remove them, an exception is thrown.  We catch that here, remove the
+    read-only attribute, and hopefully continue without problems."""
+    # if file type currently read only
+    if os.stat(path).st_mode & stat.S_IREAD:
+        # convert to read/write
+        os.chmod(path, stat.S_IWRITE)
+        # use the original function to repeat the operation
+        func(path)
+        return
+    else:
+        raise
+
+
+def display_path(path):
+    """Gives the display value for a given path, making it relative to cwd
+    if possible."""
+    path = os.path.normcase(os.path.abspath(path))
+    if sys.version_info[0] == 2:
+        path = path.decode(sys.getfilesystemencoding(), 'replace')
+        path = path.encode(sys.getdefaultencoding(), 'replace')
+    if path.startswith(os.getcwd() + os.path.sep):
+        path = '.' + path[len(os.getcwd()):]
+    return path
+
+
+def backup_dir(dir, ext='.bak'):
+    """Figure out the name of a directory to back up the given dir to
+    (adding .bak, .bak2, etc)"""
+    n = 1
+    extension = ext
+    while os.path.exists(dir + extension):
+        n += 1
+        extension = ext + str(n)
+    return dir + extension
+
+
+def ask_path_exists(message, options):
+    for action in os.environ.get('PIP_EXISTS_ACTION', '').split():
+        if action in options:
+            return action
+    return ask(message, options)
+
+
+def ask(message, options):
+    """Ask the message interactively, with the given possible responses"""
+    while 1:
+        if os.environ.get('PIP_NO_INPUT'):
+            raise Exception(
+                'No input was expected ($PIP_NO_INPUT set); question: %s' %
+                message
+            )
+        response = input(message)
+        response = response.strip().lower()
+        if response not in options:
+            print(
+                'Your response (%r) was not one of the expected responses: '
+                '%s' % (response, ', '.join(options))
+            )
+        else:
+            return response
+
+
+def format_size(bytes):
+    if bytes > 1000 * 1000:
+        return '%.1fMB' % (bytes / 1000.0 / 1000)
+    elif bytes > 10 * 1000:
+        return '%ikB' % (bytes / 1000)
+    elif bytes > 1000:
+        return '%.1fkB' % (bytes / 1000.0)
+    else:
+        return '%ibytes' % bytes
+
+
+def is_installable_dir(path):
+    """Return True if `path` is a directory containing a setup.py file."""
+    if not os.path.isdir(path):
+        return False
+    setup_py = os.path.join(path, 'setup.py')
+    if os.path.isfile(setup_py):
+        return True
+    return False
+
+
+def is_svn_page(html):
+    """
+    Returns true if the page appears to be the index page of an svn repository
+    """
+    return (re.search(r'<title>[^<]*Revision \d+:', html) and
+            re.search(r'Powered by (?:<a[^>]*?>)?Subversion', html, re.I))
+
+
+def file_contents(filename):
+    with open(filename, 'rb') as fp:
+        return fp.read().decode('utf-8')
+
+
+def read_chunks(file, size=io.DEFAULT_BUFFER_SIZE):
+    """Yield pieces of data from a file-like object until EOF."""
+    while True:
+        chunk = file.read(size)
+        if not chunk:
+            break
+        yield chunk
+
+
+def split_leading_dir(path):
+    path = path.lstrip('/').lstrip('\\')
+    if '/' in path and (('\\' in path and path.find('/') < path.find('\\')) or
+                        '\\' not in path):
+        return path.split('/', 1)
+    elif '\\' in path:
+        return path.split('\\', 1)
+    else:
+        return path, ''
+
+
+def has_leading_dir(paths):
+    """Returns true if all the paths have the same leading path name
+    (i.e., everything is in one subdirectory in an archive)"""
+    common_prefix = None
+    for path in paths:
+        prefix, rest = split_leading_dir(path)
+        if not prefix:
+            return False
+        elif common_prefix is None:
+            common_prefix = prefix
+        elif prefix != common_prefix:
+            return False
+    return True
+
+
+def normalize_path(path, resolve_symlinks=True):
+    """
+    Convert a path to its canonical, case-normalized, absolute version.
+
+    """
+    path = expanduser(path)
+    if resolve_symlinks:
+        path = os.path.realpath(path)
+    else:
+        path = os.path.abspath(path)
+    return os.path.normcase(path)
+
+
+def splitext(path):
+    """Like os.path.splitext, but take off .tar too"""
+    base, ext = posixpath.splitext(path)
+    if base.lower().endswith('.tar'):
+        ext = base[-4:] + ext
+        base = base[:-4]
+    return base, ext
+
+
+def renames(old, new):
+    """Like os.renames(), but handles renaming across devices."""
+    # Implementation borrowed from os.renames().
+    head, tail = os.path.split(new)
+    if head and tail and not os.path.exists(head):
+        os.makedirs(head)
+
+    shutil.move(old, new)
+
+    head, tail = os.path.split(old)
+    if head and tail:
+        try:
+            os.removedirs(head)
+        except OSError:
+            pass
+
+
+def is_local(path):
+    """
+    Return True if this is a path pip is allowed to modify.
+
+    If we're in a virtualenv, sys.prefix points to the virtualenv's
+    prefix; only sys.prefix is considered local.
+
+    If we're not in a virtualenv, in general we can modify anything.
+    However, if the OS vendor has configured distutils to install
+    somewhere other than sys.prefix (which could be a subdirectory of
+    sys.prefix, e.g. /usr/local), we consider sys.prefix itself nonlocal
+    and the domain of the OS vendor. (In other words, everything _other
+    than_ sys.prefix is considered local.)
+
+    """
+
+    path = normalize_path(path)
+    prefix = normalize_path(sys.prefix)
+
+    if running_under_virtualenv():
+        return path.startswith(normalize_path(sys.prefix))
+    else:
+        from pip.locations import distutils_scheme
+        if path.startswith(prefix):
+            for local_path in distutils_scheme("").values():
+                if path.startswith(normalize_path(local_path)):
+                    return True
+            return False
+        else:
+            return True
+
+
+def dist_is_local(dist):
+    """
+    Return True if given Distribution object is installed somewhere pip
+    is allowed to modify.
+
+    """
+    return is_local(dist_location(dist))
+
+
+def dist_in_usersite(dist):
+    """
+    Return True if given Distribution is installed in user site.
+    """
+    norm_path = normalize_path(dist_location(dist))
+    return norm_path.startswith(normalize_path(user_site))
+
+
+def dist_in_site_packages(dist):
+    """
+    Return True if given Distribution is installed in
+    distutils.sysconfig.get_python_lib().
+    """
+    return normalize_path(
+        dist_location(dist)
+    ).startswith(normalize_path(site_packages))
+
+
+def dist_is_editable(dist):
+    """Is distribution an editable install?"""
+    for path_item in sys.path:
+        egg_link = os.path.join(path_item, dist.project_name + '.egg-link')
+        if os.path.isfile(egg_link):
+            return True
+    return False
+
+
+def get_installed_distributions(local_only=True,
+                                skip=stdlib_pkgs,
+                                include_editables=True,
+                                editables_only=False,
+                                user_only=False):
+    """
+    Return a list of installed Distribution objects.
+
+    If ``local_only`` is True (default), only return installations
+    local to the current virtualenv, if in a virtualenv.
+
+    ``skip`` argument is an iterable of lower-case project names to
+    ignore; defaults to stdlib_pkgs
+
+    If ``editables`` is False, don't report editables.
+
+    If ``editables_only`` is True , only report editables.
+
+    If ``user_only`` is True , only report installations in the user
+    site directory.
+
+    """
+    if local_only:
+        local_test = dist_is_local
+    else:
+        def local_test(d):
+            return True
+
+    if include_editables:
+        def editable_test(d):
+            return True
+    else:
+        def editable_test(d):
+            return not dist_is_editable(d)
+
+    if editables_only:
+        def editables_only_test(d):
+            return dist_is_editable(d)
+    else:
+        def editables_only_test(d):
+            return True
+
+    if user_only:
+        user_test = dist_in_usersite
+    else:
+        def user_test(d):
+            return True
+
+    return [d for d in pkg_resources.working_set
+            if local_test(d) and
+            d.key not in skip and
+            editable_test(d) and
+            editables_only_test(d) and
+            user_test(d)
+            ]
+
+
+def egg_link_path(dist):
+    """
+    Return the path for the .egg-link file if it exists, otherwise, None.
+
+    There's 3 scenarios:
+    1) not in a virtualenv
+       try to find in site.USER_SITE, then site_packages
+    2) in a no-global virtualenv
+       try to find in site_packages
+    3) in a yes-global virtualenv
+       try to find in site_packages, then site.USER_SITE
+       (don't look in global location)
+
+    For #1 and #3, there could be odd cases, where there's an egg-link in 2
+    locations.
+
+    This method will just return the first one found.
+    """
+    sites = []
+    if running_under_virtualenv():
+        if virtualenv_no_global():
+            sites.append(site_packages)
+        else:
+            sites.append(site_packages)
+            if user_site:
+                sites.append(user_site)
+    else:
+        if user_site:
+            sites.append(user_site)
+        sites.append(site_packages)
+
+    for site in sites:
+        egglink = os.path.join(site, dist.project_name) + '.egg-link'
+        if os.path.isfile(egglink):
+            return egglink
+
+
+def dist_location(dist):
+    """
+    Get the site-packages location of this distribution. Generally
+    this is dist.location, except in the case of develop-installed
+    packages, where dist.location is the source code location, and we
+    want to know where the egg-link file is.
+
+    """
+    egg_link = egg_link_path(dist)
+    if egg_link:
+        return egg_link
+    return dist.location
+
+
+def get_terminal_size():
+    """Returns a tuple (x, y) representing the width(x) and the height(x)
+    in characters of the terminal window."""
+    def ioctl_GWINSZ(fd):
+        try:
+            import fcntl
+            import termios
+            import struct
+            cr = struct.unpack(
+                'hh',
+                fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234')
+            )
+        except:
+            return None
+        if cr == (0, 0):
+            return None
+        return cr
+    cr = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2)
+    if not cr:
+        try:
+            fd = os.open(os.ctermid(), os.O_RDONLY)
+            cr = ioctl_GWINSZ(fd)
+            os.close(fd)
+        except:
+            pass
+    if not cr:
+        cr = (os.environ.get('LINES', 25), os.environ.get('COLUMNS', 80))
+    return int(cr[1]), int(cr[0])
+
+
+def current_umask():
+    """Get the current umask which involves having to set it temporarily."""
+    mask = os.umask(0)
+    os.umask(mask)
+    return mask
+
+
+def unzip_file(filename, location, flatten=True):
+    """
+    Unzip the file (with path `filename`) to the destination `location`.  All
+    files are written based on system defaults and umask (i.e. permissions are
+    not preserved), except that regular file members with any execute
+    permissions (user, group, or world) have "chmod +x" applied after being
+    written. Note that for windows, any execute changes using os.chmod are
+    no-ops per the python docs.
+    """
+    ensure_dir(location)
+    zipfp = open(filename, 'rb')
+    try:
+        zip = zipfile.ZipFile(zipfp, allowZip64=True)
+        leading = has_leading_dir(zip.namelist()) and flatten
+        for info in zip.infolist():
+            name = info.filename
+            data = zip.read(name)
+            fn = name
+            if leading:
+                fn = split_leading_dir(name)[1]
+            fn = os.path.join(location, fn)
+            dir = os.path.dirname(fn)
+            if fn.endswith('/') or fn.endswith('\\'):
+                # A directory
+                ensure_dir(fn)
+            else:
+                ensure_dir(dir)
+                fp = open(fn, 'wb')
+                try:
+                    fp.write(data)
+                finally:
+                    fp.close()
+                    mode = info.external_attr >> 16
+                    # if mode and regular file and any execute permissions for
+                    # user/group/world?
+                    if mode and stat.S_ISREG(mode) and mode & 0o111:
+                        # make dest file have execute for user/group/world
+                        # (chmod +x) no-op on windows per python docs
+                        os.chmod(fn, (0o777 - current_umask() | 0o111))
+    finally:
+        zipfp.close()
+
+
+def untar_file(filename, location):
+    """
+    Untar the file (with path `filename`) to the destination `location`.
+    All files are written based on system defaults and umask (i.e. permissions
+    are not preserved), except that regular file members with any execute
+    permissions (user, group, or world) have "chmod +x" applied after being
+    written.  Note that for windows, any execute changes using os.chmod are
+    no-ops per the python docs.
+    """
+    ensure_dir(location)
+    if filename.lower().endswith('.gz') or filename.lower().endswith('.tgz'):
+        mode = 'r:gz'
+    elif filename.lower().endswith(BZ2_EXTENSIONS):
+        mode = 'r:bz2'
+    elif filename.lower().endswith(XZ_EXTENSIONS):
+        mode = 'r:xz'
+    elif filename.lower().endswith('.tar'):
+        mode = 'r'
+    else:
+        logger.warning(
+            'Cannot determine compression type for file %s', filename,
+        )
+        mode = 'r:*'
+    tar = tarfile.open(filename, mode)
+    try:
+        # note: python<=2.5 doesn't seem to know about pax headers, filter them
+        leading = has_leading_dir([
+            member.name for member in tar.getmembers()
+            if member.name != 'pax_global_header'
+        ])
+        for member in tar.getmembers():
+            fn = member.name
+            if fn == 'pax_global_header':
+                continue
+            if leading:
+                fn = split_leading_dir(fn)[1]
+            path = os.path.join(location, fn)
+            if member.isdir():
+                ensure_dir(path)
+            elif member.issym():
+                try:
+                    tar._extract_member(member, path)
+                except Exception as exc:
+                    # Some corrupt tar files seem to produce this
+                    # (specifically bad symlinks)
+                    logger.warning(
+                        'In the tar file %s the member %s is invalid: %s',
+                        filename, member.name, exc,
+                    )
+                    continue
+            else:
+                try:
+                    fp = tar.extractfile(member)
+                except (KeyError, AttributeError) as exc:
+                    # Some corrupt tar files seem to produce this
+                    # (specifically bad symlinks)
+                    logger.warning(
+                        'In the tar file %s the member %s is invalid: %s',
+                        filename, member.name, exc,
+                    )
+                    continue
+                ensure_dir(os.path.dirname(path))
+                with open(path, 'wb') as destfp:
+                    shutil.copyfileobj(fp, destfp)
+                fp.close()
+                # Update the timestamp (useful for cython compiled files)
+                tar.utime(member, path)
+                # member have any execute permissions for user/group/world?
+                if member.mode & 0o111:
+                    # make dest file have execute for user/group/world
+                    # no-op on windows per python docs
+                    os.chmod(path, (0o777 - current_umask() | 0o111))
+    finally:
+        tar.close()
+
+
+def unpack_file(filename, location, content_type, link):
+    filename = os.path.realpath(filename)
+    if (content_type == 'application/zip' or
+            filename.lower().endswith(ZIP_EXTENSIONS) or
+            zipfile.is_zipfile(filename)):
+        unzip_file(
+            filename,
+            location,
+            flatten=not filename.endswith('.whl')
+        )
+    elif (content_type == 'application/x-gzip' or
+            tarfile.is_tarfile(filename) or
+            filename.lower().endswith(
+                TAR_EXTENSIONS + BZ2_EXTENSIONS + XZ_EXTENSIONS)):
+        untar_file(filename, location)
+    elif (content_type and content_type.startswith('text/html') and
+            is_svn_page(file_contents(filename))):
+        # We don't really care about this
+        from pip.vcs.subversion import Subversion
+        Subversion('svn+' + link.url).unpack(location)
+    else:
+        # FIXME: handle?
+        # FIXME: magic signatures?
+        logger.critical(
+            'Cannot unpack file %s (downloaded from %s, content-type: %s); '
+            'cannot detect archive format',
+            filename, location, content_type,
+        )
+        raise InstallationError(
+            'Cannot determine archive format of %s' % location
+        )
+
+
+def call_subprocess(cmd, show_stdout=True, cwd=None,
+                    on_returncode='raise',
+                    command_desc=None,
+                    extra_environ=None, spinner=None):
+    # This function's handling of subprocess output is confusing and I
+    # previously broke it terribly, so as penance I will write a long comment
+    # explaining things.
+    #
+    # The obvious thing that affects output is the show_stdout=
+    # kwarg. show_stdout=True means, let the subprocess write directly to our
+    # stdout. Even though it is nominally the default, it is almost never used
+    # inside pip (and should not be used in new code without a very good
+    # reason); as of 2016-02-22 it is only used in a few places inside the VCS
+    # wrapper code. Ideally we should get rid of it entirely, because it
+    # creates a lot of complexity here for a rarely used feature.
+    #
+    # Most places in pip set show_stdout=False. What this means is:
+    # - We connect the child stdout to a pipe, which we read.
+    # - By default, we hide the output but show a spinner -- unless the
+    #   subprocess exits with an error, in which case we show the output.
+    # - If the --verbose option was passed (= loglevel is DEBUG), then we show
+    #   the output unconditionally. (But in this case we don't want to show
+    #   the output a second time if it turns out that there was an error.)
+    #
+    # stderr is always merged with stdout (even if show_stdout=True).
+    if show_stdout:
+        stdout = None
+    else:
+        stdout = subprocess.PIPE
+    if command_desc is None:
+        cmd_parts = []
+        for part in cmd:
+            if ' ' in part or '\n' in part or '"' in part or "'" in part:
+                part = '"%s"' % part.replace('"', '\\"')
+            cmd_parts.append(part)
+        command_desc = ' '.join(cmd_parts)
+    logger.debug("Running command %s", command_desc)
+    env = os.environ.copy()
+    if extra_environ:
+        env.update(extra_environ)
+    try:
+        proc = subprocess.Popen(
+            cmd, stderr=subprocess.STDOUT, stdin=None, stdout=stdout,
+            cwd=cwd, env=env)
+    except Exception as exc:
+        logger.critical(
+            "Error %s while executing command %s", exc, command_desc,
+        )
+        raise
+    if stdout is not None:
+        all_output = []
+        while True:
+            line = console_to_str(proc.stdout.readline())
+            if not line:
+                break
+            line = line.rstrip()
+            all_output.append(line + '\n')
+            if logger.getEffectiveLevel() <= std_logging.DEBUG:
+                # Show the line immediately
+                logger.debug(line)
+            else:
+                # Update the spinner
+                if spinner is not None:
+                    spinner.spin()
+    proc.wait()
+    if spinner is not None:
+        if proc.returncode:
+            spinner.finish("error")
+        else:
+            spinner.finish("done")
+    if proc.returncode:
+        if on_returncode == 'raise':
+            if (logger.getEffectiveLevel() > std_logging.DEBUG and
+                    not show_stdout):
+                logger.info(
+                    'Complete output from command %s:', command_desc,
+                )
+                logger.info(
+                    ''.join(all_output) +
+                    '\n----------------------------------------'
+                )
+            raise InstallationError(
+                'Command "%s" failed with error code %s in %s'
+                % (command_desc, proc.returncode, cwd))
+        elif on_returncode == 'warn':
+            logger.warning(
+                'Command "%s" had error code %s in %s',
+                command_desc, proc.returncode, cwd,
+            )
+        elif on_returncode == 'ignore':
+            pass
+        else:
+            raise ValueError('Invalid value: on_returncode=%s' %
+                             repr(on_returncode))
+    if not show_stdout:
+        return ''.join(all_output)
+
+
+def read_text_file(filename):
+    """Return the contents of *filename*.
+
+    Try to decode the file contents with utf-8, the preferred system encoding
+    (e.g., cp1252 on some Windows machines), and latin1, in that order.
+    Decoding a byte string with latin1 will never raise an error. In the worst
+    case, the returned string will contain some garbage characters.
+
+    """
+    with open(filename, 'rb') as fp:
+        data = fp.read()
+
+    encodings = ['utf-8', locale.getpreferredencoding(False), 'latin1']
+    for enc in encodings:
+        try:
+            data = data.decode(enc)
+        except UnicodeDecodeError:
+            continue
+        break
+
+    assert type(data) != bytes  # Latin1 should have worked.
+    return data
+
+
+def _make_build_dir(build_dir):
+    os.makedirs(build_dir)
+    write_delete_marker_file(build_dir)
+
+
+class FakeFile(object):
+    """Wrap a list of lines in an object with readline() to make
+    ConfigParser happy."""
+    def __init__(self, lines):
+        self._gen = (l for l in lines)
+
+    def readline(self):
+        try:
+            try:
+                return next(self._gen)
+            except NameError:
+                return self._gen.next()
+        except StopIteration:
+            return ''
+
+    def __iter__(self):
+        return self._gen
+
+
+class StreamWrapper(StringIO):
+
+    @classmethod
+    def from_stream(cls, orig_stream):
+        cls.orig_stream = orig_stream
+        return cls()
+
+    # compileall.compile_dir() needs stdout.encoding to print to stdout
+    @property
+    def encoding(self):
+        return self.orig_stream.encoding
+
+
+@contextlib.contextmanager
+def captured_output(stream_name):
+    """Return a context manager used by captured_stdout/stdin/stderr
+    that temporarily replaces the sys stream *stream_name* with a StringIO.
+
+    Taken from Lib/support/__init__.py in the CPython repo.
+    """
+    orig_stdout = getattr(sys, stream_name)
+    setattr(sys, stream_name, StreamWrapper.from_stream(orig_stdout))
+    try:
+        yield getattr(sys, stream_name)
+    finally:
+        setattr(sys, stream_name, orig_stdout)
+
+
+def captured_stdout():
+    """Capture the output of sys.stdout:
+
+       with captured_stdout() as stdout:
+           print('hello')
+       self.assertEqual(stdout.getvalue(), 'hello\n')
+
+    Taken from Lib/support/__init__.py in the CPython repo.
+    """
+    return captured_output('stdout')
+
+
+class cached_property(object):
+    """A property that is only computed once per instance and then replaces
+       itself with an ordinary attribute. Deleting the attribute resets the
+       property.
+
+       Source: https://github.com/bottlepy/bottle/blob/0.11.5/bottle.py#L175
+    """
+
+    def __init__(self, func):
+        self.__doc__ = getattr(func, '__doc__')
+        self.func = func
+
+    def __get__(self, obj, cls):
+        if obj is None:
+            # We're being accessed from the class itself, not from an object
+            return self
+        value = obj.__dict__[self.func.__name__] = self.func(obj)
+        return value
+
+
+def get_installed_version(dist_name, lookup_dirs=None):
+    """Get the installed version of dist_name avoiding pkg_resources cache"""
+    # Create a requirement that we'll look for inside of setuptools.
+    req = pkg_resources.Requirement.parse(dist_name)
+
+    # We want to avoid having this cached, so we need to construct a new
+    # working set each time.
+    if lookup_dirs is None:
+        working_set = pkg_resources.WorkingSet()
+    else:
+        working_set = pkg_resources.WorkingSet(lookup_dirs)
+
+    # Get the installed distribution from our working set
+    dist = working_set.find(req)
+
+    # Check to see if we got an installed distribution or not, if we did
+    # we want to return it's version.
+    return dist.version if dist else None
+
+
+def consume(iterator):
+    """Consume an iterable at C speed."""
+    deque(iterator, maxlen=0)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/appdirs.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/appdirs.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b8280144d75690ed46f82dfcd961547f58cc1b1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/appdirs.py
@@ -0,0 +1,248 @@
+"""
+This code was taken from https://github.com/ActiveState/appdirs and modified
+to suit our purposes.
+"""
+from __future__ import absolute_import
+
+import os
+import sys
+
+from pip.compat import WINDOWS, expanduser
+from pip._vendor.six import PY2, text_type
+
+
+def user_cache_dir(appname):
+    r"""
+    Return full path to the user-specific cache dir for this application.
+
+        "appname" is the name of application.
+
+    Typical user cache directories are:
+        macOS:      ~/Library/Caches/<AppName>
+        Unix:       ~/.cache/<AppName> (XDG default)
+        Windows:    C:\Users\<username>\AppData\Local\<AppName>\Cache
+
+    On Windows the only suggestion in the MSDN docs is that local settings go
+    in the `CSIDL_LOCAL_APPDATA` directory. This is identical to the
+    non-roaming app data dir (the default returned by `user_data_dir`). Apps
+    typically put cache data somewhere *under* the given dir here. Some
+    examples:
+        ...\Mozilla\Firefox\Profiles\<ProfileName>\Cache
+        ...\Acme\SuperApp\Cache\1.0
+
+    OPINION: This function appends "Cache" to the `CSIDL_LOCAL_APPDATA` value.
+    """
+    if WINDOWS:
+        # Get the base path
+        path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA"))
+
+        # When using Python 2, return paths as bytes on Windows like we do on
+        # other operating systems. See helper function docs for more details.
+        if PY2 and isinstance(path, text_type):
+            path = _win_path_to_bytes(path)
+
+        # Add our app name and Cache directory to it
+        path = os.path.join(path, appname, "Cache")
+    elif sys.platform == "darwin":
+        # Get the base path
+        path = expanduser("~/Library/Caches")
+
+        # Add our app name to it
+        path = os.path.join(path, appname)
+    else:
+        # Get the base path
+        path = os.getenv("XDG_CACHE_HOME", expanduser("~/.cache"))
+
+        # Add our app name to it
+        path = os.path.join(path, appname)
+
+    return path
+
+
+def user_data_dir(appname, roaming=False):
+    """
+    Return full path to the user-specific data dir for this application.
+
+        "appname" is the name of application.
+            If None, just the system directory is returned.
+        "roaming" (boolean, default False) can be set True to use the Windows
+            roaming appdata directory. That means that for users on a Windows
+            network setup for roaming profiles, this user data will be
+            sync'd on login. See
+            <http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
+            for a discussion of issues.
+
+    Typical user data directories are:
+        macOS:                  ~/Library/Application Support/<AppName>
+        Unix:                   ~/.local/share/<AppName>    # or in
+                                $XDG_DATA_HOME, if defined
+        Win XP (not roaming):   C:\Documents and Settings\<username>\ ...
+                                ...Application Data\<AppName>
+        Win XP (roaming):       C:\Documents and Settings\<username>\Local ...
+                                ...Settings\Application Data\<AppName>
+        Win 7  (not roaming):   C:\\Users\<username>\AppData\Local\<AppName>
+        Win 7  (roaming):       C:\\Users\<username>\AppData\Roaming\<AppName>
+
+    For Unix, we follow the XDG spec and support $XDG_DATA_HOME.
+    That means, by default "~/.local/share/<AppName>".
+    """
+    if WINDOWS:
+        const = roaming and "CSIDL_APPDATA" or "CSIDL_LOCAL_APPDATA"
+        path = os.path.join(os.path.normpath(_get_win_folder(const)), appname)
+    elif sys.platform == "darwin":
+        path = os.path.join(
+            expanduser('~/Library/Application Support/'),
+            appname,
+        )
+    else:
+        path = os.path.join(
+            os.getenv('XDG_DATA_HOME', expanduser("~/.local/share")),
+            appname,
+        )
+
+    return path
+
+
+def user_config_dir(appname, roaming=True):
+    """Return full path to the user-specific config dir for this application.
+
+        "appname" is the name of application.
+            If None, just the system directory is returned.
+        "roaming" (boolean, default True) can be set False to not use the
+            Windows roaming appdata directory. That means that for users on a
+            Windows network setup for roaming profiles, this user data will be
+            sync'd on login. See
+            <http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
+            for a discussion of issues.
+
+    Typical user data directories are:
+        macOS:                  same as user_data_dir
+        Unix:                   ~/.config/<AppName>
+        Win *:                  same as user_data_dir
+
+    For Unix, we follow the XDG spec and support $XDG_CONFIG_HOME.
+    That means, by default "~/.config/<AppName>".
+    """
+    if WINDOWS:
+        path = user_data_dir(appname, roaming=roaming)
+    elif sys.platform == "darwin":
+        path = user_data_dir(appname)
+    else:
+        path = os.getenv('XDG_CONFIG_HOME', expanduser("~/.config"))
+        path = os.path.join(path, appname)
+
+    return path
+
+
+# for the discussion regarding site_config_dirs locations
+# see <https://github.com/pypa/pip/issues/1733>
+def site_config_dirs(appname):
+    """Return a list of potential user-shared config dirs for this application.
+
+        "appname" is the name of application.
+
+    Typical user config directories are:
+        macOS:      /Library/Application Support/<AppName>/
+        Unix:       /etc or $XDG_CONFIG_DIRS[i]/<AppName>/ for each value in
+                    $XDG_CONFIG_DIRS
+        Win XP:     C:\Documents and Settings\All Users\Application ...
+                    ...Data\<AppName>\
+        Vista:      (Fail! "C:\ProgramData" is a hidden *system* directory
+                    on Vista.)
+        Win 7:      Hidden, but writeable on Win 7:
+                    C:\ProgramData\<AppName>\
+    """
+    if WINDOWS:
+        path = os.path.normpath(_get_win_folder("CSIDL_COMMON_APPDATA"))
+        pathlist = [os.path.join(path, appname)]
+    elif sys.platform == 'darwin':
+        pathlist = [os.path.join('/Library/Application Support', appname)]
+    else:
+        # try looking in $XDG_CONFIG_DIRS
+        xdg_config_dirs = os.getenv('XDG_CONFIG_DIRS', '/etc/xdg')
+        if xdg_config_dirs:
+            pathlist = [
+                os.path.join(expanduser(x), appname)
+                for x in xdg_config_dirs.split(os.pathsep)
+            ]
+        else:
+            pathlist = []
+
+        # always look in /etc directly as well
+        pathlist.append('/etc')
+
+    return pathlist
+
+
+# -- Windows support functions --
+
+def _get_win_folder_from_registry(csidl_name):
+    """
+    This is a fallback technique at best. I'm not sure if using the
+    registry for this guarantees us the correct answer for all CSIDL_*
+    names.
+    """
+    import _winreg
+
+    shell_folder_name = {
+        "CSIDL_APPDATA": "AppData",
+        "CSIDL_COMMON_APPDATA": "Common AppData",
+        "CSIDL_LOCAL_APPDATA": "Local AppData",
+    }[csidl_name]
+
+    key = _winreg.OpenKey(
+        _winreg.HKEY_CURRENT_USER,
+        r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders"
+    )
+    directory, _type = _winreg.QueryValueEx(key, shell_folder_name)
+    return directory
+
+
+def _get_win_folder_with_ctypes(csidl_name):
+    csidl_const = {
+        "CSIDL_APPDATA": 26,
+        "CSIDL_COMMON_APPDATA": 35,
+        "CSIDL_LOCAL_APPDATA": 28,
+    }[csidl_name]
+
+    buf = ctypes.create_unicode_buffer(1024)
+    ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf)
+
+    # Downgrade to short path name if have highbit chars. See
+    # <http://bugs.activestate.com/show_bug.cgi?id=85099>.
+    has_high_char = False
+    for c in buf:
+        if ord(c) > 255:
+            has_high_char = True
+            break
+    if has_high_char:
+        buf2 = ctypes.create_unicode_buffer(1024)
+        if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024):
+            buf = buf2
+
+    return buf.value
+
+if WINDOWS:
+    try:
+        import ctypes
+        _get_win_folder = _get_win_folder_with_ctypes
+    except ImportError:
+        _get_win_folder = _get_win_folder_from_registry
+
+
+def _win_path_to_bytes(path):
+    """Encode Windows paths to bytes. Only used on Python 2.
+
+    Motivation is to be consistent with other operating systems where paths
+    are also returned as bytes. This avoids problems mixing bytes and Unicode
+    elsewhere in the codebase. For more details and discussion see
+    <https://github.com/pypa/pip/issues/3463>.
+
+    If encoding using ASCII and MBCS fails, return the original Unicode path.
+    """
+    for encoding in ('ASCII', 'MBCS'):
+        try:
+            return path.encode(encoding)
+        except (UnicodeEncodeError, LookupError):
+            pass
+    return path
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/build.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/build.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc65cfab3e01ec5c2a092dadb1ca9a05737225f9
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/build.py
@@ -0,0 +1,42 @@
+from __future__ import absolute_import
+
+import os.path
+import tempfile
+
+from pip.utils import rmtree
+
+
+class BuildDirectory(object):
+
+    def __init__(self, name=None, delete=None):
+        # If we were not given an explicit directory, and we were not given an
+        # explicit delete option, then we'll default to deleting.
+        if name is None and delete is None:
+            delete = True
+
+        if name is None:
+            # We realpath here because some systems have their default tmpdir
+            # symlinked to another directory.  This tends to confuse build
+            # scripts, so we canonicalize the path by traversing potential
+            # symlinks here.
+            name = os.path.realpath(tempfile.mkdtemp(prefix="pip-build-"))
+            # If we were not given an explicit directory, and we were not given
+            # an explicit delete option, then we'll default to deleting.
+            if delete is None:
+                delete = True
+
+        self.name = name
+        self.delete = delete
+
+    def __repr__(self):
+        return "<{} {!r}>".format(self.__class__.__name__, self.name)
+
+    def __enter__(self):
+        return self.name
+
+    def __exit__(self, exc, value, tb):
+        self.cleanup()
+
+    def cleanup(self):
+        if self.delete:
+            rmtree(self.name)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/deprecation.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/deprecation.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3f799e64a8a503accaf9923850db6e9883be5d6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/deprecation.py
@@ -0,0 +1,76 @@
+"""
+A module that implements tooling to enable easy warnings about deprecations.
+"""
+from __future__ import absolute_import
+
+import logging
+import warnings
+
+
+class PipDeprecationWarning(Warning):
+    pass
+
+
+class Pending(object):
+    pass
+
+
+class RemovedInPip10Warning(PipDeprecationWarning):
+    pass
+
+
+class RemovedInPip11Warning(PipDeprecationWarning, Pending):
+    pass
+
+
+class Python26DeprecationWarning(PipDeprecationWarning):
+    pass
+
+
+# Warnings <-> Logging Integration
+
+
+_warnings_showwarning = None
+
+
+def _showwarning(message, category, filename, lineno, file=None, line=None):
+    if file is not None:
+        if _warnings_showwarning is not None:
+            _warnings_showwarning(
+                message, category, filename, lineno, file, line,
+            )
+    else:
+        if issubclass(category, PipDeprecationWarning):
+            # We use a specially named logger which will handle all of the
+            # deprecation messages for pip.
+            logger = logging.getLogger("pip.deprecations")
+
+            # This is purposely using the % formatter here instead of letting
+            # the logging module handle the interpolation. This is because we
+            # want it to appear as if someone typed this entire message out.
+            log_message = "DEPRECATION: %s" % message
+
+            # PipDeprecationWarnings that are Pending still have at least 2
+            # versions to go until they are removed so they can just be
+            # warnings.  Otherwise, they will be removed in the very next
+            # version of pip. We want these to be more obvious so we use the
+            # ERROR logging level.
+            if issubclass(category, Pending):
+                logger.warning(log_message)
+            else:
+                logger.error(log_message)
+        else:
+            _warnings_showwarning(
+                message, category, filename, lineno, file, line,
+            )
+
+
+def install_warning_logger():
+    # Enable our Deprecation Warnings
+    warnings.simplefilter("default", PipDeprecationWarning, append=True)
+
+    global _warnings_showwarning
+
+    if _warnings_showwarning is None:
+        _warnings_showwarning = warnings.showwarning
+        warnings.showwarning = _showwarning
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/encoding.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/encoding.py
new file mode 100644
index 0000000000000000000000000000000000000000..24831686cf4058c18e04eb2b1dc1201d22852cef
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/encoding.py
@@ -0,0 +1,31 @@
+import codecs
+import locale
+import re
+
+
+BOMS = [
+    (codecs.BOM_UTF8, 'utf8'),
+    (codecs.BOM_UTF16, 'utf16'),
+    (codecs.BOM_UTF16_BE, 'utf16-be'),
+    (codecs.BOM_UTF16_LE, 'utf16-le'),
+    (codecs.BOM_UTF32, 'utf32'),
+    (codecs.BOM_UTF32_BE, 'utf32-be'),
+    (codecs.BOM_UTF32_LE, 'utf32-le'),
+]
+
+ENCODING_RE = re.compile(b'coding[:=]\s*([-\w.]+)')
+
+
+def auto_decode(data):
+    """Check a bytes string for a BOM to correctly detect the encoding
+
+    Fallback to locale.getpreferredencoding(False) like open() on Python3"""
+    for bom, encoding in BOMS:
+        if data.startswith(bom):
+            return data[len(bom):].decode(encoding)
+    # Lets check the first two lines as in PEP263
+    for line in data.split(b'\n')[:2]:
+        if line[0:1] == b'#' and ENCODING_RE.search(line):
+            encoding = ENCODING_RE.search(line).groups()[0].decode('ascii')
+            return data.decode(encoding)
+    return data.decode(locale.getpreferredencoding(False))
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/filesystem.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/filesystem.py
new file mode 100644
index 0000000000000000000000000000000000000000..25ad51660d4b4d89d8d2c09e91b9516bb982a308
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/filesystem.py
@@ -0,0 +1,28 @@
+import os
+import os.path
+
+from pip.compat import get_path_uid
+
+
+def check_path_owner(path):
+    # If we don't have a way to check the effective uid of this process, then
+    # we'll just assume that we own the directory.
+    if not hasattr(os, "geteuid"):
+        return True
+
+    previous = None
+    while path != previous:
+        if os.path.lexists(path):
+            # Check if path is writable by current user.
+            if os.geteuid() == 0:
+                # Special handling for root user in order to handle properly
+                # cases where users use sudo without -H flag.
+                try:
+                    path_uid = get_path_uid(path)
+                except OSError:
+                    return False
+                return path_uid == 0
+            else:
+                return os.access(path, os.W_OK)
+        else:
+            previous, path = path, os.path.dirname(path)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/glibc.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/glibc.py
new file mode 100644
index 0000000000000000000000000000000000000000..7847885c4f93946545549bb79e5989b83271a7e1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/glibc.py
@@ -0,0 +1,81 @@
+from __future__ import absolute_import
+
+import re
+import ctypes
+import platform
+import warnings
+
+
+def glibc_version_string():
+    "Returns glibc version string, or None if not using glibc."
+
+    # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen
+    # manpage says, "If filename is NULL, then the returned handle is for the
+    # main program". This way we can let the linker do the work to figure out
+    # which libc our process is actually using.
+    process_namespace = ctypes.CDLL(None)
+    try:
+        gnu_get_libc_version = process_namespace.gnu_get_libc_version
+    except AttributeError:
+        # Symbol doesn't exist -> therefore, we are not linked to
+        # glibc.
+        return None
+
+    # Call gnu_get_libc_version, which returns a string like "2.5"
+    gnu_get_libc_version.restype = ctypes.c_char_p
+    version_str = gnu_get_libc_version()
+    # py2 / py3 compatibility:
+    if not isinstance(version_str, str):
+        version_str = version_str.decode("ascii")
+
+    return version_str
+
+
+# Separated out from have_compatible_glibc for easier unit testing
+def check_glibc_version(version_str, required_major, minimum_minor):
+    # Parse string and check against requested version.
+    #
+    # We use a regexp instead of str.split because we want to discard any
+    # random junk that might come after the minor version -- this might happen
+    # in patched/forked versions of glibc (e.g. Linaro's version of glibc
+    # uses version strings like "2.20-2014.11"). See gh-3588.
+    m = re.match(r"(?P<major>[0-9]+)\.(?P<minor>[0-9]+)", version_str)
+    if not m:
+        warnings.warn("Expected glibc version with 2 components major.minor,"
+                      " got: %s" % version_str, RuntimeWarning)
+        return False
+    return (int(m.group("major")) == required_major and
+            int(m.group("minor")) >= minimum_minor)
+
+
+def have_compatible_glibc(required_major, minimum_minor):
+    version_str = glibc_version_string()
+    if version_str is None:
+        return False
+    return check_glibc_version(version_str, required_major, minimum_minor)
+
+
+# platform.libc_ver regularly returns completely nonsensical glibc
+# versions. E.g. on my computer, platform says:
+#
+#   ~$ python2.7 -c 'import platform; print(platform.libc_ver())'
+#   ('glibc', '2.7')
+#   ~$ python3.5 -c 'import platform; print(platform.libc_ver())'
+#   ('glibc', '2.9')
+#
+# But the truth is:
+#
+#   ~$ ldd --version
+#   ldd (Debian GLIBC 2.22-11) 2.22
+#
+# This is unfortunate, because it means that the linehaul data on libc
+# versions that was generated by pip 8.1.2 and earlier is useless and
+# misleading. Solution: instead of using platform, use our code that actually
+# works.
+def libc_ver():
+    glibc_version = glibc_version_string()
+    if glibc_version is None:
+        # For non-glibc platforms, fall back on platform.libc_ver
+        return platform.libc_ver()
+    else:
+        return ("glibc", glibc_version)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/hashes.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/hashes.py
new file mode 100644
index 0000000000000000000000000000000000000000..960297007ae54a8ff0316850f92c49437fe85c72
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/hashes.py
@@ -0,0 +1,92 @@
+from __future__ import absolute_import
+
+import hashlib
+
+from pip.exceptions import HashMismatch, HashMissing, InstallationError
+from pip.utils import read_chunks
+from pip._vendor.six import iteritems, iterkeys, itervalues
+
+
+# The recommended hash algo of the moment. Change this whenever the state of
+# the art changes; it won't hurt backward compatibility.
+FAVORITE_HASH = 'sha256'
+
+
+# Names of hashlib algorithms allowed by the --hash option and ``pip hash``
+# Currently, those are the ones at least as collision-resistant as sha256.
+STRONG_HASHES = ['sha256', 'sha384', 'sha512']
+
+
+class Hashes(object):
+    """A wrapper that builds multiple hashes at once and checks them against
+    known-good values
+
+    """
+    def __init__(self, hashes=None):
+        """
+        :param hashes: A dict of algorithm names pointing to lists of allowed
+            hex digests
+        """
+        self._allowed = {} if hashes is None else hashes
+
+    def check_against_chunks(self, chunks):
+        """Check good hashes against ones built from iterable of chunks of
+        data.
+
+        Raise HashMismatch if none match.
+
+        """
+        gots = {}
+        for hash_name in iterkeys(self._allowed):
+            try:
+                gots[hash_name] = hashlib.new(hash_name)
+            except (ValueError, TypeError):
+                raise InstallationError('Unknown hash name: %s' % hash_name)
+
+        for chunk in chunks:
+            for hash in itervalues(gots):
+                hash.update(chunk)
+
+        for hash_name, got in iteritems(gots):
+            if got.hexdigest() in self._allowed[hash_name]:
+                return
+        self._raise(gots)
+
+    def _raise(self, gots):
+        raise HashMismatch(self._allowed, gots)
+
+    def check_against_file(self, file):
+        """Check good hashes against a file-like object
+
+        Raise HashMismatch if none match.
+
+        """
+        return self.check_against_chunks(read_chunks(file))
+
+    def check_against_path(self, path):
+        with open(path, 'rb') as file:
+            return self.check_against_file(file)
+
+    def __nonzero__(self):
+        """Return whether I know any known-good hashes."""
+        return bool(self._allowed)
+
+    def __bool__(self):
+        return self.__nonzero__()
+
+
+class MissingHashes(Hashes):
+    """A workalike for Hashes used when we're missing a hash for a requirement
+
+    It computes the actual hash of the requirement and raises a HashMissing
+    exception showing it to the user.
+
+    """
+    def __init__(self):
+        """Don't offer the ``hashes`` kwarg."""
+        # Pass our favorite hash in to generate a "gotten hash". With the
+        # empty list, it will never match, so an error will always raise.
+        super(MissingHashes, self).__init__(hashes={FAVORITE_HASH: []})
+
+    def _raise(self, gots):
+        raise HashMissing(gots[FAVORITE_HASH].hexdigest())
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/logging.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/logging.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c1053abfb4d2f086b2a94f7679a970a5bd269e2
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/logging.py
@@ -0,0 +1,130 @@
+from __future__ import absolute_import
+
+import contextlib
+import logging
+import logging.handlers
+import os
+
+try:
+    import threading
+except ImportError:
+    import dummy_threading as threading
+
+from pip.compat import WINDOWS
+from pip.utils import ensure_dir
+
+try:
+    from pip._vendor import colorama
+# Lots of different errors can come from this, including SystemError and
+# ImportError.
+except Exception:
+    colorama = None
+
+
+_log_state = threading.local()
+_log_state.indentation = 0
+
+
+@contextlib.contextmanager
+def indent_log(num=2):
+    """
+    A context manager which will cause the log output to be indented for any
+    log messages emitted inside it.
+    """
+    _log_state.indentation += num
+    try:
+        yield
+    finally:
+        _log_state.indentation -= num
+
+
+def get_indentation():
+    return getattr(_log_state, 'indentation', 0)
+
+
+class IndentingFormatter(logging.Formatter):
+
+    def format(self, record):
+        """
+        Calls the standard formatter, but will indent all of the log messages
+        by our current indentation level.
+        """
+        formatted = logging.Formatter.format(self, record)
+        formatted = "".join([
+            (" " * get_indentation()) + line
+            for line in formatted.splitlines(True)
+        ])
+        return formatted
+
+
+def _color_wrap(*colors):
+    def wrapped(inp):
+        return "".join(list(colors) + [inp, colorama.Style.RESET_ALL])
+    return wrapped
+
+
+class ColorizedStreamHandler(logging.StreamHandler):
+
+    # Don't build up a list of colors if we don't have colorama
+    if colorama:
+        COLORS = [
+            # This needs to be in order from highest logging level to lowest.
+            (logging.ERROR, _color_wrap(colorama.Fore.RED)),
+            (logging.WARNING, _color_wrap(colorama.Fore.YELLOW)),
+        ]
+    else:
+        COLORS = []
+
+    def __init__(self, stream=None):
+        logging.StreamHandler.__init__(self, stream)
+
+        if WINDOWS and colorama:
+            self.stream = colorama.AnsiToWin32(self.stream)
+
+    def should_color(self):
+        # Don't colorize things if we do not have colorama
+        if not colorama:
+            return False
+
+        real_stream = (
+            self.stream if not isinstance(self.stream, colorama.AnsiToWin32)
+            else self.stream.wrapped
+        )
+
+        # If the stream is a tty we should color it
+        if hasattr(real_stream, "isatty") and real_stream.isatty():
+            return True
+
+        # If we have an ASNI term we should color it
+        if os.environ.get("TERM") == "ANSI":
+            return True
+
+        # If anything else we should not color it
+        return False
+
+    def format(self, record):
+        msg = logging.StreamHandler.format(self, record)
+
+        if self.should_color():
+            for level, color in self.COLORS:
+                if record.levelno >= level:
+                    msg = color(msg)
+                    break
+
+        return msg
+
+
+class BetterRotatingFileHandler(logging.handlers.RotatingFileHandler):
+
+    def _open(self):
+        ensure_dir(os.path.dirname(self.baseFilename))
+        return logging.handlers.RotatingFileHandler._open(self)
+
+
+class MaxLevelFilter(logging.Filter):
+
+    def __init__(self, level):
+        self.level = level
+
+    def filter(self, record):
+        return record.levelno < self.level
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/outdated.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/outdated.py
new file mode 100644
index 0000000000000000000000000000000000000000..2164cc3cc2f2eb53e954fb80281ccdc927d10600
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/outdated.py
@@ -0,0 +1,162 @@
+from __future__ import absolute_import
+
+import datetime
+import json
+import logging
+import os.path
+import sys
+
+from pip._vendor import lockfile
+from pip._vendor.packaging import version as packaging_version
+
+from pip.compat import total_seconds, WINDOWS
+from pip.models import PyPI
+from pip.locations import USER_CACHE_DIR, running_under_virtualenv
+from pip.utils import ensure_dir, get_installed_version
+from pip.utils.filesystem import check_path_owner
+
+
+SELFCHECK_DATE_FMT = "%Y-%m-%dT%H:%M:%SZ"
+
+
+logger = logging.getLogger(__name__)
+
+
+class VirtualenvSelfCheckState(object):
+    def __init__(self):
+        self.statefile_path = os.path.join(sys.prefix, "pip-selfcheck.json")
+
+        # Load the existing state
+        try:
+            with open(self.statefile_path) as statefile:
+                self.state = json.load(statefile)
+        except (IOError, ValueError):
+            self.state = {}
+
+    def save(self, pypi_version, current_time):
+        # Attempt to write out our version check file
+        with open(self.statefile_path, "w") as statefile:
+            json.dump(
+                {
+                    "last_check": current_time.strftime(SELFCHECK_DATE_FMT),
+                    "pypi_version": pypi_version,
+                },
+                statefile,
+                sort_keys=True,
+                separators=(",", ":")
+            )
+
+
+class GlobalSelfCheckState(object):
+    def __init__(self):
+        self.statefile_path = os.path.join(USER_CACHE_DIR, "selfcheck.json")
+
+        # Load the existing state
+        try:
+            with open(self.statefile_path) as statefile:
+                self.state = json.load(statefile)[sys.prefix]
+        except (IOError, ValueError, KeyError):
+            self.state = {}
+
+    def save(self, pypi_version, current_time):
+        # Check to make sure that we own the directory
+        if not check_path_owner(os.path.dirname(self.statefile_path)):
+            return
+
+        # Now that we've ensured the directory is owned by this user, we'll go
+        # ahead and make sure that all our directories are created.
+        ensure_dir(os.path.dirname(self.statefile_path))
+
+        # Attempt to write out our version check file
+        with lockfile.LockFile(self.statefile_path):
+            if os.path.exists(self.statefile_path):
+                with open(self.statefile_path) as statefile:
+                    state = json.load(statefile)
+            else:
+                state = {}
+
+            state[sys.prefix] = {
+                "last_check": current_time.strftime(SELFCHECK_DATE_FMT),
+                "pypi_version": pypi_version,
+            }
+
+            with open(self.statefile_path, "w") as statefile:
+                json.dump(state, statefile, sort_keys=True,
+                          separators=(",", ":"))
+
+
+def load_selfcheck_statefile():
+    if running_under_virtualenv():
+        return VirtualenvSelfCheckState()
+    else:
+        return GlobalSelfCheckState()
+
+
+def pip_version_check(session):
+    """Check for an update for pip.
+
+    Limit the frequency of checks to once per week. State is stored either in
+    the active virtualenv or in the user's USER_CACHE_DIR keyed off the prefix
+    of the pip script path.
+    """
+    installed_version = get_installed_version("pip")
+    if installed_version is None:
+        return
+
+    pip_version = packaging_version.parse(installed_version)
+    pypi_version = None
+
+    try:
+        state = load_selfcheck_statefile()
+
+        current_time = datetime.datetime.utcnow()
+        # Determine if we need to refresh the state
+        if "last_check" in state.state and "pypi_version" in state.state:
+            last_check = datetime.datetime.strptime(
+                state.state["last_check"],
+                SELFCHECK_DATE_FMT
+            )
+            if total_seconds(current_time - last_check) < 7 * 24 * 60 * 60:
+                pypi_version = state.state["pypi_version"]
+
+        # Refresh the version if we need to or just see if we need to warn
+        if pypi_version is None:
+            resp = session.get(
+                PyPI.pip_json_url,
+                headers={"Accept": "application/json"},
+            )
+            resp.raise_for_status()
+            pypi_version = [
+                v for v in sorted(
+                    list(resp.json()["releases"]),
+                    key=packaging_version.parse,
+                )
+                if not packaging_version.parse(v).is_prerelease
+            ][-1]
+
+            # save that we've performed a check
+            state.save(pypi_version, current_time)
+
+        remote_version = packaging_version.parse(pypi_version)
+
+        # Determine if our pypi_version is older
+        if (pip_version < remote_version and
+                pip_version.base_version != remote_version.base_version):
+            # Advise "python -m pip" on Windows to avoid issues
+            # with overwriting pip.exe.
+            if WINDOWS:
+                pip_cmd = "python -m pip"
+            else:
+                pip_cmd = "pip"
+            logger.warning(
+                "You are using pip version %s, however version %s is "
+                "available.\nYou should consider upgrading via the "
+                "'%s install --upgrade pip' command.",
+                pip_version, pypi_version, pip_cmd
+            )
+
+    except Exception:
+        logger.debug(
+            "There was an error checking the latest version of pip",
+            exc_info=True,
+        )
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/packaging.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/packaging.py
new file mode 100644
index 0000000000000000000000000000000000000000..e93b20d158d51265a4ed9178717061ad830b5112
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/packaging.py
@@ -0,0 +1,63 @@
+from __future__ import absolute_import
+
+from email.parser import FeedParser
+
+import logging
+import sys
+
+from pip._vendor.packaging import specifiers
+from pip._vendor.packaging import version
+from pip._vendor import pkg_resources
+
+from pip import exceptions
+
+logger = logging.getLogger(__name__)
+
+
+def check_requires_python(requires_python):
+    """
+    Check if the python version in use match the `requires_python` specifier.
+
+    Returns `True` if the version of python in use matches the requirement.
+    Returns `False` if the version of python in use does not matches the
+    requirement.
+
+    Raises an InvalidSpecifier if `requires_python` have an invalid format.
+    """
+    if requires_python is None:
+        # The package provides no information
+        return True
+    requires_python_specifier = specifiers.SpecifierSet(requires_python)
+
+    # We only use major.minor.micro
+    python_version = version.parse('.'.join(map(str, sys.version_info[:3])))
+    return python_version in requires_python_specifier
+
+
+def get_metadata(dist):
+    if (isinstance(dist, pkg_resources.DistInfoDistribution) and
+            dist.has_metadata('METADATA')):
+        return dist.get_metadata('METADATA')
+    elif dist.has_metadata('PKG-INFO'):
+        return dist.get_metadata('PKG-INFO')
+
+
+def check_dist_requires_python(dist):
+    metadata = get_metadata(dist)
+    feed_parser = FeedParser()
+    feed_parser.feed(metadata)
+    pkg_info_dict = feed_parser.close()
+    requires_python = pkg_info_dict.get('Requires-Python')
+    try:
+        if not check_requires_python(requires_python):
+            raise exceptions.UnsupportedPythonVersion(
+                "%s requires Python '%s' but the running Python is %s" % (
+                    dist.project_name,
+                    requires_python,
+                    '.'.join(map(str, sys.version_info[:3])),)
+            )
+    except specifiers.InvalidSpecifier as e:
+        logger.warning(
+            "Package %s has an invalid Requires-Python entry %s - %s" % (
+                dist.project_name, requires_python, e))
+        return
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/setuptools_build.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/setuptools_build.py
new file mode 100644
index 0000000000000000000000000000000000000000..03973e976cad1b9f3363fafb9f3513dffa1b2a5e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/setuptools_build.py
@@ -0,0 +1,8 @@
+# Shim to wrap setup.py invocation with setuptools
+SETUPTOOLS_SHIM = (
+    "import setuptools, tokenize;__file__=%r;"
+    "f=getattr(tokenize, 'open', open)(__file__);"
+    "code=f.read().replace('\\r\\n', '\\n');"
+    "f.close();"
+    "exec(compile(code, __file__, 'exec'))"
+)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/ui.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/ui.py
new file mode 100644
index 0000000000000000000000000000000000000000..bba73e3b13307dc4863740da67244f46d3e23ff6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/utils/ui.py
@@ -0,0 +1,344 @@
+from __future__ import absolute_import
+from __future__ import division
+
+import itertools
+import sys
+from signal import signal, SIGINT, default_int_handler
+import time
+import contextlib
+import logging
+
+from pip.compat import WINDOWS
+from pip.utils import format_size
+from pip.utils.logging import get_indentation
+from pip._vendor import six
+from pip._vendor.progress.bar import Bar, IncrementalBar
+from pip._vendor.progress.helpers import (WritelnMixin,
+                                          HIDE_CURSOR, SHOW_CURSOR)
+from pip._vendor.progress.spinner import Spinner
+
+try:
+    from pip._vendor import colorama
+# Lots of different errors can come from this, including SystemError and
+# ImportError.
+except Exception:
+    colorama = None
+
+logger = logging.getLogger(__name__)
+
+
+def _select_progress_class(preferred, fallback):
+    encoding = getattr(preferred.file, "encoding", None)
+
+    # If we don't know what encoding this file is in, then we'll just assume
+    # that it doesn't support unicode and use the ASCII bar.
+    if not encoding:
+        return fallback
+
+    # Collect all of the possible characters we want to use with the preferred
+    # bar.
+    characters = [
+        getattr(preferred, "empty_fill", six.text_type()),
+        getattr(preferred, "fill", six.text_type()),
+    ]
+    characters += list(getattr(preferred, "phases", []))
+
+    # Try to decode the characters we're using for the bar using the encoding
+    # of the given file, if this works then we'll assume that we can use the
+    # fancier bar and if not we'll fall back to the plaintext bar.
+    try:
+        six.text_type().join(characters).encode(encoding)
+    except UnicodeEncodeError:
+        return fallback
+    else:
+        return preferred
+
+
+_BaseBar = _select_progress_class(IncrementalBar, Bar)
+
+
+class InterruptibleMixin(object):
+    """
+    Helper to ensure that self.finish() gets called on keyboard interrupt.
+
+    This allows downloads to be interrupted without leaving temporary state
+    (like hidden cursors) behind.
+
+    This class is similar to the progress library's existing SigIntMixin
+    helper, but as of version 1.2, that helper has the following problems:
+
+    1. It calls sys.exit().
+    2. It discards the existing SIGINT handler completely.
+    3. It leaves its own handler in place even after an uninterrupted finish,
+       which will have unexpected delayed effects if the user triggers an
+       unrelated keyboard interrupt some time after a progress-displaying
+       download has already completed, for example.
+    """
+
+    def __init__(self, *args, **kwargs):
+        """
+        Save the original SIGINT handler for later.
+        """
+        super(InterruptibleMixin, self).__init__(*args, **kwargs)
+
+        self.original_handler = signal(SIGINT, self.handle_sigint)
+
+        # If signal() returns None, the previous handler was not installed from
+        # Python, and we cannot restore it. This probably should not happen,
+        # but if it does, we must restore something sensible instead, at least.
+        # The least bad option should be Python's default SIGINT handler, which
+        # just raises KeyboardInterrupt.
+        if self.original_handler is None:
+            self.original_handler = default_int_handler
+
+    def finish(self):
+        """
+        Restore the original SIGINT handler after finishing.
+
+        This should happen regardless of whether the progress display finishes
+        normally, or gets interrupted.
+        """
+        super(InterruptibleMixin, self).finish()
+        signal(SIGINT, self.original_handler)
+
+    def handle_sigint(self, signum, frame):
+        """
+        Call self.finish() before delegating to the original SIGINT handler.
+
+        This handler should only be in place while the progress display is
+        active.
+        """
+        self.finish()
+        self.original_handler(signum, frame)
+
+
+class DownloadProgressMixin(object):
+
+    def __init__(self, *args, **kwargs):
+        super(DownloadProgressMixin, self).__init__(*args, **kwargs)
+        self.message = (" " * (get_indentation() + 2)) + self.message
+
+    @property
+    def downloaded(self):
+        return format_size(self.index)
+
+    @property
+    def download_speed(self):
+        # Avoid zero division errors...
+        if self.avg == 0.0:
+            return "..."
+        return format_size(1 / self.avg) + "/s"
+
+    @property
+    def pretty_eta(self):
+        if self.eta:
+            return "eta %s" % self.eta_td
+        return ""
+
+    def iter(self, it, n=1):
+        for x in it:
+            yield x
+            self.next(n)
+        self.finish()
+
+
+class WindowsMixin(object):
+
+    def __init__(self, *args, **kwargs):
+        # The Windows terminal does not support the hide/show cursor ANSI codes
+        # even with colorama. So we'll ensure that hide_cursor is False on
+        # Windows.
+        # This call neds to go before the super() call, so that hide_cursor
+        # is set in time. The base progress bar class writes the "hide cursor"
+        # code to the terminal in its init, so if we don't set this soon
+        # enough, we get a "hide" with no corresponding "show"...
+        if WINDOWS and self.hide_cursor:
+            self.hide_cursor = False
+
+        super(WindowsMixin, self).__init__(*args, **kwargs)
+
+        # Check if we are running on Windows and we have the colorama module,
+        # if we do then wrap our file with it.
+        if WINDOWS and colorama:
+            self.file = colorama.AnsiToWin32(self.file)
+            # The progress code expects to be able to call self.file.isatty()
+            # but the colorama.AnsiToWin32() object doesn't have that, so we'll
+            # add it.
+            self.file.isatty = lambda: self.file.wrapped.isatty()
+            # The progress code expects to be able to call self.file.flush()
+            # but the colorama.AnsiToWin32() object doesn't have that, so we'll
+            # add it.
+            self.file.flush = lambda: self.file.wrapped.flush()
+
+
+class DownloadProgressBar(WindowsMixin, InterruptibleMixin,
+                          DownloadProgressMixin, _BaseBar):
+
+    file = sys.stdout
+    message = "%(percent)d%%"
+    suffix = "%(downloaded)s %(download_speed)s %(pretty_eta)s"
+
+
+class DownloadProgressSpinner(WindowsMixin, InterruptibleMixin,
+                              DownloadProgressMixin, WritelnMixin, Spinner):
+
+    file = sys.stdout
+    suffix = "%(downloaded)s %(download_speed)s"
+
+    def next_phase(self):
+        if not hasattr(self, "_phaser"):
+            self._phaser = itertools.cycle(self.phases)
+        return next(self._phaser)
+
+    def update(self):
+        message = self.message % self
+        phase = self.next_phase()
+        suffix = self.suffix % self
+        line = ''.join([
+            message,
+            " " if message else "",
+            phase,
+            " " if suffix else "",
+            suffix,
+        ])
+
+        self.writeln(line)
+
+
+################################################################
+# Generic "something is happening" spinners
+#
+# We don't even try using progress.spinner.Spinner here because it's actually
+# simpler to reimplement from scratch than to coerce their code into doing
+# what we need.
+################################################################
+
+@contextlib.contextmanager
+def hidden_cursor(file):
+    # The Windows terminal does not support the hide/show cursor ANSI codes,
+    # even via colorama. So don't even try.
+    if WINDOWS:
+        yield
+    # We don't want to clutter the output with control characters if we're
+    # writing to a file, or if the user is running with --quiet.
+    # See https://github.com/pypa/pip/issues/3418
+    elif not file.isatty() or logger.getEffectiveLevel() > logging.INFO:
+        yield
+    else:
+        file.write(HIDE_CURSOR)
+        try:
+            yield
+        finally:
+            file.write(SHOW_CURSOR)
+
+
+class RateLimiter(object):
+    def __init__(self, min_update_interval_seconds):
+        self._min_update_interval_seconds = min_update_interval_seconds
+        self._last_update = 0
+
+    def ready(self):
+        now = time.time()
+        delta = now - self._last_update
+        return delta >= self._min_update_interval_seconds
+
+    def reset(self):
+        self._last_update = time.time()
+
+
+class InteractiveSpinner(object):
+    def __init__(self, message, file=None, spin_chars="-\\|/",
+                 # Empirically, 8 updates/second looks nice
+                 min_update_interval_seconds=0.125):
+        self._message = message
+        if file is None:
+            file = sys.stdout
+        self._file = file
+        self._rate_limiter = RateLimiter(min_update_interval_seconds)
+        self._finished = False
+
+        self._spin_cycle = itertools.cycle(spin_chars)
+
+        self._file.write(" " * get_indentation() + self._message + " ... ")
+        self._width = 0
+
+    def _write(self, status):
+        assert not self._finished
+        # Erase what we wrote before by backspacing to the beginning, writing
+        # spaces to overwrite the old text, and then backspacing again
+        backup = "\b" * self._width
+        self._file.write(backup + " " * self._width + backup)
+        # Now we have a blank slate to add our status
+        self._file.write(status)
+        self._width = len(status)
+        self._file.flush()
+        self._rate_limiter.reset()
+
+    def spin(self):
+        if self._finished:
+            return
+        if not self._rate_limiter.ready():
+            return
+        self._write(next(self._spin_cycle))
+
+    def finish(self, final_status):
+        if self._finished:
+            return
+        self._write(final_status)
+        self._file.write("\n")
+        self._file.flush()
+        self._finished = True
+
+
+# Used for dumb terminals, non-interactive installs (no tty), etc.
+# We still print updates occasionally (once every 60 seconds by default) to
+# act as a keep-alive for systems like Travis-CI that take lack-of-output as
+# an indication that a task has frozen.
+class NonInteractiveSpinner(object):
+    def __init__(self, message, min_update_interval_seconds=60):
+        self._message = message
+        self._finished = False
+        self._rate_limiter = RateLimiter(min_update_interval_seconds)
+        self._update("started")
+
+    def _update(self, status):
+        assert not self._finished
+        self._rate_limiter.reset()
+        logger.info("%s: %s", self._message, status)
+
+    def spin(self):
+        if self._finished:
+            return
+        if not self._rate_limiter.ready():
+            return
+        self._update("still running...")
+
+    def finish(self, final_status):
+        if self._finished:
+            return
+        self._update("finished with status '%s'" % (final_status,))
+        self._finished = True
+
+
+@contextlib.contextmanager
+def open_spinner(message):
+    # Interactive spinner goes directly to sys.stdout rather than being routed
+    # through the logging system, but it acts like it has level INFO,
+    # i.e. it's only displayed if we're at level INFO or better.
+    # Non-interactive spinner goes through the logging system, so it is always
+    # in sync with logging configuration.
+    if sys.stdout.isatty() and logger.getEffectiveLevel() <= logging.INFO:
+        spinner = InteractiveSpinner(message)
+    else:
+        spinner = NonInteractiveSpinner(message)
+    try:
+        with hidden_cursor(sys.stdout):
+            yield spinner
+    except KeyboardInterrupt:
+        spinner.finish("canceled")
+        raise
+    except Exception:
+        spinner.finish("error")
+        raise
+    else:
+        spinner.finish("done")
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/vcs/__init__.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/vcs/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d3dbb271d8ec0863629125ca9788a7b11e81b21
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/vcs/__init__.py
@@ -0,0 +1,366 @@
+"""Handles all VCS (version control) support"""
+from __future__ import absolute_import
+
+import errno
+import logging
+import os
+import shutil
+import sys
+
+from pip._vendor.six.moves.urllib import parse as urllib_parse
+
+from pip.exceptions import BadCommand
+from pip.utils import (display_path, backup_dir, call_subprocess,
+                       rmtree, ask_path_exists)
+
+
+__all__ = ['vcs', 'get_src_requirement']
+
+
+logger = logging.getLogger(__name__)
+
+
+class VcsSupport(object):
+    _registry = {}
+    schemes = ['ssh', 'git', 'hg', 'bzr', 'sftp', 'svn']
+
+    def __init__(self):
+        # Register more schemes with urlparse for various version control
+        # systems
+        urllib_parse.uses_netloc.extend(self.schemes)
+        # Python >= 2.7.4, 3.3 doesn't have uses_fragment
+        if getattr(urllib_parse, 'uses_fragment', None):
+            urllib_parse.uses_fragment.extend(self.schemes)
+        super(VcsSupport, self).__init__()
+
+    def __iter__(self):
+        return self._registry.__iter__()
+
+    @property
+    def backends(self):
+        return list(self._registry.values())
+
+    @property
+    def dirnames(self):
+        return [backend.dirname for backend in self.backends]
+
+    @property
+    def all_schemes(self):
+        schemes = []
+        for backend in self.backends:
+            schemes.extend(backend.schemes)
+        return schemes
+
+    def register(self, cls):
+        if not hasattr(cls, 'name'):
+            logger.warning('Cannot register VCS %s', cls.__name__)
+            return
+        if cls.name not in self._registry:
+            self._registry[cls.name] = cls
+            logger.debug('Registered VCS backend: %s', cls.name)
+
+    def unregister(self, cls=None, name=None):
+        if name in self._registry:
+            del self._registry[name]
+        elif cls in self._registry.values():
+            del self._registry[cls.name]
+        else:
+            logger.warning('Cannot unregister because no class or name given')
+
+    def get_backend_name(self, location):
+        """
+        Return the name of the version control backend if found at given
+        location, e.g. vcs.get_backend_name('/path/to/vcs/checkout')
+        """
+        for vc_type in self._registry.values():
+            if vc_type.controls_location(location):
+                logger.debug('Determine that %s uses VCS: %s',
+                             location, vc_type.name)
+                return vc_type.name
+        return None
+
+    def get_backend(self, name):
+        name = name.lower()
+        if name in self._registry:
+            return self._registry[name]
+
+    def get_backend_from_location(self, location):
+        vc_type = self.get_backend_name(location)
+        if vc_type:
+            return self.get_backend(vc_type)
+        return None
+
+
+vcs = VcsSupport()
+
+
+class VersionControl(object):
+    name = ''
+    dirname = ''
+    # List of supported schemes for this Version Control
+    schemes = ()
+
+    def __init__(self, url=None, *args, **kwargs):
+        self.url = url
+        super(VersionControl, self).__init__(*args, **kwargs)
+
+    def _is_local_repository(self, repo):
+        """
+           posix absolute paths start with os.path.sep,
+           win32 ones start with drive (like c:\\folder)
+        """
+        drive, tail = os.path.splitdrive(repo)
+        return repo.startswith(os.path.sep) or drive
+
+    # See issue #1083 for why this method was introduced:
+    # https://github.com/pypa/pip/issues/1083
+    def translate_egg_surname(self, surname):
+        # For example, Django has branches of the form "stable/1.7.x".
+        return surname.replace('/', '_')
+
+    def export(self, location):
+        """
+        Export the repository at the url to the destination location
+        i.e. only download the files, without vcs informations
+        """
+        raise NotImplementedError
+
+    def get_url_rev(self):
+        """
+        Returns the correct repository URL and revision by parsing the given
+        repository URL
+        """
+        error_message = (
+            "Sorry, '%s' is a malformed VCS url. "
+            "The format is <vcs>+<protocol>://<url>, "
+            "e.g. svn+http://myrepo/svn/MyApp#egg=MyApp"
+        )
+        assert '+' in self.url, error_message % self.url
+        url = self.url.split('+', 1)[1]
+        scheme, netloc, path, query, frag = urllib_parse.urlsplit(url)
+        rev = None
+        if '@' in path:
+            path, rev = path.rsplit('@', 1)
+        url = urllib_parse.urlunsplit((scheme, netloc, path, query, ''))
+        return url, rev
+
+    def get_info(self, location):
+        """
+        Returns (url, revision), where both are strings
+        """
+        assert not location.rstrip('/').endswith(self.dirname), \
+            'Bad directory: %s' % location
+        return self.get_url(location), self.get_revision(location)
+
+    def normalize_url(self, url):
+        """
+        Normalize a URL for comparison by unquoting it and removing any
+        trailing slash.
+        """
+        return urllib_parse.unquote(url).rstrip('/')
+
+    def compare_urls(self, url1, url2):
+        """
+        Compare two repo URLs for identity, ignoring incidental differences.
+        """
+        return (self.normalize_url(url1) == self.normalize_url(url2))
+
+    def obtain(self, dest):
+        """
+        Called when installing or updating an editable package, takes the
+        source path of the checkout.
+        """
+        raise NotImplementedError
+
+    def switch(self, dest, url, rev_options):
+        """
+        Switch the repo at ``dest`` to point to ``URL``.
+        """
+        raise NotImplementedError
+
+    def update(self, dest, rev_options):
+        """
+        Update an already-existing repo to the given ``rev_options``.
+        """
+        raise NotImplementedError
+
+    def check_version(self, dest, rev_options):
+        """
+        Return True if the version is identical to what exists and
+        doesn't need to be updated.
+        """
+        raise NotImplementedError
+
+    def check_destination(self, dest, url, rev_options, rev_display):
+        """
+        Prepare a location to receive a checkout/clone.
+
+        Return True if the location is ready for (and requires) a
+        checkout/clone, False otherwise.
+        """
+        checkout = True
+        prompt = False
+        if os.path.exists(dest):
+            checkout = False
+            if os.path.exists(os.path.join(dest, self.dirname)):
+                existing_url = self.get_url(dest)
+                if self.compare_urls(existing_url, url):
+                    logger.debug(
+                        '%s in %s exists, and has correct URL (%s)',
+                        self.repo_name.title(),
+                        display_path(dest),
+                        url,
+                    )
+                    if not self.check_version(dest, rev_options):
+                        logger.info(
+                            'Updating %s %s%s',
+                            display_path(dest),
+                            self.repo_name,
+                            rev_display,
+                        )
+                        self.update(dest, rev_options)
+                    else:
+                        logger.info(
+                            'Skipping because already up-to-date.')
+                else:
+                    logger.warning(
+                        '%s %s in %s exists with URL %s',
+                        self.name,
+                        self.repo_name,
+                        display_path(dest),
+                        existing_url,
+                    )
+                    prompt = ('(s)witch, (i)gnore, (w)ipe, (b)ackup ',
+                              ('s', 'i', 'w', 'b'))
+            else:
+                logger.warning(
+                    'Directory %s already exists, and is not a %s %s.',
+                    dest,
+                    self.name,
+                    self.repo_name,
+                )
+                prompt = ('(i)gnore, (w)ipe, (b)ackup ', ('i', 'w', 'b'))
+        if prompt:
+            logger.warning(
+                'The plan is to install the %s repository %s',
+                self.name,
+                url,
+            )
+            response = ask_path_exists('What to do?  %s' % prompt[0],
+                                       prompt[1])
+
+            if response == 's':
+                logger.info(
+                    'Switching %s %s to %s%s',
+                    self.repo_name,
+                    display_path(dest),
+                    url,
+                    rev_display,
+                )
+                self.switch(dest, url, rev_options)
+            elif response == 'i':
+                # do nothing
+                pass
+            elif response == 'w':
+                logger.warning('Deleting %s', display_path(dest))
+                rmtree(dest)
+                checkout = True
+            elif response == 'b':
+                dest_dir = backup_dir(dest)
+                logger.warning(
+                    'Backing up %s to %s', display_path(dest), dest_dir,
+                )
+                shutil.move(dest, dest_dir)
+                checkout = True
+            elif response == 'a':
+                sys.exit(-1)
+        return checkout
+
+    def unpack(self, location):
+        """
+        Clean up current location and download the url repository
+        (and vcs infos) into location
+        """
+        if os.path.exists(location):
+            rmtree(location)
+        self.obtain(location)
+
+    def get_src_requirement(self, dist, location):
+        """
+        Return a string representing the requirement needed to
+        redownload the files currently present in location, something
+        like:
+          {repository_url}@{revision}#egg={project_name}-{version_identifier}
+        """
+        raise NotImplementedError
+
+    def get_url(self, location):
+        """
+        Return the url used at location
+        Used in get_info or check_destination
+        """
+        raise NotImplementedError
+
+    def get_revision(self, location):
+        """
+        Return the current revision of the files at location
+        Used in get_info
+        """
+        raise NotImplementedError
+
+    def run_command(self, cmd, show_stdout=True, cwd=None,
+                    on_returncode='raise',
+                    command_desc=None,
+                    extra_environ=None, spinner=None):
+        """
+        Run a VCS subcommand
+        This is simply a wrapper around call_subprocess that adds the VCS
+        command name, and checks that the VCS is available
+        """
+        cmd = [self.name] + cmd
+        try:
+            return call_subprocess(cmd, show_stdout, cwd,
+                                   on_returncode,
+                                   command_desc, extra_environ,
+                                   spinner)
+        except OSError as e:
+            # errno.ENOENT = no such file or directory
+            # In other words, the VCS executable isn't available
+            if e.errno == errno.ENOENT:
+                raise BadCommand('Cannot find command %r' % self.name)
+            else:
+                raise  # re-raise exception if a different error occurred
+
+    @classmethod
+    def controls_location(cls, location):
+        """
+        Check if a location is controlled by the vcs.
+        It is meant to be overridden to implement smarter detection
+        mechanisms for specific vcs.
+        """
+        logger.debug('Checking in %s for %s (%s)...',
+                     location, cls.dirname, cls.name)
+        path = os.path.join(location, cls.dirname)
+        return os.path.exists(path)
+
+
+def get_src_requirement(dist, location):
+    version_control = vcs.get_backend_from_location(location)
+    if version_control:
+        try:
+            return version_control().get_src_requirement(dist,
+                                                         location)
+        except BadCommand:
+            logger.warning(
+                'cannot determine version of editable source in %s '
+                '(%s command not found in path)',
+                location,
+                version_control.name,
+            )
+            return dist.as_requirement()
+    logger.warning(
+        'cannot determine version of editable source in %s (is not SVN '
+        'checkout, Git clone, Mercurial clone or Bazaar branch)',
+        location,
+    )
+    return dist.as_requirement()
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/vcs/bazaar.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/vcs/bazaar.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f095841d361f43925af6d766bf9ec3a86a4a22c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/vcs/bazaar.py
@@ -0,0 +1,116 @@
+from __future__ import absolute_import
+
+import logging
+import os
+import tempfile
+
+# TODO: Get this into six.moves.urllib.parse
+try:
+    from urllib import parse as urllib_parse
+except ImportError:
+    import urlparse as urllib_parse
+
+from pip.utils import rmtree, display_path
+from pip.vcs import vcs, VersionControl
+from pip.download import path_to_url
+
+
+logger = logging.getLogger(__name__)
+
+
+class Bazaar(VersionControl):
+    name = 'bzr'
+    dirname = '.bzr'
+    repo_name = 'branch'
+    schemes = (
+        'bzr', 'bzr+http', 'bzr+https', 'bzr+ssh', 'bzr+sftp', 'bzr+ftp',
+        'bzr+lp',
+    )
+
+    def __init__(self, url=None, *args, **kwargs):
+        super(Bazaar, self).__init__(url, *args, **kwargs)
+        # Python >= 2.7.4, 3.3 doesn't have uses_fragment or non_hierarchical
+        # Register lp but do not expose as a scheme to support bzr+lp.
+        if getattr(urllib_parse, 'uses_fragment', None):
+            urllib_parse.uses_fragment.extend(['lp'])
+            urllib_parse.non_hierarchical.extend(['lp'])
+
+    def export(self, location):
+        """
+        Export the Bazaar repository at the url to the destination location
+        """
+        temp_dir = tempfile.mkdtemp('-export', 'pip-')
+        self.unpack(temp_dir)
+        if os.path.exists(location):
+            # Remove the location to make sure Bazaar can export it correctly
+            rmtree(location)
+        try:
+            self.run_command(['export', location], cwd=temp_dir,
+                             show_stdout=False)
+        finally:
+            rmtree(temp_dir)
+
+    def switch(self, dest, url, rev_options):
+        self.run_command(['switch', url], cwd=dest)
+
+    def update(self, dest, rev_options):
+        self.run_command(['pull', '-q'] + rev_options, cwd=dest)
+
+    def obtain(self, dest):
+        url, rev = self.get_url_rev()
+        if rev:
+            rev_options = ['-r', rev]
+            rev_display = ' (to revision %s)' % rev
+        else:
+            rev_options = []
+            rev_display = ''
+        if self.check_destination(dest, url, rev_options, rev_display):
+            logger.info(
+                'Checking out %s%s to %s',
+                url,
+                rev_display,
+                display_path(dest),
+            )
+            self.run_command(['branch', '-q'] + rev_options + [url, dest])
+
+    def get_url_rev(self):
+        # hotfix the URL scheme after removing bzr+ from bzr+ssh:// readd it
+        url, rev = super(Bazaar, self).get_url_rev()
+        if url.startswith('ssh://'):
+            url = 'bzr+' + url
+        return url, rev
+
+    def get_url(self, location):
+        urls = self.run_command(['info'], show_stdout=False, cwd=location)
+        for line in urls.splitlines():
+            line = line.strip()
+            for x in ('checkout of branch: ',
+                      'parent branch: '):
+                if line.startswith(x):
+                    repo = line.split(x)[1]
+                    if self._is_local_repository(repo):
+                        return path_to_url(repo)
+                    return repo
+        return None
+
+    def get_revision(self, location):
+        revision = self.run_command(
+            ['revno'], show_stdout=False, cwd=location)
+        return revision.splitlines()[-1]
+
+    def get_src_requirement(self, dist, location):
+        repo = self.get_url(location)
+        if not repo:
+            return None
+        if not repo.lower().startswith('bzr:'):
+            repo = 'bzr+' + repo
+        egg_project_name = dist.egg_name().split('-', 1)[0]
+        current_rev = self.get_revision(location)
+        return '%s@%s#egg=%s' % (repo, current_rev, egg_project_name)
+
+    def check_version(self, dest, rev_options):
+        """Always assume the versions don't match"""
+        return False
+
+
+vcs.register(Bazaar)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/vcs/git.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/vcs/git.py
new file mode 100644
index 0000000000000000000000000000000000000000..2187dd84629b4cacc585ca019774764b81d30e4a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/vcs/git.py
@@ -0,0 +1,300 @@
+from __future__ import absolute_import
+
+import logging
+import tempfile
+import os.path
+
+from pip.compat import samefile
+from pip.exceptions import BadCommand
+from pip._vendor.six.moves.urllib import parse as urllib_parse
+from pip._vendor.six.moves.urllib import request as urllib_request
+from pip._vendor.packaging.version import parse as parse_version
+
+from pip.utils import display_path, rmtree
+from pip.vcs import vcs, VersionControl
+
+
+urlsplit = urllib_parse.urlsplit
+urlunsplit = urllib_parse.urlunsplit
+
+
+logger = logging.getLogger(__name__)
+
+
+class Git(VersionControl):
+    name = 'git'
+    dirname = '.git'
+    repo_name = 'clone'
+    schemes = (
+        'git', 'git+http', 'git+https', 'git+ssh', 'git+git', 'git+file',
+    )
+
+    def __init__(self, url=None, *args, **kwargs):
+
+        # Works around an apparent Git bug
+        # (see http://article.gmane.org/gmane.comp.version-control.git/146500)
+        if url:
+            scheme, netloc, path, query, fragment = urlsplit(url)
+            if scheme.endswith('file'):
+                initial_slashes = path[:-len(path.lstrip('/'))]
+                newpath = (
+                    initial_slashes +
+                    urllib_request.url2pathname(path)
+                    .replace('\\', '/').lstrip('/')
+                )
+                url = urlunsplit((scheme, netloc, newpath, query, fragment))
+                after_plus = scheme.find('+') + 1
+                url = scheme[:after_plus] + urlunsplit(
+                    (scheme[after_plus:], netloc, newpath, query, fragment),
+                )
+
+        super(Git, self).__init__(url, *args, **kwargs)
+
+    def get_git_version(self):
+        VERSION_PFX = 'git version '
+        version = self.run_command(['version'], show_stdout=False)
+        if version.startswith(VERSION_PFX):
+            version = version[len(VERSION_PFX):]
+        else:
+            version = ''
+        # get first 3 positions of the git version becasue
+        # on windows it is x.y.z.windows.t, and this parses as
+        # LegacyVersion which always smaller than a Version.
+        version = '.'.join(version.split('.')[:3])
+        return parse_version(version)
+
+    def export(self, location):
+        """Export the Git repository at the url to the destination location"""
+        temp_dir = tempfile.mkdtemp('-export', 'pip-')
+        self.unpack(temp_dir)
+        try:
+            if not location.endswith('/'):
+                location = location + '/'
+            self.run_command(
+                ['checkout-index', '-a', '-f', '--prefix', location],
+                show_stdout=False, cwd=temp_dir)
+        finally:
+            rmtree(temp_dir)
+
+    def check_rev_options(self, rev, dest, rev_options):
+        """Check the revision options before checkout to compensate that tags
+        and branches may need origin/ as a prefix.
+        Returns the SHA1 of the branch or tag if found.
+        """
+        revisions = self.get_short_refs(dest)
+
+        origin_rev = 'origin/%s' % rev
+        if origin_rev in revisions:
+            # remote branch
+            return [revisions[origin_rev]]
+        elif rev in revisions:
+            # a local tag or branch name
+            return [revisions[rev]]
+        else:
+            logger.warning(
+                "Could not find a tag or branch '%s', assuming commit.", rev,
+            )
+            return rev_options
+
+    def check_version(self, dest, rev_options):
+        """
+        Compare the current sha to the ref. ref may be a branch or tag name,
+        but current rev will always point to a sha. This means that a branch
+        or tag will never compare as True. So this ultimately only matches
+        against exact shas.
+        """
+        return self.get_revision(dest).startswith(rev_options[0])
+
+    def switch(self, dest, url, rev_options):
+        self.run_command(['config', 'remote.origin.url', url], cwd=dest)
+        self.run_command(['checkout', '-q'] + rev_options, cwd=dest)
+
+        self.update_submodules(dest)
+
+    def update(self, dest, rev_options):
+        # First fetch changes from the default remote
+        if self.get_git_version() >= parse_version('1.9.0'):
+            # fetch tags in addition to everything else
+            self.run_command(['fetch', '-q', '--tags'], cwd=dest)
+        else:
+            self.run_command(['fetch', '-q'], cwd=dest)
+        # Then reset to wanted revision (maybe even origin/master)
+        if rev_options:
+            rev_options = self.check_rev_options(
+                rev_options[0], dest, rev_options,
+            )
+        self.run_command(['reset', '--hard', '-q'] + rev_options, cwd=dest)
+        #: update submodules
+        self.update_submodules(dest)
+
+    def obtain(self, dest):
+        url, rev = self.get_url_rev()
+        if rev:
+            rev_options = [rev]
+            rev_display = ' (to %s)' % rev
+        else:
+            rev_options = ['origin/master']
+            rev_display = ''
+        if self.check_destination(dest, url, rev_options, rev_display):
+            logger.info(
+                'Cloning %s%s to %s', url, rev_display, display_path(dest),
+            )
+            self.run_command(['clone', '-q', url, dest])
+
+            if rev:
+                rev_options = self.check_rev_options(rev, dest, rev_options)
+                # Only do a checkout if rev_options differs from HEAD
+                if not self.check_version(dest, rev_options):
+                    self.run_command(
+                        ['checkout', '-q'] + rev_options,
+                        cwd=dest,
+                    )
+            #: repo may contain submodules
+            self.update_submodules(dest)
+
+    def get_url(self, location):
+        """Return URL of the first remote encountered."""
+        remotes = self.run_command(
+            ['config', '--get-regexp', 'remote\..*\.url'],
+            show_stdout=False, cwd=location)
+        remotes = remotes.splitlines()
+        found_remote = remotes[0]
+        for remote in remotes:
+            if remote.startswith('remote.origin.url '):
+                found_remote = remote
+                break
+        url = found_remote.split(' ')[1]
+        return url.strip()
+
+    def get_revision(self, location):
+        current_rev = self.run_command(
+            ['rev-parse', 'HEAD'], show_stdout=False, cwd=location)
+        return current_rev.strip()
+
+    def get_full_refs(self, location):
+        """Yields tuples of (commit, ref) for branches and tags"""
+        output = self.run_command(['show-ref'],
+                                  show_stdout=False, cwd=location)
+        for line in output.strip().splitlines():
+            commit, ref = line.split(' ', 1)
+            yield commit.strip(), ref.strip()
+
+    def is_ref_remote(self, ref):
+        return ref.startswith('refs/remotes/')
+
+    def is_ref_branch(self, ref):
+        return ref.startswith('refs/heads/')
+
+    def is_ref_tag(self, ref):
+        return ref.startswith('refs/tags/')
+
+    def is_ref_commit(self, ref):
+        """A ref is a commit sha if it is not anything else"""
+        return not any((
+            self.is_ref_remote(ref),
+            self.is_ref_branch(ref),
+            self.is_ref_tag(ref),
+        ))
+
+    # Should deprecate `get_refs` since it's ambiguous
+    def get_refs(self, location):
+        return self.get_short_refs(location)
+
+    def get_short_refs(self, location):
+        """Return map of named refs (branches or tags) to commit hashes."""
+        rv = {}
+        for commit, ref in self.get_full_refs(location):
+            ref_name = None
+            if self.is_ref_remote(ref):
+                ref_name = ref[len('refs/remotes/'):]
+            elif self.is_ref_branch(ref):
+                ref_name = ref[len('refs/heads/'):]
+            elif self.is_ref_tag(ref):
+                ref_name = ref[len('refs/tags/'):]
+            if ref_name is not None:
+                rv[ref_name] = commit
+        return rv
+
+    def _get_subdirectory(self, location):
+        """Return the relative path of setup.py to the git repo root."""
+        # find the repo root
+        git_dir = self.run_command(['rev-parse', '--git-dir'],
+                                   show_stdout=False, cwd=location).strip()
+        if not os.path.isabs(git_dir):
+            git_dir = os.path.join(location, git_dir)
+        root_dir = os.path.join(git_dir, '..')
+        # find setup.py
+        orig_location = location
+        while not os.path.exists(os.path.join(location, 'setup.py')):
+            last_location = location
+            location = os.path.dirname(location)
+            if location == last_location:
+                # We've traversed up to the root of the filesystem without
+                # finding setup.py
+                logger.warning(
+                    "Could not find setup.py for directory %s (tried all "
+                    "parent directories)",
+                    orig_location,
+                )
+                return None
+        # relative path of setup.py to repo root
+        if samefile(root_dir, location):
+            return None
+        return os.path.relpath(location, root_dir)
+
+    def get_src_requirement(self, dist, location):
+        repo = self.get_url(location)
+        if not repo.lower().startswith('git:'):
+            repo = 'git+' + repo
+        egg_project_name = dist.egg_name().split('-', 1)[0]
+        if not repo:
+            return None
+        current_rev = self.get_revision(location)
+        req = '%s@%s#egg=%s' % (repo, current_rev, egg_project_name)
+        subdirectory = self._get_subdirectory(location)
+        if subdirectory:
+            req += '&subdirectory=' + subdirectory
+        return req
+
+    def get_url_rev(self):
+        """
+        Prefixes stub URLs like 'user@hostname:user/repo.git' with 'ssh://'.
+        That's required because although they use SSH they sometimes doesn't
+        work with a ssh:// scheme (e.g. Github). But we need a scheme for
+        parsing. Hence we remove it again afterwards and return it as a stub.
+        """
+        if '://' not in self.url:
+            assert 'file:' not in self.url
+            self.url = self.url.replace('git+', 'git+ssh://')
+            url, rev = super(Git, self).get_url_rev()
+            url = url.replace('ssh://', '')
+        else:
+            url, rev = super(Git, self).get_url_rev()
+
+        return url, rev
+
+    def update_submodules(self, location):
+        if not os.path.exists(os.path.join(location, '.gitmodules')):
+            return
+        self.run_command(
+            ['submodule', 'update', '--init', '--recursive', '-q'],
+            cwd=location,
+        )
+
+    @classmethod
+    def controls_location(cls, location):
+        if super(Git, cls).controls_location(location):
+            return True
+        try:
+            r = cls().run_command(['rev-parse'],
+                                  cwd=location,
+                                  show_stdout=False,
+                                  on_returncode='ignore')
+            return not r
+        except BadCommand:
+            logger.debug("could not determine if %s is under git control "
+                         "because git is not available", location)
+            return False
+
+
+vcs.register(Git)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/vcs/mercurial.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/vcs/mercurial.py
new file mode 100644
index 0000000000000000000000000000000000000000..1aa83b914523839c133adf1649a913e8e94b14ef
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/vcs/mercurial.py
@@ -0,0 +1,103 @@
+from __future__ import absolute_import
+
+import logging
+import os
+import tempfile
+
+from pip.utils import display_path, rmtree
+from pip.vcs import vcs, VersionControl
+from pip.download import path_to_url
+from pip._vendor.six.moves import configparser
+
+
+logger = logging.getLogger(__name__)
+
+
+class Mercurial(VersionControl):
+    name = 'hg'
+    dirname = '.hg'
+    repo_name = 'clone'
+    schemes = ('hg', 'hg+http', 'hg+https', 'hg+ssh', 'hg+static-http')
+
+    def export(self, location):
+        """Export the Hg repository at the url to the destination location"""
+        temp_dir = tempfile.mkdtemp('-export', 'pip-')
+        self.unpack(temp_dir)
+        try:
+            self.run_command(
+                ['archive', location], show_stdout=False, cwd=temp_dir)
+        finally:
+            rmtree(temp_dir)
+
+    def switch(self, dest, url, rev_options):
+        repo_config = os.path.join(dest, self.dirname, 'hgrc')
+        config = configparser.SafeConfigParser()
+        try:
+            config.read(repo_config)
+            config.set('paths', 'default', url)
+            with open(repo_config, 'w') as config_file:
+                config.write(config_file)
+        except (OSError, configparser.NoSectionError) as exc:
+            logger.warning(
+                'Could not switch Mercurial repository to %s: %s', url, exc,
+            )
+        else:
+            self.run_command(['update', '-q'] + rev_options, cwd=dest)
+
+    def update(self, dest, rev_options):
+        self.run_command(['pull', '-q'], cwd=dest)
+        self.run_command(['update', '-q'] + rev_options, cwd=dest)
+
+    def obtain(self, dest):
+        url, rev = self.get_url_rev()
+        if rev:
+            rev_options = [rev]
+            rev_display = ' (to revision %s)' % rev
+        else:
+            rev_options = []
+            rev_display = ''
+        if self.check_destination(dest, url, rev_options, rev_display):
+            logger.info(
+                'Cloning hg %s%s to %s',
+                url,
+                rev_display,
+                display_path(dest),
+            )
+            self.run_command(['clone', '--noupdate', '-q', url, dest])
+            self.run_command(['update', '-q'] + rev_options, cwd=dest)
+
+    def get_url(self, location):
+        url = self.run_command(
+            ['showconfig', 'paths.default'],
+            show_stdout=False, cwd=location).strip()
+        if self._is_local_repository(url):
+            url = path_to_url(url)
+        return url.strip()
+
+    def get_revision(self, location):
+        current_revision = self.run_command(
+            ['parents', '--template={rev}'],
+            show_stdout=False, cwd=location).strip()
+        return current_revision
+
+    def get_revision_hash(self, location):
+        current_rev_hash = self.run_command(
+            ['parents', '--template={node}'],
+            show_stdout=False, cwd=location).strip()
+        return current_rev_hash
+
+    def get_src_requirement(self, dist, location):
+        repo = self.get_url(location)
+        if not repo.lower().startswith('hg:'):
+            repo = 'hg+' + repo
+        egg_project_name = dist.egg_name().split('-', 1)[0]
+        if not repo:
+            return None
+        current_rev_hash = self.get_revision_hash(location)
+        return '%s@%s#egg=%s' % (repo, current_rev_hash, egg_project_name)
+
+    def check_version(self, dest, rev_options):
+        """Always assume the versions don't match"""
+        return False
+
+vcs.register(Mercurial)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/vcs/subversion.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/vcs/subversion.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b2315667af539f0fc7dd87a3ce38699632f5d2a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/vcs/subversion.py
@@ -0,0 +1,269 @@
+from __future__ import absolute_import
+
+import logging
+import os
+import re
+
+from pip._vendor.six.moves.urllib import parse as urllib_parse
+
+from pip.index import Link
+from pip.utils import rmtree, display_path
+from pip.utils.logging import indent_log
+from pip.vcs import vcs, VersionControl
+
+_svn_xml_url_re = re.compile('url="([^"]+)"')
+_svn_rev_re = re.compile('committed-rev="(\d+)"')
+_svn_url_re = re.compile(r'URL: (.+)')
+_svn_revision_re = re.compile(r'Revision: (.+)')
+_svn_info_xml_rev_re = re.compile(r'\s*revision="(\d+)"')
+_svn_info_xml_url_re = re.compile(r'<url>(.*)</url>')
+
+
+logger = logging.getLogger(__name__)
+
+
+class Subversion(VersionControl):
+    name = 'svn'
+    dirname = '.svn'
+    repo_name = 'checkout'
+    schemes = ('svn', 'svn+ssh', 'svn+http', 'svn+https', 'svn+svn')
+
+    def get_info(self, location):
+        """Returns (url, revision), where both are strings"""
+        assert not location.rstrip('/').endswith(self.dirname), \
+            'Bad directory: %s' % location
+        output = self.run_command(
+            ['info', location],
+            show_stdout=False,
+            extra_environ={'LANG': 'C'},
+        )
+        match = _svn_url_re.search(output)
+        if not match:
+            logger.warning(
+                'Cannot determine URL of svn checkout %s',
+                display_path(location),
+            )
+            logger.debug('Output that cannot be parsed: \n%s', output)
+            return None, None
+        url = match.group(1).strip()
+        match = _svn_revision_re.search(output)
+        if not match:
+            logger.warning(
+                'Cannot determine revision of svn checkout %s',
+                display_path(location),
+            )
+            logger.debug('Output that cannot be parsed: \n%s', output)
+            return url, None
+        return url, match.group(1)
+
+    def export(self, location):
+        """Export the svn repository at the url to the destination location"""
+        url, rev = self.get_url_rev()
+        rev_options = get_rev_options(url, rev)
+        url = self.remove_auth_from_url(url)
+        logger.info('Exporting svn repository %s to %s', url, location)
+        with indent_log():
+            if os.path.exists(location):
+                # Subversion doesn't like to check out over an existing
+                # directory --force fixes this, but was only added in svn 1.5
+                rmtree(location)
+            self.run_command(
+                ['export'] + rev_options + [url, location],
+                show_stdout=False)
+
+    def switch(self, dest, url, rev_options):
+        self.run_command(['switch'] + rev_options + [url, dest])
+
+    def update(self, dest, rev_options):
+        self.run_command(['update'] + rev_options + [dest])
+
+    def obtain(self, dest):
+        url, rev = self.get_url_rev()
+        rev_options = get_rev_options(url, rev)
+        url = self.remove_auth_from_url(url)
+        if rev:
+            rev_display = ' (to revision %s)' % rev
+        else:
+            rev_display = ''
+        if self.check_destination(dest, url, rev_options, rev_display):
+            logger.info(
+                'Checking out %s%s to %s',
+                url,
+                rev_display,
+                display_path(dest),
+            )
+            self.run_command(['checkout', '-q'] + rev_options + [url, dest])
+
+    def get_location(self, dist, dependency_links):
+        for url in dependency_links:
+            egg_fragment = Link(url).egg_fragment
+            if not egg_fragment:
+                continue
+            if '-' in egg_fragment:
+                # FIXME: will this work when a package has - in the name?
+                key = '-'.join(egg_fragment.split('-')[:-1]).lower()
+            else:
+                key = egg_fragment
+            if key == dist.key:
+                return url.split('#', 1)[0]
+        return None
+
+    def get_revision(self, location):
+        """
+        Return the maximum revision for all files under a given location
+        """
+        # Note: taken from setuptools.command.egg_info
+        revision = 0
+
+        for base, dirs, files in os.walk(location):
+            if self.dirname not in dirs:
+                dirs[:] = []
+                continue    # no sense walking uncontrolled subdirs
+            dirs.remove(self.dirname)
+            entries_fn = os.path.join(base, self.dirname, 'entries')
+            if not os.path.exists(entries_fn):
+                # FIXME: should we warn?
+                continue
+
+            dirurl, localrev = self._get_svn_url_rev(base)
+
+            if base == location:
+                base_url = dirurl + '/'   # save the root url
+            elif not dirurl or not dirurl.startswith(base_url):
+                dirs[:] = []
+                continue    # not part of the same svn tree, skip it
+            revision = max(revision, localrev)
+        return revision
+
+    def get_url_rev(self):
+        # hotfix the URL scheme after removing svn+ from svn+ssh:// readd it
+        url, rev = super(Subversion, self).get_url_rev()
+        if url.startswith('ssh://'):
+            url = 'svn+' + url
+        return url, rev
+
+    def get_url(self, location):
+        # In cases where the source is in a subdirectory, not alongside
+        # setup.py we have to look up in the location until we find a real
+        # setup.py
+        orig_location = location
+        while not os.path.exists(os.path.join(location, 'setup.py')):
+            last_location = location
+            location = os.path.dirname(location)
+            if location == last_location:
+                # We've traversed up to the root of the filesystem without
+                # finding setup.py
+                logger.warning(
+                    "Could not find setup.py for directory %s (tried all "
+                    "parent directories)",
+                    orig_location,
+                )
+                return None
+
+        return self._get_svn_url_rev(location)[0]
+
+    def _get_svn_url_rev(self, location):
+        from pip.exceptions import InstallationError
+
+        entries_path = os.path.join(location, self.dirname, 'entries')
+        if os.path.exists(entries_path):
+            with open(entries_path) as f:
+                data = f.read()
+        else:  # subversion >= 1.7 does not have the 'entries' file
+            data = ''
+
+        if (data.startswith('8') or
+                data.startswith('9') or
+                data.startswith('10')):
+            data = list(map(str.splitlines, data.split('\n\x0c\n')))
+            del data[0][0]  # get rid of the '8'
+            url = data[0][3]
+            revs = [int(d[9]) for d in data if len(d) > 9 and d[9]] + [0]
+        elif data.startswith('<?xml'):
+            match = _svn_xml_url_re.search(data)
+            if not match:
+                raise ValueError('Badly formatted data: %r' % data)
+            url = match.group(1)    # get repository URL
+            revs = [int(m.group(1)) for m in _svn_rev_re.finditer(data)] + [0]
+        else:
+            try:
+                # subversion >= 1.7
+                xml = self.run_command(
+                    ['info', '--xml', location],
+                    show_stdout=False,
+                )
+                url = _svn_info_xml_url_re.search(xml).group(1)
+                revs = [
+                    int(m.group(1)) for m in _svn_info_xml_rev_re.finditer(xml)
+                ]
+            except InstallationError:
+                url, revs = None, []
+
+        if revs:
+            rev = max(revs)
+        else:
+            rev = 0
+
+        return url, rev
+
+    def get_src_requirement(self, dist, location):
+        repo = self.get_url(location)
+        if repo is None:
+            return None
+        # FIXME: why not project name?
+        egg_project_name = dist.egg_name().split('-', 1)[0]
+        rev = self.get_revision(location)
+        return 'svn+%s@%s#egg=%s' % (repo, rev, egg_project_name)
+
+    def check_version(self, dest, rev_options):
+        """Always assume the versions don't match"""
+        return False
+
+    @staticmethod
+    def remove_auth_from_url(url):
+        # Return a copy of url with 'username:password@' removed.
+        # username/pass params are passed to subversion through flags
+        # and are not recognized in the url.
+
+        # parsed url
+        purl = urllib_parse.urlsplit(url)
+        stripped_netloc = \
+            purl.netloc.split('@')[-1]
+
+        # stripped url
+        url_pieces = (
+            purl.scheme, stripped_netloc, purl.path, purl.query, purl.fragment
+        )
+        surl = urllib_parse.urlunsplit(url_pieces)
+        return surl
+
+
+def get_rev_options(url, rev):
+    if rev:
+        rev_options = ['-r', rev]
+    else:
+        rev_options = []
+
+    r = urllib_parse.urlsplit(url)
+    if hasattr(r, 'username'):
+        # >= Python-2.5
+        username, password = r.username, r.password
+    else:
+        netloc = r[1]
+        if '@' in netloc:
+            auth = netloc.split('@')[0]
+            if ':' in auth:
+                username, password = auth.split(':', 1)
+            else:
+                username, password = auth, None
+        else:
+            username, password = None, None
+
+    if username:
+        rev_options += ['--username', username]
+    if password:
+        rev_options += ['--password', password]
+    return rev_options
+
+
+vcs.register(Subversion)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/wheel.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/wheel.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ac9dffed6a13d59c439941552d06c537b3aff51
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pip/wheel.py
@@ -0,0 +1,853 @@
+"""
+Support for installing and building the "wheel" binary package format.
+"""
+from __future__ import absolute_import
+
+import compileall
+import csv
+import errno
+import functools
+import hashlib
+import logging
+import os
+import os.path
+import re
+import shutil
+import stat
+import sys
+import tempfile
+import warnings
+
+from base64 import urlsafe_b64encode
+from email.parser import Parser
+
+from pip._vendor.six import StringIO
+
+import pip
+from pip.compat import expanduser
+from pip.download import path_to_url, unpack_url
+from pip.exceptions import (
+    InstallationError, InvalidWheelFilename, UnsupportedWheel)
+from pip.locations import distutils_scheme, PIP_DELETE_MARKER_FILENAME
+from pip import pep425tags
+from pip.utils import (
+    call_subprocess, ensure_dir, captured_stdout, rmtree, read_chunks,
+)
+from pip.utils.ui import open_spinner
+from pip.utils.logging import indent_log
+from pip.utils.setuptools_build import SETUPTOOLS_SHIM
+from pip._vendor.distlib.scripts import ScriptMaker
+from pip._vendor import pkg_resources
+from pip._vendor.packaging.utils import canonicalize_name
+from pip._vendor.six.moves import configparser
+
+
+wheel_ext = '.whl'
+
+VERSION_COMPATIBLE = (1, 0)
+
+
+logger = logging.getLogger(__name__)
+
+
+class WheelCache(object):
+    """A cache of wheels for future installs."""
+
+    def __init__(self, cache_dir, format_control):
+        """Create a wheel cache.
+
+        :param cache_dir: The root of the cache.
+        :param format_control: A pip.index.FormatControl object to limit
+            binaries being read from the cache.
+        """
+        self._cache_dir = expanduser(cache_dir) if cache_dir else None
+        self._format_control = format_control
+
+    def cached_wheel(self, link, package_name):
+        return cached_wheel(
+            self._cache_dir, link, self._format_control, package_name)
+
+
+def _cache_for_link(cache_dir, link):
+    """
+    Return a directory to store cached wheels in for link.
+
+    Because there are M wheels for any one sdist, we provide a directory
+    to cache them in, and then consult that directory when looking up
+    cache hits.
+
+    We only insert things into the cache if they have plausible version
+    numbers, so that we don't contaminate the cache with things that were not
+    unique. E.g. ./package might have dozens of installs done for it and build
+    a version of 0.0...and if we built and cached a wheel, we'd end up using
+    the same wheel even if the source has been edited.
+
+    :param cache_dir: The cache_dir being used by pip.
+    :param link: The link of the sdist for which this will cache wheels.
+    """
+
+    # We want to generate an url to use as our cache key, we don't want to just
+    # re-use the URL because it might have other items in the fragment and we
+    # don't care about those.
+    key_parts = [link.url_without_fragment]
+    if link.hash_name is not None and link.hash is not None:
+        key_parts.append("=".join([link.hash_name, link.hash]))
+    key_url = "#".join(key_parts)
+
+    # Encode our key url with sha224, we'll use this because it has similar
+    # security properties to sha256, but with a shorter total output (and thus
+    # less secure). However the differences don't make a lot of difference for
+    # our use case here.
+    hashed = hashlib.sha224(key_url.encode()).hexdigest()
+
+    # We want to nest the directories some to prevent having a ton of top level
+    # directories where we might run out of sub directories on some FS.
+    parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]
+
+    # Inside of the base location for cached wheels, expand our parts and join
+    # them all together.
+    return os.path.join(cache_dir, "wheels", *parts)
+
+
+def cached_wheel(cache_dir, link, format_control, package_name):
+    if not cache_dir:
+        return link
+    if not link:
+        return link
+    if link.is_wheel:
+        return link
+    if not link.is_artifact:
+        return link
+    if not package_name:
+        return link
+    canonical_name = canonicalize_name(package_name)
+    formats = pip.index.fmt_ctl_formats(format_control, canonical_name)
+    if "binary" not in formats:
+        return link
+    root = _cache_for_link(cache_dir, link)
+    try:
+        wheel_names = os.listdir(root)
+    except OSError as e:
+        if e.errno in (errno.ENOENT, errno.ENOTDIR):
+            return link
+        raise
+    candidates = []
+    for wheel_name in wheel_names:
+        try:
+            wheel = Wheel(wheel_name)
+        except InvalidWheelFilename:
+            continue
+        if not wheel.supported():
+            # Built for a different python/arch/etc
+            continue
+        candidates.append((wheel.support_index_min(), wheel_name))
+    if not candidates:
+        return link
+    candidates.sort()
+    path = os.path.join(root, candidates[0][1])
+    return pip.index.Link(path_to_url(path))
+
+
+def rehash(path, algo='sha256', blocksize=1 << 20):
+    """Return (hash, length) for path using hashlib.new(algo)"""
+    h = hashlib.new(algo)
+    length = 0
+    with open(path, 'rb') as f:
+        for block in read_chunks(f, size=blocksize):
+            length += len(block)
+            h.update(block)
+    digest = 'sha256=' + urlsafe_b64encode(
+        h.digest()
+    ).decode('latin1').rstrip('=')
+    return (digest, length)
+
+
+def open_for_csv(name, mode):
+    if sys.version_info[0] < 3:
+        nl = {}
+        bin = 'b'
+    else:
+        nl = {'newline': ''}
+        bin = ''
+    return open(name, mode + bin, **nl)
+
+
+def fix_script(path):
+    """Replace #!python with #!/path/to/python
+    Return True if file was changed."""
+    # XXX RECORD hashes will need to be updated
+    if os.path.isfile(path):
+        with open(path, 'rb') as script:
+            firstline = script.readline()
+            if not firstline.startswith(b'#!python'):
+                return False
+            exename = sys.executable.encode(sys.getfilesystemencoding())
+            firstline = b'#!' + exename + os.linesep.encode("ascii")
+            rest = script.read()
+        with open(path, 'wb') as script:
+            script.write(firstline)
+            script.write(rest)
+        return True
+
+dist_info_re = re.compile(r"""^(?P<namever>(?P<name>.+?)(-(?P<ver>\d.+?))?)
+                                \.dist-info$""", re.VERBOSE)
+
+
+def root_is_purelib(name, wheeldir):
+    """
+    Return True if the extracted wheel in wheeldir should go into purelib.
+    """
+    name_folded = name.replace("-", "_")
+    for item in os.listdir(wheeldir):
+        match = dist_info_re.match(item)
+        if match and match.group('name') == name_folded:
+            with open(os.path.join(wheeldir, item, 'WHEEL')) as wheel:
+                for line in wheel:
+                    line = line.lower().rstrip()
+                    if line == "root-is-purelib: true":
+                        return True
+    return False
+
+
+def get_entrypoints(filename):
+    if not os.path.exists(filename):
+        return {}, {}
+
+    # This is done because you can pass a string to entry_points wrappers which
+    # means that they may or may not be valid INI files. The attempt here is to
+    # strip leading and trailing whitespace in order to make them valid INI
+    # files.
+    with open(filename) as fp:
+        data = StringIO()
+        for line in fp:
+            data.write(line.strip())
+            data.write("\n")
+        data.seek(0)
+
+    cp = configparser.RawConfigParser()
+    cp.optionxform = lambda option: option
+    cp.readfp(data)
+
+    console = {}
+    gui = {}
+    if cp.has_section('console_scripts'):
+        console = dict(cp.items('console_scripts'))
+    if cp.has_section('gui_scripts'):
+        gui = dict(cp.items('gui_scripts'))
+    return console, gui
+
+
+def move_wheel_files(name, req, wheeldir, user=False, home=None, root=None,
+                     pycompile=True, scheme=None, isolated=False, prefix=None):
+    """Install a wheel"""
+
+    if not scheme:
+        scheme = distutils_scheme(
+            name, user=user, home=home, root=root, isolated=isolated,
+            prefix=prefix,
+        )
+
+    if root_is_purelib(name, wheeldir):
+        lib_dir = scheme['purelib']
+    else:
+        lib_dir = scheme['platlib']
+
+    info_dir = []
+    data_dirs = []
+    source = wheeldir.rstrip(os.path.sep) + os.path.sep
+
+    # Record details of the files moved
+    #   installed = files copied from the wheel to the destination
+    #   changed = files changed while installing (scripts #! line typically)
+    #   generated = files newly generated during the install (script wrappers)
+    installed = {}
+    changed = set()
+    generated = []
+
+    # Compile all of the pyc files that we're going to be installing
+    if pycompile:
+        with captured_stdout() as stdout:
+            with warnings.catch_warnings():
+                warnings.filterwarnings('ignore')
+                compileall.compile_dir(source, force=True, quiet=True)
+        logger.debug(stdout.getvalue())
+
+    def normpath(src, p):
+        return os.path.relpath(src, p).replace(os.path.sep, '/')
+
+    def record_installed(srcfile, destfile, modified=False):
+        """Map archive RECORD paths to installation RECORD paths."""
+        oldpath = normpath(srcfile, wheeldir)
+        newpath = normpath(destfile, lib_dir)
+        installed[oldpath] = newpath
+        if modified:
+            changed.add(destfile)
+
+    def clobber(source, dest, is_base, fixer=None, filter=None):
+        ensure_dir(dest)  # common for the 'include' path
+
+        for dir, subdirs, files in os.walk(source):
+            basedir = dir[len(source):].lstrip(os.path.sep)
+            destdir = os.path.join(dest, basedir)
+            if is_base and basedir.split(os.path.sep, 1)[0].endswith('.data'):
+                continue
+            for s in subdirs:
+                destsubdir = os.path.join(dest, basedir, s)
+                if is_base and basedir == '' and destsubdir.endswith('.data'):
+                    data_dirs.append(s)
+                    continue
+                elif (is_base and
+                        s.endswith('.dist-info') and
+                        canonicalize_name(s).startswith(
+                            canonicalize_name(req.name))):
+                    assert not info_dir, ('Multiple .dist-info directories: ' +
+                                          destsubdir + ', ' +
+                                          ', '.join(info_dir))
+                    info_dir.append(destsubdir)
+            for f in files:
+                # Skip unwanted files
+                if filter and filter(f):
+                    continue
+                srcfile = os.path.join(dir, f)
+                destfile = os.path.join(dest, basedir, f)
+                # directory creation is lazy and after the file filtering above
+                # to ensure we don't install empty dirs; empty dirs can't be
+                # uninstalled.
+                ensure_dir(destdir)
+
+                # We use copyfile (not move, copy, or copy2) to be extra sure
+                # that we are not moving directories over (copyfile fails for
+                # directories) as well as to ensure that we are not copying
+                # over any metadata because we want more control over what
+                # metadata we actually copy over.
+                shutil.copyfile(srcfile, destfile)
+
+                # Copy over the metadata for the file, currently this only
+                # includes the atime and mtime.
+                st = os.stat(srcfile)
+                if hasattr(os, "utime"):
+                    os.utime(destfile, (st.st_atime, st.st_mtime))
+
+                # If our file is executable, then make our destination file
+                # executable.
+                if os.access(srcfile, os.X_OK):
+                    st = os.stat(srcfile)
+                    permissions = (
+                        st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
+                    )
+                    os.chmod(destfile, permissions)
+
+                changed = False
+                if fixer:
+                    changed = fixer(destfile)
+                record_installed(srcfile, destfile, changed)
+
+    clobber(source, lib_dir, True)
+
+    assert info_dir, "%s .dist-info directory not found" % req
+
+    # Get the defined entry points
+    ep_file = os.path.join(info_dir[0], 'entry_points.txt')
+    console, gui = get_entrypoints(ep_file)
+
+    def is_entrypoint_wrapper(name):
+        # EP, EP.exe and EP-script.py are scripts generated for
+        # entry point EP by setuptools
+        if name.lower().endswith('.exe'):
+            matchname = name[:-4]
+        elif name.lower().endswith('-script.py'):
+            matchname = name[:-10]
+        elif name.lower().endswith(".pya"):
+            matchname = name[:-4]
+        else:
+            matchname = name
+        # Ignore setuptools-generated scripts
+        return (matchname in console or matchname in gui)
+
+    for datadir in data_dirs:
+        fixer = None
+        filter = None
+        for subdir in os.listdir(os.path.join(wheeldir, datadir)):
+            fixer = None
+            if subdir == 'scripts':
+                fixer = fix_script
+                filter = is_entrypoint_wrapper
+            source = os.path.join(wheeldir, datadir, subdir)
+            dest = scheme[subdir]
+            clobber(source, dest, False, fixer=fixer, filter=filter)
+
+    maker = ScriptMaker(None, scheme['scripts'])
+
+    # Ensure old scripts are overwritten.
+    # See https://github.com/pypa/pip/issues/1800
+    maker.clobber = True
+
+    # Ensure we don't generate any variants for scripts because this is almost
+    # never what somebody wants.
+    # See https://bitbucket.org/pypa/distlib/issue/35/
+    maker.variants = set(('', ))
+
+    # This is required because otherwise distlib creates scripts that are not
+    # executable.
+    # See https://bitbucket.org/pypa/distlib/issue/32/
+    maker.set_mode = True
+
+    # Simplify the script and fix the fact that the default script swallows
+    # every single stack trace.
+    # See https://bitbucket.org/pypa/distlib/issue/34/
+    # See https://bitbucket.org/pypa/distlib/issue/33/
+    def _get_script_text(entry):
+        if entry.suffix is None:
+            raise InstallationError(
+                "Invalid script entry point: %s for req: %s - A callable "
+                "suffix is required. Cf https://packaging.python.org/en/"
+                "latest/distributing.html#console-scripts for more "
+                "information." % (entry, req)
+            )
+        return maker.script_template % {
+            "module": entry.prefix,
+            "import_name": entry.suffix.split(".")[0],
+            "func": entry.suffix,
+        }
+
+    maker._get_script_text = _get_script_text
+    maker.script_template = """# -*- coding: utf-8 -*-
+import re
+import sys
+
+from %(module)s import %(import_name)s
+
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])
+    sys.exit(%(func)s())
+"""
+
+    # Special case pip and setuptools to generate versioned wrappers
+    #
+    # The issue is that some projects (specifically, pip and setuptools) use
+    # code in setup.py to create "versioned" entry points - pip2.7 on Python
+    # 2.7, pip3.3 on Python 3.3, etc. But these entry points are baked into
+    # the wheel metadata at build time, and so if the wheel is installed with
+    # a *different* version of Python the entry points will be wrong. The
+    # correct fix for this is to enhance the metadata to be able to describe
+    # such versioned entry points, but that won't happen till Metadata 2.0 is
+    # available.
+    # In the meantime, projects using versioned entry points will either have
+    # incorrect versioned entry points, or they will not be able to distribute
+    # "universal" wheels (i.e., they will need a wheel per Python version).
+    #
+    # Because setuptools and pip are bundled with _ensurepip and virtualenv,
+    # we need to use universal wheels. So, as a stopgap until Metadata 2.0, we
+    # override the versioned entry points in the wheel and generate the
+    # correct ones. This code is purely a short-term measure until Metadata 2.0
+    # is available.
+    #
+    # To add the level of hack in this section of code, in order to support
+    # ensurepip this code will look for an ``ENSUREPIP_OPTIONS`` environment
+    # variable which will control which version scripts get installed.
+    #
+    # ENSUREPIP_OPTIONS=altinstall
+    #   - Only pipX.Y and easy_install-X.Y will be generated and installed
+    # ENSUREPIP_OPTIONS=install
+    #   - pipX.Y, pipX, easy_install-X.Y will be generated and installed. Note
+    #     that this option is technically if ENSUREPIP_OPTIONS is set and is
+    #     not altinstall
+    # DEFAULT
+    #   - The default behavior is to install pip, pipX, pipX.Y, easy_install
+    #     and easy_install-X.Y.
+    pip_script = console.pop('pip', None)
+    if pip_script:
+        if "ENSUREPIP_OPTIONS" not in os.environ:
+            spec = 'pip = ' + pip_script
+            generated.extend(maker.make(spec))
+
+        if os.environ.get("ENSUREPIP_OPTIONS", "") != "altinstall":
+            spec = 'pip%s = %s' % (sys.version[:1], pip_script)
+            generated.extend(maker.make(spec))
+
+        spec = 'pip%s = %s' % (sys.version[:3], pip_script)
+        generated.extend(maker.make(spec))
+        # Delete any other versioned pip entry points
+        pip_ep = [k for k in console if re.match(r'pip(\d(\.\d)?)?$', k)]
+        for k in pip_ep:
+            del console[k]
+    easy_install_script = console.pop('easy_install', None)
+    if easy_install_script:
+        if "ENSUREPIP_OPTIONS" not in os.environ:
+            spec = 'easy_install = ' + easy_install_script
+            generated.extend(maker.make(spec))
+
+        spec = 'easy_install-%s = %s' % (sys.version[:3], easy_install_script)
+        generated.extend(maker.make(spec))
+        # Delete any other versioned easy_install entry points
+        easy_install_ep = [
+            k for k in console if re.match(r'easy_install(-\d\.\d)?$', k)
+        ]
+        for k in easy_install_ep:
+            del console[k]
+
+    # Generate the console and GUI entry points specified in the wheel
+    if len(console) > 0:
+        generated.extend(
+            maker.make_multiple(['%s = %s' % kv for kv in console.items()])
+        )
+    if len(gui) > 0:
+        generated.extend(
+            maker.make_multiple(
+                ['%s = %s' % kv for kv in gui.items()],
+                {'gui': True}
+            )
+        )
+
+    # Record pip as the installer
+    installer = os.path.join(info_dir[0], 'INSTALLER')
+    temp_installer = os.path.join(info_dir[0], 'INSTALLER.pip')
+    with open(temp_installer, 'wb') as installer_file:
+        installer_file.write(b'pip\n')
+    shutil.move(temp_installer, installer)
+    generated.append(installer)
+
+    # Record details of all files installed
+    record = os.path.join(info_dir[0], 'RECORD')
+    temp_record = os.path.join(info_dir[0], 'RECORD.pip')
+    with open_for_csv(record, 'r') as record_in:
+        with open_for_csv(temp_record, 'w+') as record_out:
+            reader = csv.reader(record_in)
+            writer = csv.writer(record_out)
+            for row in reader:
+                row[0] = installed.pop(row[0], row[0])
+                if row[0] in changed:
+                    row[1], row[2] = rehash(row[0])
+                writer.writerow(row)
+            for f in generated:
+                h, l = rehash(f)
+                writer.writerow((normpath(f, lib_dir), h, l))
+            for f in installed:
+                writer.writerow((installed[f], '', ''))
+    shutil.move(temp_record, record)
+
+
+def _unique(fn):
+    @functools.wraps(fn)
+    def unique(*args, **kw):
+        seen = set()
+        for item in fn(*args, **kw):
+            if item not in seen:
+                seen.add(item)
+                yield item
+    return unique
+
+
+# TODO: this goes somewhere besides the wheel module
+@_unique
+def uninstallation_paths(dist):
+    """
+    Yield all the uninstallation paths for dist based on RECORD-without-.pyc
+
+    Yield paths to all the files in RECORD. For each .py file in RECORD, add
+    the .pyc in the same directory.
+
+    UninstallPathSet.add() takes care of the __pycache__ .pyc.
+    """
+    from pip.utils import FakeFile  # circular import
+    r = csv.reader(FakeFile(dist.get_metadata_lines('RECORD')))
+    for row in r:
+        path = os.path.join(dist.location, row[0])
+        yield path
+        if path.endswith('.py'):
+            dn, fn = os.path.split(path)
+            base = fn[:-3]
+            path = os.path.join(dn, base + '.pyc')
+            yield path
+
+
+def wheel_version(source_dir):
+    """
+    Return the Wheel-Version of an extracted wheel, if possible.
+
+    Otherwise, return False if we couldn't parse / extract it.
+    """
+    try:
+        dist = [d for d in pkg_resources.find_on_path(None, source_dir)][0]
+
+        wheel_data = dist.get_metadata('WHEEL')
+        wheel_data = Parser().parsestr(wheel_data)
+
+        version = wheel_data['Wheel-Version'].strip()
+        version = tuple(map(int, version.split('.')))
+        return version
+    except:
+        return False
+
+
+def check_compatibility(version, name):
+    """
+    Raises errors or warns if called with an incompatible Wheel-Version.
+
+    Pip should refuse to install a Wheel-Version that's a major series
+    ahead of what it's compatible with (e.g 2.0 > 1.1); and warn when
+    installing a version only minor version ahead (e.g 1.2 > 1.1).
+
+    version: a 2-tuple representing a Wheel-Version (Major, Minor)
+    name: name of wheel or package to raise exception about
+
+    :raises UnsupportedWheel: when an incompatible Wheel-Version is given
+    """
+    if not version:
+        raise UnsupportedWheel(
+            "%s is in an unsupported or invalid wheel" % name
+        )
+    if version[0] > VERSION_COMPATIBLE[0]:
+        raise UnsupportedWheel(
+            "%s's Wheel-Version (%s) is not compatible with this version "
+            "of pip" % (name, '.'.join(map(str, version)))
+        )
+    elif version > VERSION_COMPATIBLE:
+        logger.warning(
+            'Installing from a newer Wheel-Version (%s)',
+            '.'.join(map(str, version)),
+        )
+
+
+class Wheel(object):
+    """A wheel file"""
+
+    # TODO: maybe move the install code into this class
+
+    wheel_file_re = re.compile(
+        r"""^(?P<namever>(?P<name>.+?)-(?P<ver>\d.*?))
+        ((-(?P<build>\d.*?))?-(?P<pyver>.+?)-(?P<abi>.+?)-(?P<plat>.+?)
+        \.whl|\.dist-info)$""",
+        re.VERBOSE
+    )
+
+    def __init__(self, filename):
+        """
+        :raises InvalidWheelFilename: when the filename is invalid for a wheel
+        """
+        wheel_info = self.wheel_file_re.match(filename)
+        if not wheel_info:
+            raise InvalidWheelFilename(
+                "%s is not a valid wheel filename." % filename
+            )
+        self.filename = filename
+        self.name = wheel_info.group('name').replace('_', '-')
+        # we'll assume "_" means "-" due to wheel naming scheme
+        # (https://github.com/pypa/pip/issues/1150)
+        self.version = wheel_info.group('ver').replace('_', '-')
+        self.pyversions = wheel_info.group('pyver').split('.')
+        self.abis = wheel_info.group('abi').split('.')
+        self.plats = wheel_info.group('plat').split('.')
+
+        # All the tag combinations from this file
+        self.file_tags = set(
+            (x, y, z) for x in self.pyversions
+            for y in self.abis for z in self.plats
+        )
+
+    def support_index_min(self, tags=None):
+        """
+        Return the lowest index that one of the wheel's file_tag combinations
+        achieves in the supported_tags list e.g. if there are 8 supported tags,
+        and one of the file tags is first in the list, then return 0.  Returns
+        None is the wheel is not supported.
+        """
+        if tags is None:  # for mock
+            tags = pep425tags.supported_tags
+        indexes = [tags.index(c) for c in self.file_tags if c in tags]
+        return min(indexes) if indexes else None
+
+    def supported(self, tags=None):
+        """Is this wheel supported on this system?"""
+        if tags is None:  # for mock
+            tags = pep425tags.supported_tags
+        return bool(set(tags).intersection(self.file_tags))
+
+
+class WheelBuilder(object):
+    """Build wheels from a RequirementSet."""
+
+    def __init__(self, requirement_set, finder, build_options=None,
+                 global_options=None):
+        self.requirement_set = requirement_set
+        self.finder = finder
+        self._cache_root = requirement_set._wheel_cache._cache_dir
+        self._wheel_dir = requirement_set.wheel_download_dir
+        self.build_options = build_options or []
+        self.global_options = global_options or []
+
+    def _build_one(self, req, output_dir, python_tag=None):
+        """Build one wheel.
+
+        :return: The filename of the built wheel, or None if the build failed.
+        """
+        tempd = tempfile.mkdtemp('pip-wheel-')
+        try:
+            if self.__build_one(req, tempd, python_tag=python_tag):
+                try:
+                    wheel_name = os.listdir(tempd)[0]
+                    wheel_path = os.path.join(output_dir, wheel_name)
+                    shutil.move(os.path.join(tempd, wheel_name), wheel_path)
+                    logger.info('Stored in directory: %s', output_dir)
+                    return wheel_path
+                except:
+                    pass
+            # Ignore return, we can't do anything else useful.
+            self._clean_one(req)
+            return None
+        finally:
+            rmtree(tempd)
+
+    def _base_setup_args(self, req):
+        return [
+            sys.executable, "-u", '-c',
+            SETUPTOOLS_SHIM % req.setup_py
+        ] + list(self.global_options)
+
+    def __build_one(self, req, tempd, python_tag=None):
+        base_args = self._base_setup_args(req)
+
+        spin_message = 'Running setup.py bdist_wheel for %s' % (req.name,)
+        with open_spinner(spin_message) as spinner:
+            logger.debug('Destination directory: %s', tempd)
+            wheel_args = base_args + ['bdist_wheel', '-d', tempd] \
+                + self.build_options
+
+            if python_tag is not None:
+                wheel_args += ["--python-tag", python_tag]
+
+            try:
+                call_subprocess(wheel_args, cwd=req.setup_py_dir,
+                                show_stdout=False, spinner=spinner)
+                return True
+            except:
+                spinner.finish("error")
+                logger.error('Failed building wheel for %s', req.name)
+                return False
+
+    def _clean_one(self, req):
+        base_args = self._base_setup_args(req)
+
+        logger.info('Running setup.py clean for %s', req.name)
+        clean_args = base_args + ['clean', '--all']
+        try:
+            call_subprocess(clean_args, cwd=req.source_dir, show_stdout=False)
+            return True
+        except:
+            logger.error('Failed cleaning build dir for %s', req.name)
+            return False
+
+    def build(self, autobuilding=False):
+        """Build wheels.
+
+        :param unpack: If True, replace the sdist we built from with the
+            newly built wheel, in preparation for installation.
+        :return: True if all the wheels built correctly.
+        """
+        assert self._wheel_dir or (autobuilding and self._cache_root)
+        # unpack sdists and constructs req set
+        self.requirement_set.prepare_files(self.finder)
+
+        reqset = self.requirement_set.requirements.values()
+
+        buildset = []
+        for req in reqset:
+            if req.constraint:
+                continue
+            if req.is_wheel:
+                if not autobuilding:
+                    logger.info(
+                        'Skipping %s, due to already being wheel.', req.name)
+            elif autobuilding and req.editable:
+                pass
+            elif autobuilding and req.link and not req.link.is_artifact:
+                pass
+            elif autobuilding and not req.source_dir:
+                pass
+            else:
+                if autobuilding:
+                    link = req.link
+                    base, ext = link.splitext()
+                    if pip.index.egg_info_matches(base, None, link) is None:
+                        # Doesn't look like a package - don't autobuild a wheel
+                        # because we'll have no way to lookup the result sanely
+                        continue
+                    if "binary" not in pip.index.fmt_ctl_formats(
+                            self.finder.format_control,
+                            canonicalize_name(req.name)):
+                        logger.info(
+                            "Skipping bdist_wheel for %s, due to binaries "
+                            "being disabled for it.", req.name)
+                        continue
+                buildset.append(req)
+
+        if not buildset:
+            return True
+
+        # Build the wheels.
+        logger.info(
+            'Building wheels for collected packages: %s',
+            ', '.join([req.name for req in buildset]),
+        )
+        with indent_log():
+            build_success, build_failure = [], []
+            for req in buildset:
+                python_tag = None
+                if autobuilding:
+                    python_tag = pep425tags.implementation_tag
+                    output_dir = _cache_for_link(self._cache_root, req.link)
+                    try:
+                        ensure_dir(output_dir)
+                    except OSError as e:
+                        logger.warning("Building wheel for %s failed: %s",
+                                       req.name, e)
+                        build_failure.append(req)
+                        continue
+                else:
+                    output_dir = self._wheel_dir
+                wheel_file = self._build_one(
+                    req, output_dir,
+                    python_tag=python_tag,
+                )
+                if wheel_file:
+                    build_success.append(req)
+                    if autobuilding:
+                        # XXX: This is mildly duplicative with prepare_files,
+                        # but not close enough to pull out to a single common
+                        # method.
+                        # The code below assumes temporary source dirs -
+                        # prevent it doing bad things.
+                        if req.source_dir and not os.path.exists(os.path.join(
+                                req.source_dir, PIP_DELETE_MARKER_FILENAME)):
+                            raise AssertionError(
+                                "bad source dir - missing marker")
+                        # Delete the source we built the wheel from
+                        req.remove_temporary_source()
+                        # set the build directory again - name is known from
+                        # the work prepare_files did.
+                        req.source_dir = req.build_location(
+                            self.requirement_set.build_dir)
+                        # Update the link for this.
+                        req.link = pip.index.Link(
+                            path_to_url(wheel_file))
+                        assert req.link.is_wheel
+                        # extract the wheel into the dir
+                        unpack_url(
+                            req.link, req.source_dir, None, False,
+                            session=self.requirement_set.session)
+                else:
+                    build_failure.append(req)
+
+        # notify success/failure
+        if build_success:
+            logger.info(
+                'Successfully built %s',
+                ' '.join([req.name for req in build_success]),
+            )
+        if build_failure:
+            logger.info(
+                'Failed to build %s',
+                ' '.join([req.name for req in build_failure]),
+            )
+        # Return True if all builds were successful
+        return len(build_failure) == 0
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources-0.0.0.dist-info/DESCRIPTION.rst b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources-0.0.0.dist-info/DESCRIPTION.rst
new file mode 100644
index 0000000000000000000000000000000000000000..e1187231a3553e6c1bec45c6b5a4e4e62b8ef70d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources-0.0.0.dist-info/DESCRIPTION.rst
@@ -0,0 +1,3 @@
+UNKNOWN
+
+
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources-0.0.0.dist-info/INSTALLER b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources-0.0.0.dist-info/INSTALLER
new file mode 100644
index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources-0.0.0.dist-info/INSTALLER
@@ -0,0 +1 @@
+pip
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources-0.0.0.dist-info/METADATA b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources-0.0.0.dist-info/METADATA
new file mode 100644
index 0000000000000000000000000000000000000000..7a504873a9e5e85911d44ecc0c58e84e435f8207
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources-0.0.0.dist-info/METADATA
@@ -0,0 +1,13 @@
+Metadata-Version: 2.0
+Name: pkg_resources
+Version: 0.0.0
+Summary: UNKNOWN
+Home-page: UNKNOWN
+Author: UNKNOWN
+Author-email: UNKNOWN
+License: UNKNOWN
+Platform: UNKNOWN
+
+UNKNOWN
+
+
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources-0.0.0.dist-info/RECORD b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources-0.0.0.dist-info/RECORD
new file mode 100644
index 0000000000000000000000000000000000000000..9f7ac8c910418cfb2ce835c5a723c54db35dc5a1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources-0.0.0.dist-info/RECORD
@@ -0,0 +1,38 @@
+pkg_resources/__init__.py,sha256=YQ4_WQnPztMsUy1yuvp7ZRBPK9IhOyhgosLpvkFso1I,103551
+pkg_resources/py31compat.py,sha256=-ysVqoxLetAnL94uM0kHkomKQTC1JZLN2ZUjqUhMeKE,600
+pkg_resources/_vendor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+pkg_resources/_vendor/appdirs.py,sha256=mExjXejqnz0UifNzjccc-yJ8JS0TfAW0gXuikOdDQLU,22376
+pkg_resources/_vendor/pyparsing.py,sha256=LTlkTtpPBJ0ypOyi---Es1IvmxfI_vh28VJEh27qY8I,229871
+pkg_resources/_vendor/six.py,sha256=A6hdJZVjI3t_geebZ9BzUvwRrIXo0lfwzQlM2LcKyas,30098
+pkg_resources/_vendor/packaging/__about__.py,sha256=zkcCPTN_6TcLW0Nrlg0176-R1QQ_WVPTm8sz1R4-HjM,720
+pkg_resources/_vendor/packaging/__init__.py,sha256=_vNac5TrzwsrzbOFIbF-5cHqc_Y2aPT2D7zrIR06BOo,513
+pkg_resources/_vendor/packaging/_compat.py,sha256=Vi_A0rAQeHbU-a9X0tt1yQm9RqkgQbDSxzRw8WlU9kA,860
+pkg_resources/_vendor/packaging/_structures.py,sha256=RImECJ4c_wTlaTYYwZYLHEiebDMaAJmK1oPARhw1T5o,1416
+pkg_resources/_vendor/packaging/markers.py,sha256=uEcBBtGvzqltgnArqb9c4RrcInXezDLos14zbBHhWJo,8248
+pkg_resources/_vendor/packaging/requirements.py,sha256=SikL2UynbsT0qtY9ltqngndha_sfo0w6XGFhAhoSoaQ,4355
+pkg_resources/_vendor/packaging/specifiers.py,sha256=SAMRerzO3fK2IkFZCaZkuwZaL_EGqHNOz4pni4vhnN0,28025
+pkg_resources/_vendor/packaging/utils.py,sha256=3m6WvPm6NNxE8rkTGmn0r75B_GZSGg7ikafxHsBN1WA,421
+pkg_resources/_vendor/packaging/version.py,sha256=OwGnxYfr2ghNzYx59qWIBkrK3SnB6n-Zfd1XaLpnnM0,11556
+pkg_resources/extern/__init__.py,sha256=JUtlHHvlxHSNuB4pWqNjcx7n6kG-fwXg7qmJ2zNJlIY,2487
+pkg_resources-0.0.0.dist-info/DESCRIPTION.rst,sha256=OCTuuN6LcWulhHS3d5rfjdsQtW22n7HENFRh6jC6ego,10
+pkg_resources-0.0.0.dist-info/METADATA,sha256=FOYDX6cmnDUkWo-yhqWQYtjKIMZR2IW2G1GFZhA6gUQ,177
+pkg_resources-0.0.0.dist-info/RECORD,,
+pkg_resources-0.0.0.dist-info/WHEEL,sha256=kdsN-5OJAZIiHN-iO4Rhl82KyS0bDWf4uBwMbkNafr8,110
+pkg_resources-0.0.0.dist-info/metadata.json,sha256=jbGQ09fxsrPQMhbeLboAEDZERlOExjHIGENdQdPS6RU,221
+pkg_resources-0.0.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+pkg_resources/_vendor/__pycache__/pyparsing.cpython-36.pyc,,
+pkg_resources/_vendor/__pycache__/six.cpython-36.pyc,,
+pkg_resources/_vendor/__pycache__/appdirs.cpython-36.pyc,,
+pkg_resources/_vendor/__pycache__/__init__.cpython-36.pyc,,
+pkg_resources/_vendor/packaging/__pycache__/specifiers.cpython-36.pyc,,
+pkg_resources/_vendor/packaging/__pycache__/_compat.cpython-36.pyc,,
+pkg_resources/_vendor/packaging/__pycache__/_structures.cpython-36.pyc,,
+pkg_resources/_vendor/packaging/__pycache__/requirements.cpython-36.pyc,,
+pkg_resources/_vendor/packaging/__pycache__/markers.cpython-36.pyc,,
+pkg_resources/_vendor/packaging/__pycache__/__about__.cpython-36.pyc,,
+pkg_resources/_vendor/packaging/__pycache__/__init__.cpython-36.pyc,,
+pkg_resources/_vendor/packaging/__pycache__/utils.cpython-36.pyc,,
+pkg_resources/_vendor/packaging/__pycache__/version.cpython-36.pyc,,
+pkg_resources/__pycache__/py31compat.cpython-36.pyc,,
+pkg_resources/__pycache__/__init__.cpython-36.pyc,,
+pkg_resources/extern/__pycache__/__init__.cpython-36.pyc,,
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources-0.0.0.dist-info/WHEEL b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources-0.0.0.dist-info/WHEEL
new file mode 100644
index 0000000000000000000000000000000000000000..7332a419cda6903b61439f3bac93492b0747e6e7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources-0.0.0.dist-info/WHEEL
@@ -0,0 +1,6 @@
+Wheel-Version: 1.0
+Generator: bdist_wheel (0.30.0)
+Root-Is-Purelib: true
+Tag: py2-none-any
+Tag: py3-none-any
+
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources-0.0.0.dist-info/metadata.json b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources-0.0.0.dist-info/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..1069edb54ba310e7ff3375b7372876e877cc9de4
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources-0.0.0.dist-info/metadata.json
@@ -0,0 +1 @@
+{"extensions": {"python.details": {"document_names": {"description": "DESCRIPTION.rst"}}}, "generator": "bdist_wheel (0.30.0)", "metadata_version": "2.0", "name": "pkg_resources", "summary": "UNKNOWN", "version": "0.0.0"}
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/__init__.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d95bd2912044ea80486f385d634376a244a0562
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/__init__.py
@@ -0,0 +1,3125 @@
+# coding: utf-8
+"""
+Package resource API
+--------------------
+
+A resource is a logical file contained within a package, or a logical
+subdirectory thereof.  The package resource API expects resource names
+to have their path parts separated with ``/``, *not* whatever the local
+path separator is.  Do not use os.path operations to manipulate resource
+names being passed into the API.
+
+The package resource API is designed to work with normal filesystem packages,
+.egg files, and unpacked .egg files.  It can also work in a limited way with
+.zip files and with custom PEP 302 loaders that support the ``get_data()``
+method.
+"""
+
+from __future__ import absolute_import
+
+import sys
+import os
+import io
+import time
+import re
+import types
+import zipfile
+import zipimport
+import warnings
+import stat
+import functools
+import pkgutil
+import operator
+import platform
+import collections
+import plistlib
+import email.parser
+import errno
+import tempfile
+import textwrap
+import itertools
+import inspect
+from pkgutil import get_importer
+
+try:
+    import _imp
+except ImportError:
+    # Python 3.2 compatibility
+    import imp as _imp
+
+from pkg_resources.extern import six
+from pkg_resources.extern.six.moves import urllib, map, filter
+
+# capture these to bypass sandboxing
+from os import utime
+try:
+    from os import mkdir, rename, unlink
+    WRITE_SUPPORT = True
+except ImportError:
+    # no write support, probably under GAE
+    WRITE_SUPPORT = False
+
+from os import open as os_open
+from os.path import isdir, split
+
+try:
+    import importlib.machinery as importlib_machinery
+    # access attribute to force import under delayed import mechanisms.
+    importlib_machinery.__name__
+except ImportError:
+    importlib_machinery = None
+
+from . import py31compat
+from pkg_resources.extern import appdirs
+from pkg_resources.extern import packaging
+__import__('pkg_resources.extern.packaging.version')
+__import__('pkg_resources.extern.packaging.specifiers')
+__import__('pkg_resources.extern.packaging.requirements')
+__import__('pkg_resources.extern.packaging.markers')
+
+
+if (3, 0) < sys.version_info < (3, 3):
+    raise RuntimeError("Python 3.3 or later is required")
+
+if six.PY2:
+    # Those builtin exceptions are only defined in Python 3
+    PermissionError = None
+    NotADirectoryError = None
+
+# declare some globals that will be defined later to
+# satisfy the linters.
+require = None
+working_set = None
+add_activation_listener = None
+resources_stream = None
+cleanup_resources = None
+resource_dir = None
+resource_stream = None
+set_extraction_path = None
+resource_isdir = None
+resource_string = None
+iter_entry_points = None
+resource_listdir = None
+resource_filename = None
+resource_exists = None
+_distribution_finders = None
+_namespace_handlers = None
+_namespace_packages = None
+
+
+class PEP440Warning(RuntimeWarning):
+    """
+    Used when there is an issue with a version or specifier not complying with
+    PEP 440.
+    """
+
+
+def parse_version(v):
+    try:
+        return packaging.version.Version(v)
+    except packaging.version.InvalidVersion:
+        return packaging.version.LegacyVersion(v)
+
+
+_state_vars = {}
+
+
+def _declare_state(vartype, **kw):
+    globals().update(kw)
+    _state_vars.update(dict.fromkeys(kw, vartype))
+
+
+def __getstate__():
+    state = {}
+    g = globals()
+    for k, v in _state_vars.items():
+        state[k] = g['_sget_' + v](g[k])
+    return state
+
+
+def __setstate__(state):
+    g = globals()
+    for k, v in state.items():
+        g['_sset_' + _state_vars[k]](k, g[k], v)
+    return state
+
+
+def _sget_dict(val):
+    return val.copy()
+
+
+def _sset_dict(key, ob, state):
+    ob.clear()
+    ob.update(state)
+
+
+def _sget_object(val):
+    return val.__getstate__()
+
+
+def _sset_object(key, ob, state):
+    ob.__setstate__(state)
+
+
+_sget_none = _sset_none = lambda *args: None
+
+
+def get_supported_platform():
+    """Return this platform's maximum compatible version.
+
+    distutils.util.get_platform() normally reports the minimum version
+    of Mac OS X that would be required to *use* extensions produced by
+    distutils.  But what we want when checking compatibility is to know the
+    version of Mac OS X that we are *running*.  To allow usage of packages that
+    explicitly require a newer version of Mac OS X, we must also know the
+    current version of the OS.
+
+    If this condition occurs for any other platform with a version in its
+    platform strings, this function should be extended accordingly.
+    """
+    plat = get_build_platform()
+    m = macosVersionString.match(plat)
+    if m is not None and sys.platform == "darwin":
+        try:
+            plat = 'macosx-%s-%s' % ('.'.join(_macosx_vers()[:2]), m.group(3))
+        except ValueError:
+            # not Mac OS X
+            pass
+    return plat
+
+
+__all__ = [
+    # Basic resource access and distribution/entry point discovery
+    'require', 'run_script', 'get_provider', 'get_distribution',
+    'load_entry_point', 'get_entry_map', 'get_entry_info',
+    'iter_entry_points',
+    'resource_string', 'resource_stream', 'resource_filename',
+    'resource_listdir', 'resource_exists', 'resource_isdir',
+
+    # Environmental control
+    'declare_namespace', 'working_set', 'add_activation_listener',
+    'find_distributions', 'set_extraction_path', 'cleanup_resources',
+    'get_default_cache',
+
+    # Primary implementation classes
+    'Environment', 'WorkingSet', 'ResourceManager',
+    'Distribution', 'Requirement', 'EntryPoint',
+
+    # Exceptions
+    'ResolutionError', 'VersionConflict', 'DistributionNotFound',
+    'UnknownExtra', 'ExtractionError',
+
+    # Warnings
+    'PEP440Warning',
+
+    # Parsing functions and string utilities
+    'parse_requirements', 'parse_version', 'safe_name', 'safe_version',
+    'get_platform', 'compatible_platforms', 'yield_lines', 'split_sections',
+    'safe_extra', 'to_filename', 'invalid_marker', 'evaluate_marker',
+
+    # filesystem utilities
+    'ensure_directory', 'normalize_path',
+
+    # Distribution "precedence" constants
+    'EGG_DIST', 'BINARY_DIST', 'SOURCE_DIST', 'CHECKOUT_DIST', 'DEVELOP_DIST',
+
+    # "Provider" interfaces, implementations, and registration/lookup APIs
+    'IMetadataProvider', 'IResourceProvider', 'FileMetadata',
+    'PathMetadata', 'EggMetadata', 'EmptyProvider', 'empty_provider',
+    'NullProvider', 'EggProvider', 'DefaultProvider', 'ZipProvider',
+    'register_finder', 'register_namespace_handler', 'register_loader_type',
+    'fixup_namespace_packages', 'get_importer',
+
+    # Deprecated/backward compatibility only
+    'run_main', 'AvailableDistributions',
+]
+
+
+class ResolutionError(Exception):
+    """Abstract base for dependency resolution errors"""
+
+    def __repr__(self):
+        return self.__class__.__name__ + repr(self.args)
+
+
+class VersionConflict(ResolutionError):
+    """
+    An already-installed version conflicts with the requested version.
+
+    Should be initialized with the installed Distribution and the requested
+    Requirement.
+    """
+
+    _template = "{self.dist} is installed but {self.req} is required"
+
+    @property
+    def dist(self):
+        return self.args[0]
+
+    @property
+    def req(self):
+        return self.args[1]
+
+    def report(self):
+        return self._template.format(**locals())
+
+    def with_context(self, required_by):
+        """
+        If required_by is non-empty, return a version of self that is a
+        ContextualVersionConflict.
+        """
+        if not required_by:
+            return self
+        args = self.args + (required_by,)
+        return ContextualVersionConflict(*args)
+
+
+class ContextualVersionConflict(VersionConflict):
+    """
+    A VersionConflict that accepts a third parameter, the set of the
+    requirements that required the installed Distribution.
+    """
+
+    _template = VersionConflict._template + ' by {self.required_by}'
+
+    @property
+    def required_by(self):
+        return self.args[2]
+
+
+class DistributionNotFound(ResolutionError):
+    """A requested distribution was not found"""
+
+    _template = ("The '{self.req}' distribution was not found "
+                 "and is required by {self.requirers_str}")
+
+    @property
+    def req(self):
+        return self.args[0]
+
+    @property
+    def requirers(self):
+        return self.args[1]
+
+    @property
+    def requirers_str(self):
+        if not self.requirers:
+            return 'the application'
+        return ', '.join(self.requirers)
+
+    def report(self):
+        return self._template.format(**locals())
+
+    def __str__(self):
+        return self.report()
+
+
+class UnknownExtra(ResolutionError):
+    """Distribution doesn't have an "extra feature" of the given name"""
+
+
+_provider_factories = {}
+
+PY_MAJOR = sys.version[:3]
+EGG_DIST = 3
+BINARY_DIST = 2
+SOURCE_DIST = 1
+CHECKOUT_DIST = 0
+DEVELOP_DIST = -1
+
+
+def register_loader_type(loader_type, provider_factory):
+    """Register `provider_factory` to make providers for `loader_type`
+
+    `loader_type` is the type or class of a PEP 302 ``module.__loader__``,
+    and `provider_factory` is a function that, passed a *module* object,
+    returns an ``IResourceProvider`` for that module.
+    """
+    _provider_factories[loader_type] = provider_factory
+
+
+def get_provider(moduleOrReq):
+    """Return an IResourceProvider for the named module or requirement"""
+    if isinstance(moduleOrReq, Requirement):
+        return working_set.find(moduleOrReq) or require(str(moduleOrReq))[0]
+    try:
+        module = sys.modules[moduleOrReq]
+    except KeyError:
+        __import__(moduleOrReq)
+        module = sys.modules[moduleOrReq]
+    loader = getattr(module, '__loader__', None)
+    return _find_adapter(_provider_factories, loader)(module)
+
+
+def _macosx_vers(_cache=[]):
+    if not _cache:
+        version = platform.mac_ver()[0]
+        # fallback for MacPorts
+        if version == '':
+            plist = '/System/Library/CoreServices/SystemVersion.plist'
+            if os.path.exists(plist):
+                if hasattr(plistlib, 'readPlist'):
+                    plist_content = plistlib.readPlist(plist)
+                    if 'ProductVersion' in plist_content:
+                        version = plist_content['ProductVersion']
+
+        _cache.append(version.split('.'))
+    return _cache[0]
+
+
+def _macosx_arch(machine):
+    return {'PowerPC': 'ppc', 'Power_Macintosh': 'ppc'}.get(machine, machine)
+
+
+def get_build_platform():
+    """Return this platform's string for platform-specific distributions
+
+    XXX Currently this is the same as ``distutils.util.get_platform()``, but it
+    needs some hacks for Linux and Mac OS X.
+    """
+    try:
+        # Python 2.7 or >=3.2
+        from sysconfig import get_platform
+    except ImportError:
+        from distutils.util import get_platform
+
+    plat = get_platform()
+    if sys.platform == "darwin" and not plat.startswith('macosx-'):
+        try:
+            version = _macosx_vers()
+            machine = os.uname()[4].replace(" ", "_")
+            return "macosx-%d.%d-%s" % (
+                int(version[0]), int(version[1]),
+                _macosx_arch(machine),
+            )
+        except ValueError:
+            # if someone is running a non-Mac darwin system, this will fall
+            # through to the default implementation
+            pass
+    return plat
+
+
+macosVersionString = re.compile(r"macosx-(\d+)\.(\d+)-(.*)")
+darwinVersionString = re.compile(r"darwin-(\d+)\.(\d+)\.(\d+)-(.*)")
+# XXX backward compat
+get_platform = get_build_platform
+
+
+def compatible_platforms(provided, required):
+    """Can code for the `provided` platform run on the `required` platform?
+
+    Returns true if either platform is ``None``, or the platforms are equal.
+
+    XXX Needs compatibility checks for Linux and other unixy OSes.
+    """
+    if provided is None or required is None or provided == required:
+        # easy case
+        return True
+
+    # Mac OS X special cases
+    reqMac = macosVersionString.match(required)
+    if reqMac:
+        provMac = macosVersionString.match(provided)
+
+        # is this a Mac package?
+        if not provMac:
+            # this is backwards compatibility for packages built before
+            # setuptools 0.6. All packages built after this point will
+            # use the new macosx designation.
+            provDarwin = darwinVersionString.match(provided)
+            if provDarwin:
+                dversion = int(provDarwin.group(1))
+                macosversion = "%s.%s" % (reqMac.group(1), reqMac.group(2))
+                if dversion == 7 and macosversion >= "10.3" or \
+                        dversion == 8 and macosversion >= "10.4":
+                    return True
+            # egg isn't macosx or legacy darwin
+            return False
+
+        # are they the same major version and machine type?
+        if provMac.group(1) != reqMac.group(1) or \
+                provMac.group(3) != reqMac.group(3):
+            return False
+
+        # is the required OS major update >= the provided one?
+        if int(provMac.group(2)) > int(reqMac.group(2)):
+            return False
+
+        return True
+
+    # XXX Linux and other platforms' special cases should go here
+    return False
+
+
+def run_script(dist_spec, script_name):
+    """Locate distribution `dist_spec` and run its `script_name` script"""
+    ns = sys._getframe(1).f_globals
+    name = ns['__name__']
+    ns.clear()
+    ns['__name__'] = name
+    require(dist_spec)[0].run_script(script_name, ns)
+
+
+# backward compatibility
+run_main = run_script
+
+
+def get_distribution(dist):
+    """Return a current distribution object for a Requirement or string"""
+    if isinstance(dist, six.string_types):
+        dist = Requirement.parse(dist)
+    if isinstance(dist, Requirement):
+        dist = get_provider(dist)
+    if not isinstance(dist, Distribution):
+        raise TypeError("Expected string, Requirement, or Distribution", dist)
+    return dist
+
+
+def load_entry_point(dist, group, name):
+    """Return `name` entry point of `group` for `dist` or raise ImportError"""
+    return get_distribution(dist).load_entry_point(group, name)
+
+
+def get_entry_map(dist, group=None):
+    """Return the entry point map for `group`, or the full entry map"""
+    return get_distribution(dist).get_entry_map(group)
+
+
+def get_entry_info(dist, group, name):
+    """Return the EntryPoint object for `group`+`name`, or ``None``"""
+    return get_distribution(dist).get_entry_info(group, name)
+
+
+class IMetadataProvider:
+    def has_metadata(name):
+        """Does the package's distribution contain the named metadata?"""
+
+    def get_metadata(name):
+        """The named metadata resource as a string"""
+
+    def get_metadata_lines(name):
+        """Yield named metadata resource as list of non-blank non-comment lines
+
+       Leading and trailing whitespace is stripped from each line, and lines
+       with ``#`` as the first non-blank character are omitted."""
+
+    def metadata_isdir(name):
+        """Is the named metadata a directory?  (like ``os.path.isdir()``)"""
+
+    def metadata_listdir(name):
+        """List of metadata names in the directory (like ``os.listdir()``)"""
+
+    def run_script(script_name, namespace):
+        """Execute the named script in the supplied namespace dictionary"""
+
+
+class IResourceProvider(IMetadataProvider):
+    """An object that provides access to package resources"""
+
+    def get_resource_filename(manager, resource_name):
+        """Return a true filesystem path for `resource_name`
+
+        `manager` must be an ``IResourceManager``"""
+
+    def get_resource_stream(manager, resource_name):
+        """Return a readable file-like object for `resource_name`
+
+        `manager` must be an ``IResourceManager``"""
+
+    def get_resource_string(manager, resource_name):
+        """Return a string containing the contents of `resource_name`
+
+        `manager` must be an ``IResourceManager``"""
+
+    def has_resource(resource_name):
+        """Does the package contain the named resource?"""
+
+    def resource_isdir(resource_name):
+        """Is the named resource a directory?  (like ``os.path.isdir()``)"""
+
+    def resource_listdir(resource_name):
+        """List of resource names in the directory (like ``os.listdir()``)"""
+
+
+class WorkingSet(object):
+    """A collection of active distributions on sys.path (or a similar list)"""
+
+    def __init__(self, entries=None):
+        """Create working set from list of path entries (default=sys.path)"""
+        self.entries = []
+        self.entry_keys = {}
+        self.by_key = {}
+        self.callbacks = []
+
+        if entries is None:
+            entries = sys.path
+
+        for entry in entries:
+            self.add_entry(entry)
+
+    @classmethod
+    def _build_master(cls):
+        """
+        Prepare the master working set.
+        """
+        ws = cls()
+        try:
+            from __main__ import __requires__
+        except ImportError:
+            # The main program does not list any requirements
+            return ws
+
+        # ensure the requirements are met
+        try:
+            ws.require(__requires__)
+        except VersionConflict:
+            return cls._build_from_requirements(__requires__)
+
+        return ws
+
+    @classmethod
+    def _build_from_requirements(cls, req_spec):
+        """
+        Build a working set from a requirement spec. Rewrites sys.path.
+        """
+        # try it without defaults already on sys.path
+        # by starting with an empty path
+        ws = cls([])
+        reqs = parse_requirements(req_spec)
+        dists = ws.resolve(reqs, Environment())
+        for dist in dists:
+            ws.add(dist)
+
+        # add any missing entries from sys.path
+        for entry in sys.path:
+            if entry not in ws.entries:
+                ws.add_entry(entry)
+
+        # then copy back to sys.path
+        sys.path[:] = ws.entries
+        return ws
+
+    def add_entry(self, entry):
+        """Add a path item to ``.entries``, finding any distributions on it
+
+        ``find_distributions(entry, True)`` is used to find distributions
+        corresponding to the path entry, and they are added.  `entry` is
+        always appended to ``.entries``, even if it is already present.
+        (This is because ``sys.path`` can contain the same value more than
+        once, and the ``.entries`` of the ``sys.path`` WorkingSet should always
+        equal ``sys.path``.)
+        """
+        self.entry_keys.setdefault(entry, [])
+        self.entries.append(entry)
+        for dist in find_distributions(entry, True):
+            self.add(dist, entry, False)
+
+    def __contains__(self, dist):
+        """True if `dist` is the active distribution for its project"""
+        return self.by_key.get(dist.key) == dist
+
+    def find(self, req):
+        """Find a distribution matching requirement `req`
+
+        If there is an active distribution for the requested project, this
+        returns it as long as it meets the version requirement specified by
+        `req`.  But, if there is an active distribution for the project and it
+        does *not* meet the `req` requirement, ``VersionConflict`` is raised.
+        If there is no active distribution for the requested project, ``None``
+        is returned.
+        """
+        dist = self.by_key.get(req.key)
+        if dist is not None and dist not in req:
+            # XXX add more info
+            raise VersionConflict(dist, req)
+        return dist
+
+    def iter_entry_points(self, group, name=None):
+        """Yield entry point objects from `group` matching `name`
+
+        If `name` is None, yields all entry points in `group` from all
+        distributions in the working set, otherwise only ones matching
+        both `group` and `name` are yielded (in distribution order).
+        """
+        for dist in self:
+            entries = dist.get_entry_map(group)
+            if name is None:
+                for ep in entries.values():
+                    yield ep
+            elif name in entries:
+                yield entries[name]
+
+    def run_script(self, requires, script_name):
+        """Locate distribution for `requires` and run `script_name` script"""
+        ns = sys._getframe(1).f_globals
+        name = ns['__name__']
+        ns.clear()
+        ns['__name__'] = name
+        self.require(requires)[0].run_script(script_name, ns)
+
+    def __iter__(self):
+        """Yield distributions for non-duplicate projects in the working set
+
+        The yield order is the order in which the items' path entries were
+        added to the working set.
+        """
+        seen = {}
+        for item in self.entries:
+            if item not in self.entry_keys:
+                # workaround a cache issue
+                continue
+
+            for key in self.entry_keys[item]:
+                if key not in seen:
+                    seen[key] = 1
+                    yield self.by_key[key]
+
+    def add(self, dist, entry=None, insert=True, replace=False):
+        """Add `dist` to working set, associated with `entry`
+
+        If `entry` is unspecified, it defaults to the ``.location`` of `dist`.
+        On exit from this routine, `entry` is added to the end of the working
+        set's ``.entries`` (if it wasn't already present).
+
+        `dist` is only added to the working set if it's for a project that
+        doesn't already have a distribution in the set, unless `replace=True`.
+        If it's added, any callbacks registered with the ``subscribe()`` method
+        will be called.
+        """
+        if insert:
+            dist.insert_on(self.entries, entry, replace=replace)
+
+        if entry is None:
+            entry = dist.location
+        keys = self.entry_keys.setdefault(entry, [])
+        keys2 = self.entry_keys.setdefault(dist.location, [])
+        if not replace and dist.key in self.by_key:
+            # ignore hidden distros
+            return
+
+        self.by_key[dist.key] = dist
+        if dist.key not in keys:
+            keys.append(dist.key)
+        if dist.key not in keys2:
+            keys2.append(dist.key)
+        self._added_new(dist)
+
+    def resolve(self, requirements, env=None, installer=None,
+                replace_conflicting=False, extras=None):
+        """List all distributions needed to (recursively) meet `requirements`
+
+        `requirements` must be a sequence of ``Requirement`` objects.  `env`,
+        if supplied, should be an ``Environment`` instance.  If
+        not supplied, it defaults to all distributions available within any
+        entry or distribution in the working set.  `installer`, if supplied,
+        will be invoked with each requirement that cannot be met by an
+        already-installed distribution; it should return a ``Distribution`` or
+        ``None``.
+
+        Unless `replace_conflicting=True`, raises a VersionConflict exception
+        if
+        any requirements are found on the path that have the correct name but
+        the wrong version.  Otherwise, if an `installer` is supplied it will be
+        invoked to obtain the correct version of the requirement and activate
+        it.
+
+        `extras` is a list of the extras to be used with these requirements.
+        This is important because extra requirements may look like `my_req;
+        extra = "my_extra"`, which would otherwise be interpreted as a purely
+        optional requirement.  Instead, we want to be able to assert that these
+        requirements are truly required.
+        """
+
+        # set up the stack
+        requirements = list(requirements)[::-1]
+        # set of processed requirements
+        processed = {}
+        # key -> dist
+        best = {}
+        to_activate = []
+
+        req_extras = _ReqExtras()
+
+        # Mapping of requirement to set of distributions that required it;
+        # useful for reporting info about conflicts.
+        required_by = collections.defaultdict(set)
+
+        while requirements:
+            # process dependencies breadth-first
+            req = requirements.pop(0)
+            if req in processed:
+                # Ignore cyclic or redundant dependencies
+                continue
+
+            if not req_extras.markers_pass(req, extras):
+                continue
+
+            dist = best.get(req.key)
+            if dist is None:
+                # Find the best distribution and add it to the map
+                dist = self.by_key.get(req.key)
+                if dist is None or (dist not in req and replace_conflicting):
+                    ws = self
+                    if env is None:
+                        if dist is None:
+                            env = Environment(self.entries)
+                        else:
+                            # Use an empty environment and workingset to avoid
+                            # any further conflicts with the conflicting
+                            # distribution
+                            env = Environment([])
+                            ws = WorkingSet([])
+                    dist = best[req.key] = env.best_match(
+                        req, ws, installer,
+                        replace_conflicting=replace_conflicting
+                    )
+                    if dist is None:
+                        requirers = required_by.get(req, None)
+                        raise DistributionNotFound(req, requirers)
+                to_activate.append(dist)
+            if dist not in req:
+                # Oops, the "best" so far conflicts with a dependency
+                dependent_req = required_by[req]
+                raise VersionConflict(dist, req).with_context(dependent_req)
+
+            # push the new requirements onto the stack
+            new_requirements = dist.requires(req.extras)[::-1]
+            requirements.extend(new_requirements)
+
+            # Register the new requirements needed by req
+            for new_requirement in new_requirements:
+                required_by[new_requirement].add(req.project_name)
+                req_extras[new_requirement] = req.extras
+
+            processed[req] = True
+
+        # return list of distros to activate
+        return to_activate
+
+    def find_plugins(
+            self, plugin_env, full_env=None, installer=None, fallback=True):
+        """Find all activatable distributions in `plugin_env`
+
+        Example usage::
+
+            distributions, errors = working_set.find_plugins(
+                Environment(plugin_dirlist)
+            )
+            # add plugins+libs to sys.path
+            map(working_set.add, distributions)
+            # display errors
+            print('Could not load', errors)
+
+        The `plugin_env` should be an ``Environment`` instance that contains
+        only distributions that are in the project's "plugin directory" or
+        directories. The `full_env`, if supplied, should be an ``Environment``
+        contains all currently-available distributions.  If `full_env` is not
+        supplied, one is created automatically from the ``WorkingSet`` this
+        method is called on, which will typically mean that every directory on
+        ``sys.path`` will be scanned for distributions.
+
+        `installer` is a standard installer callback as used by the
+        ``resolve()`` method. The `fallback` flag indicates whether we should
+        attempt to resolve older versions of a plugin if the newest version
+        cannot be resolved.
+
+        This method returns a 2-tuple: (`distributions`, `error_info`), where
+        `distributions` is a list of the distributions found in `plugin_env`
+        that were loadable, along with any other distributions that are needed
+        to resolve their dependencies.  `error_info` is a dictionary mapping
+        unloadable plugin distributions to an exception instance describing the
+        error that occurred. Usually this will be a ``DistributionNotFound`` or
+        ``VersionConflict`` instance.
+        """
+
+        plugin_projects = list(plugin_env)
+        # scan project names in alphabetic order
+        plugin_projects.sort()
+
+        error_info = {}
+        distributions = {}
+
+        if full_env is None:
+            env = Environment(self.entries)
+            env += plugin_env
+        else:
+            env = full_env + plugin_env
+
+        shadow_set = self.__class__([])
+        # put all our entries in shadow_set
+        list(map(shadow_set.add, self))
+
+        for project_name in plugin_projects:
+
+            for dist in plugin_env[project_name]:
+
+                req = [dist.as_requirement()]
+
+                try:
+                    resolvees = shadow_set.resolve(req, env, installer)
+
+                except ResolutionError as v:
+                    # save error info
+                    error_info[dist] = v
+                    if fallback:
+                        # try the next older version of project
+                        continue
+                    else:
+                        # give up on this project, keep going
+                        break
+
+                else:
+                    list(map(shadow_set.add, resolvees))
+                    distributions.update(dict.fromkeys(resolvees))
+
+                    # success, no need to try any more versions of this project
+                    break
+
+        distributions = list(distributions)
+        distributions.sort()
+
+        return distributions, error_info
+
+    def require(self, *requirements):
+        """Ensure that distributions matching `requirements` are activated
+
+        `requirements` must be a string or a (possibly-nested) sequence
+        thereof, specifying the distributions and versions required.  The
+        return value is a sequence of the distributions that needed to be
+        activated to fulfill the requirements; all relevant distributions are
+        included, even if they were already activated in this working set.
+        """
+        needed = self.resolve(parse_requirements(requirements))
+
+        for dist in needed:
+            self.add(dist)
+
+        return needed
+
+    def subscribe(self, callback, existing=True):
+        """Invoke `callback` for all distributions
+
+        If `existing=True` (default),
+        call on all existing ones, as well.
+        """
+        if callback in self.callbacks:
+            return
+        self.callbacks.append(callback)
+        if not existing:
+            return
+        for dist in self:
+            callback(dist)
+
+    def _added_new(self, dist):
+        for callback in self.callbacks:
+            callback(dist)
+
+    def __getstate__(self):
+        return (
+            self.entries[:], self.entry_keys.copy(), self.by_key.copy(),
+            self.callbacks[:]
+        )
+
+    def __setstate__(self, e_k_b_c):
+        entries, keys, by_key, callbacks = e_k_b_c
+        self.entries = entries[:]
+        self.entry_keys = keys.copy()
+        self.by_key = by_key.copy()
+        self.callbacks = callbacks[:]
+
+
+class _ReqExtras(dict):
+    """
+    Map each requirement to the extras that demanded it.
+    """
+
+    def markers_pass(self, req, extras=None):
+        """
+        Evaluate markers for req against each extra that
+        demanded it.
+
+        Return False if the req has a marker and fails
+        evaluation. Otherwise, return True.
+        """
+        extra_evals = (
+            req.marker.evaluate({'extra': extra})
+            for extra in self.get(req, ()) + (extras or (None,))
+        )
+        return not req.marker or any(extra_evals)
+
+
+class Environment(object):
+    """Searchable snapshot of distributions on a search path"""
+
+    def __init__(
+            self, search_path=None, platform=get_supported_platform(),
+            python=PY_MAJOR):
+        """Snapshot distributions available on a search path
+
+        Any distributions found on `search_path` are added to the environment.
+        `search_path` should be a sequence of ``sys.path`` items.  If not
+        supplied, ``sys.path`` is used.
+
+        `platform` is an optional string specifying the name of the platform
+        that platform-specific distributions must be compatible with.  If
+        unspecified, it defaults to the current platform.  `python` is an
+        optional string naming the desired version of Python (e.g. ``'3.3'``);
+        it defaults to the current version.
+
+        You may explicitly set `platform` (and/or `python`) to ``None`` if you
+        wish to map *all* distributions, not just those compatible with the
+        running platform or Python version.
+        """
+        self._distmap = {}
+        self.platform = platform
+        self.python = python
+        self.scan(search_path)
+
+    def can_add(self, dist):
+        """Is distribution `dist` acceptable for this environment?
+
+        The distribution must match the platform and python version
+        requirements specified when this environment was created, or False
+        is returned.
+        """
+        py_compat = (
+            self.python is None
+            or dist.py_version is None
+            or dist.py_version == self.python
+        )
+        return py_compat and compatible_platforms(dist.platform, self.platform)
+
+    def remove(self, dist):
+        """Remove `dist` from the environment"""
+        self._distmap[dist.key].remove(dist)
+
+    def scan(self, search_path=None):
+        """Scan `search_path` for distributions usable in this environment
+
+        Any distributions found are added to the environment.
+        `search_path` should be a sequence of ``sys.path`` items.  If not
+        supplied, ``sys.path`` is used.  Only distributions conforming to
+        the platform/python version defined at initialization are added.
+        """
+        if search_path is None:
+            search_path = sys.path
+
+        for item in search_path:
+            for dist in find_distributions(item):
+                self.add(dist)
+
+    def __getitem__(self, project_name):
+        """Return a newest-to-oldest list of distributions for `project_name`
+
+        Uses case-insensitive `project_name` comparison, assuming all the
+        project's distributions use their project's name converted to all
+        lowercase as their key.
+
+        """
+        distribution_key = project_name.lower()
+        return self._distmap.get(distribution_key, [])
+
+    def add(self, dist):
+        """Add `dist` if we ``can_add()`` it and it has not already been added
+        """
+        if self.can_add(dist) and dist.has_version():
+            dists = self._distmap.setdefault(dist.key, [])
+            if dist not in dists:
+                dists.append(dist)
+                dists.sort(key=operator.attrgetter('hashcmp'), reverse=True)
+
+    def best_match(
+            self, req, working_set, installer=None, replace_conflicting=False):
+        """Find distribution best matching `req` and usable on `working_set`
+
+        This calls the ``find(req)`` method of the `working_set` to see if a
+        suitable distribution is already active.  (This may raise
+        ``VersionConflict`` if an unsuitable version of the project is already
+        active in the specified `working_set`.)  If a suitable distribution
+        isn't active, this method returns the newest distribution in the
+        environment that meets the ``Requirement`` in `req`.  If no suitable
+        distribution is found, and `installer` is supplied, then the result of
+        calling the environment's ``obtain(req, installer)`` method will be
+        returned.
+        """
+        try:
+            dist = working_set.find(req)
+        except VersionConflict:
+            if not replace_conflicting:
+                raise
+            dist = None
+        if dist is not None:
+            return dist
+        for dist in self[req.key]:
+            if dist in req:
+                return dist
+        # try to download/install
+        return self.obtain(req, installer)
+
+    def obtain(self, requirement, installer=None):
+        """Obtain a distribution matching `requirement` (e.g. via download)
+
+        Obtain a distro that matches requirement (e.g. via download).  In the
+        base ``Environment`` class, this routine just returns
+        ``installer(requirement)``, unless `installer` is None, in which case
+        None is returned instead.  This method is a hook that allows subclasses
+        to attempt other ways of obtaining a distribution before falling back
+        to the `installer` argument."""
+        if installer is not None:
+            return installer(requirement)
+
+    def __iter__(self):
+        """Yield the unique project names of the available distributions"""
+        for key in self._distmap.keys():
+            if self[key]:
+                yield key
+
+    def __iadd__(self, other):
+        """In-place addition of a distribution or environment"""
+        if isinstance(other, Distribution):
+            self.add(other)
+        elif isinstance(other, Environment):
+            for project in other:
+                for dist in other[project]:
+                    self.add(dist)
+        else:
+            raise TypeError("Can't add %r to environment" % (other,))
+        return self
+
+    def __add__(self, other):
+        """Add an environment or distribution to an environment"""
+        new = self.__class__([], platform=None, python=None)
+        for env in self, other:
+            new += env
+        return new
+
+
+# XXX backward compatibility
+AvailableDistributions = Environment
+
+
+class ExtractionError(RuntimeError):
+    """An error occurred extracting a resource
+
+    The following attributes are available from instances of this exception:
+
+    manager
+        The resource manager that raised this exception
+
+    cache_path
+        The base directory for resource extraction
+
+    original_error
+        The exception instance that caused extraction to fail
+    """
+
+
+class ResourceManager:
+    """Manage resource extraction and packages"""
+    extraction_path = None
+
+    def __init__(self):
+        self.cached_files = {}
+
+    def resource_exists(self, package_or_requirement, resource_name):
+        """Does the named resource exist?"""
+        return get_provider(package_or_requirement).has_resource(resource_name)
+
+    def resource_isdir(self, package_or_requirement, resource_name):
+        """Is the named resource an existing directory?"""
+        return get_provider(package_or_requirement).resource_isdir(
+            resource_name
+        )
+
+    def resource_filename(self, package_or_requirement, resource_name):
+        """Return a true filesystem path for specified resource"""
+        return get_provider(package_or_requirement).get_resource_filename(
+            self, resource_name
+        )
+
+    def resource_stream(self, package_or_requirement, resource_name):
+        """Return a readable file-like object for specified resource"""
+        return get_provider(package_or_requirement).get_resource_stream(
+            self, resource_name
+        )
+
+    def resource_string(self, package_or_requirement, resource_name):
+        """Return specified resource as a string"""
+        return get_provider(package_or_requirement).get_resource_string(
+            self, resource_name
+        )
+
+    def resource_listdir(self, package_or_requirement, resource_name):
+        """List the contents of the named resource directory"""
+        return get_provider(package_or_requirement).resource_listdir(
+            resource_name
+        )
+
+    def extraction_error(self):
+        """Give an error message for problems extracting file(s)"""
+
+        old_exc = sys.exc_info()[1]
+        cache_path = self.extraction_path or get_default_cache()
+
+        tmpl = textwrap.dedent("""
+            Can't extract file(s) to egg cache
+
+            The following error occurred while trying to extract file(s)
+            to the Python egg cache:
+
+              {old_exc}
+
+            The Python egg cache directory is currently set to:
+
+              {cache_path}
+
+            Perhaps your account does not have write access to this directory?
+            You can change the cache directory by setting the PYTHON_EGG_CACHE
+            environment variable to point to an accessible directory.
+            """).lstrip()
+        err = ExtractionError(tmpl.format(**locals()))
+        err.manager = self
+        err.cache_path = cache_path
+        err.original_error = old_exc
+        raise err
+
+    def get_cache_path(self, archive_name, names=()):
+        """Return absolute location in cache for `archive_name` and `names`
+
+        The parent directory of the resulting path will be created if it does
+        not already exist.  `archive_name` should be the base filename of the
+        enclosing egg (which may not be the name of the enclosing zipfile!),
+        including its ".egg" extension.  `names`, if provided, should be a
+        sequence of path name parts "under" the egg's extraction location.
+
+        This method should only be called by resource providers that need to
+        obtain an extraction location, and only for names they intend to
+        extract, as it tracks the generated names for possible cleanup later.
+        """
+        extract_path = self.extraction_path or get_default_cache()
+        target_path = os.path.join(extract_path, archive_name + '-tmp', *names)
+        try:
+            _bypass_ensure_directory(target_path)
+        except Exception:
+            self.extraction_error()
+
+        self._warn_unsafe_extraction_path(extract_path)
+
+        self.cached_files[target_path] = 1
+        return target_path
+
+    @staticmethod
+    def _warn_unsafe_extraction_path(path):
+        """
+        If the default extraction path is overridden and set to an insecure
+        location, such as /tmp, it opens up an opportunity for an attacker to
+        replace an extracted file with an unauthorized payload. Warn the user
+        if a known insecure location is used.
+
+        See Distribute #375 for more details.
+        """
+        if os.name == 'nt' and not path.startswith(os.environ['windir']):
+            # On Windows, permissions are generally restrictive by default
+            #  and temp directories are not writable by other users, so
+            #  bypass the warning.
+            return
+        mode = os.stat(path).st_mode
+        if mode & stat.S_IWOTH or mode & stat.S_IWGRP:
+            msg = (
+                "%s is writable by group/others and vulnerable to attack "
+                "when "
+                "used with get_resource_filename. Consider a more secure "
+                "location (set with .set_extraction_path or the "
+                "PYTHON_EGG_CACHE environment variable)." % path
+            )
+            warnings.warn(msg, UserWarning)
+
+    def postprocess(self, tempname, filename):
+        """Perform any platform-specific postprocessing of `tempname`
+
+        This is where Mac header rewrites should be done; other platforms don't
+        have anything special they should do.
+
+        Resource providers should call this method ONLY after successfully
+        extracting a compressed resource.  They must NOT call it on resources
+        that are already in the filesystem.
+
+        `tempname` is the current (temporary) name of the file, and `filename`
+        is the name it will be renamed to by the caller after this routine
+        returns.
+        """
+
+        if os.name == 'posix':
+            # Make the resource executable
+            mode = ((os.stat(tempname).st_mode) | 0o555) & 0o7777
+            os.chmod(tempname, mode)
+
+    def set_extraction_path(self, path):
+        """Set the base path where resources will be extracted to, if needed.
+
+        If you do not call this routine before any extractions take place, the
+        path defaults to the return value of ``get_default_cache()``.  (Which
+        is based on the ``PYTHON_EGG_CACHE`` environment variable, with various
+        platform-specific fallbacks.  See that routine's documentation for more
+        details.)
+
+        Resources are extracted to subdirectories of this path based upon
+        information given by the ``IResourceProvider``.  You may set this to a
+        temporary directory, but then you must call ``cleanup_resources()`` to
+        delete the extracted files when done.  There is no guarantee that
+        ``cleanup_resources()`` will be able to remove all extracted files.
+
+        (Note: you may not change the extraction path for a given resource
+        manager once resources have been extracted, unless you first call
+        ``cleanup_resources()``.)
+        """
+        if self.cached_files:
+            raise ValueError(
+                "Can't change extraction path, files already extracted"
+            )
+
+        self.extraction_path = path
+
+    def cleanup_resources(self, force=False):
+        """
+        Delete all extracted resource files and directories, returning a list
+        of the file and directory names that could not be successfully removed.
+        This function does not have any concurrency protection, so it should
+        generally only be called when the extraction path is a temporary
+        directory exclusive to a single process.  This method is not
+        automatically called; you must call it explicitly or register it as an
+        ``atexit`` function if you wish to ensure cleanup of a temporary
+        directory used for extractions.
+        """
+        # XXX
+
+
+def get_default_cache():
+    """
+    Return the ``PYTHON_EGG_CACHE`` environment variable
+    or a platform-relevant user cache dir for an app
+    named "Python-Eggs".
+    """
+    return (
+        os.environ.get('PYTHON_EGG_CACHE')
+        or appdirs.user_cache_dir(appname='Python-Eggs')
+    )
+
+
+def safe_name(name):
+    """Convert an arbitrary string to a standard distribution name
+
+    Any runs of non-alphanumeric/. characters are replaced with a single '-'.
+    """
+    return re.sub('[^A-Za-z0-9.]+', '-', name)
+
+
+def safe_version(version):
+    """
+    Convert an arbitrary string to a standard version string
+    """
+    try:
+        # normalize the version
+        return str(packaging.version.Version(version))
+    except packaging.version.InvalidVersion:
+        version = version.replace(' ', '.')
+        return re.sub('[^A-Za-z0-9.]+', '-', version)
+
+
+def safe_extra(extra):
+    """Convert an arbitrary string to a standard 'extra' name
+
+    Any runs of non-alphanumeric characters are replaced with a single '_',
+    and the result is always lowercased.
+    """
+    return re.sub('[^A-Za-z0-9.-]+', '_', extra).lower()
+
+
+def to_filename(name):
+    """Convert a project or version name to its filename-escaped form
+
+    Any '-' characters are currently replaced with '_'.
+    """
+    return name.replace('-', '_')
+
+
+def invalid_marker(text):
+    """
+    Validate text as a PEP 508 environment marker; return an exception
+    if invalid or False otherwise.
+    """
+    try:
+        evaluate_marker(text)
+    except SyntaxError as e:
+        e.filename = None
+        e.lineno = None
+        return e
+    return False
+
+
+def evaluate_marker(text, extra=None):
+    """
+    Evaluate a PEP 508 environment marker.
+    Return a boolean indicating the marker result in this environment.
+    Raise SyntaxError if marker is invalid.
+
+    This implementation uses the 'pyparsing' module.
+    """
+    try:
+        marker = packaging.markers.Marker(text)
+        return marker.evaluate()
+    except packaging.markers.InvalidMarker as e:
+        raise SyntaxError(e)
+
+
+class NullProvider:
+    """Try to implement resources and metadata for arbitrary PEP 302 loaders"""
+
+    egg_name = None
+    egg_info = None
+    loader = None
+
+    def __init__(self, module):
+        self.loader = getattr(module, '__loader__', None)
+        self.module_path = os.path.dirname(getattr(module, '__file__', ''))
+
+    def get_resource_filename(self, manager, resource_name):
+        return self._fn(self.module_path, resource_name)
+
+    def get_resource_stream(self, manager, resource_name):
+        return io.BytesIO(self.get_resource_string(manager, resource_name))
+
+    def get_resource_string(self, manager, resource_name):
+        return self._get(self._fn(self.module_path, resource_name))
+
+    def has_resource(self, resource_name):
+        return self._has(self._fn(self.module_path, resource_name))
+
+    def has_metadata(self, name):
+        return self.egg_info and self._has(self._fn(self.egg_info, name))
+
+    def get_metadata(self, name):
+        if not self.egg_info:
+            return ""
+        value = self._get(self._fn(self.egg_info, name))
+        return value.decode('utf-8') if six.PY3 else value
+
+    def get_metadata_lines(self, name):
+        return yield_lines(self.get_metadata(name))
+
+    def resource_isdir(self, resource_name):
+        return self._isdir(self._fn(self.module_path, resource_name))
+
+    def metadata_isdir(self, name):
+        return self.egg_info and self._isdir(self._fn(self.egg_info, name))
+
+    def resource_listdir(self, resource_name):
+        return self._listdir(self._fn(self.module_path, resource_name))
+
+    def metadata_listdir(self, name):
+        if self.egg_info:
+            return self._listdir(self._fn(self.egg_info, name))
+        return []
+
+    def run_script(self, script_name, namespace):
+        script = 'scripts/' + script_name
+        if not self.has_metadata(script):
+            raise ResolutionError(
+                "Script {script!r} not found in metadata at {self.egg_info!r}"
+                .format(**locals()),
+            )
+        script_text = self.get_metadata(script).replace('\r\n', '\n')
+        script_text = script_text.replace('\r', '\n')
+        script_filename = self._fn(self.egg_info, script)
+        namespace['__file__'] = script_filename
+        if os.path.exists(script_filename):
+            source = open(script_filename).read()
+            code = compile(source, script_filename, 'exec')
+            exec(code, namespace, namespace)
+        else:
+            from linecache import cache
+            cache[script_filename] = (
+                len(script_text), 0, script_text.split('\n'), script_filename
+            )
+            script_code = compile(script_text, script_filename, 'exec')
+            exec(script_code, namespace, namespace)
+
+    def _has(self, path):
+        raise NotImplementedError(
+            "Can't perform this operation for unregistered loader type"
+        )
+
+    def _isdir(self, path):
+        raise NotImplementedError(
+            "Can't perform this operation for unregistered loader type"
+        )
+
+    def _listdir(self, path):
+        raise NotImplementedError(
+            "Can't perform this operation for unregistered loader type"
+        )
+
+    def _fn(self, base, resource_name):
+        if resource_name:
+            return os.path.join(base, *resource_name.split('/'))
+        return base
+
+    def _get(self, path):
+        if hasattr(self.loader, 'get_data'):
+            return self.loader.get_data(path)
+        raise NotImplementedError(
+            "Can't perform this operation for loaders without 'get_data()'"
+        )
+
+
+register_loader_type(object, NullProvider)
+
+
+class EggProvider(NullProvider):
+    """Provider based on a virtual filesystem"""
+
+    def __init__(self, module):
+        NullProvider.__init__(self, module)
+        self._setup_prefix()
+
+    def _setup_prefix(self):
+        # we assume here that our metadata may be nested inside a "basket"
+        # of multiple eggs; that's why we use module_path instead of .archive
+        path = self.module_path
+        old = None
+        while path != old:
+            if _is_egg_path(path):
+                self.egg_name = os.path.basename(path)
+                self.egg_info = os.path.join(path, 'EGG-INFO')
+                self.egg_root = path
+                break
+            old = path
+            path, base = os.path.split(path)
+
+
+class DefaultProvider(EggProvider):
+    """Provides access to package resources in the filesystem"""
+
+    def _has(self, path):
+        return os.path.exists(path)
+
+    def _isdir(self, path):
+        return os.path.isdir(path)
+
+    def _listdir(self, path):
+        return os.listdir(path)
+
+    def get_resource_stream(self, manager, resource_name):
+        return open(self._fn(self.module_path, resource_name), 'rb')
+
+    def _get(self, path):
+        with open(path, 'rb') as stream:
+            return stream.read()
+
+    @classmethod
+    def _register(cls):
+        loader_cls = getattr(
+            importlib_machinery,
+            'SourceFileLoader',
+            type(None),
+        )
+        register_loader_type(loader_cls, cls)
+
+
+DefaultProvider._register()
+
+
+class EmptyProvider(NullProvider):
+    """Provider that returns nothing for all requests"""
+
+    module_path = None
+
+    _isdir = _has = lambda self, path: False
+
+    def _get(self, path):
+        return ''
+
+    def _listdir(self, path):
+        return []
+
+    def __init__(self):
+        pass
+
+
+empty_provider = EmptyProvider()
+
+
+class ZipManifests(dict):
+    """
+    zip manifest builder
+    """
+
+    @classmethod
+    def build(cls, path):
+        """
+        Build a dictionary similar to the zipimport directory
+        caches, except instead of tuples, store ZipInfo objects.
+
+        Use a platform-specific path separator (os.sep) for the path keys
+        for compatibility with pypy on Windows.
+        """
+        with zipfile.ZipFile(path) as zfile:
+            items = (
+                (
+                    name.replace('/', os.sep),
+                    zfile.getinfo(name),
+                )
+                for name in zfile.namelist()
+            )
+            return dict(items)
+
+    load = build
+
+
+class MemoizedZipManifests(ZipManifests):
+    """
+    Memoized zipfile manifests.
+    """
+    manifest_mod = collections.namedtuple('manifest_mod', 'manifest mtime')
+
+    def load(self, path):
+        """
+        Load a manifest at path or return a suitable manifest already loaded.
+        """
+        path = os.path.normpath(path)
+        mtime = os.stat(path).st_mtime
+
+        if path not in self or self[path].mtime != mtime:
+            manifest = self.build(path)
+            self[path] = self.manifest_mod(manifest, mtime)
+
+        return self[path].manifest
+
+
+class ZipProvider(EggProvider):
+    """Resource support for zips and eggs"""
+
+    eagers = None
+    _zip_manifests = MemoizedZipManifests()
+
+    def __init__(self, module):
+        EggProvider.__init__(self, module)
+        self.zip_pre = self.loader.archive + os.sep
+
+    def _zipinfo_name(self, fspath):
+        # Convert a virtual filename (full path to file) into a zipfile subpath
+        # usable with the zipimport directory cache for our target archive
+        fspath = fspath.rstrip(os.sep)
+        if fspath == self.loader.archive:
+            return ''
+        if fspath.startswith(self.zip_pre):
+            return fspath[len(self.zip_pre):]
+        raise AssertionError(
+            "%s is not a subpath of %s" % (fspath, self.zip_pre)
+        )
+
+    def _parts(self, zip_path):
+        # Convert a zipfile subpath into an egg-relative path part list.
+        # pseudo-fs path
+        fspath = self.zip_pre + zip_path
+        if fspath.startswith(self.egg_root + os.sep):
+            return fspath[len(self.egg_root) + 1:].split(os.sep)
+        raise AssertionError(
+            "%s is not a subpath of %s" % (fspath, self.egg_root)
+        )
+
+    @property
+    def zipinfo(self):
+        return self._zip_manifests.load(self.loader.archive)
+
+    def get_resource_filename(self, manager, resource_name):
+        if not self.egg_name:
+            raise NotImplementedError(
+                "resource_filename() only supported for .egg, not .zip"
+            )
+        # no need to lock for extraction, since we use temp names
+        zip_path = self._resource_to_zip(resource_name)
+        eagers = self._get_eager_resources()
+        if '/'.join(self._parts(zip_path)) in eagers:
+            for name in eagers:
+                self._extract_resource(manager, self._eager_to_zip(name))
+        return self._extract_resource(manager, zip_path)
+
+    @staticmethod
+    def _get_date_and_size(zip_stat):
+        size = zip_stat.file_size
+        # ymdhms+wday, yday, dst
+        date_time = zip_stat.date_time + (0, 0, -1)
+        # 1980 offset already done
+        timestamp = time.mktime(date_time)
+        return timestamp, size
+
+    def _extract_resource(self, manager, zip_path):
+
+        if zip_path in self._index():
+            for name in self._index()[zip_path]:
+                last = self._extract_resource(
+                    manager, os.path.join(zip_path, name)
+                )
+            # return the extracted directory name
+            return os.path.dirname(last)
+
+        timestamp, size = self._get_date_and_size(self.zipinfo[zip_path])
+
+        if not WRITE_SUPPORT:
+            raise IOError('"os.rename" and "os.unlink" are not supported '
+                          'on this platform')
+        try:
+
+            real_path = manager.get_cache_path(
+                self.egg_name, self._parts(zip_path)
+            )
+
+            if self._is_current(real_path, zip_path):
+                return real_path
+
+            outf, tmpnam = _mkstemp(
+                ".$extract",
+                dir=os.path.dirname(real_path),
+            )
+            os.write(outf, self.loader.get_data(zip_path))
+            os.close(outf)
+            utime(tmpnam, (timestamp, timestamp))
+            manager.postprocess(tmpnam, real_path)
+
+            try:
+                rename(tmpnam, real_path)
+
+            except os.error:
+                if os.path.isfile(real_path):
+                    if self._is_current(real_path, zip_path):
+                        # the file became current since it was checked above,
+                        #  so proceed.
+                        return real_path
+                    # Windows, del old file and retry
+                    elif os.name == 'nt':
+                        unlink(real_path)
+                        rename(tmpnam, real_path)
+                        return real_path
+                raise
+
+        except os.error:
+            # report a user-friendly error
+            manager.extraction_error()
+
+        return real_path
+
+    def _is_current(self, file_path, zip_path):
+        """
+        Return True if the file_path is current for this zip_path
+        """
+        timestamp, size = self._get_date_and_size(self.zipinfo[zip_path])
+        if not os.path.isfile(file_path):
+            return False
+        stat = os.stat(file_path)
+        if stat.st_size != size or stat.st_mtime != timestamp:
+            return False
+        # check that the contents match
+        zip_contents = self.loader.get_data(zip_path)
+        with open(file_path, 'rb') as f:
+            file_contents = f.read()
+        return zip_contents == file_contents
+
+    def _get_eager_resources(self):
+        if self.eagers is None:
+            eagers = []
+            for name in ('native_libs.txt', 'eager_resources.txt'):
+                if self.has_metadata(name):
+                    eagers.extend(self.get_metadata_lines(name))
+            self.eagers = eagers
+        return self.eagers
+
+    def _index(self):
+        try:
+            return self._dirindex
+        except AttributeError:
+            ind = {}
+            for path in self.zipinfo:
+                parts = path.split(os.sep)
+                while parts:
+                    parent = os.sep.join(parts[:-1])
+                    if parent in ind:
+                        ind[parent].append(parts[-1])
+                        break
+                    else:
+                        ind[parent] = [parts.pop()]
+            self._dirindex = ind
+            return ind
+
+    def _has(self, fspath):
+        zip_path = self._zipinfo_name(fspath)
+        return zip_path in self.zipinfo or zip_path in self._index()
+
+    def _isdir(self, fspath):
+        return self._zipinfo_name(fspath) in self._index()
+
+    def _listdir(self, fspath):
+        return list(self._index().get(self._zipinfo_name(fspath), ()))
+
+    def _eager_to_zip(self, resource_name):
+        return self._zipinfo_name(self._fn(self.egg_root, resource_name))
+
+    def _resource_to_zip(self, resource_name):
+        return self._zipinfo_name(self._fn(self.module_path, resource_name))
+
+
+register_loader_type(zipimport.zipimporter, ZipProvider)
+
+
+class FileMetadata(EmptyProvider):
+    """Metadata handler for standalone PKG-INFO files
+
+    Usage::
+
+        metadata = FileMetadata("/path/to/PKG-INFO")
+
+    This provider rejects all data and metadata requests except for PKG-INFO,
+    which is treated as existing, and will be the contents of the file at
+    the provided location.
+    """
+
+    def __init__(self, path):
+        self.path = path
+
+    def has_metadata(self, name):
+        return name == 'PKG-INFO' and os.path.isfile(self.path)
+
+    def get_metadata(self, name):
+        if name != 'PKG-INFO':
+            raise KeyError("No metadata except PKG-INFO is available")
+
+        with io.open(self.path, encoding='utf-8', errors="replace") as f:
+            metadata = f.read()
+        self._warn_on_replacement(metadata)
+        return metadata
+
+    def _warn_on_replacement(self, metadata):
+        # Python 2.7 compat for: replacement_char = '�'
+        replacement_char = b'\xef\xbf\xbd'.decode('utf-8')
+        if replacement_char in metadata:
+            tmpl = "{self.path} could not be properly decoded in UTF-8"
+            msg = tmpl.format(**locals())
+            warnings.warn(msg)
+
+    def get_metadata_lines(self, name):
+        return yield_lines(self.get_metadata(name))
+
+
+class PathMetadata(DefaultProvider):
+    """Metadata provider for egg directories
+
+    Usage::
+
+        # Development eggs:
+
+        egg_info = "/path/to/PackageName.egg-info"
+        base_dir = os.path.dirname(egg_info)
+        metadata = PathMetadata(base_dir, egg_info)
+        dist_name = os.path.splitext(os.path.basename(egg_info))[0]
+        dist = Distribution(basedir, project_name=dist_name, metadata=metadata)
+
+        # Unpacked egg directories:
+
+        egg_path = "/path/to/PackageName-ver-pyver-etc.egg"
+        metadata = PathMetadata(egg_path, os.path.join(egg_path,'EGG-INFO'))
+        dist = Distribution.from_filename(egg_path, metadata=metadata)
+    """
+
+    def __init__(self, path, egg_info):
+        self.module_path = path
+        self.egg_info = egg_info
+
+
+class EggMetadata(ZipProvider):
+    """Metadata provider for .egg files"""
+
+    def __init__(self, importer):
+        """Create a metadata provider from a zipimporter"""
+
+        self.zip_pre = importer.archive + os.sep
+        self.loader = importer
+        if importer.prefix:
+            self.module_path = os.path.join(importer.archive, importer.prefix)
+        else:
+            self.module_path = importer.archive
+        self._setup_prefix()
+
+
+_declare_state('dict', _distribution_finders={})
+
+
+def register_finder(importer_type, distribution_finder):
+    """Register `distribution_finder` to find distributions in sys.path items
+
+    `importer_type` is the type or class of a PEP 302 "Importer" (sys.path item
+    handler), and `distribution_finder` is a callable that, passed a path
+    item and the importer instance, yields ``Distribution`` instances found on
+    that path item.  See ``pkg_resources.find_on_path`` for an example."""
+    _distribution_finders[importer_type] = distribution_finder
+
+
+def find_distributions(path_item, only=False):
+    """Yield distributions accessible via `path_item`"""
+    importer = get_importer(path_item)
+    finder = _find_adapter(_distribution_finders, importer)
+    return finder(importer, path_item, only)
+
+
+def find_eggs_in_zip(importer, path_item, only=False):
+    """
+    Find eggs in zip files; possibly multiple nested eggs.
+    """
+    if importer.archive.endswith('.whl'):
+        # wheels are not supported with this finder
+        # they don't have PKG-INFO metadata, and won't ever contain eggs
+        return
+    metadata = EggMetadata(importer)
+    if metadata.has_metadata('PKG-INFO'):
+        yield Distribution.from_filename(path_item, metadata=metadata)
+    if only:
+        # don't yield nested distros
+        return
+    for subitem in metadata.resource_listdir('/'):
+        if _is_egg_path(subitem):
+            subpath = os.path.join(path_item, subitem)
+            dists = find_eggs_in_zip(zipimport.zipimporter(subpath), subpath)
+            for dist in dists:
+                yield dist
+        elif subitem.lower().endswith('.dist-info'):
+            subpath = os.path.join(path_item, subitem)
+            submeta = EggMetadata(zipimport.zipimporter(subpath))
+            submeta.egg_info = subpath
+            yield Distribution.from_location(path_item, subitem, submeta)
+
+
+register_finder(zipimport.zipimporter, find_eggs_in_zip)
+
+
+def find_nothing(importer, path_item, only=False):
+    return ()
+
+
+register_finder(object, find_nothing)
+
+
+def _by_version_descending(names):
+    """
+    Given a list of filenames, return them in descending order
+    by version number.
+
+    >>> names = 'bar', 'foo', 'Python-2.7.10.egg', 'Python-2.7.2.egg'
+    >>> _by_version_descending(names)
+    ['Python-2.7.10.egg', 'Python-2.7.2.egg', 'foo', 'bar']
+    >>> names = 'Setuptools-1.2.3b1.egg', 'Setuptools-1.2.3.egg'
+    >>> _by_version_descending(names)
+    ['Setuptools-1.2.3.egg', 'Setuptools-1.2.3b1.egg']
+    >>> names = 'Setuptools-1.2.3b1.egg', 'Setuptools-1.2.3.post1.egg'
+    >>> _by_version_descending(names)
+    ['Setuptools-1.2.3.post1.egg', 'Setuptools-1.2.3b1.egg']
+    """
+    def _by_version(name):
+        """
+        Parse each component of the filename
+        """
+        name, ext = os.path.splitext(name)
+        parts = itertools.chain(name.split('-'), [ext])
+        return [packaging.version.parse(part) for part in parts]
+
+    return sorted(names, key=_by_version, reverse=True)
+
+
+def find_on_path(importer, path_item, only=False):
+    """Yield distributions accessible on a sys.path directory"""
+    path_item = _normalize_cached(path_item)
+
+    if _is_unpacked_egg(path_item):
+        yield Distribution.from_filename(
+            path_item, metadata=PathMetadata(
+                path_item, os.path.join(path_item, 'EGG-INFO')
+            )
+        )
+        return
+
+    entries = safe_listdir(path_item)
+
+    # for performance, before sorting by version,
+    # screen entries for only those that will yield
+    # distributions
+    filtered = (
+        entry
+        for entry in entries
+        if dist_factory(path_item, entry, only)
+    )
+
+    # scan for .egg and .egg-info in directory
+    path_item_entries = _by_version_descending(filtered)
+    for entry in path_item_entries:
+        fullpath = os.path.join(path_item, entry)
+        factory = dist_factory(path_item, entry, only)
+        for dist in factory(fullpath):
+            yield dist
+
+
+def dist_factory(path_item, entry, only):
+    """
+    Return a dist_factory for a path_item and entry
+    """
+    lower = entry.lower()
+    is_meta = any(map(lower.endswith, ('.egg-info', '.dist-info')))
+    return (
+        distributions_from_metadata
+        if is_meta else
+        find_distributions
+        if not only and _is_egg_path(entry) else
+        resolve_egg_link
+        if not only and lower.endswith('.egg-link') else
+        NoDists()
+    )
+
+
+class NoDists:
+    """
+    >>> bool(NoDists())
+    False
+
+    >>> list(NoDists()('anything'))
+    []
+    """
+    def __bool__(self):
+        return False
+    if six.PY2:
+        __nonzero__ = __bool__
+
+    def __call__(self, fullpath):
+        return iter(())
+
+
+def safe_listdir(path):
+    """
+    Attempt to list contents of path, but suppress some exceptions.
+    """
+    try:
+        return os.listdir(path)
+    except (PermissionError, NotADirectoryError):
+        pass
+    except OSError as e:
+        # Ignore the directory if does not exist, not a directory or
+        # permission denied
+        ignorable = (
+            e.errno in (errno.ENOTDIR, errno.EACCES, errno.ENOENT)
+            # Python 2 on Windows needs to be handled this way :(
+            or getattr(e, "winerror", None) == 267
+        )
+        if not ignorable:
+            raise
+    return ()
+
+
+def distributions_from_metadata(path):
+    root = os.path.dirname(path)
+    if os.path.isdir(path):
+        if len(os.listdir(path)) == 0:
+            # empty metadata dir; skip
+            return
+        metadata = PathMetadata(root, path)
+    else:
+        metadata = FileMetadata(path)
+    entry = os.path.basename(path)
+    yield Distribution.from_location(
+        root, entry, metadata, precedence=DEVELOP_DIST,
+    )
+
+
+def non_empty_lines(path):
+    """
+    Yield non-empty lines from file at path
+    """
+    with open(path) as f:
+        for line in f:
+            line = line.strip()
+            if line:
+                yield line
+
+
+def resolve_egg_link(path):
+    """
+    Given a path to an .egg-link, resolve distributions
+    present in the referenced path.
+    """
+    referenced_paths = non_empty_lines(path)
+    resolved_paths = (
+        os.path.join(os.path.dirname(path), ref)
+        for ref in referenced_paths
+    )
+    dist_groups = map(find_distributions, resolved_paths)
+    return next(dist_groups, ())
+
+
+register_finder(pkgutil.ImpImporter, find_on_path)
+
+if hasattr(importlib_machinery, 'FileFinder'):
+    register_finder(importlib_machinery.FileFinder, find_on_path)
+
+_declare_state('dict', _namespace_handlers={})
+_declare_state('dict', _namespace_packages={})
+
+
+def register_namespace_handler(importer_type, namespace_handler):
+    """Register `namespace_handler` to declare namespace packages
+
+    `importer_type` is the type or class of a PEP 302 "Importer" (sys.path item
+    handler), and `namespace_handler` is a callable like this::
+
+        def namespace_handler(importer, path_entry, moduleName, module):
+            # return a path_entry to use for child packages
+
+    Namespace handlers are only called if the importer object has already
+    agreed that it can handle the relevant path item, and they should only
+    return a subpath if the module __path__ does not already contain an
+    equivalent subpath.  For an example namespace handler, see
+    ``pkg_resources.file_ns_handler``.
+    """
+    _namespace_handlers[importer_type] = namespace_handler
+
+
+def _handle_ns(packageName, path_item):
+    """Ensure that named package includes a subpath of path_item (if needed)"""
+
+    importer = get_importer(path_item)
+    if importer is None:
+        return None
+    loader = importer.find_module(packageName)
+    if loader is None:
+        return None
+    module = sys.modules.get(packageName)
+    if module is None:
+        module = sys.modules[packageName] = types.ModuleType(packageName)
+        module.__path__ = []
+        _set_parent_ns(packageName)
+    elif not hasattr(module, '__path__'):
+        raise TypeError("Not a package:", packageName)
+    handler = _find_adapter(_namespace_handlers, importer)
+    subpath = handler(importer, path_item, packageName, module)
+    if subpath is not None:
+        path = module.__path__
+        path.append(subpath)
+        loader.load_module(packageName)
+        _rebuild_mod_path(path, packageName, module)
+    return subpath
+
+
+def _rebuild_mod_path(orig_path, package_name, module):
+    """
+    Rebuild module.__path__ ensuring that all entries are ordered
+    corresponding to their sys.path order
+    """
+    sys_path = [_normalize_cached(p) for p in sys.path]
+
+    def safe_sys_path_index(entry):
+        """
+        Workaround for #520 and #513.
+        """
+        try:
+            return sys_path.index(entry)
+        except ValueError:
+            return float('inf')
+
+    def position_in_sys_path(path):
+        """
+        Return the ordinal of the path based on its position in sys.path
+        """
+        path_parts = path.split(os.sep)
+        module_parts = package_name.count('.') + 1
+        parts = path_parts[:-module_parts]
+        return safe_sys_path_index(_normalize_cached(os.sep.join(parts)))
+
+    if not isinstance(orig_path, list):
+        # Is this behavior useful when module.__path__ is not a list?
+        return
+
+    orig_path.sort(key=position_in_sys_path)
+    module.__path__[:] = [_normalize_cached(p) for p in orig_path]
+
+
+def declare_namespace(packageName):
+    """Declare that package 'packageName' is a namespace package"""
+
+    _imp.acquire_lock()
+    try:
+        if packageName in _namespace_packages:
+            return
+
+        path, parent = sys.path, None
+        if '.' in packageName:
+            parent = '.'.join(packageName.split('.')[:-1])
+            declare_namespace(parent)
+            if parent not in _namespace_packages:
+                __import__(parent)
+            try:
+                path = sys.modules[parent].__path__
+            except AttributeError:
+                raise TypeError("Not a package:", parent)
+
+        # Track what packages are namespaces, so when new path items are added,
+        # they can be updated
+        _namespace_packages.setdefault(parent, []).append(packageName)
+        _namespace_packages.setdefault(packageName, [])
+
+        for path_item in path:
+            # Ensure all the parent's path items are reflected in the child,
+            # if they apply
+            _handle_ns(packageName, path_item)
+
+    finally:
+        _imp.release_lock()
+
+
+def fixup_namespace_packages(path_item, parent=None):
+    """Ensure that previously-declared namespace packages include path_item"""
+    _imp.acquire_lock()
+    try:
+        for package in _namespace_packages.get(parent, ()):
+            subpath = _handle_ns(package, path_item)
+            if subpath:
+                fixup_namespace_packages(subpath, package)
+    finally:
+        _imp.release_lock()
+
+
+def file_ns_handler(importer, path_item, packageName, module):
+    """Compute an ns-package subpath for a filesystem or zipfile importer"""
+
+    subpath = os.path.join(path_item, packageName.split('.')[-1])
+    normalized = _normalize_cached(subpath)
+    for item in module.__path__:
+        if _normalize_cached(item) == normalized:
+            break
+    else:
+        # Only return the path if it's not already there
+        return subpath
+
+
+register_namespace_handler(pkgutil.ImpImporter, file_ns_handler)
+register_namespace_handler(zipimport.zipimporter, file_ns_handler)
+
+if hasattr(importlib_machinery, 'FileFinder'):
+    register_namespace_handler(importlib_machinery.FileFinder, file_ns_handler)
+
+
+def null_ns_handler(importer, path_item, packageName, module):
+    return None
+
+
+register_namespace_handler(object, null_ns_handler)
+
+
+def normalize_path(filename):
+    """Normalize a file/dir name for comparison purposes"""
+    return os.path.normcase(os.path.realpath(filename))
+
+
+def _normalize_cached(filename, _cache={}):
+    try:
+        return _cache[filename]
+    except KeyError:
+        _cache[filename] = result = normalize_path(filename)
+        return result
+
+
+def _is_egg_path(path):
+    """
+    Determine if given path appears to be an egg.
+    """
+    return path.lower().endswith('.egg')
+
+
+def _is_unpacked_egg(path):
+    """
+    Determine if given path appears to be an unpacked egg.
+    """
+    return (
+        _is_egg_path(path) and
+        os.path.isfile(os.path.join(path, 'EGG-INFO', 'PKG-INFO'))
+    )
+
+
+def _set_parent_ns(packageName):
+    parts = packageName.split('.')
+    name = parts.pop()
+    if parts:
+        parent = '.'.join(parts)
+        setattr(sys.modules[parent], name, sys.modules[packageName])
+
+
+def yield_lines(strs):
+    """Yield non-empty/non-comment lines of a string or sequence"""
+    if isinstance(strs, six.string_types):
+        for s in strs.splitlines():
+            s = s.strip()
+            # skip blank lines/comments
+            if s and not s.startswith('#'):
+                yield s
+    else:
+        for ss in strs:
+            for s in yield_lines(ss):
+                yield s
+
+
+MODULE = re.compile(r"\w+(\.\w+)*$").match
+EGG_NAME = re.compile(
+    r"""
+    (?P<name>[^-]+) (
+        -(?P<ver>[^-]+) (
+            -py(?P<pyver>[^-]+) (
+                -(?P<plat>.+)
+            )?
+        )?
+    )?
+    """,
+    re.VERBOSE | re.IGNORECASE,
+).match
+
+
+class EntryPoint(object):
+    """Object representing an advertised importable object"""
+
+    def __init__(self, name, module_name, attrs=(), extras=(), dist=None):
+        if not MODULE(module_name):
+            raise ValueError("Invalid module name", module_name)
+        self.name = name
+        self.module_name = module_name
+        self.attrs = tuple(attrs)
+        self.extras = tuple(extras)
+        self.dist = dist
+
+    def __str__(self):
+        s = "%s = %s" % (self.name, self.module_name)
+        if self.attrs:
+            s += ':' + '.'.join(self.attrs)
+        if self.extras:
+            s += ' [%s]' % ','.join(self.extras)
+        return s
+
+    def __repr__(self):
+        return "EntryPoint.parse(%r)" % str(self)
+
+    def load(self, require=True, *args, **kwargs):
+        """
+        Require packages for this EntryPoint, then resolve it.
+        """
+        if not require or args or kwargs:
+            warnings.warn(
+                "Parameters to load are deprecated.  Call .resolve and "
+                ".require separately.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+        if require:
+            self.require(*args, **kwargs)
+        return self.resolve()
+
+    def resolve(self):
+        """
+        Resolve the entry point from its module and attrs.
+        """
+        module = __import__(self.module_name, fromlist=['__name__'], level=0)
+        try:
+            return functools.reduce(getattr, self.attrs, module)
+        except AttributeError as exc:
+            raise ImportError(str(exc))
+
+    def require(self, env=None, installer=None):
+        if self.extras and not self.dist:
+            raise UnknownExtra("Can't require() without a distribution", self)
+
+        # Get the requirements for this entry point with all its extras and
+        # then resolve them. We have to pass `extras` along when resolving so
+        # that the working set knows what extras we want. Otherwise, for
+        # dist-info distributions, the working set will assume that the
+        # requirements for that extra are purely optional and skip over them.
+        reqs = self.dist.requires(self.extras)
+        items = working_set.resolve(reqs, env, installer, extras=self.extras)
+        list(map(working_set.add, items))
+
+    pattern = re.compile(
+        r'\s*'
+        r'(?P<name>.+?)\s*'
+        r'=\s*'
+        r'(?P<module>[\w.]+)\s*'
+        r'(:\s*(?P<attr>[\w.]+))?\s*'
+        r'(?P<extras>\[.*\])?\s*$'
+    )
+
+    @classmethod
+    def parse(cls, src, dist=None):
+        """Parse a single entry point from string `src`
+
+        Entry point syntax follows the form::
+
+            name = some.module:some.attr [extra1, extra2]
+
+        The entry name and module name are required, but the ``:attrs`` and
+        ``[extras]`` parts are optional
+        """
+        m = cls.pattern.match(src)
+        if not m:
+            msg = "EntryPoint must be in 'name=module:attrs [extras]' format"
+            raise ValueError(msg, src)
+        res = m.groupdict()
+        extras = cls._parse_extras(res['extras'])
+        attrs = res['attr'].split('.') if res['attr'] else ()
+        return cls(res['name'], res['module'], attrs, extras, dist)
+
+    @classmethod
+    def _parse_extras(cls, extras_spec):
+        if not extras_spec:
+            return ()
+        req = Requirement.parse('x' + extras_spec)
+        if req.specs:
+            raise ValueError()
+        return req.extras
+
+    @classmethod
+    def parse_group(cls, group, lines, dist=None):
+        """Parse an entry point group"""
+        if not MODULE(group):
+            raise ValueError("Invalid group name", group)
+        this = {}
+        for line in yield_lines(lines):
+            ep = cls.parse(line, dist)
+            if ep.name in this:
+                raise ValueError("Duplicate entry point", group, ep.name)
+            this[ep.name] = ep
+        return this
+
+    @classmethod
+    def parse_map(cls, data, dist=None):
+        """Parse a map of entry point groups"""
+        if isinstance(data, dict):
+            data = data.items()
+        else:
+            data = split_sections(data)
+        maps = {}
+        for group, lines in data:
+            if group is None:
+                if not lines:
+                    continue
+                raise ValueError("Entry points must be listed in groups")
+            group = group.strip()
+            if group in maps:
+                raise ValueError("Duplicate group name", group)
+            maps[group] = cls.parse_group(group, lines, dist)
+        return maps
+
+
+def _remove_md5_fragment(location):
+    if not location:
+        return ''
+    parsed = urllib.parse.urlparse(location)
+    if parsed[-1].startswith('md5='):
+        return urllib.parse.urlunparse(parsed[:-1] + ('',))
+    return location
+
+
+def _version_from_file(lines):
+    """
+    Given an iterable of lines from a Metadata file, return
+    the value of the Version field, if present, or None otherwise.
+    """
+    def is_version_line(line):
+        return line.lower().startswith('version:')
+    version_lines = filter(is_version_line, lines)
+    line = next(iter(version_lines), '')
+    _, _, value = line.partition(':')
+    return safe_version(value.strip()) or None
+
+
+class Distribution(object):
+    """Wrap an actual or potential sys.path entry w/metadata"""
+    PKG_INFO = 'PKG-INFO'
+
+    def __init__(
+            self, location=None, metadata=None, project_name=None,
+            version=None, py_version=PY_MAJOR, platform=None,
+            precedence=EGG_DIST):
+        self.project_name = safe_name(project_name or 'Unknown')
+        if version is not None:
+            self._version = safe_version(version)
+        self.py_version = py_version
+        self.platform = platform
+        self.location = location
+        self.precedence = precedence
+        self._provider = metadata or empty_provider
+
+    @classmethod
+    def from_location(cls, location, basename, metadata=None, **kw):
+        project_name, version, py_version, platform = [None] * 4
+        basename, ext = os.path.splitext(basename)
+        if ext.lower() in _distributionImpl:
+            cls = _distributionImpl[ext.lower()]
+
+            match = EGG_NAME(basename)
+            if match:
+                project_name, version, py_version, platform = match.group(
+                    'name', 'ver', 'pyver', 'plat'
+                )
+        return cls(
+            location, metadata, project_name=project_name, version=version,
+            py_version=py_version, platform=platform, **kw
+        )._reload_version()
+
+    def _reload_version(self):
+        return self
+
+    @property
+    def hashcmp(self):
+        return (
+            self.parsed_version,
+            self.precedence,
+            self.key,
+            _remove_md5_fragment(self.location),
+            self.py_version or '',
+            self.platform or '',
+        )
+
+    def __hash__(self):
+        return hash(self.hashcmp)
+
+    def __lt__(self, other):
+        return self.hashcmp < other.hashcmp
+
+    def __le__(self, other):
+        return self.hashcmp <= other.hashcmp
+
+    def __gt__(self, other):
+        return self.hashcmp > other.hashcmp
+
+    def __ge__(self, other):
+        return self.hashcmp >= other.hashcmp
+
+    def __eq__(self, other):
+        if not isinstance(other, self.__class__):
+            # It's not a Distribution, so they are not equal
+            return False
+        return self.hashcmp == other.hashcmp
+
+    def __ne__(self, other):
+        return not self == other
+
+    # These properties have to be lazy so that we don't have to load any
+    # metadata until/unless it's actually needed.  (i.e., some distributions
+    # may not know their name or version without loading PKG-INFO)
+
+    @property
+    def key(self):
+        try:
+            return self._key
+        except AttributeError:
+            self._key = key = self.project_name.lower()
+            return key
+
+    @property
+    def parsed_version(self):
+        if not hasattr(self, "_parsed_version"):
+            self._parsed_version = parse_version(self.version)
+
+        return self._parsed_version
+
+    def _warn_legacy_version(self):
+        LV = packaging.version.LegacyVersion
+        is_legacy = isinstance(self._parsed_version, LV)
+        if not is_legacy:
+            return
+
+        # While an empty version is technically a legacy version and
+        # is not a valid PEP 440 version, it's also unlikely to
+        # actually come from someone and instead it is more likely that
+        # it comes from setuptools attempting to parse a filename and
+        # including it in the list. So for that we'll gate this warning
+        # on if the version is anything at all or not.
+        if not self.version:
+            return
+
+        tmpl = textwrap.dedent("""
+            '{project_name} ({version})' is being parsed as a legacy,
+            non PEP 440,
+            version. You may find odd behavior and sort order.
+            In particular it will be sorted as less than 0.0. It
+            is recommended to migrate to PEP 440 compatible
+            versions.
+            """).strip().replace('\n', ' ')
+
+        warnings.warn(tmpl.format(**vars(self)), PEP440Warning)
+
+    @property
+    def version(self):
+        try:
+            return self._version
+        except AttributeError:
+            version = _version_from_file(self._get_metadata(self.PKG_INFO))
+            if version is None:
+                tmpl = "Missing 'Version:' header and/or %s file"
+                raise ValueError(tmpl % self.PKG_INFO, self)
+            return version
+
+    @property
+    def _dep_map(self):
+        """
+        A map of extra to its list of (direct) requirements
+        for this distribution, including the null extra.
+        """
+        try:
+            return self.__dep_map
+        except AttributeError:
+            self.__dep_map = self._filter_extras(self._build_dep_map())
+        return self.__dep_map
+
+    @staticmethod
+    def _filter_extras(dm):
+        """
+        Given a mapping of extras to dependencies, strip off
+        environment markers and filter out any dependencies
+        not matching the markers.
+        """
+        for extra in list(filter(None, dm)):
+            new_extra = extra
+            reqs = dm.pop(extra)
+            new_extra, _, marker = extra.partition(':')
+            fails_marker = marker and (
+                invalid_marker(marker)
+                or not evaluate_marker(marker)
+            )
+            if fails_marker:
+                reqs = []
+            new_extra = safe_extra(new_extra) or None
+
+            dm.setdefault(new_extra, []).extend(reqs)
+        return dm
+
+    def _build_dep_map(self):
+        dm = {}
+        for name in 'requires.txt', 'depends.txt':
+            for extra, reqs in split_sections(self._get_metadata(name)):
+                dm.setdefault(extra, []).extend(parse_requirements(reqs))
+        return dm
+
+    def requires(self, extras=()):
+        """List of Requirements needed for this distro if `extras` are used"""
+        dm = self._dep_map
+        deps = []
+        deps.extend(dm.get(None, ()))
+        for ext in extras:
+            try:
+                deps.extend(dm[safe_extra(ext)])
+            except KeyError:
+                raise UnknownExtra(
+                    "%s has no such extra feature %r" % (self, ext)
+                )
+        return deps
+
+    def _get_metadata(self, name):
+        if self.has_metadata(name):
+            for line in self.get_metadata_lines(name):
+                yield line
+
+    def activate(self, path=None, replace=False):
+        """Ensure distribution is importable on `path` (default=sys.path)"""
+        if path is None:
+            path = sys.path
+        self.insert_on(path, replace=replace)
+        if path is sys.path:
+            fixup_namespace_packages(self.location)
+            for pkg in self._get_metadata('namespace_packages.txt'):
+                if pkg in sys.modules:
+                    declare_namespace(pkg)
+
+    def egg_name(self):
+        """Return what this distribution's standard .egg filename should be"""
+        filename = "%s-%s-py%s" % (
+            to_filename(self.project_name), to_filename(self.version),
+            self.py_version or PY_MAJOR
+        )
+
+        if self.platform:
+            filename += '-' + self.platform
+        return filename
+
+    def __repr__(self):
+        if self.location:
+            return "%s (%s)" % (self, self.location)
+        else:
+            return str(self)
+
+    def __str__(self):
+        try:
+            version = getattr(self, 'version', None)
+        except ValueError:
+            version = None
+        version = version or "[unknown version]"
+        return "%s %s" % (self.project_name, version)
+
+    def __getattr__(self, attr):
+        """Delegate all unrecognized public attributes to .metadata provider"""
+        if attr.startswith('_'):
+            raise AttributeError(attr)
+        return getattr(self._provider, attr)
+
+    @classmethod
+    def from_filename(cls, filename, metadata=None, **kw):
+        return cls.from_location(
+            _normalize_cached(filename), os.path.basename(filename), metadata,
+            **kw
+        )
+
+    def as_requirement(self):
+        """Return a ``Requirement`` that matches this distribution exactly"""
+        if isinstance(self.parsed_version, packaging.version.Version):
+            spec = "%s==%s" % (self.project_name, self.parsed_version)
+        else:
+            spec = "%s===%s" % (self.project_name, self.parsed_version)
+
+        return Requirement.parse(spec)
+
+    def load_entry_point(self, group, name):
+        """Return the `name` entry point of `group` or raise ImportError"""
+        ep = self.get_entry_info(group, name)
+        if ep is None:
+            raise ImportError("Entry point %r not found" % ((group, name),))
+        return ep.load()
+
+    def get_entry_map(self, group=None):
+        """Return the entry point map for `group`, or the full entry map"""
+        try:
+            ep_map = self._ep_map
+        except AttributeError:
+            ep_map = self._ep_map = EntryPoint.parse_map(
+                self._get_metadata('entry_points.txt'), self
+            )
+        if group is not None:
+            return ep_map.get(group, {})
+        return ep_map
+
+    def get_entry_info(self, group, name):
+        """Return the EntryPoint object for `group`+`name`, or ``None``"""
+        return self.get_entry_map(group).get(name)
+
+    def insert_on(self, path, loc=None, replace=False):
+        """Ensure self.location is on path
+
+        If replace=False (default):
+            - If location is already in path anywhere, do nothing.
+            - Else:
+              - If it's an egg and its parent directory is on path,
+                insert just ahead of the parent.
+              - Else: add to the end of path.
+        If replace=True:
+            - If location is already on path anywhere (not eggs)
+              or higher priority than its parent (eggs)
+              do nothing.
+            - Else:
+              - If it's an egg and its parent directory is on path,
+                insert just ahead of the parent,
+                removing any lower-priority entries.
+              - Else: add it to the front of path.
+        """
+
+        loc = loc or self.location
+        if not loc:
+            return
+
+        nloc = _normalize_cached(loc)
+        bdir = os.path.dirname(nloc)
+        npath = [(p and _normalize_cached(p) or p) for p in path]
+
+        for p, item in enumerate(npath):
+            if item == nloc:
+                if replace:
+                    break
+                else:
+                    # don't modify path (even removing duplicates) if
+                    # found and not replace
+                    return
+            elif item == bdir and self.precedence == EGG_DIST:
+                # if it's an .egg, give it precedence over its directory
+                # UNLESS it's already been added to sys.path and replace=False
+                if (not replace) and nloc in npath[p:]:
+                    return
+                if path is sys.path:
+                    self.check_version_conflict()
+                path.insert(p, loc)
+                npath.insert(p, nloc)
+                break
+        else:
+            if path is sys.path:
+                self.check_version_conflict()
+            if replace:
+                path.insert(0, loc)
+            else:
+                path.append(loc)
+            return
+
+        # p is the spot where we found or inserted loc; now remove duplicates
+        while True:
+            try:
+                np = npath.index(nloc, p + 1)
+            except ValueError:
+                break
+            else:
+                del npath[np], path[np]
+                # ha!
+                p = np
+
+        return
+
+    def check_version_conflict(self):
+        if self.key == 'setuptools':
+            # ignore the inevitable setuptools self-conflicts  :(
+            return
+
+        nsp = dict.fromkeys(self._get_metadata('namespace_packages.txt'))
+        loc = normalize_path(self.location)
+        for modname in self._get_metadata('top_level.txt'):
+            if (modname not in sys.modules or modname in nsp
+                    or modname in _namespace_packages):
+                continue
+            if modname in ('pkg_resources', 'setuptools', 'site'):
+                continue
+            fn = getattr(sys.modules[modname], '__file__', None)
+            if fn and (normalize_path(fn).startswith(loc) or
+                       fn.startswith(self.location)):
+                continue
+            issue_warning(
+                "Module %s was already imported from %s, but %s is being added"
+                " to sys.path" % (modname, fn, self.location),
+            )
+
+    def has_version(self):
+        try:
+            self.version
+        except ValueError:
+            issue_warning("Unbuilt egg for " + repr(self))
+            return False
+        return True
+
+    def clone(self, **kw):
+        """Copy this distribution, substituting in any changed keyword args"""
+        names = 'project_name version py_version platform location precedence'
+        for attr in names.split():
+            kw.setdefault(attr, getattr(self, attr, None))
+        kw.setdefault('metadata', self._provider)
+        return self.__class__(**kw)
+
+    @property
+    def extras(self):
+        return [dep for dep in self._dep_map if dep]
+
+
+class EggInfoDistribution(Distribution):
+    def _reload_version(self):
+        """
+        Packages installed by distutils (e.g. numpy or scipy),
+        which uses an old safe_version, and so
+        their version numbers can get mangled when
+        converted to filenames (e.g., 1.11.0.dev0+2329eae to
+        1.11.0.dev0_2329eae). These distributions will not be
+        parsed properly
+        downstream by Distribution and safe_version, so
+        take an extra step and try to get the version number from
+        the metadata file itself instead of the filename.
+        """
+        md_version = _version_from_file(self._get_metadata(self.PKG_INFO))
+        if md_version:
+            self._version = md_version
+        return self
+
+
+class DistInfoDistribution(Distribution):
+    """
+    Wrap an actual or potential sys.path entry
+    w/metadata, .dist-info style.
+    """
+    PKG_INFO = 'METADATA'
+    EQEQ = re.compile(r"([\(,])\s*(\d.*?)\s*([,\)])")
+
+    @property
+    def _parsed_pkg_info(self):
+        """Parse and cache metadata"""
+        try:
+            return self._pkg_info
+        except AttributeError:
+            metadata = self.get_metadata(self.PKG_INFO)
+            self._pkg_info = email.parser.Parser().parsestr(metadata)
+            return self._pkg_info
+
+    @property
+    def _dep_map(self):
+        try:
+            return self.__dep_map
+        except AttributeError:
+            self.__dep_map = self._compute_dependencies()
+            return self.__dep_map
+
+    def _compute_dependencies(self):
+        """Recompute this distribution's dependencies."""
+        dm = self.__dep_map = {None: []}
+
+        reqs = []
+        # Including any condition expressions
+        for req in self._parsed_pkg_info.get_all('Requires-Dist') or []:
+            reqs.extend(parse_requirements(req))
+
+        def reqs_for_extra(extra):
+            for req in reqs:
+                if not req.marker or req.marker.evaluate({'extra': extra}):
+                    yield req
+
+        common = frozenset(reqs_for_extra(None))
+        dm[None].extend(common)
+
+        for extra in self._parsed_pkg_info.get_all('Provides-Extra') or []:
+            s_extra = safe_extra(extra.strip())
+            dm[s_extra] = list(frozenset(reqs_for_extra(extra)) - common)
+
+        return dm
+
+
+_distributionImpl = {
+    '.egg': Distribution,
+    '.egg-info': EggInfoDistribution,
+    '.dist-info': DistInfoDistribution,
+}
+
+
+def issue_warning(*args, **kw):
+    level = 1
+    g = globals()
+    try:
+        # find the first stack frame that is *not* code in
+        # the pkg_resources module, to use for the warning
+        while sys._getframe(level).f_globals is g:
+            level += 1
+    except ValueError:
+        pass
+    warnings.warn(stacklevel=level + 1, *args, **kw)
+
+
+class RequirementParseError(ValueError):
+    def __str__(self):
+        return ' '.join(self.args)
+
+
+def parse_requirements(strs):
+    """Yield ``Requirement`` objects for each specification in `strs`
+
+    `strs` must be a string, or a (possibly-nested) iterable thereof.
+    """
+    # create a steppable iterator, so we can handle \-continuations
+    lines = iter(yield_lines(strs))
+
+    for line in lines:
+        # Drop comments -- a hash without a space may be in a URL.
+        if ' #' in line:
+            line = line[:line.find(' #')]
+        # If there is a line continuation, drop it, and append the next line.
+        if line.endswith('\\'):
+            line = line[:-2].strip()
+            try:
+                line += next(lines)
+            except StopIteration:
+                return
+        yield Requirement(line)
+
+
+class Requirement(packaging.requirements.Requirement):
+    def __init__(self, requirement_string):
+        """DO NOT CALL THIS UNDOCUMENTED METHOD; use Requirement.parse()!"""
+        try:
+            super(Requirement, self).__init__(requirement_string)
+        except packaging.requirements.InvalidRequirement as e:
+            raise RequirementParseError(str(e))
+        self.unsafe_name = self.name
+        project_name = safe_name(self.name)
+        self.project_name, self.key = project_name, project_name.lower()
+        self.specs = [
+            (spec.operator, spec.version) for spec in self.specifier]
+        self.extras = tuple(map(safe_extra, self.extras))
+        self.hashCmp = (
+            self.key,
+            self.specifier,
+            frozenset(self.extras),
+            str(self.marker) if self.marker else None,
+        )
+        self.__hash = hash(self.hashCmp)
+
+    def __eq__(self, other):
+        return (
+            isinstance(other, Requirement) and
+            self.hashCmp == other.hashCmp
+        )
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __contains__(self, item):
+        if isinstance(item, Distribution):
+            if item.key != self.key:
+                return False
+
+            item = item.version
+
+        # Allow prereleases always in order to match the previous behavior of
+        # this method. In the future this should be smarter and follow PEP 440
+        # more accurately.
+        return self.specifier.contains(item, prereleases=True)
+
+    def __hash__(self):
+        return self.__hash
+
+    def __repr__(self):
+        return "Requirement.parse(%r)" % str(self)
+
+    @staticmethod
+    def parse(s):
+        req, = parse_requirements(s)
+        return req
+
+
+def _always_object(classes):
+    """
+    Ensure object appears in the mro even
+    for old-style classes.
+    """
+    if object not in classes:
+        return classes + (object,)
+    return classes
+
+
+def _find_adapter(registry, ob):
+    """Return an adapter factory for `ob` from `registry`"""
+    types = _always_object(inspect.getmro(getattr(ob, '__class__', type(ob))))
+    for t in types:
+        if t in registry:
+            return registry[t]
+
+
+def ensure_directory(path):
+    """Ensure that the parent directory of `path` exists"""
+    dirname = os.path.dirname(path)
+    py31compat.makedirs(dirname, exist_ok=True)
+
+
+def _bypass_ensure_directory(path):
+    """Sandbox-bypassing version of ensure_directory()"""
+    if not WRITE_SUPPORT:
+        raise IOError('"os.mkdir" not supported on this platform.')
+    dirname, filename = split(path)
+    if dirname and filename and not isdir(dirname):
+        _bypass_ensure_directory(dirname)
+        mkdir(dirname, 0o755)
+
+
+def split_sections(s):
+    """Split a string or iterable thereof into (section, content) pairs
+
+    Each ``section`` is a stripped version of the section header ("[section]")
+    and each ``content`` is a list of stripped lines excluding blank lines and
+    comment-only lines.  If there are any such lines before the first section
+    header, they're returned in a first ``section`` of ``None``.
+    """
+    section = None
+    content = []
+    for line in yield_lines(s):
+        if line.startswith("["):
+            if line.endswith("]"):
+                if section or content:
+                    yield section, content
+                section = line[1:-1].strip()
+                content = []
+            else:
+                raise ValueError("Invalid section heading", line)
+        else:
+            content.append(line)
+
+    # wrap up last segment
+    yield section, content
+
+
+def _mkstemp(*args, **kw):
+    old_open = os.open
+    try:
+        # temporarily bypass sandboxing
+        os.open = os_open
+        return tempfile.mkstemp(*args, **kw)
+    finally:
+        # and then put it back
+        os.open = old_open
+
+
+# Silence the PEP440Warning by default, so that end users don't get hit by it
+# randomly just because they use pkg_resources. We want to append the rule
+# because we want earlier uses of filterwarnings to take precedence over this
+# one.
+warnings.filterwarnings("ignore", category=PEP440Warning, append=True)
+
+
+# from jaraco.functools 1.3
+def _call_aside(f, *args, **kwargs):
+    f(*args, **kwargs)
+    return f
+
+
+@_call_aside
+def _initialize(g=globals()):
+    "Set up global resource manager (deliberately not state-saved)"
+    manager = ResourceManager()
+    g['_manager'] = manager
+    g.update(
+        (name, getattr(manager, name))
+        for name in dir(manager)
+        if not name.startswith('_')
+    )
+
+
+@_call_aside
+def _initialize_master_working_set():
+    """
+    Prepare the master working set and make the ``require()``
+    API available.
+
+    This function has explicit effects on the global state
+    of pkg_resources. It is intended to be invoked once at
+    the initialization of this module.
+
+    Invocation by other packages is unsupported and done
+    at their own risk.
+    """
+    working_set = WorkingSet._build_master()
+    _declare_state('object', working_set=working_set)
+
+    require = working_set.require
+    iter_entry_points = working_set.iter_entry_points
+    add_activation_listener = working_set.subscribe
+    run_script = working_set.run_script
+    # backward compatibility
+    run_main = run_script
+    # Activate all distributions already on sys.path with replace=False and
+    # ensure that all distributions added to the working set in the future
+    # (e.g. by calling ``require()``) will get activated as well,
+    # with higher priority (replace=True).
+    tuple(
+        dist.activate(replace=False)
+        for dist in working_set
+    )
+    add_activation_listener(
+        lambda dist: dist.activate(replace=True),
+        existing=False,
+    )
+    working_set.entries = []
+    # match order
+    list(map(working_set.add_entry, sys.path))
+    globals().update(locals())
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/__init__.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/appdirs.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/appdirs.py
new file mode 100644
index 0000000000000000000000000000000000000000..32e7c9f7cfa76e675e7f7ad88ef4a1872a8ff6a5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/appdirs.py
@@ -0,0 +1,552 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2005-2010 ActiveState Software Inc.
+# Copyright (c) 2013 Eddy Petrișor
+
+"""Utilities for determining application-specific dirs.
+
+See <http://github.com/ActiveState/appdirs> for details and usage.
+"""
+# Dev Notes:
+# - MSDN on where to store app data files:
+#   http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120
+# - Mac OS X: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html
+# - XDG spec for Un*x: http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
+
+__version_info__ = (1, 4, 0)
+__version__ = '.'.join(map(str, __version_info__))
+
+
+import sys
+import os
+
+PY3 = sys.version_info[0] == 3
+
+if PY3:
+    unicode = str
+
+if sys.platform.startswith('java'):
+    import platform
+    os_name = platform.java_ver()[3][0]
+    if os_name.startswith('Windows'): # "Windows XP", "Windows 7", etc.
+        system = 'win32'
+    elif os_name.startswith('Mac'): # "Mac OS X", etc.
+        system = 'darwin'
+    else: # "Linux", "SunOS", "FreeBSD", etc.
+        # Setting this to "linux2" is not ideal, but only Windows or Mac
+        # are actually checked for and the rest of the module expects
+        # *sys.platform* style strings.
+        system = 'linux2'
+else:
+    system = sys.platform
+
+
+
+def user_data_dir(appname=None, appauthor=None, version=None, roaming=False):
+    r"""Return full path to the user-specific data dir for this application.
+
+        "appname" is the name of application.
+            If None, just the system directory is returned.
+        "appauthor" (only used on Windows) is the name of the
+            appauthor or distributing body for this application. Typically
+            it is the owning company name. This falls back to appname. You may
+            pass False to disable it.
+        "version" is an optional version path element to append to the
+            path. You might want to use this if you want multiple versions
+            of your app to be able to run independently. If used, this
+            would typically be "<major>.<minor>".
+            Only applied when appname is present.
+        "roaming" (boolean, default False) can be set True to use the Windows
+            roaming appdata directory. That means that for users on a Windows
+            network setup for roaming profiles, this user data will be
+            sync'd on login. See
+            <http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
+            for a discussion of issues.
+
+    Typical user data directories are:
+        Mac OS X:               ~/Library/Application Support/<AppName>
+        Unix:                   ~/.local/share/<AppName>    # or in $XDG_DATA_HOME, if defined
+        Win XP (not roaming):   C:\Documents and Settings\<username>\Application Data\<AppAuthor>\<AppName>
+        Win XP (roaming):       C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>
+        Win 7  (not roaming):   C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>
+        Win 7  (roaming):       C:\Users\<username>\AppData\Roaming\<AppAuthor>\<AppName>
+
+    For Unix, we follow the XDG spec and support $XDG_DATA_HOME.
+    That means, by default "~/.local/share/<AppName>".
+    """
+    if system == "win32":
+        if appauthor is None:
+            appauthor = appname
+        const = roaming and "CSIDL_APPDATA" or "CSIDL_LOCAL_APPDATA"
+        path = os.path.normpath(_get_win_folder(const))
+        if appname:
+            if appauthor is not False:
+                path = os.path.join(path, appauthor, appname)
+            else:
+                path = os.path.join(path, appname)
+    elif system == 'darwin':
+        path = os.path.expanduser('~/Library/Application Support/')
+        if appname:
+            path = os.path.join(path, appname)
+    else:
+        path = os.getenv('XDG_DATA_HOME', os.path.expanduser("~/.local/share"))
+        if appname:
+            path = os.path.join(path, appname)
+    if appname and version:
+        path = os.path.join(path, version)
+    return path
+
+
+def site_data_dir(appname=None, appauthor=None, version=None, multipath=False):
+    r"""Return full path to the user-shared data dir for this application.
+
+        "appname" is the name of application.
+            If None, just the system directory is returned.
+        "appauthor" (only used on Windows) is the name of the
+            appauthor or distributing body for this application. Typically
+            it is the owning company name. This falls back to appname. You may
+            pass False to disable it.
+        "version" is an optional version path element to append to the
+            path. You might want to use this if you want multiple versions
+            of your app to be able to run independently. If used, this
+            would typically be "<major>.<minor>".
+            Only applied when appname is present.
+        "multipath" is an optional parameter only applicable to *nix
+            which indicates that the entire list of data dirs should be
+            returned. By default, the first item from XDG_DATA_DIRS is
+            returned, or '/usr/local/share/<AppName>',
+            if XDG_DATA_DIRS is not set
+
+    Typical user data directories are:
+        Mac OS X:   /Library/Application Support/<AppName>
+        Unix:       /usr/local/share/<AppName> or /usr/share/<AppName>
+        Win XP:     C:\Documents and Settings\All Users\Application Data\<AppAuthor>\<AppName>
+        Vista:      (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
+        Win 7:      C:\ProgramData\<AppAuthor>\<AppName>   # Hidden, but writeable on Win 7.
+
+    For Unix, this is using the $XDG_DATA_DIRS[0] default.
+
+    WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
+    """
+    if system == "win32":
+        if appauthor is None:
+            appauthor = appname
+        path = os.path.normpath(_get_win_folder("CSIDL_COMMON_APPDATA"))
+        if appname:
+            if appauthor is not False:
+                path = os.path.join(path, appauthor, appname)
+            else:
+                path = os.path.join(path, appname)
+    elif system == 'darwin':
+        path = os.path.expanduser('/Library/Application Support')
+        if appname:
+            path = os.path.join(path, appname)
+    else:
+        # XDG default for $XDG_DATA_DIRS
+        # only first, if multipath is False
+        path = os.getenv('XDG_DATA_DIRS',
+                         os.pathsep.join(['/usr/local/share', '/usr/share']))
+        pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
+        if appname:
+            if version:
+                appname = os.path.join(appname, version)
+            pathlist = [os.sep.join([x, appname]) for x in pathlist]
+
+        if multipath:
+            path = os.pathsep.join(pathlist)
+        else:
+            path = pathlist[0]
+        return path
+
+    if appname and version:
+        path = os.path.join(path, version)
+    return path
+
+
+def user_config_dir(appname=None, appauthor=None, version=None, roaming=False):
+    r"""Return full path to the user-specific config dir for this application.
+
+        "appname" is the name of application.
+            If None, just the system directory is returned.
+        "appauthor" (only used on Windows) is the name of the
+            appauthor or distributing body for this application. Typically
+            it is the owning company name. This falls back to appname. You may
+            pass False to disable it.
+        "version" is an optional version path element to append to the
+            path. You might want to use this if you want multiple versions
+            of your app to be able to run independently. If used, this
+            would typically be "<major>.<minor>".
+            Only applied when appname is present.
+        "roaming" (boolean, default False) can be set True to use the Windows
+            roaming appdata directory. That means that for users on a Windows
+            network setup for roaming profiles, this user data will be
+            sync'd on login. See
+            <http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
+            for a discussion of issues.
+
+    Typical user data directories are:
+        Mac OS X:               same as user_data_dir
+        Unix:                   ~/.config/<AppName>     # or in $XDG_CONFIG_HOME, if defined
+        Win *:                  same as user_data_dir
+
+    For Unix, we follow the XDG spec and support $XDG_CONFIG_HOME.
+    That means, by deafult "~/.config/<AppName>".
+    """
+    if system in ["win32", "darwin"]:
+        path = user_data_dir(appname, appauthor, None, roaming)
+    else:
+        path = os.getenv('XDG_CONFIG_HOME', os.path.expanduser("~/.config"))
+        if appname:
+            path = os.path.join(path, appname)
+    if appname and version:
+        path = os.path.join(path, version)
+    return path
+
+
+def site_config_dir(appname=None, appauthor=None, version=None, multipath=False):
+    r"""Return full path to the user-shared data dir for this application.
+
+        "appname" is the name of application.
+            If None, just the system directory is returned.
+        "appauthor" (only used on Windows) is the name of the
+            appauthor or distributing body for this application. Typically
+            it is the owning company name. This falls back to appname. You may
+            pass False to disable it.
+        "version" is an optional version path element to append to the
+            path. You might want to use this if you want multiple versions
+            of your app to be able to run independently. If used, this
+            would typically be "<major>.<minor>".
+            Only applied when appname is present.
+        "multipath" is an optional parameter only applicable to *nix
+            which indicates that the entire list of config dirs should be
+            returned. By default, the first item from XDG_CONFIG_DIRS is
+            returned, or '/etc/xdg/<AppName>', if XDG_CONFIG_DIRS is not set
+
+    Typical user data directories are:
+        Mac OS X:   same as site_data_dir
+        Unix:       /etc/xdg/<AppName> or $XDG_CONFIG_DIRS[i]/<AppName> for each value in
+                    $XDG_CONFIG_DIRS
+        Win *:      same as site_data_dir
+        Vista:      (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
+
+    For Unix, this is using the $XDG_CONFIG_DIRS[0] default, if multipath=False
+
+    WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
+    """
+    if system in ["win32", "darwin"]:
+        path = site_data_dir(appname, appauthor)
+        if appname and version:
+            path = os.path.join(path, version)
+    else:
+        # XDG default for $XDG_CONFIG_DIRS
+        # only first, if multipath is False
+        path = os.getenv('XDG_CONFIG_DIRS', '/etc/xdg')
+        pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
+        if appname:
+            if version:
+                appname = os.path.join(appname, version)
+            pathlist = [os.sep.join([x, appname]) for x in pathlist]
+
+        if multipath:
+            path = os.pathsep.join(pathlist)
+        else:
+            path = pathlist[0]
+    return path
+
+
+def user_cache_dir(appname=None, appauthor=None, version=None, opinion=True):
+    r"""Return full path to the user-specific cache dir for this application.
+
+        "appname" is the name of application.
+            If None, just the system directory is returned.
+        "appauthor" (only used on Windows) is the name of the
+            appauthor or distributing body for this application. Typically
+            it is the owning company name. This falls back to appname. You may
+            pass False to disable it.
+        "version" is an optional version path element to append to the
+            path. You might want to use this if you want multiple versions
+            of your app to be able to run independently. If used, this
+            would typically be "<major>.<minor>".
+            Only applied when appname is present.
+        "opinion" (boolean) can be False to disable the appending of
+            "Cache" to the base app data dir for Windows. See
+            discussion below.
+
+    Typical user cache directories are:
+        Mac OS X:   ~/Library/Caches/<AppName>
+        Unix:       ~/.cache/<AppName> (XDG default)
+        Win XP:     C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Cache
+        Vista:      C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Cache
+
+    On Windows the only suggestion in the MSDN docs is that local settings go in
+    the `CSIDL_LOCAL_APPDATA` directory. This is identical to the non-roaming
+    app data dir (the default returned by `user_data_dir` above). Apps typically
+    put cache data somewhere *under* the given dir here. Some examples:
+        ...\Mozilla\Firefox\Profiles\<ProfileName>\Cache
+        ...\Acme\SuperApp\Cache\1.0
+    OPINION: This function appends "Cache" to the `CSIDL_LOCAL_APPDATA` value.
+    This can be disabled with the `opinion=False` option.
+    """
+    if system == "win32":
+        if appauthor is None:
+            appauthor = appname
+        path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA"))
+        if appname:
+            if appauthor is not False:
+                path = os.path.join(path, appauthor, appname)
+            else:
+                path = os.path.join(path, appname)
+            if opinion:
+                path = os.path.join(path, "Cache")
+    elif system == 'darwin':
+        path = os.path.expanduser('~/Library/Caches')
+        if appname:
+            path = os.path.join(path, appname)
+    else:
+        path = os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache'))
+        if appname:
+            path = os.path.join(path, appname)
+    if appname and version:
+        path = os.path.join(path, version)
+    return path
+
+
+def user_log_dir(appname=None, appauthor=None, version=None, opinion=True):
+    r"""Return full path to the user-specific log dir for this application.
+
+        "appname" is the name of application.
+            If None, just the system directory is returned.
+        "appauthor" (only used on Windows) is the name of the
+            appauthor or distributing body for this application. Typically
+            it is the owning company name. This falls back to appname. You may
+            pass False to disable it.
+        "version" is an optional version path element to append to the
+            path. You might want to use this if you want multiple versions
+            of your app to be able to run independently. If used, this
+            would typically be "<major>.<minor>".
+            Only applied when appname is present.
+        "opinion" (boolean) can be False to disable the appending of
+            "Logs" to the base app data dir for Windows, and "log" to the
+            base cache dir for Unix. See discussion below.
+
+    Typical user cache directories are:
+        Mac OS X:   ~/Library/Logs/<AppName>
+        Unix:       ~/.cache/<AppName>/log  # or under $XDG_CACHE_HOME if defined
+        Win XP:     C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Logs
+        Vista:      C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Logs
+
+    On Windows the only suggestion in the MSDN docs is that local settings
+    go in the `CSIDL_LOCAL_APPDATA` directory. (Note: I'm interested in
+    examples of what some windows apps use for a logs dir.)
+
+    OPINION: This function appends "Logs" to the `CSIDL_LOCAL_APPDATA`
+    value for Windows and appends "log" to the user cache dir for Unix.
+    This can be disabled with the `opinion=False` option.
+    """
+    if system == "darwin":
+        path = os.path.join(
+            os.path.expanduser('~/Library/Logs'),
+            appname)
+    elif system == "win32":
+        path = user_data_dir(appname, appauthor, version)
+        version = False
+        if opinion:
+            path = os.path.join(path, "Logs")
+    else:
+        path = user_cache_dir(appname, appauthor, version)
+        version = False
+        if opinion:
+            path = os.path.join(path, "log")
+    if appname and version:
+        path = os.path.join(path, version)
+    return path
+
+
+class AppDirs(object):
+    """Convenience wrapper for getting application dirs."""
+    def __init__(self, appname, appauthor=None, version=None, roaming=False,
+                 multipath=False):
+        self.appname = appname
+        self.appauthor = appauthor
+        self.version = version
+        self.roaming = roaming
+        self.multipath = multipath
+
+    @property
+    def user_data_dir(self):
+        return user_data_dir(self.appname, self.appauthor,
+                             version=self.version, roaming=self.roaming)
+
+    @property
+    def site_data_dir(self):
+        return site_data_dir(self.appname, self.appauthor,
+                             version=self.version, multipath=self.multipath)
+
+    @property
+    def user_config_dir(self):
+        return user_config_dir(self.appname, self.appauthor,
+                               version=self.version, roaming=self.roaming)
+
+    @property
+    def site_config_dir(self):
+        return site_config_dir(self.appname, self.appauthor,
+                             version=self.version, multipath=self.multipath)
+
+    @property
+    def user_cache_dir(self):
+        return user_cache_dir(self.appname, self.appauthor,
+                              version=self.version)
+
+    @property
+    def user_log_dir(self):
+        return user_log_dir(self.appname, self.appauthor,
+                            version=self.version)
+
+
+#---- internal support stuff
+
+def _get_win_folder_from_registry(csidl_name):
+    """This is a fallback technique at best. I'm not sure if using the
+    registry for this guarantees us the correct answer for all CSIDL_*
+    names.
+    """
+    import _winreg
+
+    shell_folder_name = {
+        "CSIDL_APPDATA": "AppData",
+        "CSIDL_COMMON_APPDATA": "Common AppData",
+        "CSIDL_LOCAL_APPDATA": "Local AppData",
+    }[csidl_name]
+
+    key = _winreg.OpenKey(
+        _winreg.HKEY_CURRENT_USER,
+        r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders"
+    )
+    dir, type = _winreg.QueryValueEx(key, shell_folder_name)
+    return dir
+
+
+def _get_win_folder_with_pywin32(csidl_name):
+    from win32com.shell import shellcon, shell
+    dir = shell.SHGetFolderPath(0, getattr(shellcon, csidl_name), 0, 0)
+    # Try to make this a unicode path because SHGetFolderPath does
+    # not return unicode strings when there is unicode data in the
+    # path.
+    try:
+        dir = unicode(dir)
+
+        # Downgrade to short path name if have highbit chars. See
+        # <http://bugs.activestate.com/show_bug.cgi?id=85099>.
+        has_high_char = False
+        for c in dir:
+            if ord(c) > 255:
+                has_high_char = True
+                break
+        if has_high_char:
+            try:
+                import win32api
+                dir = win32api.GetShortPathName(dir)
+            except ImportError:
+                pass
+    except UnicodeError:
+        pass
+    return dir
+
+
+def _get_win_folder_with_ctypes(csidl_name):
+    import ctypes
+
+    csidl_const = {
+        "CSIDL_APPDATA": 26,
+        "CSIDL_COMMON_APPDATA": 35,
+        "CSIDL_LOCAL_APPDATA": 28,
+    }[csidl_name]
+
+    buf = ctypes.create_unicode_buffer(1024)
+    ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf)
+
+    # Downgrade to short path name if have highbit chars. See
+    # <http://bugs.activestate.com/show_bug.cgi?id=85099>.
+    has_high_char = False
+    for c in buf:
+        if ord(c) > 255:
+            has_high_char = True
+            break
+    if has_high_char:
+        buf2 = ctypes.create_unicode_buffer(1024)
+        if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024):
+            buf = buf2
+
+    return buf.value
+
+def _get_win_folder_with_jna(csidl_name):
+    import array
+    from com.sun import jna
+    from com.sun.jna.platform import win32
+
+    buf_size = win32.WinDef.MAX_PATH * 2
+    buf = array.zeros('c', buf_size)
+    shell = win32.Shell32.INSTANCE
+    shell.SHGetFolderPath(None, getattr(win32.ShlObj, csidl_name), None, win32.ShlObj.SHGFP_TYPE_CURRENT, buf)
+    dir = jna.Native.toString(buf.tostring()).rstrip("\0")
+
+    # Downgrade to short path name if have highbit chars. See
+    # <http://bugs.activestate.com/show_bug.cgi?id=85099>.
+    has_high_char = False
+    for c in dir:
+        if ord(c) > 255:
+            has_high_char = True
+            break
+    if has_high_char:
+        buf = array.zeros('c', buf_size)
+        kernel = win32.Kernel32.INSTANCE
+        if kernal.GetShortPathName(dir, buf, buf_size):
+            dir = jna.Native.toString(buf.tostring()).rstrip("\0")
+
+    return dir
+
+if system == "win32":
+    try:
+        import win32com.shell
+        _get_win_folder = _get_win_folder_with_pywin32
+    except ImportError:
+        try:
+            from ctypes import windll
+            _get_win_folder = _get_win_folder_with_ctypes
+        except ImportError:
+            try:
+                import com.sun.jna
+                _get_win_folder = _get_win_folder_with_jna
+            except ImportError:
+                _get_win_folder = _get_win_folder_from_registry
+
+
+#---- self test code
+
+if __name__ == "__main__":
+    appname = "MyApp"
+    appauthor = "MyCompany"
+
+    props = ("user_data_dir", "site_data_dir",
+             "user_config_dir", "site_config_dir",
+             "user_cache_dir", "user_log_dir")
+
+    print("-- app dirs (with optional 'version')")
+    dirs = AppDirs(appname, appauthor, version="1.0")
+    for prop in props:
+        print("%s: %s" % (prop, getattr(dirs, prop)))
+
+    print("\n-- app dirs (without optional 'version')")
+    dirs = AppDirs(appname, appauthor)
+    for prop in props:
+        print("%s: %s" % (prop, getattr(dirs, prop)))
+
+    print("\n-- app dirs (without optional 'appauthor')")
+    dirs = AppDirs(appname)
+    for prop in props:
+        print("%s: %s" % (prop, getattr(dirs, prop)))
+
+    print("\n-- app dirs (with disabled 'appauthor')")
+    dirs = AppDirs(appname, appauthor=False)
+    for prop in props:
+        print("%s: %s" % (prop, getattr(dirs, prop)))
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/__about__.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/__about__.py
new file mode 100644
index 0000000000000000000000000000000000000000..95d330ef823aa2e12f7846bc63c0955b25df6029
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/__about__.py
@@ -0,0 +1,21 @@
+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+from __future__ import absolute_import, division, print_function
+
+__all__ = [
+    "__title__", "__summary__", "__uri__", "__version__", "__author__",
+    "__email__", "__license__", "__copyright__",
+]
+
+__title__ = "packaging"
+__summary__ = "Core utilities for Python packages"
+__uri__ = "https://github.com/pypa/packaging"
+
+__version__ = "16.8"
+
+__author__ = "Donald Stufft and individual contributors"
+__email__ = "donald@stufft.io"
+
+__license__ = "BSD or Apache License, Version 2.0"
+__copyright__ = "Copyright 2014-2016 %s" % __author__
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/__init__.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ee6220203e5425f900fb5a43676c24ea377c2fa
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/__init__.py
@@ -0,0 +1,14 @@
+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+from __future__ import absolute_import, division, print_function
+
+from .__about__ import (
+    __author__, __copyright__, __email__, __license__, __summary__, __title__,
+    __uri__, __version__
+)
+
+__all__ = [
+    "__title__", "__summary__", "__uri__", "__version__", "__author__",
+    "__email__", "__license__", "__copyright__",
+]
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/_compat.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/_compat.py
new file mode 100644
index 0000000000000000000000000000000000000000..210bb80b7e7b64cb79f7e7cdf3e42819fe3471fe
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/_compat.py
@@ -0,0 +1,30 @@
+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+from __future__ import absolute_import, division, print_function
+
+import sys
+
+
+PY2 = sys.version_info[0] == 2
+PY3 = sys.version_info[0] == 3
+
+# flake8: noqa
+
+if PY3:
+    string_types = str,
+else:
+    string_types = basestring,
+
+
+def with_metaclass(meta, *bases):
+    """
+    Create a base class with a metaclass.
+    """
+    # This requires a bit of explanation: the basic idea is to make a dummy
+    # metaclass for one level of class instantiation that replaces itself with
+    # the actual metaclass.
+    class metaclass(meta):
+        def __new__(cls, name, this_bases, d):
+            return meta(name, bases, d)
+    return type.__new__(metaclass, 'temporary_class', (), {})
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/_structures.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/_structures.py
new file mode 100644
index 0000000000000000000000000000000000000000..ccc27861c3a4d9efaa3db753c77c4515a627bd98
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/_structures.py
@@ -0,0 +1,68 @@
+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+from __future__ import absolute_import, division, print_function
+
+
+class Infinity(object):
+
+    def __repr__(self):
+        return "Infinity"
+
+    def __hash__(self):
+        return hash(repr(self))
+
+    def __lt__(self, other):
+        return False
+
+    def __le__(self, other):
+        return False
+
+    def __eq__(self, other):
+        return isinstance(other, self.__class__)
+
+    def __ne__(self, other):
+        return not isinstance(other, self.__class__)
+
+    def __gt__(self, other):
+        return True
+
+    def __ge__(self, other):
+        return True
+
+    def __neg__(self):
+        return NegativeInfinity
+
+Infinity = Infinity()
+
+
+class NegativeInfinity(object):
+
+    def __repr__(self):
+        return "-Infinity"
+
+    def __hash__(self):
+        return hash(repr(self))
+
+    def __lt__(self, other):
+        return True
+
+    def __le__(self, other):
+        return True
+
+    def __eq__(self, other):
+        return isinstance(other, self.__class__)
+
+    def __ne__(self, other):
+        return not isinstance(other, self.__class__)
+
+    def __gt__(self, other):
+        return False
+
+    def __ge__(self, other):
+        return False
+
+    def __neg__(self):
+        return Infinity
+
+NegativeInfinity = NegativeInfinity()
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/markers.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/markers.py
new file mode 100644
index 0000000000000000000000000000000000000000..892e578edd4b992cc2996c31d9deb13af73d62c0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/markers.py
@@ -0,0 +1,301 @@
+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+from __future__ import absolute_import, division, print_function
+
+import operator
+import os
+import platform
+import sys
+
+from pkg_resources.extern.pyparsing import ParseException, ParseResults, stringStart, stringEnd
+from pkg_resources.extern.pyparsing import ZeroOrMore, Group, Forward, QuotedString
+from pkg_resources.extern.pyparsing import Literal as L  # noqa
+
+from ._compat import string_types
+from .specifiers import Specifier, InvalidSpecifier
+
+
+__all__ = [
+    "InvalidMarker", "UndefinedComparison", "UndefinedEnvironmentName",
+    "Marker", "default_environment",
+]
+
+
+class InvalidMarker(ValueError):
+    """
+    An invalid marker was found, users should refer to PEP 508.
+    """
+
+
+class UndefinedComparison(ValueError):
+    """
+    An invalid operation was attempted on a value that doesn't support it.
+    """
+
+
+class UndefinedEnvironmentName(ValueError):
+    """
+    A name was attempted to be used that does not exist inside of the
+    environment.
+    """
+
+
+class Node(object):
+
+    def __init__(self, value):
+        self.value = value
+
+    def __str__(self):
+        return str(self.value)
+
+    def __repr__(self):
+        return "<{0}({1!r})>".format(self.__class__.__name__, str(self))
+
+    def serialize(self):
+        raise NotImplementedError
+
+
+class Variable(Node):
+
+    def serialize(self):
+        return str(self)
+
+
+class Value(Node):
+
+    def serialize(self):
+        return '"{0}"'.format(self)
+
+
+class Op(Node):
+
+    def serialize(self):
+        return str(self)
+
+
+VARIABLE = (
+    L("implementation_version") |
+    L("platform_python_implementation") |
+    L("implementation_name") |
+    L("python_full_version") |
+    L("platform_release") |
+    L("platform_version") |
+    L("platform_machine") |
+    L("platform_system") |
+    L("python_version") |
+    L("sys_platform") |
+    L("os_name") |
+    L("os.name") |  # PEP-345
+    L("sys.platform") |  # PEP-345
+    L("platform.version") |  # PEP-345
+    L("platform.machine") |  # PEP-345
+    L("platform.python_implementation") |  # PEP-345
+    L("python_implementation") |  # undocumented setuptools legacy
+    L("extra")
+)
+ALIASES = {
+    'os.name': 'os_name',
+    'sys.platform': 'sys_platform',
+    'platform.version': 'platform_version',
+    'platform.machine': 'platform_machine',
+    'platform.python_implementation': 'platform_python_implementation',
+    'python_implementation': 'platform_python_implementation'
+}
+VARIABLE.setParseAction(lambda s, l, t: Variable(ALIASES.get(t[0], t[0])))
+
+VERSION_CMP = (
+    L("===") |
+    L("==") |
+    L(">=") |
+    L("<=") |
+    L("!=") |
+    L("~=") |
+    L(">") |
+    L("<")
+)
+
+MARKER_OP = VERSION_CMP | L("not in") | L("in")
+MARKER_OP.setParseAction(lambda s, l, t: Op(t[0]))
+
+MARKER_VALUE = QuotedString("'") | QuotedString('"')
+MARKER_VALUE.setParseAction(lambda s, l, t: Value(t[0]))
+
+BOOLOP = L("and") | L("or")
+
+MARKER_VAR = VARIABLE | MARKER_VALUE
+
+MARKER_ITEM = Group(MARKER_VAR + MARKER_OP + MARKER_VAR)
+MARKER_ITEM.setParseAction(lambda s, l, t: tuple(t[0]))
+
+LPAREN = L("(").suppress()
+RPAREN = L(")").suppress()
+
+MARKER_EXPR = Forward()
+MARKER_ATOM = MARKER_ITEM | Group(LPAREN + MARKER_EXPR + RPAREN)
+MARKER_EXPR << MARKER_ATOM + ZeroOrMore(BOOLOP + MARKER_EXPR)
+
+MARKER = stringStart + MARKER_EXPR + stringEnd
+
+
+def _coerce_parse_result(results):
+    if isinstance(results, ParseResults):
+        return [_coerce_parse_result(i) for i in results]
+    else:
+        return results
+
+
+def _format_marker(marker, first=True):
+    assert isinstance(marker, (list, tuple, string_types))
+
+    # Sometimes we have a structure like [[...]] which is a single item list
+    # where the single item is itself it's own list. In that case we want skip
+    # the rest of this function so that we don't get extraneous () on the
+    # outside.
+    if (isinstance(marker, list) and len(marker) == 1 and
+            isinstance(marker[0], (list, tuple))):
+        return _format_marker(marker[0])
+
+    if isinstance(marker, list):
+        inner = (_format_marker(m, first=False) for m in marker)
+        if first:
+            return " ".join(inner)
+        else:
+            return "(" + " ".join(inner) + ")"
+    elif isinstance(marker, tuple):
+        return " ".join([m.serialize() for m in marker])
+    else:
+        return marker
+
+
+_operators = {
+    "in": lambda lhs, rhs: lhs in rhs,
+    "not in": lambda lhs, rhs: lhs not in rhs,
+    "<": operator.lt,
+    "<=": operator.le,
+    "==": operator.eq,
+    "!=": operator.ne,
+    ">=": operator.ge,
+    ">": operator.gt,
+}
+
+
+def _eval_op(lhs, op, rhs):
+    try:
+        spec = Specifier("".join([op.serialize(), rhs]))
+    except InvalidSpecifier:
+        pass
+    else:
+        return spec.contains(lhs)
+
+    oper = _operators.get(op.serialize())
+    if oper is None:
+        raise UndefinedComparison(
+            "Undefined {0!r} on {1!r} and {2!r}.".format(op, lhs, rhs)
+        )
+
+    return oper(lhs, rhs)
+
+
+_undefined = object()
+
+
+def _get_env(environment, name):
+    value = environment.get(name, _undefined)
+
+    if value is _undefined:
+        raise UndefinedEnvironmentName(
+            "{0!r} does not exist in evaluation environment.".format(name)
+        )
+
+    return value
+
+
+def _evaluate_markers(markers, environment):
+    groups = [[]]
+
+    for marker in markers:
+        assert isinstance(marker, (list, tuple, string_types))
+
+        if isinstance(marker, list):
+            groups[-1].append(_evaluate_markers(marker, environment))
+        elif isinstance(marker, tuple):
+            lhs, op, rhs = marker
+
+            if isinstance(lhs, Variable):
+                lhs_value = _get_env(environment, lhs.value)
+                rhs_value = rhs.value
+            else:
+                lhs_value = lhs.value
+                rhs_value = _get_env(environment, rhs.value)
+
+            groups[-1].append(_eval_op(lhs_value, op, rhs_value))
+        else:
+            assert marker in ["and", "or"]
+            if marker == "or":
+                groups.append([])
+
+    return any(all(item) for item in groups)
+
+
+def format_full_version(info):
+    version = '{0.major}.{0.minor}.{0.micro}'.format(info)
+    kind = info.releaselevel
+    if kind != 'final':
+        version += kind[0] + str(info.serial)
+    return version
+
+
+def default_environment():
+    if hasattr(sys, 'implementation'):
+        iver = format_full_version(sys.implementation.version)
+        implementation_name = sys.implementation.name
+    else:
+        iver = '0'
+        implementation_name = ''
+
+    return {
+        "implementation_name": implementation_name,
+        "implementation_version": iver,
+        "os_name": os.name,
+        "platform_machine": platform.machine(),
+        "platform_release": platform.release(),
+        "platform_system": platform.system(),
+        "platform_version": platform.version(),
+        "python_full_version": platform.python_version(),
+        "platform_python_implementation": platform.python_implementation(),
+        "python_version": platform.python_version()[:3],
+        "sys_platform": sys.platform,
+    }
+
+
+class Marker(object):
+
+    def __init__(self, marker):
+        try:
+            self._markers = _coerce_parse_result(MARKER.parseString(marker))
+        except ParseException as e:
+            err_str = "Invalid marker: {0!r}, parse error at {1!r}".format(
+                marker, marker[e.loc:e.loc + 8])
+            raise InvalidMarker(err_str)
+
+    def __str__(self):
+        return _format_marker(self._markers)
+
+    def __repr__(self):
+        return "<Marker({0!r})>".format(str(self))
+
+    def evaluate(self, environment=None):
+        """Evaluate a marker.
+
+        Return the boolean from evaluating the given marker against the
+        environment. environment is an optional argument to override all or
+        part of the determined environment.
+
+        The environment is determined from the current Python process.
+        """
+        current_environment = default_environment()
+        if environment is not None:
+            current_environment.update(environment)
+
+        return _evaluate_markers(self._markers, current_environment)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/requirements.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/requirements.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c8c4a3852fd37053fd552846aa7787805c30a48
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/requirements.py
@@ -0,0 +1,127 @@
+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+from __future__ import absolute_import, division, print_function
+
+import string
+import re
+
+from pkg_resources.extern.pyparsing import stringStart, stringEnd, originalTextFor, ParseException
+from pkg_resources.extern.pyparsing import ZeroOrMore, Word, Optional, Regex, Combine
+from pkg_resources.extern.pyparsing import Literal as L  # noqa
+from pkg_resources.extern.six.moves.urllib import parse as urlparse
+
+from .markers import MARKER_EXPR, Marker
+from .specifiers import LegacySpecifier, Specifier, SpecifierSet
+
+
+class InvalidRequirement(ValueError):
+    """
+    An invalid requirement was found, users should refer to PEP 508.
+    """
+
+
+ALPHANUM = Word(string.ascii_letters + string.digits)
+
+LBRACKET = L("[").suppress()
+RBRACKET = L("]").suppress()
+LPAREN = L("(").suppress()
+RPAREN = L(")").suppress()
+COMMA = L(",").suppress()
+SEMICOLON = L(";").suppress()
+AT = L("@").suppress()
+
+PUNCTUATION = Word("-_.")
+IDENTIFIER_END = ALPHANUM | (ZeroOrMore(PUNCTUATION) + ALPHANUM)
+IDENTIFIER = Combine(ALPHANUM + ZeroOrMore(IDENTIFIER_END))
+
+NAME = IDENTIFIER("name")
+EXTRA = IDENTIFIER
+
+URI = Regex(r'[^ ]+')("url")
+URL = (AT + URI)
+
+EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA)
+EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras")
+
+VERSION_PEP440 = Regex(Specifier._regex_str, re.VERBOSE | re.IGNORECASE)
+VERSION_LEGACY = Regex(LegacySpecifier._regex_str, re.VERBOSE | re.IGNORECASE)
+
+VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY
+VERSION_MANY = Combine(VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE),
+                       joinString=",", adjacent=False)("_raw_spec")
+_VERSION_SPEC = Optional(((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY))
+_VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or '')
+
+VERSION_SPEC = originalTextFor(_VERSION_SPEC)("specifier")
+VERSION_SPEC.setParseAction(lambda s, l, t: t[1])
+
+MARKER_EXPR = originalTextFor(MARKER_EXPR())("marker")
+MARKER_EXPR.setParseAction(
+    lambda s, l, t: Marker(s[t._original_start:t._original_end])
+)
+MARKER_SEPERATOR = SEMICOLON
+MARKER = MARKER_SEPERATOR + MARKER_EXPR
+
+VERSION_AND_MARKER = VERSION_SPEC + Optional(MARKER)
+URL_AND_MARKER = URL + Optional(MARKER)
+
+NAMED_REQUIREMENT = \
+    NAME + Optional(EXTRAS) + (URL_AND_MARKER | VERSION_AND_MARKER)
+
+REQUIREMENT = stringStart + NAMED_REQUIREMENT + stringEnd
+
+
+class Requirement(object):
+    """Parse a requirement.
+
+    Parse a given requirement string into its parts, such as name, specifier,
+    URL, and extras. Raises InvalidRequirement on a badly-formed requirement
+    string.
+    """
+
+    # TODO: Can we test whether something is contained within a requirement?
+    #       If so how do we do that? Do we need to test against the _name_ of
+    #       the thing as well as the version? What about the markers?
+    # TODO: Can we normalize the name and extra name?
+
+    def __init__(self, requirement_string):
+        try:
+            req = REQUIREMENT.parseString(requirement_string)
+        except ParseException as e:
+            raise InvalidRequirement(
+                "Invalid requirement, parse error at \"{0!r}\"".format(
+                    requirement_string[e.loc:e.loc + 8]))
+
+        self.name = req.name
+        if req.url:
+            parsed_url = urlparse.urlparse(req.url)
+            if not (parsed_url.scheme and parsed_url.netloc) or (
+                    not parsed_url.scheme and not parsed_url.netloc):
+                raise InvalidRequirement("Invalid URL given")
+            self.url = req.url
+        else:
+            self.url = None
+        self.extras = set(req.extras.asList() if req.extras else [])
+        self.specifier = SpecifierSet(req.specifier)
+        self.marker = req.marker if req.marker else None
+
+    def __str__(self):
+        parts = [self.name]
+
+        if self.extras:
+            parts.append("[{0}]".format(",".join(sorted(self.extras))))
+
+        if self.specifier:
+            parts.append(str(self.specifier))
+
+        if self.url:
+            parts.append("@ {0}".format(self.url))
+
+        if self.marker:
+            parts.append("; {0}".format(self.marker))
+
+        return "".join(parts)
+
+    def __repr__(self):
+        return "<Requirement({0!r})>".format(str(self))
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/specifiers.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/specifiers.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f5a76cfd63f47dcce29b3ea82f59d10f4e8d771
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/specifiers.py
@@ -0,0 +1,774 @@
+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+from __future__ import absolute_import, division, print_function
+
+import abc
+import functools
+import itertools
+import re
+
+from ._compat import string_types, with_metaclass
+from .version import Version, LegacyVersion, parse
+
+
+class InvalidSpecifier(ValueError):
+    """
+    An invalid specifier was found, users should refer to PEP 440.
+    """
+
+
+class BaseSpecifier(with_metaclass(abc.ABCMeta, object)):
+
+    @abc.abstractmethod
+    def __str__(self):
+        """
+        Returns the str representation of this Specifier like object. This
+        should be representative of the Specifier itself.
+        """
+
+    @abc.abstractmethod
+    def __hash__(self):
+        """
+        Returns a hash value for this Specifier like object.
+        """
+
+    @abc.abstractmethod
+    def __eq__(self, other):
+        """
+        Returns a boolean representing whether or not the two Specifier like
+        objects are equal.
+        """
+
+    @abc.abstractmethod
+    def __ne__(self, other):
+        """
+        Returns a boolean representing whether or not the two Specifier like
+        objects are not equal.
+        """
+
+    @abc.abstractproperty
+    def prereleases(self):
+        """
+        Returns whether or not pre-releases as a whole are allowed by this
+        specifier.
+        """
+
+    @prereleases.setter
+    def prereleases(self, value):
+        """
+        Sets whether or not pre-releases as a whole are allowed by this
+        specifier.
+        """
+
+    @abc.abstractmethod
+    def contains(self, item, prereleases=None):
+        """
+        Determines if the given item is contained within this specifier.
+        """
+
+    @abc.abstractmethod
+    def filter(self, iterable, prereleases=None):
+        """
+        Takes an iterable of items and filters them so that only items which
+        are contained within this specifier are allowed in it.
+        """
+
+
+class _IndividualSpecifier(BaseSpecifier):
+
+    _operators = {}
+
+    def __init__(self, spec="", prereleases=None):
+        match = self._regex.search(spec)
+        if not match:
+            raise InvalidSpecifier("Invalid specifier: '{0}'".format(spec))
+
+        self._spec = (
+            match.group("operator").strip(),
+            match.group("version").strip(),
+        )
+
+        # Store whether or not this Specifier should accept prereleases
+        self._prereleases = prereleases
+
+    def __repr__(self):
+        pre = (
+            ", prereleases={0!r}".format(self.prereleases)
+            if self._prereleases is not None
+            else ""
+        )
+
+        return "<{0}({1!r}{2})>".format(
+            self.__class__.__name__,
+            str(self),
+            pre,
+        )
+
+    def __str__(self):
+        return "{0}{1}".format(*self._spec)
+
+    def __hash__(self):
+        return hash(self._spec)
+
+    def __eq__(self, other):
+        if isinstance(other, string_types):
+            try:
+                other = self.__class__(other)
+            except InvalidSpecifier:
+                return NotImplemented
+        elif not isinstance(other, self.__class__):
+            return NotImplemented
+
+        return self._spec == other._spec
+
+    def __ne__(self, other):
+        if isinstance(other, string_types):
+            try:
+                other = self.__class__(other)
+            except InvalidSpecifier:
+                return NotImplemented
+        elif not isinstance(other, self.__class__):
+            return NotImplemented
+
+        return self._spec != other._spec
+
+    def _get_operator(self, op):
+        return getattr(self, "_compare_{0}".format(self._operators[op]))
+
+    def _coerce_version(self, version):
+        if not isinstance(version, (LegacyVersion, Version)):
+            version = parse(version)
+        return version
+
+    @property
+    def operator(self):
+        return self._spec[0]
+
+    @property
+    def version(self):
+        return self._spec[1]
+
+    @property
+    def prereleases(self):
+        return self._prereleases
+
+    @prereleases.setter
+    def prereleases(self, value):
+        self._prereleases = value
+
+    def __contains__(self, item):
+        return self.contains(item)
+
+    def contains(self, item, prereleases=None):
+        # Determine if prereleases are to be allowed or not.
+        if prereleases is None:
+            prereleases = self.prereleases
+
+        # Normalize item to a Version or LegacyVersion, this allows us to have
+        # a shortcut for ``"2.0" in Specifier(">=2")
+        item = self._coerce_version(item)
+
+        # Determine if we should be supporting prereleases in this specifier
+        # or not, if we do not support prereleases than we can short circuit
+        # logic if this version is a prereleases.
+        if item.is_prerelease and not prereleases:
+            return False
+
+        # Actually do the comparison to determine if this item is contained
+        # within this Specifier or not.
+        return self._get_operator(self.operator)(item, self.version)
+
+    def filter(self, iterable, prereleases=None):
+        yielded = False
+        found_prereleases = []
+
+        kw = {"prereleases": prereleases if prereleases is not None else True}
+
+        # Attempt to iterate over all the values in the iterable and if any of
+        # them match, yield them.
+        for version in iterable:
+            parsed_version = self._coerce_version(version)
+
+            if self.contains(parsed_version, **kw):
+                # If our version is a prerelease, and we were not set to allow
+                # prereleases, then we'll store it for later incase nothing
+                # else matches this specifier.
+                if (parsed_version.is_prerelease and not
+                        (prereleases or self.prereleases)):
+                    found_prereleases.append(version)
+                # Either this is not a prerelease, or we should have been
+                # accepting prereleases from the begining.
+                else:
+                    yielded = True
+                    yield version
+
+        # Now that we've iterated over everything, determine if we've yielded
+        # any values, and if we have not and we have any prereleases stored up
+        # then we will go ahead and yield the prereleases.
+        if not yielded and found_prereleases:
+            for version in found_prereleases:
+                yield version
+
+
+class LegacySpecifier(_IndividualSpecifier):
+
+    _regex_str = (
+        r"""
+        (?P<operator>(==|!=|<=|>=|<|>))
+        \s*
+        (?P<version>
+            [^,;\s)]* # Since this is a "legacy" specifier, and the version
+                      # string can be just about anything, we match everything
+                      # except for whitespace, a semi-colon for marker support,
+                      # a closing paren since versions can be enclosed in
+                      # them, and a comma since it's a version separator.
+        )
+        """
+    )
+
+    _regex = re.compile(
+        r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE)
+
+    _operators = {
+        "==": "equal",
+        "!=": "not_equal",
+        "<=": "less_than_equal",
+        ">=": "greater_than_equal",
+        "<": "less_than",
+        ">": "greater_than",
+    }
+
+    def _coerce_version(self, version):
+        if not isinstance(version, LegacyVersion):
+            version = LegacyVersion(str(version))
+        return version
+
+    def _compare_equal(self, prospective, spec):
+        return prospective == self._coerce_version(spec)
+
+    def _compare_not_equal(self, prospective, spec):
+        return prospective != self._coerce_version(spec)
+
+    def _compare_less_than_equal(self, prospective, spec):
+        return prospective <= self._coerce_version(spec)
+
+    def _compare_greater_than_equal(self, prospective, spec):
+        return prospective >= self._coerce_version(spec)
+
+    def _compare_less_than(self, prospective, spec):
+        return prospective < self._coerce_version(spec)
+
+    def _compare_greater_than(self, prospective, spec):
+        return prospective > self._coerce_version(spec)
+
+
+def _require_version_compare(fn):
+    @functools.wraps(fn)
+    def wrapped(self, prospective, spec):
+        if not isinstance(prospective, Version):
+            return False
+        return fn(self, prospective, spec)
+    return wrapped
+
+
+class Specifier(_IndividualSpecifier):
+
+    _regex_str = (
+        r"""
+        (?P<operator>(~=|==|!=|<=|>=|<|>|===))
+        (?P<version>
+            (?:
+                # The identity operators allow for an escape hatch that will
+                # do an exact string match of the version you wish to install.
+                # This will not be parsed by PEP 440 and we cannot determine
+                # any semantic meaning from it. This operator is discouraged
+                # but included entirely as an escape hatch.
+                (?<====)  # Only match for the identity operator
+                \s*
+                [^\s]*    # We just match everything, except for whitespace
+                          # since we are only testing for strict identity.
+            )
+            |
+            (?:
+                # The (non)equality operators allow for wild card and local
+                # versions to be specified so we have to define these two
+                # operators separately to enable that.
+                (?<===|!=)            # Only match for equals and not equals
+
+                \s*
+                v?
+                (?:[0-9]+!)?          # epoch
+                [0-9]+(?:\.[0-9]+)*   # release
+                (?:                   # pre release
+                    [-_\.]?
+                    (a|b|c|rc|alpha|beta|pre|preview)
+                    [-_\.]?
+                    [0-9]*
+                )?
+                (?:                   # post release
+                    (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*)
+                )?
+
+                # You cannot use a wild card and a dev or local version
+                # together so group them with a | and make them optional.
+                (?:
+                    (?:[-_\.]?dev[-_\.]?[0-9]*)?         # dev release
+                    (?:\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*)? # local
+                    |
+                    \.\*  # Wild card syntax of .*
+                )?
+            )
+            |
+            (?:
+                # The compatible operator requires at least two digits in the
+                # release segment.
+                (?<=~=)               # Only match for the compatible operator
+
+                \s*
+                v?
+                (?:[0-9]+!)?          # epoch
+                [0-9]+(?:\.[0-9]+)+   # release  (We have a + instead of a *)
+                (?:                   # pre release
+                    [-_\.]?
+                    (a|b|c|rc|alpha|beta|pre|preview)
+                    [-_\.]?
+                    [0-9]*
+                )?
+                (?:                                   # post release
+                    (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*)
+                )?
+                (?:[-_\.]?dev[-_\.]?[0-9]*)?          # dev release
+            )
+            |
+            (?:
+                # All other operators only allow a sub set of what the
+                # (non)equality operators do. Specifically they do not allow
+                # local versions to be specified nor do they allow the prefix
+                # matching wild cards.
+                (?<!==|!=|~=)         # We have special cases for these
+                                      # operators so we want to make sure they
+                                      # don't match here.
+
+                \s*
+                v?
+                (?:[0-9]+!)?          # epoch
+                [0-9]+(?:\.[0-9]+)*   # release
+                (?:                   # pre release
+                    [-_\.]?
+                    (a|b|c|rc|alpha|beta|pre|preview)
+                    [-_\.]?
+                    [0-9]*
+                )?
+                (?:                                   # post release
+                    (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*)
+                )?
+                (?:[-_\.]?dev[-_\.]?[0-9]*)?          # dev release
+            )
+        )
+        """
+    )
+
+    _regex = re.compile(
+        r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE)
+
+    _operators = {
+        "~=": "compatible",
+        "==": "equal",
+        "!=": "not_equal",
+        "<=": "less_than_equal",
+        ">=": "greater_than_equal",
+        "<": "less_than",
+        ">": "greater_than",
+        "===": "arbitrary",
+    }
+
+    @_require_version_compare
+    def _compare_compatible(self, prospective, spec):
+        # Compatible releases have an equivalent combination of >= and ==. That
+        # is that ~=2.2 is equivalent to >=2.2,==2.*. This allows us to
+        # implement this in terms of the other specifiers instead of
+        # implementing it ourselves. The only thing we need to do is construct
+        # the other specifiers.
+
+        # We want everything but the last item in the version, but we want to
+        # ignore post and dev releases and we want to treat the pre-release as
+        # it's own separate segment.
+        prefix = ".".join(
+            list(
+                itertools.takewhile(
+                    lambda x: (not x.startswith("post") and not
+                               x.startswith("dev")),
+                    _version_split(spec),
+                )
+            )[:-1]
+        )
+
+        # Add the prefix notation to the end of our string
+        prefix += ".*"
+
+        return (self._get_operator(">=")(prospective, spec) and
+                self._get_operator("==")(prospective, prefix))
+
+    @_require_version_compare
+    def _compare_equal(self, prospective, spec):
+        # We need special logic to handle prefix matching
+        if spec.endswith(".*"):
+            # In the case of prefix matching we want to ignore local segment.
+            prospective = Version(prospective.public)
+            # Split the spec out by dots, and pretend that there is an implicit
+            # dot in between a release segment and a pre-release segment.
+            spec = _version_split(spec[:-2])  # Remove the trailing .*
+
+            # Split the prospective version out by dots, and pretend that there
+            # is an implicit dot in between a release segment and a pre-release
+            # segment.
+            prospective = _version_split(str(prospective))
+
+            # Shorten the prospective version to be the same length as the spec
+            # so that we can determine if the specifier is a prefix of the
+            # prospective version or not.
+            prospective = prospective[:len(spec)]
+
+            # Pad out our two sides with zeros so that they both equal the same
+            # length.
+            spec, prospective = _pad_version(spec, prospective)
+        else:
+            # Convert our spec string into a Version
+            spec = Version(spec)
+
+            # If the specifier does not have a local segment, then we want to
+            # act as if the prospective version also does not have a local
+            # segment.
+            if not spec.local:
+                prospective = Version(prospective.public)
+
+        return prospective == spec
+
+    @_require_version_compare
+    def _compare_not_equal(self, prospective, spec):
+        return not self._compare_equal(prospective, spec)
+
+    @_require_version_compare
+    def _compare_less_than_equal(self, prospective, spec):
+        return prospective <= Version(spec)
+
+    @_require_version_compare
+    def _compare_greater_than_equal(self, prospective, spec):
+        return prospective >= Version(spec)
+
+    @_require_version_compare
+    def _compare_less_than(self, prospective, spec):
+        # Convert our spec to a Version instance, since we'll want to work with
+        # it as a version.
+        spec = Version(spec)
+
+        # Check to see if the prospective version is less than the spec
+        # version. If it's not we can short circuit and just return False now
+        # instead of doing extra unneeded work.
+        if not prospective < spec:
+            return False
+
+        # This special case is here so that, unless the specifier itself
+        # includes is a pre-release version, that we do not accept pre-release
+        # versions for the version mentioned in the specifier (e.g. <3.1 should
+        # not match 3.1.dev0, but should match 3.0.dev0).
+        if not spec.is_prerelease and prospective.is_prerelease:
+            if Version(prospective.base_version) == Version(spec.base_version):
+                return False
+
+        # If we've gotten to here, it means that prospective version is both
+        # less than the spec version *and* it's not a pre-release of the same
+        # version in the spec.
+        return True
+
+    @_require_version_compare
+    def _compare_greater_than(self, prospective, spec):
+        # Convert our spec to a Version instance, since we'll want to work with
+        # it as a version.
+        spec = Version(spec)
+
+        # Check to see if the prospective version is greater than the spec
+        # version. If it's not we can short circuit and just return False now
+        # instead of doing extra unneeded work.
+        if not prospective > spec:
+            return False
+
+        # This special case is here so that, unless the specifier itself
+        # includes is a post-release version, that we do not accept
+        # post-release versions for the version mentioned in the specifier
+        # (e.g. >3.1 should not match 3.0.post0, but should match 3.2.post0).
+        if not spec.is_postrelease and prospective.is_postrelease:
+            if Version(prospective.base_version) == Version(spec.base_version):
+                return False
+
+        # Ensure that we do not allow a local version of the version mentioned
+        # in the specifier, which is techincally greater than, to match.
+        if prospective.local is not None:
+            if Version(prospective.base_version) == Version(spec.base_version):
+                return False
+
+        # If we've gotten to here, it means that prospective version is both
+        # greater than the spec version *and* it's not a pre-release of the
+        # same version in the spec.
+        return True
+
+    def _compare_arbitrary(self, prospective, spec):
+        return str(prospective).lower() == str(spec).lower()
+
+    @property
+    def prereleases(self):
+        # If there is an explicit prereleases set for this, then we'll just
+        # blindly use that.
+        if self._prereleases is not None:
+            return self._prereleases
+
+        # Look at all of our specifiers and determine if they are inclusive
+        # operators, and if they are if they are including an explicit
+        # prerelease.
+        operator, version = self._spec
+        if operator in ["==", ">=", "<=", "~=", "==="]:
+            # The == specifier can include a trailing .*, if it does we
+            # want to remove before parsing.
+            if operator == "==" and version.endswith(".*"):
+                version = version[:-2]
+
+            # Parse the version, and if it is a pre-release than this
+            # specifier allows pre-releases.
+            if parse(version).is_prerelease:
+                return True
+
+        return False
+
+    @prereleases.setter
+    def prereleases(self, value):
+        self._prereleases = value
+
+
+_prefix_regex = re.compile(r"^([0-9]+)((?:a|b|c|rc)[0-9]+)$")
+
+
+def _version_split(version):
+    result = []
+    for item in version.split("."):
+        match = _prefix_regex.search(item)
+        if match:
+            result.extend(match.groups())
+        else:
+            result.append(item)
+    return result
+
+
+def _pad_version(left, right):
+    left_split, right_split = [], []
+
+    # Get the release segment of our versions
+    left_split.append(list(itertools.takewhile(lambda x: x.isdigit(), left)))
+    right_split.append(list(itertools.takewhile(lambda x: x.isdigit(), right)))
+
+    # Get the rest of our versions
+    left_split.append(left[len(left_split[0]):])
+    right_split.append(right[len(right_split[0]):])
+
+    # Insert our padding
+    left_split.insert(
+        1,
+        ["0"] * max(0, len(right_split[0]) - len(left_split[0])),
+    )
+    right_split.insert(
+        1,
+        ["0"] * max(0, len(left_split[0]) - len(right_split[0])),
+    )
+
+    return (
+        list(itertools.chain(*left_split)),
+        list(itertools.chain(*right_split)),
+    )
+
+
+class SpecifierSet(BaseSpecifier):
+
+    def __init__(self, specifiers="", prereleases=None):
+        # Split on , to break each indidivual specifier into it's own item, and
+        # strip each item to remove leading/trailing whitespace.
+        specifiers = [s.strip() for s in specifiers.split(",") if s.strip()]
+
+        # Parsed each individual specifier, attempting first to make it a
+        # Specifier and falling back to a LegacySpecifier.
+        parsed = set()
+        for specifier in specifiers:
+            try:
+                parsed.add(Specifier(specifier))
+            except InvalidSpecifier:
+                parsed.add(LegacySpecifier(specifier))
+
+        # Turn our parsed specifiers into a frozen set and save them for later.
+        self._specs = frozenset(parsed)
+
+        # Store our prereleases value so we can use it later to determine if
+        # we accept prereleases or not.
+        self._prereleases = prereleases
+
+    def __repr__(self):
+        pre = (
+            ", prereleases={0!r}".format(self.prereleases)
+            if self._prereleases is not None
+            else ""
+        )
+
+        return "<SpecifierSet({0!r}{1})>".format(str(self), pre)
+
+    def __str__(self):
+        return ",".join(sorted(str(s) for s in self._specs))
+
+    def __hash__(self):
+        return hash(self._specs)
+
+    def __and__(self, other):
+        if isinstance(other, string_types):
+            other = SpecifierSet(other)
+        elif not isinstance(other, SpecifierSet):
+            return NotImplemented
+
+        specifier = SpecifierSet()
+        specifier._specs = frozenset(self._specs | other._specs)
+
+        if self._prereleases is None and other._prereleases is not None:
+            specifier._prereleases = other._prereleases
+        elif self._prereleases is not None and other._prereleases is None:
+            specifier._prereleases = self._prereleases
+        elif self._prereleases == other._prereleases:
+            specifier._prereleases = self._prereleases
+        else:
+            raise ValueError(
+                "Cannot combine SpecifierSets with True and False prerelease "
+                "overrides."
+            )
+
+        return specifier
+
+    def __eq__(self, other):
+        if isinstance(other, string_types):
+            other = SpecifierSet(other)
+        elif isinstance(other, _IndividualSpecifier):
+            other = SpecifierSet(str(other))
+        elif not isinstance(other, SpecifierSet):
+            return NotImplemented
+
+        return self._specs == other._specs
+
+    def __ne__(self, other):
+        if isinstance(other, string_types):
+            other = SpecifierSet(other)
+        elif isinstance(other, _IndividualSpecifier):
+            other = SpecifierSet(str(other))
+        elif not isinstance(other, SpecifierSet):
+            return NotImplemented
+
+        return self._specs != other._specs
+
+    def __len__(self):
+        return len(self._specs)
+
+    def __iter__(self):
+        return iter(self._specs)
+
+    @property
+    def prereleases(self):
+        # If we have been given an explicit prerelease modifier, then we'll
+        # pass that through here.
+        if self._prereleases is not None:
+            return self._prereleases
+
+        # If we don't have any specifiers, and we don't have a forced value,
+        # then we'll just return None since we don't know if this should have
+        # pre-releases or not.
+        if not self._specs:
+            return None
+
+        # Otherwise we'll see if any of the given specifiers accept
+        # prereleases, if any of them do we'll return True, otherwise False.
+        return any(s.prereleases for s in self._specs)
+
+    @prereleases.setter
+    def prereleases(self, value):
+        self._prereleases = value
+
+    def __contains__(self, item):
+        return self.contains(item)
+
+    def contains(self, item, prereleases=None):
+        # Ensure that our item is a Version or LegacyVersion instance.
+        if not isinstance(item, (LegacyVersion, Version)):
+            item = parse(item)
+
+        # Determine if we're forcing a prerelease or not, if we're not forcing
+        # one for this particular filter call, then we'll use whatever the
+        # SpecifierSet thinks for whether or not we should support prereleases.
+        if prereleases is None:
+            prereleases = self.prereleases
+
+        # We can determine if we're going to allow pre-releases by looking to
+        # see if any of the underlying items supports them. If none of them do
+        # and this item is a pre-release then we do not allow it and we can
+        # short circuit that here.
+        # Note: This means that 1.0.dev1 would not be contained in something
+        #       like >=1.0.devabc however it would be in >=1.0.debabc,>0.0.dev0
+        if not prereleases and item.is_prerelease:
+            return False
+
+        # We simply dispatch to the underlying specs here to make sure that the
+        # given version is contained within all of them.
+        # Note: This use of all() here means that an empty set of specifiers
+        #       will always return True, this is an explicit design decision.
+        return all(
+            s.contains(item, prereleases=prereleases)
+            for s in self._specs
+        )
+
+    def filter(self, iterable, prereleases=None):
+        # Determine if we're forcing a prerelease or not, if we're not forcing
+        # one for this particular filter call, then we'll use whatever the
+        # SpecifierSet thinks for whether or not we should support prereleases.
+        if prereleases is None:
+            prereleases = self.prereleases
+
+        # If we have any specifiers, then we want to wrap our iterable in the
+        # filter method for each one, this will act as a logical AND amongst
+        # each specifier.
+        if self._specs:
+            for spec in self._specs:
+                iterable = spec.filter(iterable, prereleases=bool(prereleases))
+            return iterable
+        # If we do not have any specifiers, then we need to have a rough filter
+        # which will filter out any pre-releases, unless there are no final
+        # releases, and which will filter out LegacyVersion in general.
+        else:
+            filtered = []
+            found_prereleases = []
+
+            for item in iterable:
+                # Ensure that we some kind of Version class for this item.
+                if not isinstance(item, (LegacyVersion, Version)):
+                    parsed_version = parse(item)
+                else:
+                    parsed_version = item
+
+                # Filter out any item which is parsed as a LegacyVersion
+                if isinstance(parsed_version, LegacyVersion):
+                    continue
+
+                # Store any item which is a pre-release for later unless we've
+                # already found a final version or we are accepting prereleases
+                if parsed_version.is_prerelease and not prereleases:
+                    if not filtered:
+                        found_prereleases.append(item)
+                else:
+                    filtered.append(item)
+
+            # If we've found no items except for pre-releases, then we'll go
+            # ahead and use the pre-releases
+            if not filtered and found_prereleases and prereleases is None:
+                return found_prereleases
+
+            return filtered
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/utils.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..942387cef5d75f299a769b1eb43b6c7679e7a3a0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/utils.py
@@ -0,0 +1,14 @@
+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+from __future__ import absolute_import, division, print_function
+
+import re
+
+
+_canonicalize_regex = re.compile(r"[-_.]+")
+
+
+def canonicalize_name(name):
+    # This is taken from PEP 503.
+    return _canonicalize_regex.sub("-", name).lower()
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/version.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..83b5ee8c5efadf22ce2f16ff08c8a8d75f1eb5df
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/packaging/version.py
@@ -0,0 +1,393 @@
+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+from __future__ import absolute_import, division, print_function
+
+import collections
+import itertools
+import re
+
+from ._structures import Infinity
+
+
+__all__ = [
+    "parse", "Version", "LegacyVersion", "InvalidVersion", "VERSION_PATTERN"
+]
+
+
+_Version = collections.namedtuple(
+    "_Version",
+    ["epoch", "release", "dev", "pre", "post", "local"],
+)
+
+
+def parse(version):
+    """
+    Parse the given version string and return either a :class:`Version` object
+    or a :class:`LegacyVersion` object depending on if the given version is
+    a valid PEP 440 version or a legacy version.
+    """
+    try:
+        return Version(version)
+    except InvalidVersion:
+        return LegacyVersion(version)
+
+
+class InvalidVersion(ValueError):
+    """
+    An invalid version was found, users should refer to PEP 440.
+    """
+
+
+class _BaseVersion(object):
+
+    def __hash__(self):
+        return hash(self._key)
+
+    def __lt__(self, other):
+        return self._compare(other, lambda s, o: s < o)
+
+    def __le__(self, other):
+        return self._compare(other, lambda s, o: s <= o)
+
+    def __eq__(self, other):
+        return self._compare(other, lambda s, o: s == o)
+
+    def __ge__(self, other):
+        return self._compare(other, lambda s, o: s >= o)
+
+    def __gt__(self, other):
+        return self._compare(other, lambda s, o: s > o)
+
+    def __ne__(self, other):
+        return self._compare(other, lambda s, o: s != o)
+
+    def _compare(self, other, method):
+        if not isinstance(other, _BaseVersion):
+            return NotImplemented
+
+        return method(self._key, other._key)
+
+
+class LegacyVersion(_BaseVersion):
+
+    def __init__(self, version):
+        self._version = str(version)
+        self._key = _legacy_cmpkey(self._version)
+
+    def __str__(self):
+        return self._version
+
+    def __repr__(self):
+        return "<LegacyVersion({0})>".format(repr(str(self)))
+
+    @property
+    def public(self):
+        return self._version
+
+    @property
+    def base_version(self):
+        return self._version
+
+    @property
+    def local(self):
+        return None
+
+    @property
+    def is_prerelease(self):
+        return False
+
+    @property
+    def is_postrelease(self):
+        return False
+
+
+_legacy_version_component_re = re.compile(
+    r"(\d+ | [a-z]+ | \.| -)", re.VERBOSE,
+)
+
+_legacy_version_replacement_map = {
+    "pre": "c", "preview": "c", "-": "final-", "rc": "c", "dev": "@",
+}
+
+
+def _parse_version_parts(s):
+    for part in _legacy_version_component_re.split(s):
+        part = _legacy_version_replacement_map.get(part, part)
+
+        if not part or part == ".":
+            continue
+
+        if part[:1] in "0123456789":
+            # pad for numeric comparison
+            yield part.zfill(8)
+        else:
+            yield "*" + part
+
+    # ensure that alpha/beta/candidate are before final
+    yield "*final"
+
+
+def _legacy_cmpkey(version):
+    # We hardcode an epoch of -1 here. A PEP 440 version can only have a epoch
+    # greater than or equal to 0. This will effectively put the LegacyVersion,
+    # which uses the defacto standard originally implemented by setuptools,
+    # as before all PEP 440 versions.
+    epoch = -1
+
+    # This scheme is taken from pkg_resources.parse_version setuptools prior to
+    # it's adoption of the packaging library.
+    parts = []
+    for part in _parse_version_parts(version.lower()):
+        if part.startswith("*"):
+            # remove "-" before a prerelease tag
+            if part < "*final":
+                while parts and parts[-1] == "*final-":
+                    parts.pop()
+
+            # remove trailing zeros from each series of numeric parts
+            while parts and parts[-1] == "00000000":
+                parts.pop()
+
+        parts.append(part)
+    parts = tuple(parts)
+
+    return epoch, parts
+
+# Deliberately not anchored to the start and end of the string, to make it
+# easier for 3rd party code to reuse
+VERSION_PATTERN = r"""
+    v?
+    (?:
+        (?:(?P<epoch>[0-9]+)!)?                           # epoch
+        (?P<release>[0-9]+(?:\.[0-9]+)*)                  # release segment
+        (?P<pre>                                          # pre-release
+            [-_\.]?
+            (?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))
+            [-_\.]?
+            (?P<pre_n>[0-9]+)?
+        )?
+        (?P<post>                                         # post release
+            (?:-(?P<post_n1>[0-9]+))
+            |
+            (?:
+                [-_\.]?
+                (?P<post_l>post|rev|r)
+                [-_\.]?
+                (?P<post_n2>[0-9]+)?
+            )
+        )?
+        (?P<dev>                                          # dev release
+            [-_\.]?
+            (?P<dev_l>dev)
+            [-_\.]?
+            (?P<dev_n>[0-9]+)?
+        )?
+    )
+    (?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
+"""
+
+
+class Version(_BaseVersion):
+
+    _regex = re.compile(
+        r"^\s*" + VERSION_PATTERN + r"\s*$",
+        re.VERBOSE | re.IGNORECASE,
+    )
+
+    def __init__(self, version):
+        # Validate the version and parse it into pieces
+        match = self._regex.search(version)
+        if not match:
+            raise InvalidVersion("Invalid version: '{0}'".format(version))
+
+        # Store the parsed out pieces of the version
+        self._version = _Version(
+            epoch=int(match.group("epoch")) if match.group("epoch") else 0,
+            release=tuple(int(i) for i in match.group("release").split(".")),
+            pre=_parse_letter_version(
+                match.group("pre_l"),
+                match.group("pre_n"),
+            ),
+            post=_parse_letter_version(
+                match.group("post_l"),
+                match.group("post_n1") or match.group("post_n2"),
+            ),
+            dev=_parse_letter_version(
+                match.group("dev_l"),
+                match.group("dev_n"),
+            ),
+            local=_parse_local_version(match.group("local")),
+        )
+
+        # Generate a key which will be used for sorting
+        self._key = _cmpkey(
+            self._version.epoch,
+            self._version.release,
+            self._version.pre,
+            self._version.post,
+            self._version.dev,
+            self._version.local,
+        )
+
+    def __repr__(self):
+        return "<Version({0})>".format(repr(str(self)))
+
+    def __str__(self):
+        parts = []
+
+        # Epoch
+        if self._version.epoch != 0:
+            parts.append("{0}!".format(self._version.epoch))
+
+        # Release segment
+        parts.append(".".join(str(x) for x in self._version.release))
+
+        # Pre-release
+        if self._version.pre is not None:
+            parts.append("".join(str(x) for x in self._version.pre))
+
+        # Post-release
+        if self._version.post is not None:
+            parts.append(".post{0}".format(self._version.post[1]))
+
+        # Development release
+        if self._version.dev is not None:
+            parts.append(".dev{0}".format(self._version.dev[1]))
+
+        # Local version segment
+        if self._version.local is not None:
+            parts.append(
+                "+{0}".format(".".join(str(x) for x in self._version.local))
+            )
+
+        return "".join(parts)
+
+    @property
+    def public(self):
+        return str(self).split("+", 1)[0]
+
+    @property
+    def base_version(self):
+        parts = []
+
+        # Epoch
+        if self._version.epoch != 0:
+            parts.append("{0}!".format(self._version.epoch))
+
+        # Release segment
+        parts.append(".".join(str(x) for x in self._version.release))
+
+        return "".join(parts)
+
+    @property
+    def local(self):
+        version_string = str(self)
+        if "+" in version_string:
+            return version_string.split("+", 1)[1]
+
+    @property
+    def is_prerelease(self):
+        return bool(self._version.dev or self._version.pre)
+
+    @property
+    def is_postrelease(self):
+        return bool(self._version.post)
+
+
+def _parse_letter_version(letter, number):
+    if letter:
+        # We consider there to be an implicit 0 in a pre-release if there is
+        # not a numeral associated with it.
+        if number is None:
+            number = 0
+
+        # We normalize any letters to their lower case form
+        letter = letter.lower()
+
+        # We consider some words to be alternate spellings of other words and
+        # in those cases we want to normalize the spellings to our preferred
+        # spelling.
+        if letter == "alpha":
+            letter = "a"
+        elif letter == "beta":
+            letter = "b"
+        elif letter in ["c", "pre", "preview"]:
+            letter = "rc"
+        elif letter in ["rev", "r"]:
+            letter = "post"
+
+        return letter, int(number)
+    if not letter and number:
+        # We assume if we are given a number, but we are not given a letter
+        # then this is using the implicit post release syntax (e.g. 1.0-1)
+        letter = "post"
+
+        return letter, int(number)
+
+
+_local_version_seperators = re.compile(r"[\._-]")
+
+
+def _parse_local_version(local):
+    """
+    Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
+    """
+    if local is not None:
+        return tuple(
+            part.lower() if not part.isdigit() else int(part)
+            for part in _local_version_seperators.split(local)
+        )
+
+
+def _cmpkey(epoch, release, pre, post, dev, local):
+    # When we compare a release version, we want to compare it with all of the
+    # trailing zeros removed. So we'll use a reverse the list, drop all the now
+    # leading zeros until we come to something non zero, then take the rest
+    # re-reverse it back into the correct order and make it a tuple and use
+    # that for our sorting key.
+    release = tuple(
+        reversed(list(
+            itertools.dropwhile(
+                lambda x: x == 0,
+                reversed(release),
+            )
+        ))
+    )
+
+    # We need to "trick" the sorting algorithm to put 1.0.dev0 before 1.0a0.
+    # We'll do this by abusing the pre segment, but we _only_ want to do this
+    # if there is not a pre or a post segment. If we have one of those then
+    # the normal sorting rules will handle this case correctly.
+    if pre is None and post is None and dev is not None:
+        pre = -Infinity
+    # Versions without a pre-release (except as noted above) should sort after
+    # those with one.
+    elif pre is None:
+        pre = Infinity
+
+    # Versions without a post segment should sort before those with one.
+    if post is None:
+        post = -Infinity
+
+    # Versions without a development segment should sort after those with one.
+    if dev is None:
+        dev = Infinity
+
+    if local is None:
+        # Versions without a local segment should sort before those with one.
+        local = -Infinity
+    else:
+        # Versions with a local segment need that segment parsed to implement
+        # the sorting rules in PEP440.
+        # - Alpha numeric segments sort before numeric segments
+        # - Alpha numeric segments sort lexicographically
+        # - Numeric segments sort numerically
+        # - Shorter versions sort before longer versions when the prefixes
+        #   match exactly
+        local = tuple(
+            (i, "") if isinstance(i, int) else (-Infinity, i)
+            for i in local
+        )
+
+    return epoch, release, pre, post, dev, local
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/pyparsing.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/pyparsing.py
new file mode 100644
index 0000000000000000000000000000000000000000..d433392c711ee2e921a814870d57ef514ac260fc
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/pyparsing.py
@@ -0,0 +1,5696 @@
+# module pyparsing.py
+#
+# Copyright (c) 2003-2016  Paul T. McGuire
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+
+__doc__ = \
+"""
+pyparsing module - Classes and methods to define and execute parsing grammars
+
+The pyparsing module is an alternative approach to creating and executing simple grammars,
+vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you
+don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
+provides a library of classes that you use to construct the grammar directly in Python.
+
+Here is a program to parse "Hello, World!" (or any greeting of the form 
+C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements 
+(L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to
+L{Literal} expressions)::
+
+    from pyparsing import Word, alphas
+
+    # define grammar of a greeting
+    greet = Word(alphas) + "," + Word(alphas) + "!"
+
+    hello = "Hello, World!"
+    print (hello, "->", greet.parseString(hello))
+
+The program outputs the following::
+
+    Hello, World! -> ['Hello', ',', 'World', '!']
+
+The Python representation of the grammar is quite readable, owing to the self-explanatory
+class names, and the use of '+', '|' and '^' operators.
+
+The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
+object with named attributes.
+
+The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
+ - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.)
+ - quoted strings
+ - embedded comments
+"""
+
+__version__ = "2.1.10"
+__versionTime__ = "07 Oct 2016 01:31 UTC"
+__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
+
+import string
+from weakref import ref as wkref
+import copy
+import sys
+import warnings
+import re
+import sre_constants
+import collections
+import pprint
+import traceback
+import types
+from datetime import datetime
+
+try:
+    from _thread import RLock
+except ImportError:
+    from threading import RLock
+
+try:
+    from collections import OrderedDict as _OrderedDict
+except ImportError:
+    try:
+        from ordereddict import OrderedDict as _OrderedDict
+    except ImportError:
+        _OrderedDict = None
+
+#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
+
+__all__ = [
+'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
+'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
+'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
+'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
+'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
+'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 
+'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
+'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
+'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
+'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
+'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
+'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
+'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
+'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 
+'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
+'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
+'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
+'CloseMatch', 'tokenMap', 'pyparsing_common',
+]
+
+system_version = tuple(sys.version_info)[:3]
+PY_3 = system_version[0] == 3
+if PY_3:
+    _MAX_INT = sys.maxsize
+    basestring = str
+    unichr = chr
+    _ustr = str
+
+    # build list of single arg builtins, that can be used as parse actions
+    singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
+
+else:
+    _MAX_INT = sys.maxint
+    range = xrange
+
+    def _ustr(obj):
+        """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
+           str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
+           then < returns the unicode object | encodes it with the default encoding | ... >.
+        """
+        if isinstance(obj,unicode):
+            return obj
+
+        try:
+            # If this works, then _ustr(obj) has the same behaviour as str(obj), so
+            # it won't break any existing code.
+            return str(obj)
+
+        except UnicodeEncodeError:
+            # Else encode it
+            ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
+            xmlcharref = Regex(r'&#\d+;')
+            xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
+            return xmlcharref.transformString(ret)
+
+    # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
+    singleArgBuiltins = []
+    import __builtin__
+    for fname in "sum len sorted reversed list tuple set any all min max".split():
+        try:
+            singleArgBuiltins.append(getattr(__builtin__,fname))
+        except AttributeError:
+            continue
+            
+_generatorType = type((y for y in range(1)))
+ 
+def _xml_escape(data):
+    """Escape &, <, >, ", ', etc. in a string of data."""
+
+    # ampersand must be replaced first
+    from_symbols = '&><"\''
+    to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
+    for from_,to_ in zip(from_symbols, to_symbols):
+        data = data.replace(from_, to_)
+    return data
+
+class _Constants(object):
+    pass
+
+alphas     = string.ascii_uppercase + string.ascii_lowercase
+nums       = "0123456789"
+hexnums    = nums + "ABCDEFabcdef"
+alphanums  = alphas + nums
+_bslash    = chr(92)
+printables = "".join(c for c in string.printable if c not in string.whitespace)
+
+class ParseBaseException(Exception):
+    """base exception class for all parsing runtime exceptions"""
+    # Performance tuning: we construct a *lot* of these, so keep this
+    # constructor as small and fast as possible
+    def __init__( self, pstr, loc=0, msg=None, elem=None ):
+        self.loc = loc
+        if msg is None:
+            self.msg = pstr
+            self.pstr = ""
+        else:
+            self.msg = msg
+            self.pstr = pstr
+        self.parserElement = elem
+        self.args = (pstr, loc, msg)
+
+    @classmethod
+    def _from_exception(cls, pe):
+        """
+        internal factory method to simplify creating one type of ParseException 
+        from another - avoids having __init__ signature conflicts among subclasses
+        """
+        return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
+
+    def __getattr__( self, aname ):
+        """supported attributes by name are:
+            - lineno - returns the line number of the exception text
+            - col - returns the column number of the exception text
+            - line - returns the line containing the exception text
+        """
+        if( aname == "lineno" ):
+            return lineno( self.loc, self.pstr )
+        elif( aname in ("col", "column") ):
+            return col( self.loc, self.pstr )
+        elif( aname == "line" ):
+            return line( self.loc, self.pstr )
+        else:
+            raise AttributeError(aname)
+
+    def __str__( self ):
+        return "%s (at char %d), (line:%d, col:%d)" % \
+                ( self.msg, self.loc, self.lineno, self.column )
+    def __repr__( self ):
+        return _ustr(self)
+    def markInputline( self, markerString = ">!<" ):
+        """Extracts the exception line from the input string, and marks
+           the location of the exception with a special symbol.
+        """
+        line_str = self.line
+        line_column = self.column - 1
+        if markerString:
+            line_str = "".join((line_str[:line_column],
+                                markerString, line_str[line_column:]))
+        return line_str.strip()
+    def __dir__(self):
+        return "lineno col line".split() + dir(type(self))
+
+class ParseException(ParseBaseException):
+    """
+    Exception thrown when parse expressions don't match class;
+    supported attributes by name are:
+     - lineno - returns the line number of the exception text
+     - col - returns the column number of the exception text
+     - line - returns the line containing the exception text
+        
+    Example::
+        try:
+            Word(nums).setName("integer").parseString("ABC")
+        except ParseException as pe:
+            print(pe)
+            print("column: {}".format(pe.col))
+            
+    prints::
+       Expected integer (at char 0), (line:1, col:1)
+        column: 1
+    """
+    pass
+
+class ParseFatalException(ParseBaseException):
+    """user-throwable exception thrown when inconsistent parse content
+       is found; stops all parsing immediately"""
+    pass
+
+class ParseSyntaxException(ParseFatalException):
+    """just like L{ParseFatalException}, but thrown internally when an
+       L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop 
+       immediately because an unbacktrackable syntax error has been found"""
+    pass
+
+#~ class ReparseException(ParseBaseException):
+    #~ """Experimental class - parse actions can raise this exception to cause
+       #~ pyparsing to reparse the input string:
+        #~ - with a modified input string, and/or
+        #~ - with a modified start location
+       #~ Set the values of the ReparseException in the constructor, and raise the
+       #~ exception in a parse action to cause pyparsing to use the new string/location.
+       #~ Setting the values as None causes no change to be made.
+       #~ """
+    #~ def __init_( self, newstring, restartLoc ):
+        #~ self.newParseText = newstring
+        #~ self.reparseLoc = restartLoc
+
+class RecursiveGrammarException(Exception):
+    """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
+    def __init__( self, parseElementList ):
+        self.parseElementTrace = parseElementList
+
+    def __str__( self ):
+        return "RecursiveGrammarException: %s" % self.parseElementTrace
+
+class _ParseResultsWithOffset(object):
+    def __init__(self,p1,p2):
+        self.tup = (p1,p2)
+    def __getitem__(self,i):
+        return self.tup[i]
+    def __repr__(self):
+        return repr(self.tup[0])
+    def setOffset(self,i):
+        self.tup = (self.tup[0],i)
+
+class ParseResults(object):
+    """
+    Structured parse results, to provide multiple means of access to the parsed data:
+       - as a list (C{len(results)})
+       - by list index (C{results[0], results[1]}, etc.)
+       - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})
+
+    Example::
+        integer = Word(nums)
+        date_str = (integer.setResultsName("year") + '/' 
+                        + integer.setResultsName("month") + '/' 
+                        + integer.setResultsName("day"))
+        # equivalent form:
+        # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+
+        # parseString returns a ParseResults object
+        result = date_str.parseString("1999/12/31")
+
+        def test(s, fn=repr):
+            print("%s -> %s" % (s, fn(eval(s))))
+        test("list(result)")
+        test("result[0]")
+        test("result['month']")
+        test("result.day")
+        test("'month' in result")
+        test("'minutes' in result")
+        test("result.dump()", str)
+    prints::
+        list(result) -> ['1999', '/', '12', '/', '31']
+        result[0] -> '1999'
+        result['month'] -> '12'
+        result.day -> '31'
+        'month' in result -> True
+        'minutes' in result -> False
+        result.dump() -> ['1999', '/', '12', '/', '31']
+        - day: 31
+        - month: 12
+        - year: 1999
+    """
+    def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
+        if isinstance(toklist, cls):
+            return toklist
+        retobj = object.__new__(cls)
+        retobj.__doinit = True
+        return retobj
+
+    # Performance tuning: we construct a *lot* of these, so keep this
+    # constructor as small and fast as possible
+    def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
+        if self.__doinit:
+            self.__doinit = False
+            self.__name = None
+            self.__parent = None
+            self.__accumNames = {}
+            self.__asList = asList
+            self.__modal = modal
+            if toklist is None:
+                toklist = []
+            if isinstance(toklist, list):
+                self.__toklist = toklist[:]
+            elif isinstance(toklist, _generatorType):
+                self.__toklist = list(toklist)
+            else:
+                self.__toklist = [toklist]
+            self.__tokdict = dict()
+
+        if name is not None and name:
+            if not modal:
+                self.__accumNames[name] = 0
+            if isinstance(name,int):
+                name = _ustr(name) # will always return a str, but use _ustr for consistency
+            self.__name = name
+            if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
+                if isinstance(toklist,basestring):
+                    toklist = [ toklist ]
+                if asList:
+                    if isinstance(toklist,ParseResults):
+                        self[name] = _ParseResultsWithOffset(toklist.copy(),0)
+                    else:
+                        self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
+                    self[name].__name = name
+                else:
+                    try:
+                        self[name] = toklist[0]
+                    except (KeyError,TypeError,IndexError):
+                        self[name] = toklist
+
+    def __getitem__( self, i ):
+        if isinstance( i, (int,slice) ):
+            return self.__toklist[i]
+        else:
+            if i not in self.__accumNames:
+                return self.__tokdict[i][-1][0]
+            else:
+                return ParseResults([ v[0] for v in self.__tokdict[i] ])
+
+    def __setitem__( self, k, v, isinstance=isinstance ):
+        if isinstance(v,_ParseResultsWithOffset):
+            self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
+            sub = v[0]
+        elif isinstance(k,(int,slice)):
+            self.__toklist[k] = v
+            sub = v
+        else:
+            self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
+            sub = v
+        if isinstance(sub,ParseResults):
+            sub.__parent = wkref(self)
+
+    def __delitem__( self, i ):
+        if isinstance(i,(int,slice)):
+            mylen = len( self.__toklist )
+            del self.__toklist[i]
+
+            # convert int to slice
+            if isinstance(i, int):
+                if i < 0:
+                    i += mylen
+                i = slice(i, i+1)
+            # get removed indices
+            removed = list(range(*i.indices(mylen)))
+            removed.reverse()
+            # fixup indices in token dictionary
+            for name,occurrences in self.__tokdict.items():
+                for j in removed:
+                    for k, (value, position) in enumerate(occurrences):
+                        occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
+        else:
+            del self.__tokdict[i]
+
+    def __contains__( self, k ):
+        return k in self.__tokdict
+
+    def __len__( self ): return len( self.__toklist )
+    def __bool__(self): return ( not not self.__toklist )
+    __nonzero__ = __bool__
+    def __iter__( self ): return iter( self.__toklist )
+    def __reversed__( self ): return iter( self.__toklist[::-1] )
+    def _iterkeys( self ):
+        if hasattr(self.__tokdict, "iterkeys"):
+            return self.__tokdict.iterkeys()
+        else:
+            return iter(self.__tokdict)
+
+    def _itervalues( self ):
+        return (self[k] for k in self._iterkeys())
+            
+    def _iteritems( self ):
+        return ((k, self[k]) for k in self._iterkeys())
+
+    if PY_3:
+        keys = _iterkeys       
+        """Returns an iterator of all named result keys (Python 3.x only)."""
+
+        values = _itervalues
+        """Returns an iterator of all named result values (Python 3.x only)."""
+
+        items = _iteritems
+        """Returns an iterator of all named result key-value tuples (Python 3.x only)."""
+
+    else:
+        iterkeys = _iterkeys
+        """Returns an iterator of all named result keys (Python 2.x only)."""
+
+        itervalues = _itervalues
+        """Returns an iterator of all named result values (Python 2.x only)."""
+
+        iteritems = _iteritems
+        """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
+
+        def keys( self ):
+            """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
+            return list(self.iterkeys())
+
+        def values( self ):
+            """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
+            return list(self.itervalues())
+                
+        def items( self ):
+            """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
+            return list(self.iteritems())
+
+    def haskeys( self ):
+        """Since keys() returns an iterator, this method is helpful in bypassing
+           code that looks for the existence of any defined results names."""
+        return bool(self.__tokdict)
+        
+    def pop( self, *args, **kwargs):
+        """
+        Removes and returns item at specified index (default=C{last}).
+        Supports both C{list} and C{dict} semantics for C{pop()}. If passed no
+        argument or an integer argument, it will use C{list} semantics
+        and pop tokens from the list of parsed tokens. If passed a 
+        non-integer argument (most likely a string), it will use C{dict}
+        semantics and pop the corresponding value from any defined 
+        results names. A second default return value argument is 
+        supported, just as in C{dict.pop()}.
+
+        Example::
+            def remove_first(tokens):
+                tokens.pop(0)
+            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
+            print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
+
+            label = Word(alphas)
+            patt = label("LABEL") + OneOrMore(Word(nums))
+            print(patt.parseString("AAB 123 321").dump())
+
+            # Use pop() in a parse action to remove named result (note that corresponding value is not
+            # removed from list form of results)
+            def remove_LABEL(tokens):
+                tokens.pop("LABEL")
+                return tokens
+            patt.addParseAction(remove_LABEL)
+            print(patt.parseString("AAB 123 321").dump())
+        prints::
+            ['AAB', '123', '321']
+            - LABEL: AAB
+
+            ['AAB', '123', '321']
+        """
+        if not args:
+            args = [-1]
+        for k,v in kwargs.items():
+            if k == 'default':
+                args = (args[0], v)
+            else:
+                raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
+        if (isinstance(args[0], int) or 
+                        len(args) == 1 or 
+                        args[0] in self):
+            index = args[0]
+            ret = self[index]
+            del self[index]
+            return ret
+        else:
+            defaultvalue = args[1]
+            return defaultvalue
+
+    def get(self, key, defaultValue=None):
+        """
+        Returns named result matching the given key, or if there is no
+        such name, then returns the given C{defaultValue} or C{None} if no
+        C{defaultValue} is specified.
+
+        Similar to C{dict.get()}.
+        
+        Example::
+            integer = Word(nums)
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")           
+
+            result = date_str.parseString("1999/12/31")
+            print(result.get("year")) # -> '1999'
+            print(result.get("hour", "not specified")) # -> 'not specified'
+            print(result.get("hour")) # -> None
+        """
+        if key in self:
+            return self[key]
+        else:
+            return defaultValue
+
+    def insert( self, index, insStr ):
+        """
+        Inserts new element at location index in the list of parsed tokens.
+        
+        Similar to C{list.insert()}.
+
+        Example::
+            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
+
+            # use a parse action to insert the parse location in the front of the parsed results
+            def insert_locn(locn, tokens):
+                tokens.insert(0, locn)
+            print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
+        """
+        self.__toklist.insert(index, insStr)
+        # fixup indices in token dictionary
+        for name,occurrences in self.__tokdict.items():
+            for k, (value, position) in enumerate(occurrences):
+                occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
+
+    def append( self, item ):
+        """
+        Add single element to end of ParseResults list of elements.
+
+        Example::
+            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
+            
+            # use a parse action to compute the sum of the parsed integers, and add it to the end
+            def append_sum(tokens):
+                tokens.append(sum(map(int, tokens)))
+            print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
+        """
+        self.__toklist.append(item)
+
+    def extend( self, itemseq ):
+        """
+        Add sequence of elements to end of ParseResults list of elements.
+
+        Example::
+            patt = OneOrMore(Word(alphas))
+            
+            # use a parse action to append the reverse of the matched strings, to make a palindrome
+            def make_palindrome(tokens):
+                tokens.extend(reversed([t[::-1] for t in tokens]))
+                return ''.join(tokens)
+            print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
+        """
+        if isinstance(itemseq, ParseResults):
+            self += itemseq
+        else:
+            self.__toklist.extend(itemseq)
+
+    def clear( self ):
+        """
+        Clear all elements and results names.
+        """
+        del self.__toklist[:]
+        self.__tokdict.clear()
+
+    def __getattr__( self, name ):
+        try:
+            return self[name]
+        except KeyError:
+            return ""
+            
+        if name in self.__tokdict:
+            if name not in self.__accumNames:
+                return self.__tokdict[name][-1][0]
+            else:
+                return ParseResults([ v[0] for v in self.__tokdict[name] ])
+        else:
+            return ""
+
+    def __add__( self, other ):
+        ret = self.copy()
+        ret += other
+        return ret
+
+    def __iadd__( self, other ):
+        if other.__tokdict:
+            offset = len(self.__toklist)
+            addoffset = lambda a: offset if a<0 else a+offset
+            otheritems = other.__tokdict.items()
+            otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
+                                for (k,vlist) in otheritems for v in vlist]
+            for k,v in otherdictitems:
+                self[k] = v
+                if isinstance(v[0],ParseResults):
+                    v[0].__parent = wkref(self)
+            
+        self.__toklist += other.__toklist
+        self.__accumNames.update( other.__accumNames )
+        return self
+
+    def __radd__(self, other):
+        if isinstance(other,int) and other == 0:
+            # useful for merging many ParseResults using sum() builtin
+            return self.copy()
+        else:
+            # this may raise a TypeError - so be it
+            return other + self
+        
+    def __repr__( self ):
+        return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
+
+    def __str__( self ):
+        return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
+
+    def _asStringList( self, sep='' ):
+        out = []
+        for item in self.__toklist:
+            if out and sep:
+                out.append(sep)
+            if isinstance( item, ParseResults ):
+                out += item._asStringList()
+            else:
+                out.append( _ustr(item) )
+        return out
+
+    def asList( self ):
+        """
+        Returns the parse results as a nested list of matching tokens, all converted to strings.
+
+        Example::
+            patt = OneOrMore(Word(alphas))
+            result = patt.parseString("sldkj lsdkj sldkj")
+            # even though the result prints in string-like form, it is actually a pyparsing ParseResults
+            print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
+            
+            # Use asList() to create an actual list
+            result_list = result.asList()
+            print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
+        """
+        return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
+
+    def asDict( self ):
+        """
+        Returns the named parse results as a nested dictionary.
+
+        Example::
+            integer = Word(nums)
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+            
+            result = date_str.parseString('12/31/1999')
+            print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
+            
+            result_dict = result.asDict()
+            print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
+
+            # even though a ParseResults supports dict-like access, sometime you just need to have a dict
+            import json
+            print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
+            print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
+        """
+        if PY_3:
+            item_fn = self.items
+        else:
+            item_fn = self.iteritems
+            
+        def toItem(obj):
+            if isinstance(obj, ParseResults):
+                if obj.haskeys():
+                    return obj.asDict()
+                else:
+                    return [toItem(v) for v in obj]
+            else:
+                return obj
+                
+        return dict((k,toItem(v)) for k,v in item_fn())
+
+    def copy( self ):
+        """
+        Returns a new copy of a C{ParseResults} object.
+        """
+        ret = ParseResults( self.__toklist )
+        ret.__tokdict = self.__tokdict.copy()
+        ret.__parent = self.__parent
+        ret.__accumNames.update( self.__accumNames )
+        ret.__name = self.__name
+        return ret
+
+    def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
+        """
+        (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
+        """
+        nl = "\n"
+        out = []
+        namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
+                                                            for v in vlist)
+        nextLevelIndent = indent + "  "
+
+        # collapse out indents if formatting is not desired
+        if not formatted:
+            indent = ""
+            nextLevelIndent = ""
+            nl = ""
+
+        selfTag = None
+        if doctag is not None:
+            selfTag = doctag
+        else:
+            if self.__name:
+                selfTag = self.__name
+
+        if not selfTag:
+            if namedItemsOnly:
+                return ""
+            else:
+                selfTag = "ITEM"
+
+        out += [ nl, indent, "<", selfTag, ">" ]
+
+        for i,res in enumerate(self.__toklist):
+            if isinstance(res,ParseResults):
+                if i in namedItems:
+                    out += [ res.asXML(namedItems[i],
+                                        namedItemsOnly and doctag is None,
+                                        nextLevelIndent,
+                                        formatted)]
+                else:
+                    out += [ res.asXML(None,
+                                        namedItemsOnly and doctag is None,
+                                        nextLevelIndent,
+                                        formatted)]
+            else:
+                # individual token, see if there is a name for it
+                resTag = None
+                if i in namedItems:
+                    resTag = namedItems[i]
+                if not resTag:
+                    if namedItemsOnly:
+                        continue
+                    else:
+                        resTag = "ITEM"
+                xmlBodyText = _xml_escape(_ustr(res))
+                out += [ nl, nextLevelIndent, "<", resTag, ">",
+                                                xmlBodyText,
+                                                "</", resTag, ">" ]
+
+        out += [ nl, indent, "</", selfTag, ">" ]
+        return "".join(out)
+
+    def __lookup(self,sub):
+        for k,vlist in self.__tokdict.items():
+            for v,loc in vlist:
+                if sub is v:
+                    return k
+        return None
+
+    def getName(self):
+        r"""
+        Returns the results name for this token expression. Useful when several 
+        different expressions might match at a particular location.
+
+        Example::
+            integer = Word(nums)
+            ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
+            house_number_expr = Suppress('#') + Word(nums, alphanums)
+            user_data = (Group(house_number_expr)("house_number") 
+                        | Group(ssn_expr)("ssn")
+                        | Group(integer)("age"))
+            user_info = OneOrMore(user_data)
+            
+            result = user_info.parseString("22 111-22-3333 #221B")
+            for item in result:
+                print(item.getName(), ':', item[0])
+        prints::
+            age : 22
+            ssn : 111-22-3333
+            house_number : 221B
+        """
+        if self.__name:
+            return self.__name
+        elif self.__parent:
+            par = self.__parent()
+            if par:
+                return par.__lookup(self)
+            else:
+                return None
+        elif (len(self) == 1 and
+               len(self.__tokdict) == 1 and
+               next(iter(self.__tokdict.values()))[0][1] in (0,-1)):
+            return next(iter(self.__tokdict.keys()))
+        else:
+            return None
+
+    def dump(self, indent='', depth=0, full=True):
+        """
+        Diagnostic method for listing out the contents of a C{ParseResults}.
+        Accepts an optional C{indent} argument so that this string can be embedded
+        in a nested display of other data.
+
+        Example::
+            integer = Word(nums)
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+            
+            result = date_str.parseString('12/31/1999')
+            print(result.dump())
+        prints::
+            ['12', '/', '31', '/', '1999']
+            - day: 1999
+            - month: 31
+            - year: 12
+        """
+        out = []
+        NL = '\n'
+        out.append( indent+_ustr(self.asList()) )
+        if full:
+            if self.haskeys():
+                items = sorted((str(k), v) for k,v in self.items())
+                for k,v in items:
+                    if out:
+                        out.append(NL)
+                    out.append( "%s%s- %s: " % (indent,('  '*depth), k) )
+                    if isinstance(v,ParseResults):
+                        if v:
+                            out.append( v.dump(indent,depth+1) )
+                        else:
+                            out.append(_ustr(v))
+                    else:
+                        out.append(repr(v))
+            elif any(isinstance(vv,ParseResults) for vv in self):
+                v = self
+                for i,vv in enumerate(v):
+                    if isinstance(vv,ParseResults):
+                        out.append("\n%s%s[%d]:\n%s%s%s" % (indent,('  '*(depth)),i,indent,('  '*(depth+1)),vv.dump(indent,depth+1) ))
+                    else:
+                        out.append("\n%s%s[%d]:\n%s%s%s" % (indent,('  '*(depth)),i,indent,('  '*(depth+1)),_ustr(vv)))
+            
+        return "".join(out)
+
+    def pprint(self, *args, **kwargs):
+        """
+        Pretty-printer for parsed results as a list, using the C{pprint} module.
+        Accepts additional positional or keyword args as defined for the 
+        C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
+
+        Example::
+            ident = Word(alphas, alphanums)
+            num = Word(nums)
+            func = Forward()
+            term = ident | num | Group('(' + func + ')')
+            func <<= ident + Group(Optional(delimitedList(term)))
+            result = func.parseString("fna a,b,(fnb c,d,200),100")
+            result.pprint(width=40)
+        prints::
+            ['fna',
+             ['a',
+              'b',
+              ['(', 'fnb', ['c', 'd', '200'], ')'],
+              '100']]
+        """
+        pprint.pprint(self.asList(), *args, **kwargs)
+
+    # add support for pickle protocol
+    def __getstate__(self):
+        return ( self.__toklist,
+                 ( self.__tokdict.copy(),
+                   self.__parent is not None and self.__parent() or None,
+                   self.__accumNames,
+                   self.__name ) )
+
+    def __setstate__(self,state):
+        self.__toklist = state[0]
+        (self.__tokdict,
+         par,
+         inAccumNames,
+         self.__name) = state[1]
+        self.__accumNames = {}
+        self.__accumNames.update(inAccumNames)
+        if par is not None:
+            self.__parent = wkref(par)
+        else:
+            self.__parent = None
+
+    def __getnewargs__(self):
+        return self.__toklist, self.__name, self.__asList, self.__modal
+
+    def __dir__(self):
+        return (dir(type(self)) + list(self.keys()))
+
+collections.MutableMapping.register(ParseResults)
+
+def col (loc,strg):
+    """Returns current column within a string, counting newlines as line separators.
+   The first column is number 1.
+
+   Note: the default parsing behavior is to expand tabs in the input string
+   before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
+   on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
+   consistent view of the parsed string, the parse location, and line and column
+   positions within the parsed string.
+   """
+    s = strg
+    return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
+
+def lineno(loc,strg):
+    """Returns current line number within a string, counting newlines as line separators.
+   The first line is number 1.
+
+   Note: the default parsing behavior is to expand tabs in the input string
+   before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
+   on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
+   consistent view of the parsed string, the parse location, and line and column
+   positions within the parsed string.
+   """
+    return strg.count("\n",0,loc) + 1
+
+def line( loc, strg ):
+    """Returns the line of text containing loc within a string, counting newlines as line separators.
+       """
+    lastCR = strg.rfind("\n", 0, loc)
+    nextCR = strg.find("\n", loc)
+    if nextCR >= 0:
+        return strg[lastCR+1:nextCR]
+    else:
+        return strg[lastCR+1:]
+
+def _defaultStartDebugAction( instring, loc, expr ):
+    print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
+
+def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
+    print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
+
+def _defaultExceptionDebugAction( instring, loc, expr, exc ):
+    print ("Exception raised:" + _ustr(exc))
+
+def nullDebugAction(*args):
+    """'Do-nothing' debug action, to suppress debugging output during parsing."""
+    pass
+
+# Only works on Python 3.x - nonlocal is toxic to Python 2 installs
+#~ 'decorator to trim function calls to match the arity of the target'
+#~ def _trim_arity(func, maxargs=3):
+    #~ if func in singleArgBuiltins:
+        #~ return lambda s,l,t: func(t)
+    #~ limit = 0
+    #~ foundArity = False
+    #~ def wrapper(*args):
+        #~ nonlocal limit,foundArity
+        #~ while 1:
+            #~ try:
+                #~ ret = func(*args[limit:])
+                #~ foundArity = True
+                #~ return ret
+            #~ except TypeError:
+                #~ if limit == maxargs or foundArity:
+                    #~ raise
+                #~ limit += 1
+                #~ continue
+    #~ return wrapper
+
+# this version is Python 2.x-3.x cross-compatible
+'decorator to trim function calls to match the arity of the target'
+def _trim_arity(func, maxargs=2):
+    if func in singleArgBuiltins:
+        return lambda s,l,t: func(t)
+    limit = [0]
+    foundArity = [False]
+    
+    # traceback return data structure changed in Py3.5 - normalize back to plain tuples
+    if system_version[:2] >= (3,5):
+        def extract_stack(limit=0):
+            # special handling for Python 3.5.0 - extra deep call stack by 1
+            offset = -3 if system_version == (3,5,0) else -2
+            frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]
+            return [(frame_summary.filename, frame_summary.lineno)]
+        def extract_tb(tb, limit=0):
+            frames = traceback.extract_tb(tb, limit=limit)
+            frame_summary = frames[-1]
+            return [(frame_summary.filename, frame_summary.lineno)]
+    else:
+        extract_stack = traceback.extract_stack
+        extract_tb = traceback.extract_tb
+    
+    # synthesize what would be returned by traceback.extract_stack at the call to 
+    # user's parse action 'func', so that we don't incur call penalty at parse time
+    
+    LINE_DIFF = 6
+    # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 
+    # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
+    this_line = extract_stack(limit=2)[-1]
+    pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
+
+    def wrapper(*args):
+        while 1:
+            try:
+                ret = func(*args[limit[0]:])
+                foundArity[0] = True
+                return ret
+            except TypeError:
+                # re-raise TypeErrors if they did not come from our arity testing
+                if foundArity[0]:
+                    raise
+                else:
+                    try:
+                        tb = sys.exc_info()[-1]
+                        if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
+                            raise
+                    finally:
+                        del tb
+
+                if limit[0] <= maxargs:
+                    limit[0] += 1
+                    continue
+                raise
+
+    # copy func name to wrapper for sensible debug output
+    func_name = "<parse action>"
+    try:
+        func_name = getattr(func, '__name__', 
+                            getattr(func, '__class__').__name__)
+    except Exception:
+        func_name = str(func)
+    wrapper.__name__ = func_name
+
+    return wrapper
+
+class ParserElement(object):
+    """Abstract base level parser element class."""
+    DEFAULT_WHITE_CHARS = " \n\t\r"
+    verbose_stacktrace = False
+
+    @staticmethod
+    def setDefaultWhitespaceChars( chars ):
+        r"""
+        Overrides the default whitespace chars
+
+        Example::
+            # default whitespace chars are space, <TAB> and newline
+            OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def', 'ghi', 'jkl']
+            
+            # change to just treat newline as significant
+            ParserElement.setDefaultWhitespaceChars(" \t")
+            OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def']
+        """
+        ParserElement.DEFAULT_WHITE_CHARS = chars
+
+    @staticmethod
+    def inlineLiteralsUsing(cls):
+        """
+        Set class to be used for inclusion of string literals into a parser.
+        
+        Example::
+            # default literal class used is Literal
+            integer = Word(nums)
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")           
+
+            date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
+
+
+            # change to Suppress
+            ParserElement.inlineLiteralsUsing(Suppress)
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")           
+
+            date_str.parseString("1999/12/31")  # -> ['1999', '12', '31']
+        """
+        ParserElement._literalStringClass = cls
+
+    def __init__( self, savelist=False ):
+        self.parseAction = list()
+        self.failAction = None
+        #~ self.name = "<unknown>"  # don't define self.name, let subclasses try/except upcall
+        self.strRepr = None
+        self.resultsName = None
+        self.saveAsList = savelist
+        self.skipWhitespace = True
+        self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
+        self.copyDefaultWhiteChars = True
+        self.mayReturnEmpty = False # used when checking for left-recursion
+        self.keepTabs = False
+        self.ignoreExprs = list()
+        self.debug = False
+        self.streamlined = False
+        self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
+        self.errmsg = ""
+        self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
+        self.debugActions = ( None, None, None ) #custom debug actions
+        self.re = None
+        self.callPreparse = True # used to avoid redundant calls to preParse
+        self.callDuringTry = False
+
+    def copy( self ):
+        """
+        Make a copy of this C{ParserElement}.  Useful for defining different parse actions
+        for the same parsing pattern, using copies of the original parse element.
+        
+        Example::
+            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
+            integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
+            integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
+            
+            print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
+        prints::
+            [5120, 100, 655360, 268435456]
+        Equivalent form of C{expr.copy()} is just C{expr()}::
+            integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
+        """
+        cpy = copy.copy( self )
+        cpy.parseAction = self.parseAction[:]
+        cpy.ignoreExprs = self.ignoreExprs[:]
+        if self.copyDefaultWhiteChars:
+            cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
+        return cpy
+
+    def setName( self, name ):
+        """
+        Define name for this expression, makes debugging and exception messages clearer.
+        
+        Example::
+            Word(nums).parseString("ABC")  # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
+            Word(nums).setName("integer").parseString("ABC")  # -> Exception: Expected integer (at char 0), (line:1, col:1)
+        """
+        self.name = name
+        self.errmsg = "Expected " + self.name
+        if hasattr(self,"exception"):
+            self.exception.msg = self.errmsg
+        return self
+
+    def setResultsName( self, name, listAllMatches=False ):
+        """
+        Define name for referencing matching tokens as a nested attribute
+        of the returned parse results.
+        NOTE: this returns a *copy* of the original C{ParserElement} object;
+        this is so that the client can define a basic element, such as an
+        integer, and reference it in multiple places with different names.
+
+        You can also set results names using the abbreviated syntax,
+        C{expr("name")} in place of C{expr.setResultsName("name")} - 
+        see L{I{__call__}<__call__>}.
+
+        Example::
+            date_str = (integer.setResultsName("year") + '/' 
+                        + integer.setResultsName("month") + '/' 
+                        + integer.setResultsName("day"))
+
+            # equivalent form:
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+        """
+        newself = self.copy()
+        if name.endswith("*"):
+            name = name[:-1]
+            listAllMatches=True
+        newself.resultsName = name
+        newself.modalResults = not listAllMatches
+        return newself
+
+    def setBreak(self,breakFlag = True):
+        """Method to invoke the Python pdb debugger when this element is
+           about to be parsed. Set C{breakFlag} to True to enable, False to
+           disable.
+        """
+        if breakFlag:
+            _parseMethod = self._parse
+            def breaker(instring, loc, doActions=True, callPreParse=True):
+                import pdb
+                pdb.set_trace()
+                return _parseMethod( instring, loc, doActions, callPreParse )
+            breaker._originalParseMethod = _parseMethod
+            self._parse = breaker
+        else:
+            if hasattr(self._parse,"_originalParseMethod"):
+                self._parse = self._parse._originalParseMethod
+        return self
+
+    def setParseAction( self, *fns, **kwargs ):
+        """
+        Define action to perform when successfully matching parse element definition.
+        Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
+        C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
+         - s   = the original string being parsed (see note below)
+         - loc = the location of the matching substring
+         - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
+        If the functions in fns modify the tokens, they can return them as the return
+        value from fn, and the modified list of tokens will replace the original.
+        Otherwise, fn does not need to return any value.
+
+        Optional keyword arguments:
+         - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing
+
+        Note: the default parsing behavior is to expand tabs in the input string
+        before starting the parsing process.  See L{I{parseString}<parseString>} for more information
+        on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
+        consistent view of the parsed string, the parse location, and line and column
+        positions within the parsed string.
+        
+        Example::
+            integer = Word(nums)
+            date_str = integer + '/' + integer + '/' + integer
+
+            date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
+
+            # use parse action to convert to ints at parse time
+            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
+            date_str = integer + '/' + integer + '/' + integer
+
+            # note that integer fields are now ints, not strings
+            date_str.parseString("1999/12/31")  # -> [1999, '/', 12, '/', 31]
+        """
+        self.parseAction = list(map(_trim_arity, list(fns)))
+        self.callDuringTry = kwargs.get("callDuringTry", False)
+        return self
+
+    def addParseAction( self, *fns, **kwargs ):
+        """
+        Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.
+        
+        See examples in L{I{copy}<copy>}.
+        """
+        self.parseAction += list(map(_trim_arity, list(fns)))
+        self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
+        return self
+
+    def addCondition(self, *fns, **kwargs):
+        """Add a boolean predicate function to expression's list of parse actions. See 
+        L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction}, 
+        functions passed to C{addCondition} need to return boolean success/fail of the condition.
+
+        Optional keyword arguments:
+         - message = define a custom message to be used in the raised exception
+         - fatal   = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
+         
+        Example::
+            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
+            year_int = integer.copy()
+            year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
+            date_str = year_int + '/' + integer + '/' + integer
+
+            result = date_str.parseString("1999/12/31")  # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
+        """
+        msg = kwargs.get("message", "failed user-defined condition")
+        exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
+        for fn in fns:
+            def pa(s,l,t):
+                if not bool(_trim_arity(fn)(s,l,t)):
+                    raise exc_type(s,l,msg)
+            self.parseAction.append(pa)
+        self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
+        return self
+
+    def setFailAction( self, fn ):
+        """Define action to perform if parsing fails at this expression.
+           Fail acton fn is a callable function that takes the arguments
+           C{fn(s,loc,expr,err)} where:
+            - s = string being parsed
+            - loc = location where expression match was attempted and failed
+            - expr = the parse expression that failed
+            - err = the exception thrown
+           The function returns no value.  It may throw C{L{ParseFatalException}}
+           if it is desired to stop parsing immediately."""
+        self.failAction = fn
+        return self
+
+    def _skipIgnorables( self, instring, loc ):
+        exprsFound = True
+        while exprsFound:
+            exprsFound = False
+            for e in self.ignoreExprs:
+                try:
+                    while 1:
+                        loc,dummy = e._parse( instring, loc )
+                        exprsFound = True
+                except ParseException:
+                    pass
+        return loc
+
+    def preParse( self, instring, loc ):
+        if self.ignoreExprs:
+            loc = self._skipIgnorables( instring, loc )
+
+        if self.skipWhitespace:
+            wt = self.whiteChars
+            instrlen = len(instring)
+            while loc < instrlen and instring[loc] in wt:
+                loc += 1
+
+        return loc
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        return loc, []
+
+    def postParse( self, instring, loc, tokenlist ):
+        return tokenlist
+
+    #~ @profile
+    def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
+        debugging = ( self.debug ) #and doActions )
+
+        if debugging or self.failAction:
+            #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
+            if (self.debugActions[0] ):
+                self.debugActions[0]( instring, loc, self )
+            if callPreParse and self.callPreparse:
+                preloc = self.preParse( instring, loc )
+            else:
+                preloc = loc
+            tokensStart = preloc
+            try:
+                try:
+                    loc,tokens = self.parseImpl( instring, preloc, doActions )
+                except IndexError:
+                    raise ParseException( instring, len(instring), self.errmsg, self )
+            except ParseBaseException as err:
+                #~ print ("Exception raised:", err)
+                if self.debugActions[2]:
+                    self.debugActions[2]( instring, tokensStart, self, err )
+                if self.failAction:
+                    self.failAction( instring, tokensStart, self, err )
+                raise
+        else:
+            if callPreParse and self.callPreparse:
+                preloc = self.preParse( instring, loc )
+            else:
+                preloc = loc
+            tokensStart = preloc
+            if self.mayIndexError or loc >= len(instring):
+                try:
+                    loc,tokens = self.parseImpl( instring, preloc, doActions )
+                except IndexError:
+                    raise ParseException( instring, len(instring), self.errmsg, self )
+            else:
+                loc,tokens = self.parseImpl( instring, preloc, doActions )
+
+        tokens = self.postParse( instring, loc, tokens )
+
+        retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
+        if self.parseAction and (doActions or self.callDuringTry):
+            if debugging:
+                try:
+                    for fn in self.parseAction:
+                        tokens = fn( instring, tokensStart, retTokens )
+                        if tokens is not None:
+                            retTokens = ParseResults( tokens,
+                                                      self.resultsName,
+                                                      asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
+                                                      modal=self.modalResults )
+                except ParseBaseException as err:
+                    #~ print "Exception raised in user parse action:", err
+                    if (self.debugActions[2] ):
+                        self.debugActions[2]( instring, tokensStart, self, err )
+                    raise
+            else:
+                for fn in self.parseAction:
+                    tokens = fn( instring, tokensStart, retTokens )
+                    if tokens is not None:
+                        retTokens = ParseResults( tokens,
+                                                  self.resultsName,
+                                                  asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
+                                                  modal=self.modalResults )
+
+        if debugging:
+            #~ print ("Matched",self,"->",retTokens.asList())
+            if (self.debugActions[1] ):
+                self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
+
+        return loc, retTokens
+
+    def tryParse( self, instring, loc ):
+        try:
+            return self._parse( instring, loc, doActions=False )[0]
+        except ParseFatalException:
+            raise ParseException( instring, loc, self.errmsg, self)
+    
+    def canParseNext(self, instring, loc):
+        try:
+            self.tryParse(instring, loc)
+        except (ParseException, IndexError):
+            return False
+        else:
+            return True
+
+    class _UnboundedCache(object):
+        def __init__(self):
+            cache = {}
+            self.not_in_cache = not_in_cache = object()
+
+            def get(self, key):
+                return cache.get(key, not_in_cache)
+
+            def set(self, key, value):
+                cache[key] = value
+
+            def clear(self):
+                cache.clear()
+
+            self.get = types.MethodType(get, self)
+            self.set = types.MethodType(set, self)
+            self.clear = types.MethodType(clear, self)
+
+    if _OrderedDict is not None:
+        class _FifoCache(object):
+            def __init__(self, size):
+                self.not_in_cache = not_in_cache = object()
+
+                cache = _OrderedDict()
+
+                def get(self, key):
+                    return cache.get(key, not_in_cache)
+
+                def set(self, key, value):
+                    cache[key] = value
+                    if len(cache) > size:
+                        cache.popitem(False)
+
+                def clear(self):
+                    cache.clear()
+
+                self.get = types.MethodType(get, self)
+                self.set = types.MethodType(set, self)
+                self.clear = types.MethodType(clear, self)
+
+    else:
+        class _FifoCache(object):
+            def __init__(self, size):
+                self.not_in_cache = not_in_cache = object()
+
+                cache = {}
+                key_fifo = collections.deque([], size)
+
+                def get(self, key):
+                    return cache.get(key, not_in_cache)
+
+                def set(self, key, value):
+                    cache[key] = value
+                    if len(cache) > size:
+                        cache.pop(key_fifo.popleft(), None)
+                    key_fifo.append(key)
+
+                def clear(self):
+                    cache.clear()
+                    key_fifo.clear()
+
+                self.get = types.MethodType(get, self)
+                self.set = types.MethodType(set, self)
+                self.clear = types.MethodType(clear, self)
+
+    # argument cache for optimizing repeated calls when backtracking through recursive expressions
+    packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
+    packrat_cache_lock = RLock()
+    packrat_cache_stats = [0, 0]
+
+    # this method gets repeatedly called during backtracking with the same arguments -
+    # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
+    def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
+        HIT, MISS = 0, 1
+        lookup = (self, instring, loc, callPreParse, doActions)
+        with ParserElement.packrat_cache_lock:
+            cache = ParserElement.packrat_cache
+            value = cache.get(lookup)
+            if value is cache.not_in_cache:
+                ParserElement.packrat_cache_stats[MISS] += 1
+                try:
+                    value = self._parseNoCache(instring, loc, doActions, callPreParse)
+                except ParseBaseException as pe:
+                    # cache a copy of the exception, without the traceback
+                    cache.set(lookup, pe.__class__(*pe.args))
+                    raise
+                else:
+                    cache.set(lookup, (value[0], value[1].copy()))
+                    return value
+            else:
+                ParserElement.packrat_cache_stats[HIT] += 1
+                if isinstance(value, Exception):
+                    raise value
+                return (value[0], value[1].copy())
+
+    _parse = _parseNoCache
+
+    @staticmethod
+    def resetCache():
+        ParserElement.packrat_cache.clear()
+        ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
+
+    _packratEnabled = False
+    @staticmethod
+    def enablePackrat(cache_size_limit=128):
+        """Enables "packrat" parsing, which adds memoizing to the parsing logic.
+           Repeated parse attempts at the same string location (which happens
+           often in many complex grammars) can immediately return a cached value,
+           instead of re-executing parsing/validating code.  Memoizing is done of
+           both valid results and parsing exceptions.
+           
+           Parameters:
+            - cache_size_limit - (default=C{128}) - if an integer value is provided
+              will limit the size of the packrat cache; if None is passed, then
+              the cache size will be unbounded; if 0 is passed, the cache will
+              be effectively disabled.
+            
+           This speedup may break existing programs that use parse actions that
+           have side-effects.  For this reason, packrat parsing is disabled when
+           you first import pyparsing.  To activate the packrat feature, your
+           program must call the class method C{ParserElement.enablePackrat()}.  If
+           your program uses C{psyco} to "compile as you go", you must call
+           C{enablePackrat} before calling C{psyco.full()}.  If you do not do this,
+           Python will crash.  For best results, call C{enablePackrat()} immediately
+           after importing pyparsing.
+           
+           Example::
+               import pyparsing
+               pyparsing.ParserElement.enablePackrat()
+        """
+        if not ParserElement._packratEnabled:
+            ParserElement._packratEnabled = True
+            if cache_size_limit is None:
+                ParserElement.packrat_cache = ParserElement._UnboundedCache()
+            else:
+                ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
+            ParserElement._parse = ParserElement._parseCache
+
+    def parseString( self, instring, parseAll=False ):
+        """
+        Execute the parse expression with the given string.
+        This is the main interface to the client code, once the complete
+        expression has been built.
+
+        If you want the grammar to require that the entire input string be
+        successfully parsed, then set C{parseAll} to True (equivalent to ending
+        the grammar with C{L{StringEnd()}}).
+
+        Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
+        in order to report proper column numbers in parse actions.
+        If the input string contains tabs and
+        the grammar uses parse actions that use the C{loc} argument to index into the
+        string being parsed, you can ensure you have a consistent view of the input
+        string by:
+         - calling C{parseWithTabs} on your grammar before calling C{parseString}
+           (see L{I{parseWithTabs}<parseWithTabs>})
+         - define your parse action using the full C{(s,loc,toks)} signature, and
+           reference the input string using the parse action's C{s} argument
+         - explictly expand the tabs in your input string before calling
+           C{parseString}
+        
+        Example::
+            Word('a').parseString('aaaaabaaa')  # -> ['aaaaa']
+            Word('a').parseString('aaaaabaaa', parseAll=True)  # -> Exception: Expected end of text
+        """
+        ParserElement.resetCache()
+        if not self.streamlined:
+            self.streamline()
+            #~ self.saveAsList = True
+        for e in self.ignoreExprs:
+            e.streamline()
+        if not self.keepTabs:
+            instring = instring.expandtabs()
+        try:
+            loc, tokens = self._parse( instring, 0 )
+            if parseAll:
+                loc = self.preParse( instring, loc )
+                se = Empty() + StringEnd()
+                se._parse( instring, loc )
+        except ParseBaseException as exc:
+            if ParserElement.verbose_stacktrace:
+                raise
+            else:
+                # catch and re-raise exception from here, clears out pyparsing internal stack trace
+                raise exc
+        else:
+            return tokens
+
+    def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
+        """
+        Scan the input string for expression matches.  Each match will return the
+        matching tokens, start location, and end location.  May be called with optional
+        C{maxMatches} argument, to clip scanning after 'n' matches are found.  If
+        C{overlap} is specified, then overlapping matches will be reported.
+
+        Note that the start and end locations are reported relative to the string
+        being parsed.  See L{I{parseString}<parseString>} for more information on parsing
+        strings with embedded tabs.
+
+        Example::
+            source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
+            print(source)
+            for tokens,start,end in Word(alphas).scanString(source):
+                print(' '*start + '^'*(end-start))
+                print(' '*start + tokens[0])
+        
+        prints::
+        
+            sldjf123lsdjjkf345sldkjf879lkjsfd987
+            ^^^^^
+            sldjf
+                    ^^^^^^^
+                    lsdjjkf
+                              ^^^^^^
+                              sldkjf
+                                       ^^^^^^
+                                       lkjsfd
+        """
+        if not self.streamlined:
+            self.streamline()
+        for e in self.ignoreExprs:
+            e.streamline()
+
+        if not self.keepTabs:
+            instring = _ustr(instring).expandtabs()
+        instrlen = len(instring)
+        loc = 0
+        preparseFn = self.preParse
+        parseFn = self._parse
+        ParserElement.resetCache()
+        matches = 0
+        try:
+            while loc <= instrlen and matches < maxMatches:
+                try:
+                    preloc = preparseFn( instring, loc )
+                    nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
+                except ParseException:
+                    loc = preloc+1
+                else:
+                    if nextLoc > loc:
+                        matches += 1
+                        yield tokens, preloc, nextLoc
+                        if overlap:
+                            nextloc = preparseFn( instring, loc )
+                            if nextloc > loc:
+                                loc = nextLoc
+                            else:
+                                loc += 1
+                        else:
+                            loc = nextLoc
+                    else:
+                        loc = preloc+1
+        except ParseBaseException as exc:
+            if ParserElement.verbose_stacktrace:
+                raise
+            else:
+                # catch and re-raise exception from here, clears out pyparsing internal stack trace
+                raise exc
+
+    def transformString( self, instring ):
+        """
+        Extension to C{L{scanString}}, to modify matching text with modified tokens that may
+        be returned from a parse action.  To use C{transformString}, define a grammar and
+        attach a parse action to it that modifies the returned token list.
+        Invoking C{transformString()} on a target string will then scan for matches,
+        and replace the matched text patterns according to the logic in the parse
+        action.  C{transformString()} returns the resulting transformed string.
+        
+        Example::
+            wd = Word(alphas)
+            wd.setParseAction(lambda toks: toks[0].title())
+            
+            print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
+        Prints::
+            Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
+        """
+        out = []
+        lastE = 0
+        # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
+        # keep string locs straight between transformString and scanString
+        self.keepTabs = True
+        try:
+            for t,s,e in self.scanString( instring ):
+                out.append( instring[lastE:s] )
+                if t:
+                    if isinstance(t,ParseResults):
+                        out += t.asList()
+                    elif isinstance(t,list):
+                        out += t
+                    else:
+                        out.append(t)
+                lastE = e
+            out.append(instring[lastE:])
+            out = [o for o in out if o]
+            return "".join(map(_ustr,_flatten(out)))
+        except ParseBaseException as exc:
+            if ParserElement.verbose_stacktrace:
+                raise
+            else:
+                # catch and re-raise exception from here, clears out pyparsing internal stack trace
+                raise exc
+
+    def searchString( self, instring, maxMatches=_MAX_INT ):
+        """
+        Another extension to C{L{scanString}}, simplifying the access to the tokens found
+        to match the given parse expression.  May be called with optional
+        C{maxMatches} argument, to clip searching after 'n' matches are found.
+        
+        Example::
+            # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
+            cap_word = Word(alphas.upper(), alphas.lower())
+            
+            print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
+        prints::
+            ['More', 'Iron', 'Lead', 'Gold', 'I']
+        """
+        try:
+            return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
+        except ParseBaseException as exc:
+            if ParserElement.verbose_stacktrace:
+                raise
+            else:
+                # catch and re-raise exception from here, clears out pyparsing internal stack trace
+                raise exc
+
+    def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
+        """
+        Generator method to split a string using the given expression as a separator.
+        May be called with optional C{maxsplit} argument, to limit the number of splits;
+        and the optional C{includeSeparators} argument (default=C{False}), if the separating
+        matching text should be included in the split results.
+        
+        Example::        
+            punc = oneOf(list(".,;:/-!?"))
+            print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
+        prints::
+            ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
+        """
+        splits = 0
+        last = 0
+        for t,s,e in self.scanString(instring, maxMatches=maxsplit):
+            yield instring[last:s]
+            if includeSeparators:
+                yield t[0]
+            last = e
+        yield instring[last:]
+
+    def __add__(self, other ):
+        """
+        Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement
+        converts them to L{Literal}s by default.
+        
+        Example::
+            greet = Word(alphas) + "," + Word(alphas) + "!"
+            hello = "Hello, World!"
+            print (hello, "->", greet.parseString(hello))
+        Prints::
+            Hello, World! -> ['Hello', ',', 'World', '!']
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return And( [ self, other ] )
+
+    def __radd__(self, other ):
+        """
+        Implementation of + operator when left operand is not a C{L{ParserElement}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return other + self
+
+    def __sub__(self, other):
+        """
+        Implementation of - operator, returns C{L{And}} with error stop
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return And( [ self, And._ErrorStop(), other ] )
+
+    def __rsub__(self, other ):
+        """
+        Implementation of - operator when left operand is not a C{L{ParserElement}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return other - self
+
+    def __mul__(self,other):
+        """
+        Implementation of * operator, allows use of C{expr * 3} in place of
+        C{expr + expr + expr}.  Expressions may also me multiplied by a 2-integer
+        tuple, similar to C{{min,max}} multipliers in regular expressions.  Tuples
+        may also include C{None} as in:
+         - C{expr*(n,None)} or C{expr*(n,)} is equivalent
+              to C{expr*n + L{ZeroOrMore}(expr)}
+              (read as "at least n instances of C{expr}")
+         - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
+              (read as "0 to n instances of C{expr}")
+         - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
+         - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
+
+        Note that C{expr*(None,n)} does not raise an exception if
+        more than n exprs exist in the input stream; that is,
+        C{expr*(None,n)} does not enforce a maximum number of expr
+        occurrences.  If this behavior is desired, then write
+        C{expr*(None,n) + ~expr}
+        """
+        if isinstance(other,int):
+            minElements, optElements = other,0
+        elif isinstance(other,tuple):
+            other = (other + (None, None))[:2]
+            if other[0] is None:
+                other = (0, other[1])
+            if isinstance(other[0],int) and other[1] is None:
+                if other[0] == 0:
+                    return ZeroOrMore(self)
+                if other[0] == 1:
+                    return OneOrMore(self)
+                else:
+                    return self*other[0] + ZeroOrMore(self)
+            elif isinstance(other[0],int) and isinstance(other[1],int):
+                minElements, optElements = other
+                optElements -= minElements
+            else:
+                raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
+        else:
+            raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
+
+        if minElements < 0:
+            raise ValueError("cannot multiply ParserElement by negative value")
+        if optElements < 0:
+            raise ValueError("second tuple value must be greater or equal to first tuple value")
+        if minElements == optElements == 0:
+            raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
+
+        if (optElements):
+            def makeOptionalList(n):
+                if n>1:
+                    return Optional(self + makeOptionalList(n-1))
+                else:
+                    return Optional(self)
+            if minElements:
+                if minElements == 1:
+                    ret = self + makeOptionalList(optElements)
+                else:
+                    ret = And([self]*minElements) + makeOptionalList(optElements)
+            else:
+                ret = makeOptionalList(optElements)
+        else:
+            if minElements == 1:
+                ret = self
+            else:
+                ret = And([self]*minElements)
+        return ret
+
+    def __rmul__(self, other):
+        return self.__mul__(other)
+
+    def __or__(self, other ):
+        """
+        Implementation of | operator - returns C{L{MatchFirst}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return MatchFirst( [ self, other ] )
+
+    def __ror__(self, other ):
+        """
+        Implementation of | operator when left operand is not a C{L{ParserElement}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return other | self
+
+    def __xor__(self, other ):
+        """
+        Implementation of ^ operator - returns C{L{Or}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return Or( [ self, other ] )
+
+    def __rxor__(self, other ):
+        """
+        Implementation of ^ operator when left operand is not a C{L{ParserElement}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return other ^ self
+
+    def __and__(self, other ):
+        """
+        Implementation of & operator - returns C{L{Each}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return Each( [ self, other ] )
+
+    def __rand__(self, other ):
+        """
+        Implementation of & operator when left operand is not a C{L{ParserElement}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return other & self
+
+    def __invert__( self ):
+        """
+        Implementation of ~ operator - returns C{L{NotAny}}
+        """
+        return NotAny( self )
+
+    def __call__(self, name=None):
+        """
+        Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.
+        
+        If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
+        passed as C{True}.
+           
+        If C{name} is omitted, same as calling C{L{copy}}.
+
+        Example::
+            # these are equivalent
+            userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
+            userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")             
+        """
+        if name is not None:
+            return self.setResultsName(name)
+        else:
+            return self.copy()
+
+    def suppress( self ):
+        """
+        Suppresses the output of this C{ParserElement}; useful to keep punctuation from
+        cluttering up returned output.
+        """
+        return Suppress( self )
+
+    def leaveWhitespace( self ):
+        """
+        Disables the skipping of whitespace before matching the characters in the
+        C{ParserElement}'s defined pattern.  This is normally only used internally by
+        the pyparsing module, but may be needed in some whitespace-sensitive grammars.
+        """
+        self.skipWhitespace = False
+        return self
+
+    def setWhitespaceChars( self, chars ):
+        """
+        Overrides the default whitespace chars
+        """
+        self.skipWhitespace = True
+        self.whiteChars = chars
+        self.copyDefaultWhiteChars = False
+        return self
+
+    def parseWithTabs( self ):
+        """
+        Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
+        Must be called before C{parseString} when the input grammar contains elements that
+        match C{<TAB>} characters.
+        """
+        self.keepTabs = True
+        return self
+
+    def ignore( self, other ):
+        """
+        Define expression to be ignored (e.g., comments) while doing pattern
+        matching; may be called repeatedly, to define multiple comment or other
+        ignorable patterns.
+        
+        Example::
+            patt = OneOrMore(Word(alphas))
+            patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
+            
+            patt.ignore(cStyleComment)
+            patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
+        """
+        if isinstance(other, basestring):
+            other = Suppress(other)
+
+        if isinstance( other, Suppress ):
+            if other not in self.ignoreExprs:
+                self.ignoreExprs.append(other)
+        else:
+            self.ignoreExprs.append( Suppress( other.copy() ) )
+        return self
+
+    def setDebugActions( self, startAction, successAction, exceptionAction ):
+        """
+        Enable display of debugging messages while doing pattern matching.
+        """
+        self.debugActions = (startAction or _defaultStartDebugAction,
+                             successAction or _defaultSuccessDebugAction,
+                             exceptionAction or _defaultExceptionDebugAction)
+        self.debug = True
+        return self
+
+    def setDebug( self, flag=True ):
+        """
+        Enable display of debugging messages while doing pattern matching.
+        Set C{flag} to True to enable, False to disable.
+
+        Example::
+            wd = Word(alphas).setName("alphaword")
+            integer = Word(nums).setName("numword")
+            term = wd | integer
+            
+            # turn on debugging for wd
+            wd.setDebug()
+
+            OneOrMore(term).parseString("abc 123 xyz 890")
+        
+        prints::
+            Match alphaword at loc 0(1,1)
+            Matched alphaword -> ['abc']
+            Match alphaword at loc 3(1,4)
+            Exception raised:Expected alphaword (at char 4), (line:1, col:5)
+            Match alphaword at loc 7(1,8)
+            Matched alphaword -> ['xyz']
+            Match alphaword at loc 11(1,12)
+            Exception raised:Expected alphaword (at char 12), (line:1, col:13)
+            Match alphaword at loc 15(1,16)
+            Exception raised:Expected alphaword (at char 15), (line:1, col:16)
+
+        The output shown is that produced by the default debug actions - custom debug actions can be
+        specified using L{setDebugActions}. Prior to attempting
+        to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}
+        is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}
+        message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,
+        which makes debugging and exception messages easier to understand - for instance, the default
+        name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.
+        """
+        if flag:
+            self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
+        else:
+            self.debug = False
+        return self
+
+    def __str__( self ):
+        return self.name
+
+    def __repr__( self ):
+        return _ustr(self)
+
+    def streamline( self ):
+        self.streamlined = True
+        self.strRepr = None
+        return self
+
+    def checkRecursion( self, parseElementList ):
+        pass
+
+    def validate( self, validateTrace=[] ):
+        """
+        Check defined expressions for valid structure, check for infinite recursive definitions.
+        """
+        self.checkRecursion( [] )
+
+    def parseFile( self, file_or_filename, parseAll=False ):
+        """
+        Execute the parse expression on the given file or filename.
+        If a filename is specified (instead of a file object),
+        the entire file is opened, read, and closed before parsing.
+        """
+        try:
+            file_contents = file_or_filename.read()
+        except AttributeError:
+            with open(file_or_filename, "r") as f:
+                file_contents = f.read()
+        try:
+            return self.parseString(file_contents, parseAll)
+        except ParseBaseException as exc:
+            if ParserElement.verbose_stacktrace:
+                raise
+            else:
+                # catch and re-raise exception from here, clears out pyparsing internal stack trace
+                raise exc
+
+    def __eq__(self,other):
+        if isinstance(other, ParserElement):
+            return self is other or vars(self) == vars(other)
+        elif isinstance(other, basestring):
+            return self.matches(other)
+        else:
+            return super(ParserElement,self)==other
+
+    def __ne__(self,other):
+        return not (self == other)
+
+    def __hash__(self):
+        return hash(id(self))
+
+    def __req__(self,other):
+        return self == other
+
+    def __rne__(self,other):
+        return not (self == other)
+
+    def matches(self, testString, parseAll=True):
+        """
+        Method for quick testing of a parser against a test string. Good for simple 
+        inline microtests of sub expressions while building up larger parser.
+           
+        Parameters:
+         - testString - to test against this expression for a match
+         - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
+            
+        Example::
+            expr = Word(nums)
+            assert expr.matches("100")
+        """
+        try:
+            self.parseString(_ustr(testString), parseAll=parseAll)
+            return True
+        except ParseBaseException:
+            return False
+                
+    def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
+        """
+        Execute the parse expression on a series of test strings, showing each
+        test, the parsed results or where the parse failed. Quick and easy way to
+        run a parse expression against a list of sample strings.
+           
+        Parameters:
+         - tests - a list of separate test strings, or a multiline string of test strings
+         - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests           
+         - comment - (default=C{'#'}) - expression for indicating embedded comments in the test 
+              string; pass None to disable comment filtering
+         - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;
+              if False, only dump nested list
+         - printResults - (default=C{True}) prints test output to stdout
+         - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing
+
+        Returns: a (success, results) tuple, where success indicates that all tests succeeded
+        (or failed if C{failureTests} is True), and the results contain a list of lines of each 
+        test's output
+        
+        Example::
+            number_expr = pyparsing_common.number.copy()
+
+            result = number_expr.runTests('''
+                # unsigned integer
+                100
+                # negative integer
+                -100
+                # float with scientific notation
+                6.02e23
+                # integer with scientific notation
+                1e-12
+                ''')
+            print("Success" if result[0] else "Failed!")
+
+            result = number_expr.runTests('''
+                # stray character
+                100Z
+                # missing leading digit before '.'
+                -.100
+                # too many '.'
+                3.14.159
+                ''', failureTests=True)
+            print("Success" if result[0] else "Failed!")
+        prints::
+            # unsigned integer
+            100
+            [100]
+
+            # negative integer
+            -100
+            [-100]
+
+            # float with scientific notation
+            6.02e23
+            [6.02e+23]
+
+            # integer with scientific notation
+            1e-12
+            [1e-12]
+
+            Success
+            
+            # stray character
+            100Z
+               ^
+            FAIL: Expected end of text (at char 3), (line:1, col:4)
+
+            # missing leading digit before '.'
+            -.100
+            ^
+            FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
+
+            # too many '.'
+            3.14.159
+                ^
+            FAIL: Expected end of text (at char 4), (line:1, col:5)
+
+            Success
+
+        Each test string must be on a single line. If you want to test a string that spans multiple
+        lines, create a test like this::
+
+            expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
+        
+        (Note that this is a raw string literal, you must include the leading 'r'.)
+        """
+        if isinstance(tests, basestring):
+            tests = list(map(str.strip, tests.rstrip().splitlines()))
+        if isinstance(comment, basestring):
+            comment = Literal(comment)
+        allResults = []
+        comments = []
+        success = True
+        for t in tests:
+            if comment is not None and comment.matches(t, False) or comments and not t:
+                comments.append(t)
+                continue
+            if not t:
+                continue
+            out = ['\n'.join(comments), t]
+            comments = []
+            try:
+                t = t.replace(r'\n','\n')
+                result = self.parseString(t, parseAll=parseAll)
+                out.append(result.dump(full=fullDump))
+                success = success and not failureTests
+            except ParseBaseException as pe:
+                fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
+                if '\n' in t:
+                    out.append(line(pe.loc, t))
+                    out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
+                else:
+                    out.append(' '*pe.loc + '^' + fatal)
+                out.append("FAIL: " + str(pe))
+                success = success and failureTests
+                result = pe
+            except Exception as exc:
+                out.append("FAIL-EXCEPTION: " + str(exc))
+                success = success and failureTests
+                result = exc
+
+            if printResults:
+                if fullDump:
+                    out.append('')
+                print('\n'.join(out))
+
+            allResults.append((t, result))
+        
+        return success, allResults
+
+        
+class Token(ParserElement):
+    """
+    Abstract C{ParserElement} subclass, for defining atomic matching patterns.
+    """
+    def __init__( self ):
+        super(Token,self).__init__( savelist=False )
+
+
+class Empty(Token):
+    """
+    An empty token, will always match.
+    """
+    def __init__( self ):
+        super(Empty,self).__init__()
+        self.name = "Empty"
+        self.mayReturnEmpty = True
+        self.mayIndexError = False
+
+
+class NoMatch(Token):
+    """
+    A token that will never match.
+    """
+    def __init__( self ):
+        super(NoMatch,self).__init__()
+        self.name = "NoMatch"
+        self.mayReturnEmpty = True
+        self.mayIndexError = False
+        self.errmsg = "Unmatchable token"
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        raise ParseException(instring, loc, self.errmsg, self)
+
+
+class Literal(Token):
+    """
+    Token to exactly match a specified string.
+    
+    Example::
+        Literal('blah').parseString('blah')  # -> ['blah']
+        Literal('blah').parseString('blahfooblah')  # -> ['blah']
+        Literal('blah').parseString('bla')  # -> Exception: Expected "blah"
+    
+    For case-insensitive matching, use L{CaselessLiteral}.
+    
+    For keyword matching (force word break before and after the matched string),
+    use L{Keyword} or L{CaselessKeyword}.
+    """
+    def __init__( self, matchString ):
+        super(Literal,self).__init__()
+        self.match = matchString
+        self.matchLen = len(matchString)
+        try:
+            self.firstMatchChar = matchString[0]
+        except IndexError:
+            warnings.warn("null string passed to Literal; use Empty() instead",
+                            SyntaxWarning, stacklevel=2)
+            self.__class__ = Empty
+        self.name = '"%s"' % _ustr(self.match)
+        self.errmsg = "Expected " + self.name
+        self.mayReturnEmpty = False
+        self.mayIndexError = False
+
+    # Performance tuning: this routine gets called a *lot*
+    # if this is a single character match string  and the first character matches,
+    # short-circuit as quickly as possible, and avoid calling startswith
+    #~ @profile
+    def parseImpl( self, instring, loc, doActions=True ):
+        if (instring[loc] == self.firstMatchChar and
+            (self.matchLen==1 or instring.startswith(self.match,loc)) ):
+            return loc+self.matchLen, self.match
+        raise ParseException(instring, loc, self.errmsg, self)
+_L = Literal
+ParserElement._literalStringClass = Literal
+
+class Keyword(Token):
+    """
+    Token to exactly match a specified string as a keyword, that is, it must be
+    immediately followed by a non-keyword character.  Compare with C{L{Literal}}:
+     - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
+     - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
+    Accepts two optional constructor arguments in addition to the keyword string:
+     - C{identChars} is a string of characters that would be valid identifier characters,
+          defaulting to all alphanumerics + "_" and "$"
+     - C{caseless} allows case-insensitive matching, default is C{False}.
+       
+    Example::
+        Keyword("start").parseString("start")  # -> ['start']
+        Keyword("start").parseString("starting")  # -> Exception
+
+    For case-insensitive matching, use L{CaselessKeyword}.
+    """
+    DEFAULT_KEYWORD_CHARS = alphanums+"_$"
+
+    def __init__( self, matchString, identChars=None, caseless=False ):
+        super(Keyword,self).__init__()
+        if identChars is None:
+            identChars = Keyword.DEFAULT_KEYWORD_CHARS
+        self.match = matchString
+        self.matchLen = len(matchString)
+        try:
+            self.firstMatchChar = matchString[0]
+        except IndexError:
+            warnings.warn("null string passed to Keyword; use Empty() instead",
+                            SyntaxWarning, stacklevel=2)
+        self.name = '"%s"' % self.match
+        self.errmsg = "Expected " + self.name
+        self.mayReturnEmpty = False
+        self.mayIndexError = False
+        self.caseless = caseless
+        if caseless:
+            self.caselessmatch = matchString.upper()
+            identChars = identChars.upper()
+        self.identChars = set(identChars)
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if self.caseless:
+            if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
+                 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
+                 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
+                return loc+self.matchLen, self.match
+        else:
+            if (instring[loc] == self.firstMatchChar and
+                (self.matchLen==1 or instring.startswith(self.match,loc)) and
+                (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
+                (loc == 0 or instring[loc-1] not in self.identChars) ):
+                return loc+self.matchLen, self.match
+        raise ParseException(instring, loc, self.errmsg, self)
+
+    def copy(self):
+        c = super(Keyword,self).copy()
+        c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
+        return c
+
+    @staticmethod
+    def setDefaultKeywordChars( chars ):
+        """Overrides the default Keyword chars
+        """
+        Keyword.DEFAULT_KEYWORD_CHARS = chars
+
+class CaselessLiteral(Literal):
+    """
+    Token to match a specified string, ignoring case of letters.
+    Note: the matched results will always be in the case of the given
+    match string, NOT the case of the input text.
+
+    Example::
+        OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
+        
+    (Contrast with example for L{CaselessKeyword}.)
+    """
+    def __init__( self, matchString ):
+        super(CaselessLiteral,self).__init__( matchString.upper() )
+        # Preserve the defining literal.
+        self.returnString = matchString
+        self.name = "'%s'" % self.returnString
+        self.errmsg = "Expected " + self.name
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if instring[ loc:loc+self.matchLen ].upper() == self.match:
+            return loc+self.matchLen, self.returnString
+        raise ParseException(instring, loc, self.errmsg, self)
+
+class CaselessKeyword(Keyword):
+    """
+    Caseless version of L{Keyword}.
+
+    Example::
+        OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
+        
+    (Contrast with example for L{CaselessLiteral}.)
+    """
+    def __init__( self, matchString, identChars=None ):
+        super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
+             (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
+            return loc+self.matchLen, self.match
+        raise ParseException(instring, loc, self.errmsg, self)
+
+class CloseMatch(Token):
+    """
+    A variation on L{Literal} which matches "close" matches, that is, 
+    strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters:
+     - C{match_string} - string to be matched
+     - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match
+    
+    The results from a successful parse will contain the matched text from the input string and the following named results:
+     - C{mismatches} - a list of the positions within the match_string where mismatches were found
+     - C{original} - the original match_string used to compare against the input string
+    
+    If C{mismatches} is an empty list, then the match was an exact match.
+    
+    Example::
+        patt = CloseMatch("ATCATCGAATGGA")
+        patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
+        patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
+
+        # exact match
+        patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
+
+        # close match allowing up to 2 mismatches
+        patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
+        patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
+    """
+    def __init__(self, match_string, maxMismatches=1):
+        super(CloseMatch,self).__init__()
+        self.name = match_string
+        self.match_string = match_string
+        self.maxMismatches = maxMismatches
+        self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
+        self.mayIndexError = False
+        self.mayReturnEmpty = False
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        start = loc
+        instrlen = len(instring)
+        maxloc = start + len(self.match_string)
+
+        if maxloc <= instrlen:
+            match_string = self.match_string
+            match_stringloc = 0
+            mismatches = []
+            maxMismatches = self.maxMismatches
+
+            for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):
+                src,mat = s_m
+                if src != mat:
+                    mismatches.append(match_stringloc)
+                    if len(mismatches) > maxMismatches:
+                        break
+            else:
+                loc = match_stringloc + 1
+                results = ParseResults([instring[start:loc]])
+                results['original'] = self.match_string
+                results['mismatches'] = mismatches
+                return loc, results
+
+        raise ParseException(instring, loc, self.errmsg, self)
+
+
+class Word(Token):
+    """
+    Token for matching words composed of allowed character sets.
+    Defined with string containing all allowed initial characters,
+    an optional string containing allowed body characters (if omitted,
+    defaults to the initial character set), and an optional minimum,
+    maximum, and/or exact length.  The default value for C{min} is 1 (a
+    minimum value < 1 is not valid); the default values for C{max} and C{exact}
+    are 0, meaning no maximum or exact length restriction. An optional
+    C{excludeChars} parameter can list characters that might be found in 
+    the input C{bodyChars} string; useful to define a word of all printables
+    except for one or two characters, for instance.
+    
+    L{srange} is useful for defining custom character set strings for defining 
+    C{Word} expressions, using range notation from regular expression character sets.
+    
+    A common mistake is to use C{Word} to match a specific literal string, as in 
+    C{Word("Address")}. Remember that C{Word} uses the string argument to define
+    I{sets} of matchable characters. This expression would match "Add", "AAA",
+    "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.
+    To match an exact literal string, use L{Literal} or L{Keyword}.
+
+    pyparsing includes helper strings for building Words:
+     - L{alphas}
+     - L{nums}
+     - L{alphanums}
+     - L{hexnums}
+     - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)
+     - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)
+     - L{printables} (any non-whitespace character)
+
+    Example::
+        # a word composed of digits
+        integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
+        
+        # a word with a leading capital, and zero or more lowercase
+        capital_word = Word(alphas.upper(), alphas.lower())
+
+        # hostnames are alphanumeric, with leading alpha, and '-'
+        hostname = Word(alphas, alphanums+'-')
+        
+        # roman numeral (not a strict parser, accepts invalid mix of characters)
+        roman = Word("IVXLCDM")
+        
+        # any string of non-whitespace characters, except for ','
+        csv_value = Word(printables, excludeChars=",")
+    """
+    def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
+        super(Word,self).__init__()
+        if excludeChars:
+            initChars = ''.join(c for c in initChars if c not in excludeChars)
+            if bodyChars:
+                bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
+        self.initCharsOrig = initChars
+        self.initChars = set(initChars)
+        if bodyChars :
+            self.bodyCharsOrig = bodyChars
+            self.bodyChars = set(bodyChars)
+        else:
+            self.bodyCharsOrig = initChars
+            self.bodyChars = set(initChars)
+
+        self.maxSpecified = max > 0
+
+        if min < 1:
+            raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
+
+        self.minLen = min
+
+        if max > 0:
+            self.maxLen = max
+        else:
+            self.maxLen = _MAX_INT
+
+        if exact > 0:
+            self.maxLen = exact
+            self.minLen = exact
+
+        self.name = _ustr(self)
+        self.errmsg = "Expected " + self.name
+        self.mayIndexError = False
+        self.asKeyword = asKeyword
+
+        if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
+            if self.bodyCharsOrig == self.initCharsOrig:
+                self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
+            elif len(self.initCharsOrig) == 1:
+                self.reString = "%s[%s]*" % \
+                                      (re.escape(self.initCharsOrig),
+                                      _escapeRegexRangeChars(self.bodyCharsOrig),)
+            else:
+                self.reString = "[%s][%s]*" % \
+                                      (_escapeRegexRangeChars(self.initCharsOrig),
+                                      _escapeRegexRangeChars(self.bodyCharsOrig),)
+            if self.asKeyword:
+                self.reString = r"\b"+self.reString+r"\b"
+            try:
+                self.re = re.compile( self.reString )
+            except Exception:
+                self.re = None
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if self.re:
+            result = self.re.match(instring,loc)
+            if not result:
+                raise ParseException(instring, loc, self.errmsg, self)
+
+            loc = result.end()
+            return loc, result.group()
+
+        if not(instring[ loc ] in self.initChars):
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        start = loc
+        loc += 1
+        instrlen = len(instring)
+        bodychars = self.bodyChars
+        maxloc = start + self.maxLen
+        maxloc = min( maxloc, instrlen )
+        while loc < maxloc and instring[loc] in bodychars:
+            loc += 1
+
+        throwException = False
+        if loc - start < self.minLen:
+            throwException = True
+        if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
+            throwException = True
+        if self.asKeyword:
+            if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
+                throwException = True
+
+        if throwException:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        return loc, instring[start:loc]
+
+    def __str__( self ):
+        try:
+            return super(Word,self).__str__()
+        except Exception:
+            pass
+
+
+        if self.strRepr is None:
+
+            def charsAsStr(s):
+                if len(s)>4:
+                    return s[:4]+"..."
+                else:
+                    return s
+
+            if ( self.initCharsOrig != self.bodyCharsOrig ):
+                self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
+            else:
+                self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
+
+        return self.strRepr
+
+
+class Regex(Token):
+    r"""
+    Token for matching strings that match a given regular expression.
+    Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
+    If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as 
+    named parse results.
+
+    Example::
+        realnum = Regex(r"[+-]?\d+\.\d*")
+        date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
+        # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
+        roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
+    """
+    compiledREtype = type(re.compile("[A-Z]"))
+    def __init__( self, pattern, flags=0):
+        """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
+        super(Regex,self).__init__()
+
+        if isinstance(pattern, basestring):
+            if not pattern:
+                warnings.warn("null string passed to Regex; use Empty() instead",
+                        SyntaxWarning, stacklevel=2)
+
+            self.pattern = pattern
+            self.flags = flags
+
+            try:
+                self.re = re.compile(self.pattern, self.flags)
+                self.reString = self.pattern
+            except sre_constants.error:
+                warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
+                    SyntaxWarning, stacklevel=2)
+                raise
+
+        elif isinstance(pattern, Regex.compiledREtype):
+            self.re = pattern
+            self.pattern = \
+            self.reString = str(pattern)
+            self.flags = flags
+            
+        else:
+            raise ValueError("Regex may only be constructed with a string or a compiled RE object")
+
+        self.name = _ustr(self)
+        self.errmsg = "Expected " + self.name
+        self.mayIndexError = False
+        self.mayReturnEmpty = True
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        result = self.re.match(instring,loc)
+        if not result:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        loc = result.end()
+        d = result.groupdict()
+        ret = ParseResults(result.group())
+        if d:
+            for k in d:
+                ret[k] = d[k]
+        return loc,ret
+
+    def __str__( self ):
+        try:
+            return super(Regex,self).__str__()
+        except Exception:
+            pass
+
+        if self.strRepr is None:
+            self.strRepr = "Re:(%s)" % repr(self.pattern)
+
+        return self.strRepr
+
+
+class QuotedString(Token):
+    r"""
+    Token for matching strings that are delimited by quoting characters.
+    
+    Defined with the following parameters:
+        - quoteChar - string of one or more characters defining the quote delimiting string
+        - escChar - character to escape quotes, typically backslash (default=C{None})
+        - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})
+        - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
+        - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
+        - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
+        - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
+
+    Example::
+        qs = QuotedString('"')
+        print(qs.searchString('lsjdf "This is the quote" sldjf'))
+        complex_qs = QuotedString('{{', endQuoteChar='}}')
+        print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
+        sql_qs = QuotedString('"', escQuote='""')
+        print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
+    prints::
+        [['This is the quote']]
+        [['This is the "quote"']]
+        [['This is the quote with "embedded" quotes']]
+    """
+    def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
+        super(QuotedString,self).__init__()
+
+        # remove white space from quote chars - wont work anyway
+        quoteChar = quoteChar.strip()
+        if not quoteChar:
+            warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
+            raise SyntaxError()
+
+        if endQuoteChar is None:
+            endQuoteChar = quoteChar
+        else:
+            endQuoteChar = endQuoteChar.strip()
+            if not endQuoteChar:
+                warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
+                raise SyntaxError()
+
+        self.quoteChar = quoteChar
+        self.quoteCharLen = len(quoteChar)
+        self.firstQuoteChar = quoteChar[0]
+        self.endQuoteChar = endQuoteChar
+        self.endQuoteCharLen = len(endQuoteChar)
+        self.escChar = escChar
+        self.escQuote = escQuote
+        self.unquoteResults = unquoteResults
+        self.convertWhitespaceEscapes = convertWhitespaceEscapes
+
+        if multiline:
+            self.flags = re.MULTILINE | re.DOTALL
+            self.pattern = r'%s(?:[^%s%s]' % \
+                ( re.escape(self.quoteChar),
+                  _escapeRegexRangeChars(self.endQuoteChar[0]),
+                  (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
+        else:
+            self.flags = 0
+            self.pattern = r'%s(?:[^%s\n\r%s]' % \
+                ( re.escape(self.quoteChar),
+                  _escapeRegexRangeChars(self.endQuoteChar[0]),
+                  (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
+        if len(self.endQuoteChar) > 1:
+            self.pattern += (
+                '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
+                                               _escapeRegexRangeChars(self.endQuoteChar[i]))
+                                    for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
+                )
+        if escQuote:
+            self.pattern += (r'|(?:%s)' % re.escape(escQuote))
+        if escChar:
+            self.pattern += (r'|(?:%s.)' % re.escape(escChar))
+            self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
+        self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
+
+        try:
+            self.re = re.compile(self.pattern, self.flags)
+            self.reString = self.pattern
+        except sre_constants.error:
+            warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
+                SyntaxWarning, stacklevel=2)
+            raise
+
+        self.name = _ustr(self)
+        self.errmsg = "Expected " + self.name
+        self.mayIndexError = False
+        self.mayReturnEmpty = True
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
+        if not result:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        loc = result.end()
+        ret = result.group()
+
+        if self.unquoteResults:
+
+            # strip off quotes
+            ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
+
+            if isinstance(ret,basestring):
+                # replace escaped whitespace
+                if '\\' in ret and self.convertWhitespaceEscapes:
+                    ws_map = {
+                        r'\t' : '\t',
+                        r'\n' : '\n',
+                        r'\f' : '\f',
+                        r'\r' : '\r',
+                    }
+                    for wslit,wschar in ws_map.items():
+                        ret = ret.replace(wslit, wschar)
+
+                # replace escaped characters
+                if self.escChar:
+                    ret = re.sub(self.escCharReplacePattern,r"\g<1>",ret)
+
+                # replace escaped quotes
+                if self.escQuote:
+                    ret = ret.replace(self.escQuote, self.endQuoteChar)
+
+        return loc, ret
+
+    def __str__( self ):
+        try:
+            return super(QuotedString,self).__str__()
+        except Exception:
+            pass
+
+        if self.strRepr is None:
+            self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
+
+        return self.strRepr
+
+
+class CharsNotIn(Token):
+    """
+    Token for matching words composed of characters I{not} in a given set (will
+    include whitespace in matched characters if not listed in the provided exclusion set - see example).
+    Defined with string containing all disallowed characters, and an optional
+    minimum, maximum, and/or exact length.  The default value for C{min} is 1 (a
+    minimum value < 1 is not valid); the default values for C{max} and C{exact}
+    are 0, meaning no maximum or exact length restriction.
+
+    Example::
+        # define a comma-separated-value as anything that is not a ','
+        csv_value = CharsNotIn(',')
+        print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
+    prints::
+        ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
+    """
+    def __init__( self, notChars, min=1, max=0, exact=0 ):
+        super(CharsNotIn,self).__init__()
+        self.skipWhitespace = False
+        self.notChars = notChars
+
+        if min < 1:
+            raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
+
+        self.minLen = min
+
+        if max > 0:
+            self.maxLen = max
+        else:
+            self.maxLen = _MAX_INT
+
+        if exact > 0:
+            self.maxLen = exact
+            self.minLen = exact
+
+        self.name = _ustr(self)
+        self.errmsg = "Expected " + self.name
+        self.mayReturnEmpty = ( self.minLen == 0 )
+        self.mayIndexError = False
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if instring[loc] in self.notChars:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        start = loc
+        loc += 1
+        notchars = self.notChars
+        maxlen = min( start+self.maxLen, len(instring) )
+        while loc < maxlen and \
+              (instring[loc] not in notchars):
+            loc += 1
+
+        if loc - start < self.minLen:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        return loc, instring[start:loc]
+
+    def __str__( self ):
+        try:
+            return super(CharsNotIn, self).__str__()
+        except Exception:
+            pass
+
+        if self.strRepr is None:
+            if len(self.notChars) > 4:
+                self.strRepr = "!W:(%s...)" % self.notChars[:4]
+            else:
+                self.strRepr = "!W:(%s)" % self.notChars
+
+        return self.strRepr
+
+class White(Token):
+    """
+    Special matching class for matching whitespace.  Normally, whitespace is ignored
+    by pyparsing grammars.  This class is included when some whitespace structures
+    are significant.  Define with a string containing the whitespace characters to be
+    matched; default is C{" \\t\\r\\n"}.  Also takes optional C{min}, C{max}, and C{exact} arguments,
+    as defined for the C{L{Word}} class.
+    """
+    whiteStrs = {
+        " " : "<SPC>",
+        "\t": "<TAB>",
+        "\n": "<LF>",
+        "\r": "<CR>",
+        "\f": "<FF>",
+        }
+    def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
+        super(White,self).__init__()
+        self.matchWhite = ws
+        self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
+        #~ self.leaveWhitespace()
+        self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
+        self.mayReturnEmpty = True
+        self.errmsg = "Expected " + self.name
+
+        self.minLen = min
+
+        if max > 0:
+            self.maxLen = max
+        else:
+            self.maxLen = _MAX_INT
+
+        if exact > 0:
+            self.maxLen = exact
+            self.minLen = exact
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if not(instring[ loc ] in self.matchWhite):
+            raise ParseException(instring, loc, self.errmsg, self)
+        start = loc
+        loc += 1
+        maxloc = start + self.maxLen
+        maxloc = min( maxloc, len(instring) )
+        while loc < maxloc and instring[loc] in self.matchWhite:
+            loc += 1
+
+        if loc - start < self.minLen:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        return loc, instring[start:loc]
+
+
+class _PositionToken(Token):
+    def __init__( self ):
+        super(_PositionToken,self).__init__()
+        self.name=self.__class__.__name__
+        self.mayReturnEmpty = True
+        self.mayIndexError = False
+
+class GoToColumn(_PositionToken):
+    """
+    Token to advance to a specific column of input text; useful for tabular report scraping.
+    """
+    def __init__( self, colno ):
+        super(GoToColumn,self).__init__()
+        self.col = colno
+
+    def preParse( self, instring, loc ):
+        if col(loc,instring) != self.col:
+            instrlen = len(instring)
+            if self.ignoreExprs:
+                loc = self._skipIgnorables( instring, loc )
+            while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
+                loc += 1
+        return loc
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        thiscol = col( loc, instring )
+        if thiscol > self.col:
+            raise ParseException( instring, loc, "Text not in expected column", self )
+        newloc = loc + self.col - thiscol
+        ret = instring[ loc: newloc ]
+        return newloc, ret
+
+
+class LineStart(_PositionToken):
+    """
+    Matches if current position is at the beginning of a line within the parse string
+    
+    Example::
+    
+        test = '''\
+        AAA this line
+        AAA and this line
+          AAA but not this one
+        B AAA and definitely not this one
+        '''
+
+        for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
+            print(t)
+    
+    Prints::
+        ['AAA', ' this line']
+        ['AAA', ' and this line']    
+
+    """
+    def __init__( self ):
+        super(LineStart,self).__init__()
+        self.errmsg = "Expected start of line"
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if col(loc, instring) == 1:
+            return loc, []
+        raise ParseException(instring, loc, self.errmsg, self)
+
+class LineEnd(_PositionToken):
+    """
+    Matches if current position is at the end of a line within the parse string
+    """
+    def __init__( self ):
+        super(LineEnd,self).__init__()
+        self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
+        self.errmsg = "Expected end of line"
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if loc<len(instring):
+            if instring[loc] == "\n":
+                return loc+1, "\n"
+            else:
+                raise ParseException(instring, loc, self.errmsg, self)
+        elif loc == len(instring):
+            return loc+1, []
+        else:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+class StringStart(_PositionToken):
+    """
+    Matches if current position is at the beginning of the parse string
+    """
+    def __init__( self ):
+        super(StringStart,self).__init__()
+        self.errmsg = "Expected start of text"
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if loc != 0:
+            # see if entire string up to here is just whitespace and ignoreables
+            if loc != self.preParse( instring, 0 ):
+                raise ParseException(instring, loc, self.errmsg, self)
+        return loc, []
+
+class StringEnd(_PositionToken):
+    """
+    Matches if current position is at the end of the parse string
+    """
+    def __init__( self ):
+        super(StringEnd,self).__init__()
+        self.errmsg = "Expected end of text"
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if loc < len(instring):
+            raise ParseException(instring, loc, self.errmsg, self)
+        elif loc == len(instring):
+            return loc+1, []
+        elif loc > len(instring):
+            return loc, []
+        else:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+class WordStart(_PositionToken):
+    """
+    Matches if the current position is at the beginning of a Word, and
+    is not preceded by any character in a given set of C{wordChars}
+    (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
+    use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
+    the string being parsed, or at the beginning of a line.
+    """
+    def __init__(self, wordChars = printables):
+        super(WordStart,self).__init__()
+        self.wordChars = set(wordChars)
+        self.errmsg = "Not at the start of a word"
+
+    def parseImpl(self, instring, loc, doActions=True ):
+        if loc != 0:
+            if (instring[loc-1] in self.wordChars or
+                instring[loc] not in self.wordChars):
+                raise ParseException(instring, loc, self.errmsg, self)
+        return loc, []
+
+class WordEnd(_PositionToken):
+    """
+    Matches if the current position is at the end of a Word, and
+    is not followed by any character in a given set of C{wordChars}
+    (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
+    use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
+    the string being parsed, or at the end of a line.
+    """
+    def __init__(self, wordChars = printables):
+        super(WordEnd,self).__init__()
+        self.wordChars = set(wordChars)
+        self.skipWhitespace = False
+        self.errmsg = "Not at the end of a word"
+
+    def parseImpl(self, instring, loc, doActions=True ):
+        instrlen = len(instring)
+        if instrlen>0 and loc<instrlen:
+            if (instring[loc] in self.wordChars or
+                instring[loc-1] not in self.wordChars):
+                raise ParseException(instring, loc, self.errmsg, self)
+        return loc, []
+
+
+class ParseExpression(ParserElement):
+    """
+    Abstract subclass of ParserElement, for combining and post-processing parsed tokens.
+    """
+    def __init__( self, exprs, savelist = False ):
+        super(ParseExpression,self).__init__(savelist)
+        if isinstance( exprs, _generatorType ):
+            exprs = list(exprs)
+
+        if isinstance( exprs, basestring ):
+            self.exprs = [ ParserElement._literalStringClass( exprs ) ]
+        elif isinstance( exprs, collections.Iterable ):
+            exprs = list(exprs)
+            # if sequence of strings provided, wrap with Literal
+            if all(isinstance(expr, basestring) for expr in exprs):
+                exprs = map(ParserElement._literalStringClass, exprs)
+            self.exprs = list(exprs)
+        else:
+            try:
+                self.exprs = list( exprs )
+            except TypeError:
+                self.exprs = [ exprs ]
+        self.callPreparse = False
+
+    def __getitem__( self, i ):
+        return self.exprs[i]
+
+    def append( self, other ):
+        self.exprs.append( other )
+        self.strRepr = None
+        return self
+
+    def leaveWhitespace( self ):
+        """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
+           all contained expressions."""
+        self.skipWhitespace = False
+        self.exprs = [ e.copy() for e in self.exprs ]
+        for e in self.exprs:
+            e.leaveWhitespace()
+        return self
+
+    def ignore( self, other ):
+        if isinstance( other, Suppress ):
+            if other not in self.ignoreExprs:
+                super( ParseExpression, self).ignore( other )
+                for e in self.exprs:
+                    e.ignore( self.ignoreExprs[-1] )
+        else:
+            super( ParseExpression, self).ignore( other )
+            for e in self.exprs:
+                e.ignore( self.ignoreExprs[-1] )
+        return self
+
+    def __str__( self ):
+        try:
+            return super(ParseExpression,self).__str__()
+        except Exception:
+            pass
+
+        if self.strRepr is None:
+            self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
+        return self.strRepr
+
+    def streamline( self ):
+        super(ParseExpression,self).streamline()
+
+        for e in self.exprs:
+            e.streamline()
+
+        # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
+        # but only if there are no parse actions or resultsNames on the nested And's
+        # (likewise for Or's and MatchFirst's)
+        if ( len(self.exprs) == 2 ):
+            other = self.exprs[0]
+            if ( isinstance( other, self.__class__ ) and
+                  not(other.parseAction) and
+                  other.resultsName is None and
+                  not other.debug ):
+                self.exprs = other.exprs[:] + [ self.exprs[1] ]
+                self.strRepr = None
+                self.mayReturnEmpty |= other.mayReturnEmpty
+                self.mayIndexError  |= other.mayIndexError
+
+            other = self.exprs[-1]
+            if ( isinstance( other, self.__class__ ) and
+                  not(other.parseAction) and
+                  other.resultsName is None and
+                  not other.debug ):
+                self.exprs = self.exprs[:-1] + other.exprs[:]
+                self.strRepr = None
+                self.mayReturnEmpty |= other.mayReturnEmpty
+                self.mayIndexError  |= other.mayIndexError
+
+        self.errmsg = "Expected " + _ustr(self)
+        
+        return self
+
+    def setResultsName( self, name, listAllMatches=False ):
+        ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
+        return ret
+
+    def validate( self, validateTrace=[] ):
+        tmp = validateTrace[:]+[self]
+        for e in self.exprs:
+            e.validate(tmp)
+        self.checkRecursion( [] )
+        
+    def copy(self):
+        ret = super(ParseExpression,self).copy()
+        ret.exprs = [e.copy() for e in self.exprs]
+        return ret
+
+class And(ParseExpression):
+    """
+    Requires all given C{ParseExpression}s to be found in the given order.
+    Expressions may be separated by whitespace.
+    May be constructed using the C{'+'} operator.
+    May also be constructed using the C{'-'} operator, which will suppress backtracking.
+
+    Example::
+        integer = Word(nums)
+        name_expr = OneOrMore(Word(alphas))
+
+        expr = And([integer("id"),name_expr("name"),integer("age")])
+        # more easily written as:
+        expr = integer("id") + name_expr("name") + integer("age")
+    """
+
+    class _ErrorStop(Empty):
+        def __init__(self, *args, **kwargs):
+            super(And._ErrorStop,self).__init__(*args, **kwargs)
+            self.name = '-'
+            self.leaveWhitespace()
+
+    def __init__( self, exprs, savelist = True ):
+        super(And,self).__init__(exprs, savelist)
+        self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
+        self.setWhitespaceChars( self.exprs[0].whiteChars )
+        self.skipWhitespace = self.exprs[0].skipWhitespace
+        self.callPreparse = True
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        # pass False as last arg to _parse for first element, since we already
+        # pre-parsed the string as part of our And pre-parsing
+        loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
+        errorStop = False
+        for e in self.exprs[1:]:
+            if isinstance(e, And._ErrorStop):
+                errorStop = True
+                continue
+            if errorStop:
+                try:
+                    loc, exprtokens = e._parse( instring, loc, doActions )
+                except ParseSyntaxException:
+                    raise
+                except ParseBaseException as pe:
+                    pe.__traceback__ = None
+                    raise ParseSyntaxException._from_exception(pe)
+                except IndexError:
+                    raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
+            else:
+                loc, exprtokens = e._parse( instring, loc, doActions )
+            if exprtokens or exprtokens.haskeys():
+                resultlist += exprtokens
+        return loc, resultlist
+
+    def __iadd__(self, other ):
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        return self.append( other ) #And( [ self, other ] )
+
+    def checkRecursion( self, parseElementList ):
+        subRecCheckList = parseElementList[:] + [ self ]
+        for e in self.exprs:
+            e.checkRecursion( subRecCheckList )
+            if not e.mayReturnEmpty:
+                break
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
+
+        return self.strRepr
+
+
+class Or(ParseExpression):
+    """
+    Requires that at least one C{ParseExpression} is found.
+    If two expressions match, the expression that matches the longest string will be used.
+    May be constructed using the C{'^'} operator.
+
+    Example::
+        # construct Or using '^' operator
+        
+        number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
+        print(number.searchString("123 3.1416 789"))
+    prints::
+        [['123'], ['3.1416'], ['789']]
+    """
+    def __init__( self, exprs, savelist = False ):
+        super(Or,self).__init__(exprs, savelist)
+        if self.exprs:
+            self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
+        else:
+            self.mayReturnEmpty = True
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        maxExcLoc = -1
+        maxException = None
+        matches = []
+        for e in self.exprs:
+            try:
+                loc2 = e.tryParse( instring, loc )
+            except ParseException as err:
+                err.__traceback__ = None
+                if err.loc > maxExcLoc:
+                    maxException = err
+                    maxExcLoc = err.loc
+            except IndexError:
+                if len(instring) > maxExcLoc:
+                    maxException = ParseException(instring,len(instring),e.errmsg,self)
+                    maxExcLoc = len(instring)
+            else:
+                # save match among all matches, to retry longest to shortest
+                matches.append((loc2, e))
+
+        if matches:
+            matches.sort(key=lambda x: -x[0])
+            for _,e in matches:
+                try:
+                    return e._parse( instring, loc, doActions )
+                except ParseException as err:
+                    err.__traceback__ = None
+                    if err.loc > maxExcLoc:
+                        maxException = err
+                        maxExcLoc = err.loc
+
+        if maxException is not None:
+            maxException.msg = self.errmsg
+            raise maxException
+        else:
+            raise ParseException(instring, loc, "no defined alternatives to match", self)
+
+
+    def __ixor__(self, other ):
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        return self.append( other ) #Or( [ self, other ] )
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
+
+        return self.strRepr
+
+    def checkRecursion( self, parseElementList ):
+        subRecCheckList = parseElementList[:] + [ self ]
+        for e in self.exprs:
+            e.checkRecursion( subRecCheckList )
+
+
+class MatchFirst(ParseExpression):
+    """
+    Requires that at least one C{ParseExpression} is found.
+    If two expressions match, the first one listed is the one that will match.
+    May be constructed using the C{'|'} operator.
+
+    Example::
+        # construct MatchFirst using '|' operator
+        
+        # watch the order of expressions to match
+        number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
+        print(number.searchString("123 3.1416 789")) #  Fail! -> [['123'], ['3'], ['1416'], ['789']]
+
+        # put more selective expression first
+        number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
+        print(number.searchString("123 3.1416 789")) #  Better -> [['123'], ['3.1416'], ['789']]
+    """
+    def __init__( self, exprs, savelist = False ):
+        super(MatchFirst,self).__init__(exprs, savelist)
+        if self.exprs:
+            self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
+        else:
+            self.mayReturnEmpty = True
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        maxExcLoc = -1
+        maxException = None
+        for e in self.exprs:
+            try:
+                ret = e._parse( instring, loc, doActions )
+                return ret
+            except ParseException as err:
+                if err.loc > maxExcLoc:
+                    maxException = err
+                    maxExcLoc = err.loc
+            except IndexError:
+                if len(instring) > maxExcLoc:
+                    maxException = ParseException(instring,len(instring),e.errmsg,self)
+                    maxExcLoc = len(instring)
+
+        # only got here if no expression matched, raise exception for match that made it the furthest
+        else:
+            if maxException is not None:
+                maxException.msg = self.errmsg
+                raise maxException
+            else:
+                raise ParseException(instring, loc, "no defined alternatives to match", self)
+
+    def __ior__(self, other ):
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        return self.append( other ) #MatchFirst( [ self, other ] )
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
+
+        return self.strRepr
+
+    def checkRecursion( self, parseElementList ):
+        subRecCheckList = parseElementList[:] + [ self ]
+        for e in self.exprs:
+            e.checkRecursion( subRecCheckList )
+
+
+class Each(ParseExpression):
+    """
+    Requires all given C{ParseExpression}s to be found, but in any order.
+    Expressions may be separated by whitespace.
+    May be constructed using the C{'&'} operator.
+
+    Example::
+        color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
+        shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
+        integer = Word(nums)
+        shape_attr = "shape:" + shape_type("shape")
+        posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
+        color_attr = "color:" + color("color")
+        size_attr = "size:" + integer("size")
+
+        # use Each (using operator '&') to accept attributes in any order 
+        # (shape and posn are required, color and size are optional)
+        shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
+
+        shape_spec.runTests('''
+            shape: SQUARE color: BLACK posn: 100, 120
+            shape: CIRCLE size: 50 color: BLUE posn: 50,80
+            color:GREEN size:20 shape:TRIANGLE posn:20,40
+            '''
+            )
+    prints::
+        shape: SQUARE color: BLACK posn: 100, 120
+        ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
+        - color: BLACK
+        - posn: ['100', ',', '120']
+          - x: 100
+          - y: 120
+        - shape: SQUARE
+
+
+        shape: CIRCLE size: 50 color: BLUE posn: 50,80
+        ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
+        - color: BLUE
+        - posn: ['50', ',', '80']
+          - x: 50
+          - y: 80
+        - shape: CIRCLE
+        - size: 50
+
+
+        color: GREEN size: 20 shape: TRIANGLE posn: 20,40
+        ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
+        - color: GREEN
+        - posn: ['20', ',', '40']
+          - x: 20
+          - y: 40
+        - shape: TRIANGLE
+        - size: 20
+    """
+    def __init__( self, exprs, savelist = True ):
+        super(Each,self).__init__(exprs, savelist)
+        self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
+        self.skipWhitespace = True
+        self.initExprGroups = True
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if self.initExprGroups:
+            self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
+            opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
+            opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
+            self.optionals = opt1 + opt2
+            self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
+            self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
+            self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
+            self.required += self.multirequired
+            self.initExprGroups = False
+        tmpLoc = loc
+        tmpReqd = self.required[:]
+        tmpOpt  = self.optionals[:]
+        matchOrder = []
+
+        keepMatching = True
+        while keepMatching:
+            tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
+            failed = []
+            for e in tmpExprs:
+                try:
+                    tmpLoc = e.tryParse( instring, tmpLoc )
+                except ParseException:
+                    failed.append(e)
+                else:
+                    matchOrder.append(self.opt1map.get(id(e),e))
+                    if e in tmpReqd:
+                        tmpReqd.remove(e)
+                    elif e in tmpOpt:
+                        tmpOpt.remove(e)
+            if len(failed) == len(tmpExprs):
+                keepMatching = False
+
+        if tmpReqd:
+            missing = ", ".join(_ustr(e) for e in tmpReqd)
+            raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
+
+        # add any unmatched Optionals, in case they have default values defined
+        matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
+
+        resultlist = []
+        for e in matchOrder:
+            loc,results = e._parse(instring,loc,doActions)
+            resultlist.append(results)
+
+        finalResults = sum(resultlist, ParseResults([]))
+        return loc, finalResults
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
+
+        return self.strRepr
+
+    def checkRecursion( self, parseElementList ):
+        subRecCheckList = parseElementList[:] + [ self ]
+        for e in self.exprs:
+            e.checkRecursion( subRecCheckList )
+
+
+class ParseElementEnhance(ParserElement):
+    """
+    Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.
+    """
+    def __init__( self, expr, savelist=False ):
+        super(ParseElementEnhance,self).__init__(savelist)
+        if isinstance( expr, basestring ):
+            if issubclass(ParserElement._literalStringClass, Token):
+                expr = ParserElement._literalStringClass(expr)
+            else:
+                expr = ParserElement._literalStringClass(Literal(expr))
+        self.expr = expr
+        self.strRepr = None
+        if expr is not None:
+            self.mayIndexError = expr.mayIndexError
+            self.mayReturnEmpty = expr.mayReturnEmpty
+            self.setWhitespaceChars( expr.whiteChars )
+            self.skipWhitespace = expr.skipWhitespace
+            self.saveAsList = expr.saveAsList
+            self.callPreparse = expr.callPreparse
+            self.ignoreExprs.extend(expr.ignoreExprs)
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if self.expr is not None:
+            return self.expr._parse( instring, loc, doActions, callPreParse=False )
+        else:
+            raise ParseException("",loc,self.errmsg,self)
+
+    def leaveWhitespace( self ):
+        self.skipWhitespace = False
+        self.expr = self.expr.copy()
+        if self.expr is not None:
+            self.expr.leaveWhitespace()
+        return self
+
+    def ignore( self, other ):
+        if isinstance( other, Suppress ):
+            if other not in self.ignoreExprs:
+                super( ParseElementEnhance, self).ignore( other )
+                if self.expr is not None:
+                    self.expr.ignore( self.ignoreExprs[-1] )
+        else:
+            super( ParseElementEnhance, self).ignore( other )
+            if self.expr is not None:
+                self.expr.ignore( self.ignoreExprs[-1] )
+        return self
+
+    def streamline( self ):
+        super(ParseElementEnhance,self).streamline()
+        if self.expr is not None:
+            self.expr.streamline()
+        return self
+
+    def checkRecursion( self, parseElementList ):
+        if self in parseElementList:
+            raise RecursiveGrammarException( parseElementList+[self] )
+        subRecCheckList = parseElementList[:] + [ self ]
+        if self.expr is not None:
+            self.expr.checkRecursion( subRecCheckList )
+
+    def validate( self, validateTrace=[] ):
+        tmp = validateTrace[:]+[self]
+        if self.expr is not None:
+            self.expr.validate(tmp)
+        self.checkRecursion( [] )
+
+    def __str__( self ):
+        try:
+            return super(ParseElementEnhance,self).__str__()
+        except Exception:
+            pass
+
+        if self.strRepr is None and self.expr is not None:
+            self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
+        return self.strRepr
+
+
+class FollowedBy(ParseElementEnhance):
+    """
+    Lookahead matching of the given parse expression.  C{FollowedBy}
+    does I{not} advance the parsing position within the input string, it only
+    verifies that the specified parse expression matches at the current
+    position.  C{FollowedBy} always returns a null token list.
+
+    Example::
+        # use FollowedBy to match a label only if it is followed by a ':'
+        data_word = Word(alphas)
+        label = data_word + FollowedBy(':')
+        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
+        
+        OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
+    prints::
+        [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
+    """
+    def __init__( self, expr ):
+        super(FollowedBy,self).__init__(expr)
+        self.mayReturnEmpty = True
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        self.expr.tryParse( instring, loc )
+        return loc, []
+
+
+class NotAny(ParseElementEnhance):
+    """
+    Lookahead to disallow matching with the given parse expression.  C{NotAny}
+    does I{not} advance the parsing position within the input string, it only
+    verifies that the specified parse expression does I{not} match at the current
+    position.  Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny}
+    always returns a null token list.  May be constructed using the '~' operator.
+
+    Example::
+        
+    """
+    def __init__( self, expr ):
+        super(NotAny,self).__init__(expr)
+        #~ self.leaveWhitespace()
+        self.skipWhitespace = False  # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
+        self.mayReturnEmpty = True
+        self.errmsg = "Found unwanted token, "+_ustr(self.expr)
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if self.expr.canParseNext(instring, loc):
+            raise ParseException(instring, loc, self.errmsg, self)
+        return loc, []
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "~{" + _ustr(self.expr) + "}"
+
+        return self.strRepr
+
+class _MultipleMatch(ParseElementEnhance):
+    def __init__( self, expr, stopOn=None):
+        super(_MultipleMatch, self).__init__(expr)
+        self.saveAsList = True
+        ender = stopOn
+        if isinstance(ender, basestring):
+            ender = ParserElement._literalStringClass(ender)
+        self.not_ender = ~ender if ender is not None else None
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        self_expr_parse = self.expr._parse
+        self_skip_ignorables = self._skipIgnorables
+        check_ender = self.not_ender is not None
+        if check_ender:
+            try_not_ender = self.not_ender.tryParse
+        
+        # must be at least one (but first see if we are the stopOn sentinel;
+        # if so, fail)
+        if check_ender:
+            try_not_ender(instring, loc)
+        loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
+        try:
+            hasIgnoreExprs = (not not self.ignoreExprs)
+            while 1:
+                if check_ender:
+                    try_not_ender(instring, loc)
+                if hasIgnoreExprs:
+                    preloc = self_skip_ignorables( instring, loc )
+                else:
+                    preloc = loc
+                loc, tmptokens = self_expr_parse( instring, preloc, doActions )
+                if tmptokens or tmptokens.haskeys():
+                    tokens += tmptokens
+        except (ParseException,IndexError):
+            pass
+
+        return loc, tokens
+        
+class OneOrMore(_MultipleMatch):
+    """
+    Repetition of one or more of the given expression.
+    
+    Parameters:
+     - expr - expression that must match one or more times
+     - stopOn - (default=C{None}) - expression for a terminating sentinel
+          (only required if the sentinel would ordinarily match the repetition 
+          expression)          
+
+    Example::
+        data_word = Word(alphas)
+        label = data_word + FollowedBy(':')
+        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
+
+        text = "shape: SQUARE posn: upper left color: BLACK"
+        OneOrMore(attr_expr).parseString(text).pprint()  # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
+
+        # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
+        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
+        OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
+        
+        # could also be written as
+        (attr_expr * (1,)).parseString(text).pprint()
+    """
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "{" + _ustr(self.expr) + "}..."
+
+        return self.strRepr
+
+class ZeroOrMore(_MultipleMatch):
+    """
+    Optional repetition of zero or more of the given expression.
+    
+    Parameters:
+     - expr - expression that must match zero or more times
+     - stopOn - (default=C{None}) - expression for a terminating sentinel
+          (only required if the sentinel would ordinarily match the repetition 
+          expression)          
+
+    Example: similar to L{OneOrMore}
+    """
+    def __init__( self, expr, stopOn=None):
+        super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)
+        self.mayReturnEmpty = True
+        
+    def parseImpl( self, instring, loc, doActions=True ):
+        try:
+            return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
+        except (ParseException,IndexError):
+            return loc, []
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "[" + _ustr(self.expr) + "]..."
+
+        return self.strRepr
+
+class _NullToken(object):
+    def __bool__(self):
+        return False
+    __nonzero__ = __bool__
+    def __str__(self):
+        return ""
+
+_optionalNotMatched = _NullToken()
+class Optional(ParseElementEnhance):
+    """
+    Optional matching of the given expression.
+
+    Parameters:
+     - expr - expression that must match zero or more times
+     - default (optional) - value to be returned if the optional expression is not found.
+
+    Example::
+        # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
+        zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
+        zip.runTests('''
+            # traditional ZIP code
+            12345
+            
+            # ZIP+4 form
+            12101-0001
+            
+            # invalid ZIP
+            98765-
+            ''')
+    prints::
+        # traditional ZIP code
+        12345
+        ['12345']
+
+        # ZIP+4 form
+        12101-0001
+        ['12101-0001']
+
+        # invalid ZIP
+        98765-
+             ^
+        FAIL: Expected end of text (at char 5), (line:1, col:6)
+    """
+    def __init__( self, expr, default=_optionalNotMatched ):
+        super(Optional,self).__init__( expr, savelist=False )
+        self.saveAsList = self.expr.saveAsList
+        self.defaultValue = default
+        self.mayReturnEmpty = True
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        try:
+            loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
+        except (ParseException,IndexError):
+            if self.defaultValue is not _optionalNotMatched:
+                if self.expr.resultsName:
+                    tokens = ParseResults([ self.defaultValue ])
+                    tokens[self.expr.resultsName] = self.defaultValue
+                else:
+                    tokens = [ self.defaultValue ]
+            else:
+                tokens = []
+        return loc, tokens
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "[" + _ustr(self.expr) + "]"
+
+        return self.strRepr
+
+class SkipTo(ParseElementEnhance):
+    """
+    Token for skipping over all undefined text until the matched expression is found.
+
+    Parameters:
+     - expr - target expression marking the end of the data to be skipped
+     - include - (default=C{False}) if True, the target expression is also parsed 
+          (the skipped text and target expression are returned as a 2-element list).
+     - ignore - (default=C{None}) used to define grammars (typically quoted strings and 
+          comments) that might contain false matches to the target expression
+     - failOn - (default=C{None}) define expressions that are not allowed to be 
+          included in the skipped test; if found before the target expression is found, 
+          the SkipTo is not a match
+
+    Example::
+        report = '''
+            Outstanding Issues Report - 1 Jan 2000
+
+               # | Severity | Description                               |  Days Open
+            -----+----------+-------------------------------------------+-----------
+             101 | Critical | Intermittent system crash                 |          6
+              94 | Cosmetic | Spelling error on Login ('log|n')         |         14
+              79 | Minor    | System slow when running too many reports |         47
+            '''
+        integer = Word(nums)
+        SEP = Suppress('|')
+        # use SkipTo to simply match everything up until the next SEP
+        # - ignore quoted strings, so that a '|' character inside a quoted string does not match
+        # - parse action will call token.strip() for each matched token, i.e., the description body
+        string_data = SkipTo(SEP, ignore=quotedString)
+        string_data.setParseAction(tokenMap(str.strip))
+        ticket_expr = (integer("issue_num") + SEP 
+                      + string_data("sev") + SEP 
+                      + string_data("desc") + SEP 
+                      + integer("days_open"))
+        
+        for tkt in ticket_expr.searchString(report):
+            print tkt.dump()
+    prints::
+        ['101', 'Critical', 'Intermittent system crash', '6']
+        - days_open: 6
+        - desc: Intermittent system crash
+        - issue_num: 101
+        - sev: Critical
+        ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
+        - days_open: 14
+        - desc: Spelling error on Login ('log|n')
+        - issue_num: 94
+        - sev: Cosmetic
+        ['79', 'Minor', 'System slow when running too many reports', '47']
+        - days_open: 47
+        - desc: System slow when running too many reports
+        - issue_num: 79
+        - sev: Minor
+    """
+    def __init__( self, other, include=False, ignore=None, failOn=None ):
+        super( SkipTo, self ).__init__( other )
+        self.ignoreExpr = ignore
+        self.mayReturnEmpty = True
+        self.mayIndexError = False
+        self.includeMatch = include
+        self.asList = False
+        if isinstance(failOn, basestring):
+            self.failOn = ParserElement._literalStringClass(failOn)
+        else:
+            self.failOn = failOn
+        self.errmsg = "No match found for "+_ustr(self.expr)
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        startloc = loc
+        instrlen = len(instring)
+        expr = self.expr
+        expr_parse = self.expr._parse
+        self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
+        self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
+        
+        tmploc = loc
+        while tmploc <= instrlen:
+            if self_failOn_canParseNext is not None:
+                # break if failOn expression matches
+                if self_failOn_canParseNext(instring, tmploc):
+                    break
+                    
+            if self_ignoreExpr_tryParse is not None:
+                # advance past ignore expressions
+                while 1:
+                    try:
+                        tmploc = self_ignoreExpr_tryParse(instring, tmploc)
+                    except ParseBaseException:
+                        break
+            
+            try:
+                expr_parse(instring, tmploc, doActions=False, callPreParse=False)
+            except (ParseException, IndexError):
+                # no match, advance loc in string
+                tmploc += 1
+            else:
+                # matched skipto expr, done
+                break
+
+        else:
+            # ran off the end of the input string without matching skipto expr, fail
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        # build up return values
+        loc = tmploc
+        skiptext = instring[startloc:loc]
+        skipresult = ParseResults(skiptext)
+        
+        if self.includeMatch:
+            loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
+            skipresult += mat
+
+        return loc, skipresult
+
+class Forward(ParseElementEnhance):
+    """
+    Forward declaration of an expression to be defined later -
+    used for recursive grammars, such as algebraic infix notation.
+    When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
+
+    Note: take care when assigning to C{Forward} not to overlook precedence of operators.
+    Specifically, '|' has a lower precedence than '<<', so that::
+        fwdExpr << a | b | c
+    will actually be evaluated as::
+        (fwdExpr << a) | b | c
+    thereby leaving b and c out as parseable alternatives.  It is recommended that you
+    explicitly group the values inserted into the C{Forward}::
+        fwdExpr << (a | b | c)
+    Converting to use the '<<=' operator instead will avoid this problem.
+
+    See L{ParseResults.pprint} for an example of a recursive parser created using
+    C{Forward}.
+    """
+    def __init__( self, other=None ):
+        super(Forward,self).__init__( other, savelist=False )
+
+    def __lshift__( self, other ):
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass(other)
+        self.expr = other
+        self.strRepr = None
+        self.mayIndexError = self.expr.mayIndexError
+        self.mayReturnEmpty = self.expr.mayReturnEmpty
+        self.setWhitespaceChars( self.expr.whiteChars )
+        self.skipWhitespace = self.expr.skipWhitespace
+        self.saveAsList = self.expr.saveAsList
+        self.ignoreExprs.extend(self.expr.ignoreExprs)
+        return self
+        
+    def __ilshift__(self, other):
+        return self << other
+    
+    def leaveWhitespace( self ):
+        self.skipWhitespace = False
+        return self
+
+    def streamline( self ):
+        if not self.streamlined:
+            self.streamlined = True
+            if self.expr is not None:
+                self.expr.streamline()
+        return self
+
+    def validate( self, validateTrace=[] ):
+        if self not in validateTrace:
+            tmp = validateTrace[:]+[self]
+            if self.expr is not None:
+                self.expr.validate(tmp)
+        self.checkRecursion([])
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+        return self.__class__.__name__ + ": ..."
+
+        # stubbed out for now - creates awful memory and perf issues
+        self._revertClass = self.__class__
+        self.__class__ = _ForwardNoRecurse
+        try:
+            if self.expr is not None:
+                retString = _ustr(self.expr)
+            else:
+                retString = "None"
+        finally:
+            self.__class__ = self._revertClass
+        return self.__class__.__name__ + ": " + retString
+
+    def copy(self):
+        if self.expr is not None:
+            return super(Forward,self).copy()
+        else:
+            ret = Forward()
+            ret <<= self
+            return ret
+
+class _ForwardNoRecurse(Forward):
+    def __str__( self ):
+        return "..."
+
+class TokenConverter(ParseElementEnhance):
+    """
+    Abstract subclass of C{ParseExpression}, for converting parsed results.
+    """
+    def __init__( self, expr, savelist=False ):
+        super(TokenConverter,self).__init__( expr )#, savelist )
+        self.saveAsList = False
+
+class Combine(TokenConverter):
+    """
+    Converter to concatenate all matching tokens to a single string.
+    By default, the matching patterns must also be contiguous in the input string;
+    this can be disabled by specifying C{'adjacent=False'} in the constructor.
+
+    Example::
+        real = Word(nums) + '.' + Word(nums)
+        print(real.parseString('3.1416')) # -> ['3', '.', '1416']
+        # will also erroneously match the following
+        print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
+
+        real = Combine(Word(nums) + '.' + Word(nums))
+        print(real.parseString('3.1416')) # -> ['3.1416']
+        # no match when there are internal spaces
+        print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
+    """
+    def __init__( self, expr, joinString="", adjacent=True ):
+        super(Combine,self).__init__( expr )
+        # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
+        if adjacent:
+            self.leaveWhitespace()
+        self.adjacent = adjacent
+        self.skipWhitespace = True
+        self.joinString = joinString
+        self.callPreparse = True
+
+    def ignore( self, other ):
+        if self.adjacent:
+            ParserElement.ignore(self, other)
+        else:
+            super( Combine, self).ignore( other )
+        return self
+
+    def postParse( self, instring, loc, tokenlist ):
+        retToks = tokenlist.copy()
+        del retToks[:]
+        retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
+
+        if self.resultsName and retToks.haskeys():
+            return [ retToks ]
+        else:
+            return retToks
+
+class Group(TokenConverter):
+    """
+    Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.
+
+    Example::
+        ident = Word(alphas)
+        num = Word(nums)
+        term = ident | num
+        func = ident + Optional(delimitedList(term))
+        print(func.parseString("fn a,b,100"))  # -> ['fn', 'a', 'b', '100']
+
+        func = ident + Group(Optional(delimitedList(term)))
+        print(func.parseString("fn a,b,100"))  # -> ['fn', ['a', 'b', '100']]
+    """
+    def __init__( self, expr ):
+        super(Group,self).__init__( expr )
+        self.saveAsList = True
+
+    def postParse( self, instring, loc, tokenlist ):
+        return [ tokenlist ]
+
+class Dict(TokenConverter):
+    """
+    Converter to return a repetitive expression as a list, but also as a dictionary.
+    Each element can also be referenced using the first token in the expression as its key.
+    Useful for tabular report scraping when the first column can be used as a item key.
+
+    Example::
+        data_word = Word(alphas)
+        label = data_word + FollowedBy(':')
+        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
+
+        text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
+        attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
+        
+        # print attributes as plain groups
+        print(OneOrMore(attr_expr).parseString(text).dump())
+        
+        # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
+        result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
+        print(result.dump())
+        
+        # access named fields as dict entries, or output as dict
+        print(result['shape'])        
+        print(result.asDict())
+    prints::
+        ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
+
+        [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
+        - color: light blue
+        - posn: upper left
+        - shape: SQUARE
+        - texture: burlap
+        SQUARE
+        {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
+    See more examples at L{ParseResults} of accessing fields by results name.
+    """
+    def __init__( self, expr ):
+        super(Dict,self).__init__( expr )
+        self.saveAsList = True
+
+    def postParse( self, instring, loc, tokenlist ):
+        for i,tok in enumerate(tokenlist):
+            if len(tok) == 0:
+                continue
+            ikey = tok[0]
+            if isinstance(ikey,int):
+                ikey = _ustr(tok[0]).strip()
+            if len(tok)==1:
+                tokenlist[ikey] = _ParseResultsWithOffset("",i)
+            elif len(tok)==2 and not isinstance(tok[1],ParseResults):
+                tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
+            else:
+                dictvalue = tok.copy() #ParseResults(i)
+                del dictvalue[0]
+                if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
+                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
+                else:
+                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
+
+        if self.resultsName:
+            return [ tokenlist ]
+        else:
+            return tokenlist
+
+
+class Suppress(TokenConverter):
+    """
+    Converter for ignoring the results of a parsed expression.
+
+    Example::
+        source = "a, b, c,d"
+        wd = Word(alphas)
+        wd_list1 = wd + ZeroOrMore(',' + wd)
+        print(wd_list1.parseString(source))
+
+        # often, delimiters that are useful during parsing are just in the
+        # way afterward - use Suppress to keep them out of the parsed output
+        wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
+        print(wd_list2.parseString(source))
+    prints::
+        ['a', ',', 'b', ',', 'c', ',', 'd']
+        ['a', 'b', 'c', 'd']
+    (See also L{delimitedList}.)
+    """
+    def postParse( self, instring, loc, tokenlist ):
+        return []
+
+    def suppress( self ):
+        return self
+
+
+class OnlyOnce(object):
+    """
+    Wrapper for parse actions, to ensure they are only called once.
+    """
+    def __init__(self, methodCall):
+        self.callable = _trim_arity(methodCall)
+        self.called = False
+    def __call__(self,s,l,t):
+        if not self.called:
+            results = self.callable(s,l,t)
+            self.called = True
+            return results
+        raise ParseException(s,l,"")
+    def reset(self):
+        self.called = False
+
+def traceParseAction(f):
+    """
+    Decorator for debugging parse actions. 
+    
+    When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".}
+    When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised.
+
+    Example::
+        wd = Word(alphas)
+
+        @traceParseAction
+        def remove_duplicate_chars(tokens):
+            return ''.join(sorted(set(''.join(tokens)))
+
+        wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
+        print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
+    prints::
+        >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
+        <<leaving remove_duplicate_chars (ret: 'dfjkls')
+        ['dfjkls']
+    """
+    f = _trim_arity(f)
+    def z(*paArgs):
+        thisFunc = f.__name__
+        s,l,t = paArgs[-3:]
+        if len(paArgs)>3:
+            thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
+        sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
+        try:
+            ret = f(*paArgs)
+        except Exception as exc:
+            sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
+            raise
+        sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
+        return ret
+    try:
+        z.__name__ = f.__name__
+    except AttributeError:
+        pass
+    return z
+
+#
+# global helpers
+#
+def delimitedList( expr, delim=",", combine=False ):
+    """
+    Helper to define a delimited list of expressions - the delimiter defaults to ','.
+    By default, the list elements and delimiters can have intervening whitespace, and
+    comments, but this can be overridden by passing C{combine=True} in the constructor.
+    If C{combine} is set to C{True}, the matching tokens are returned as a single token
+    string, with the delimiters included; otherwise, the matching tokens are returned
+    as a list of tokens, with the delimiters suppressed.
+
+    Example::
+        delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
+        delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
+    """
+    dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
+    if combine:
+        return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
+    else:
+        return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
+
+def countedArray( expr, intExpr=None ):
+    """
+    Helper to define a counted list of expressions.
+    This helper defines a pattern of the form::
+        integer expr expr expr...
+    where the leading integer tells how many expr expressions follow.
+    The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
+    
+    If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value.
+
+    Example::
+        countedArray(Word(alphas)).parseString('2 ab cd ef')  # -> ['ab', 'cd']
+
+        # in this parser, the leading integer value is given in binary,
+        # '10' indicating that 2 values are in the array
+        binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
+        countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef')  # -> ['ab', 'cd']
+    """
+    arrayExpr = Forward()
+    def countFieldParseAction(s,l,t):
+        n = t[0]
+        arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
+        return []
+    if intExpr is None:
+        intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
+    else:
+        intExpr = intExpr.copy()
+    intExpr.setName("arrayLen")
+    intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
+    return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
+
+def _flatten(L):
+    ret = []
+    for i in L:
+        if isinstance(i,list):
+            ret.extend(_flatten(i))
+        else:
+            ret.append(i)
+    return ret
+
+def matchPreviousLiteral(expr):
+    """
+    Helper to define an expression that is indirectly defined from
+    the tokens matched in a previous expression, that is, it looks
+    for a 'repeat' of a previous expression.  For example::
+        first = Word(nums)
+        second = matchPreviousLiteral(first)
+        matchExpr = first + ":" + second
+    will match C{"1:1"}, but not C{"1:2"}.  Because this matches a
+    previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
+    If this is not desired, use C{matchPreviousExpr}.
+    Do I{not} use with packrat parsing enabled.
+    """
+    rep = Forward()
+    def copyTokenToRepeater(s,l,t):
+        if t:
+            if len(t) == 1:
+                rep << t[0]
+            else:
+                # flatten t tokens
+                tflat = _flatten(t.asList())
+                rep << And(Literal(tt) for tt in tflat)
+        else:
+            rep << Empty()
+    expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
+    rep.setName('(prev) ' + _ustr(expr))
+    return rep
+
+def matchPreviousExpr(expr):
+    """
+    Helper to define an expression that is indirectly defined from
+    the tokens matched in a previous expression, that is, it looks
+    for a 'repeat' of a previous expression.  For example::
+        first = Word(nums)
+        second = matchPreviousExpr(first)
+        matchExpr = first + ":" + second
+    will match C{"1:1"}, but not C{"1:2"}.  Because this matches by
+    expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};
+    the expressions are evaluated first, and then compared, so
+    C{"1"} is compared with C{"10"}.
+    Do I{not} use with packrat parsing enabled.
+    """
+    rep = Forward()
+    e2 = expr.copy()
+    rep <<= e2
+    def copyTokenToRepeater(s,l,t):
+        matchTokens = _flatten(t.asList())
+        def mustMatchTheseTokens(s,l,t):
+            theseTokens = _flatten(t.asList())
+            if  theseTokens != matchTokens:
+                raise ParseException("",0,"")
+        rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
+    expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
+    rep.setName('(prev) ' + _ustr(expr))
+    return rep
+
+def _escapeRegexRangeChars(s):
+    #~  escape these chars: ^-]
+    for c in r"\^-]":
+        s = s.replace(c,_bslash+c)
+    s = s.replace("\n",r"\n")
+    s = s.replace("\t",r"\t")
+    return _ustr(s)
+
+def oneOf( strs, caseless=False, useRegex=True ):
+    """
+    Helper to quickly define a set of alternative Literals, and makes sure to do
+    longest-first testing when there is a conflict, regardless of the input order,
+    but returns a C{L{MatchFirst}} for best performance.
+
+    Parameters:
+     - strs - a string of space-delimited literals, or a collection of string literals
+     - caseless - (default=C{False}) - treat all literals as caseless
+     - useRegex - (default=C{True}) - as an optimization, will generate a Regex
+          object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
+          if creating a C{Regex} raises an exception)
+
+    Example::
+        comp_oper = oneOf("< = > <= >= !=")
+        var = Word(alphas)
+        number = Word(nums)
+        term = var | number
+        comparison_expr = term + comp_oper + term
+        print(comparison_expr.searchString("B = 12  AA=23 B<=AA AA>12"))
+    prints::
+        [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
+    """
+    if caseless:
+        isequal = ( lambda a,b: a.upper() == b.upper() )
+        masks = ( lambda a,b: b.upper().startswith(a.upper()) )
+        parseElementClass = CaselessLiteral
+    else:
+        isequal = ( lambda a,b: a == b )
+        masks = ( lambda a,b: b.startswith(a) )
+        parseElementClass = Literal
+
+    symbols = []
+    if isinstance(strs,basestring):
+        symbols = strs.split()
+    elif isinstance(strs, collections.Iterable):
+        symbols = list(strs)
+    else:
+        warnings.warn("Invalid argument to oneOf, expected string or iterable",
+                SyntaxWarning, stacklevel=2)
+    if not symbols:
+        return NoMatch()
+
+    i = 0
+    while i < len(symbols)-1:
+        cur = symbols[i]
+        for j,other in enumerate(symbols[i+1:]):
+            if ( isequal(other, cur) ):
+                del symbols[i+j+1]
+                break
+            elif ( masks(cur, other) ):
+                del symbols[i+j+1]
+                symbols.insert(i,other)
+                cur = other
+                break
+        else:
+            i += 1
+
+    if not caseless and useRegex:
+        #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
+        try:
+            if len(symbols)==len("".join(symbols)):
+                return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
+            else:
+                return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
+        except Exception:
+            warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
+                    SyntaxWarning, stacklevel=2)
+
+
+    # last resort, just use MatchFirst
+    return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
+
+def dictOf( key, value ):
+    """
+    Helper to easily and clearly define a dictionary by specifying the respective patterns
+    for the key and value.  Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
+    in the proper order.  The key pattern can include delimiting markers or punctuation,
+    as long as they are suppressed, thereby leaving the significant key text.  The value
+    pattern can include named results, so that the C{Dict} results can include named token
+    fields.
+
+    Example::
+        text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
+        attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
+        print(OneOrMore(attr_expr).parseString(text).dump())
+        
+        attr_label = label
+        attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
+
+        # similar to Dict, but simpler call format
+        result = dictOf(attr_label, attr_value).parseString(text)
+        print(result.dump())
+        print(result['shape'])
+        print(result.shape)  # object attribute access works too
+        print(result.asDict())
+    prints::
+        [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
+        - color: light blue
+        - posn: upper left
+        - shape: SQUARE
+        - texture: burlap
+        SQUARE
+        SQUARE
+        {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
+    """
+    return Dict( ZeroOrMore( Group ( key + value ) ) )
+
+def originalTextFor(expr, asString=True):
+    """
+    Helper to return the original, untokenized text for a given expression.  Useful to
+    restore the parsed fields of an HTML start tag into the raw tag text itself, or to
+    revert separate tokens with intervening whitespace back to the original matching
+    input text. By default, returns astring containing the original parsed text.  
+       
+    If the optional C{asString} argument is passed as C{False}, then the return value is a 
+    C{L{ParseResults}} containing any results names that were originally matched, and a 
+    single token containing the original matched text from the input string.  So if 
+    the expression passed to C{L{originalTextFor}} contains expressions with defined
+    results names, you must set C{asString} to C{False} if you want to preserve those
+    results name values.
+
+    Example::
+        src = "this is test <b> bold <i>text</i> </b> normal text "
+        for tag in ("b","i"):
+            opener,closer = makeHTMLTags(tag)
+            patt = originalTextFor(opener + SkipTo(closer) + closer)
+            print(patt.searchString(src)[0])
+    prints::
+        ['<b> bold <i>text</i> </b>']
+        ['<i>text</i>']
+    """
+    locMarker = Empty().setParseAction(lambda s,loc,t: loc)
+    endlocMarker = locMarker.copy()
+    endlocMarker.callPreparse = False
+    matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
+    if asString:
+        extractText = lambda s,l,t: s[t._original_start:t._original_end]
+    else:
+        def extractText(s,l,t):
+            t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
+    matchExpr.setParseAction(extractText)
+    matchExpr.ignoreExprs = expr.ignoreExprs
+    return matchExpr
+
+def ungroup(expr): 
+    """
+    Helper to undo pyparsing's default grouping of And expressions, even
+    if all but one are non-empty.
+    """
+    return TokenConverter(expr).setParseAction(lambda t:t[0])
+
+def locatedExpr(expr):
+    """
+    Helper to decorate a returned token with its starting and ending locations in the input string.
+    This helper adds the following results names:
+     - locn_start = location where matched expression begins
+     - locn_end = location where matched expression ends
+     - value = the actual parsed results
+
+    Be careful if the input text contains C{<TAB>} characters, you may want to call
+    C{L{ParserElement.parseWithTabs}}
+
+    Example::
+        wd = Word(alphas)
+        for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
+            print(match)
+    prints::
+        [[0, 'ljsdf', 5]]
+        [[8, 'lksdjjf', 15]]
+        [[18, 'lkkjj', 23]]
+    """
+    locator = Empty().setParseAction(lambda s,l,t: l)
+    return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
+
+
+# convenience constants for positional expressions
+empty       = Empty().setName("empty")
+lineStart   = LineStart().setName("lineStart")
+lineEnd     = LineEnd().setName("lineEnd")
+stringStart = StringStart().setName("stringStart")
+stringEnd   = StringEnd().setName("stringEnd")
+
+_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
+_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
+_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
+_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE)
+_charRange = Group(_singleChar + Suppress("-") + _singleChar)
+_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
+
+def srange(s):
+    r"""
+    Helper to easily define string ranges for use in Word construction.  Borrows
+    syntax from regexp '[]' string range definitions::
+        srange("[0-9]")   -> "0123456789"
+        srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
+        srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
+    The input string must be enclosed in []'s, and the returned string is the expanded
+    character set joined into a single string.
+    The values enclosed in the []'s may be:
+     - a single character
+     - an escaped character with a leading backslash (such as C{\-} or C{\]})
+     - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) 
+         (C{\0x##} is also supported for backwards compatibility) 
+     - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
+     - a range of any of the above, separated by a dash (C{'a-z'}, etc.)
+     - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
+    """
+    _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
+    try:
+        return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
+    except Exception:
+        return ""
+
+def matchOnlyAtCol(n):
+    """
+    Helper method for defining parse actions that require matching at a specific
+    column in the input text.
+    """
+    def verifyCol(strg,locn,toks):
+        if col(locn,strg) != n:
+            raise ParseException(strg,locn,"matched token not at column %d" % n)
+    return verifyCol
+
+def replaceWith(replStr):
+    """
+    Helper method for common parse actions that simply return a literal value.  Especially
+    useful when used with C{L{transformString<ParserElement.transformString>}()}.
+
+    Example::
+        num = Word(nums).setParseAction(lambda toks: int(toks[0]))
+        na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
+        term = na | num
+        
+        OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
+    """
+    return lambda s,l,t: [replStr]
+
+def removeQuotes(s,l,t):
+    """
+    Helper parse action for removing quotation marks from parsed quoted strings.
+
+    Example::
+        # by default, quotation marks are included in parsed results
+        quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
+
+        # use removeQuotes to strip quotation marks from parsed results
+        quotedString.setParseAction(removeQuotes)
+        quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
+    """
+    return t[0][1:-1]
+
+def tokenMap(func, *args):
+    """
+    Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional 
+    args are passed, they are forwarded to the given function as additional arguments after
+    the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
+    parsed data to an integer using base 16.
+
+    Example (compare the last to example in L{ParserElement.transformString}::
+        hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
+        hex_ints.runTests('''
+            00 11 22 aa FF 0a 0d 1a
+            ''')
+        
+        upperword = Word(alphas).setParseAction(tokenMap(str.upper))
+        OneOrMore(upperword).runTests('''
+            my kingdom for a horse
+            ''')
+
+        wd = Word(alphas).setParseAction(tokenMap(str.title))
+        OneOrMore(wd).setParseAction(' '.join).runTests('''
+            now is the winter of our discontent made glorious summer by this sun of york
+            ''')
+    prints::
+        00 11 22 aa FF 0a 0d 1a
+        [0, 17, 34, 170, 255, 10, 13, 26]
+
+        my kingdom for a horse
+        ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
+
+        now is the winter of our discontent made glorious summer by this sun of york
+        ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
+    """
+    def pa(s,l,t):
+        return [func(tokn, *args) for tokn in t]
+
+    try:
+        func_name = getattr(func, '__name__', 
+                            getattr(func, '__class__').__name__)
+    except Exception:
+        func_name = str(func)
+    pa.__name__ = func_name
+
+    return pa
+
+upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
+"""(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}"""
+
+downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
+"""(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""
+    
+def _makeTags(tagStr, xml):
+    """Internal helper to construct opening and closing tag expressions, given a tag name"""
+    if isinstance(tagStr,basestring):
+        resname = tagStr
+        tagStr = Keyword(tagStr, caseless=not xml)
+    else:
+        resname = tagStr.name
+
+    tagAttrName = Word(alphas,alphanums+"_-:")
+    if (xml):
+        tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
+        openTag = Suppress("<") + tagStr("tag") + \
+                Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
+                Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
+    else:
+        printablesLessRAbrack = "".join(c for c in printables if c not in ">")
+        tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
+        openTag = Suppress("<") + tagStr("tag") + \
+                Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
+                Optional( Suppress("=") + tagAttrValue ) ))) + \
+                Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
+    closeTag = Combine(_L("</") + tagStr + ">")
+
+    openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname)
+    closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname)
+    openTag.tag = resname
+    closeTag.tag = resname
+    return openTag, closeTag
+
+def makeHTMLTags(tagStr):
+    """
+    Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches
+    tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values.
+
+    Example::
+        text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
+        # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple
+        a,a_end = makeHTMLTags("A")
+        link_expr = a + SkipTo(a_end)("link_text") + a_end
+        
+        for link in link_expr.searchString(text):
+            # attributes in the <A> tag (like "href" shown here) are also accessible as named results
+            print(link.link_text, '->', link.href)
+    prints::
+        pyparsing -> http://pyparsing.wikispaces.com
+    """
+    return _makeTags( tagStr, False )
+
+def makeXMLTags(tagStr):
+    """
+    Helper to construct opening and closing tag expressions for XML, given a tag name. Matches
+    tags only in the given upper/lower case.
+
+    Example: similar to L{makeHTMLTags}
+    """
+    return _makeTags( tagStr, True )
+
+def withAttribute(*args,**attrDict):
+    """
+    Helper to create a validating parse action to be used with start tags created
+    with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
+    with a required attribute value, to avoid false matches on common tags such as
+    C{<TD>} or C{<DIV>}.
+
+    Call C{withAttribute} with a series of attribute names and values. Specify the list
+    of filter attributes names and values as:
+     - keyword arguments, as in C{(align="right")}, or
+     - as an explicit dict with C{**} operator, when an attribute name is also a Python
+          reserved word, as in C{**{"class":"Customer", "align":"right"}}
+     - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
+    For attribute names with a namespace prefix, you must use the second form.  Attribute
+    names are matched insensitive to upper/lower case.
+       
+    If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
+
+    To verify that the attribute exists, but without specifying a value, pass
+    C{withAttribute.ANY_VALUE} as the value.
+
+    Example::
+        html = '''
+            <div>
+            Some text
+            <div type="grid">1 4 0 1 0</div>
+            <div type="graph">1,3 2,3 1,1</div>
+            <div>this has no type</div>
+            </div>
+                
+        '''
+        div,div_end = makeHTMLTags("div")
+
+        # only match div tag having a type attribute with value "grid"
+        div_grid = div().setParseAction(withAttribute(type="grid"))
+        grid_expr = div_grid + SkipTo(div | div_end)("body")
+        for grid_header in grid_expr.searchString(html):
+            print(grid_header.body)
+        
+        # construct a match with any div tag having a type attribute, regardless of the value
+        div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
+        div_expr = div_any_type + SkipTo(div | div_end)("body")
+        for div_header in div_expr.searchString(html):
+            print(div_header.body)
+    prints::
+        1 4 0 1 0
+
+        1 4 0 1 0
+        1,3 2,3 1,1
+    """
+    if args:
+        attrs = args[:]
+    else:
+        attrs = attrDict.items()
+    attrs = [(k,v) for k,v in attrs]
+    def pa(s,l,tokens):
+        for attrName,attrValue in attrs:
+            if attrName not in tokens:
+                raise ParseException(s,l,"no matching attribute " + attrName)
+            if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
+                raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
+                                            (attrName, tokens[attrName], attrValue))
+    return pa
+withAttribute.ANY_VALUE = object()
+
+def withClass(classname, namespace=''):
+    """
+    Simplified version of C{L{withAttribute}} when matching on a div class - made
+    difficult because C{class} is a reserved word in Python.
+
+    Example::
+        html = '''
+            <div>
+            Some text
+            <div class="grid">1 4 0 1 0</div>
+            <div class="graph">1,3 2,3 1,1</div>
+            <div>this &lt;div&gt; has no class</div>
+            </div>
+                
+        '''
+        div,div_end = makeHTMLTags("div")
+        div_grid = div().setParseAction(withClass("grid"))
+        
+        grid_expr = div_grid + SkipTo(div | div_end)("body")
+        for grid_header in grid_expr.searchString(html):
+            print(grid_header.body)
+        
+        div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
+        div_expr = div_any_type + SkipTo(div | div_end)("body")
+        for div_header in div_expr.searchString(html):
+            print(div_header.body)
+    prints::
+        1 4 0 1 0
+
+        1 4 0 1 0
+        1,3 2,3 1,1
+    """
+    classattr = "%s:class" % namespace if namespace else "class"
+    return withAttribute(**{classattr : classname})        
+
+opAssoc = _Constants()
+opAssoc.LEFT = object()
+opAssoc.RIGHT = object()
+
+def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
+    """
+    Helper method for constructing grammars of expressions made up of
+    operators working in a precedence hierarchy.  Operators may be unary or
+    binary, left- or right-associative.  Parse actions can also be attached
+    to operator expressions. The generated parser will also recognize the use 
+    of parentheses to override operator precedences (see example below).
+    
+    Note: if you define a deep operator list, you may see performance issues
+    when using infixNotation. See L{ParserElement.enablePackrat} for a
+    mechanism to potentially improve your parser performance.
+
+    Parameters:
+     - baseExpr - expression representing the most basic element for the nested
+     - opList - list of tuples, one for each operator precedence level in the
+      expression grammar; each tuple is of the form
+      (opExpr, numTerms, rightLeftAssoc, parseAction), where:
+       - opExpr is the pyparsing expression for the operator;
+          may also be a string, which will be converted to a Literal;
+          if numTerms is 3, opExpr is a tuple of two expressions, for the
+          two operators separating the 3 terms
+       - numTerms is the number of terms for this operator (must
+          be 1, 2, or 3)
+       - rightLeftAssoc is the indicator whether the operator is
+          right or left associative, using the pyparsing-defined
+          constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
+       - parseAction is the parse action to be associated with
+          expressions matching this operator expression (the
+          parse action tuple member may be omitted)
+     - lpar - expression for matching left-parentheses (default=C{Suppress('(')})
+     - rpar - expression for matching right-parentheses (default=C{Suppress(')')})
+
+    Example::
+        # simple example of four-function arithmetic with ints and variable names
+        integer = pyparsing_common.signed_integer
+        varname = pyparsing_common.identifier 
+        
+        arith_expr = infixNotation(integer | varname,
+            [
+            ('-', 1, opAssoc.RIGHT),
+            (oneOf('* /'), 2, opAssoc.LEFT),
+            (oneOf('+ -'), 2, opAssoc.LEFT),
+            ])
+        
+        arith_expr.runTests('''
+            5+3*6
+            (5+3)*6
+            -2--11
+            ''', fullDump=False)
+    prints::
+        5+3*6
+        [[5, '+', [3, '*', 6]]]
+
+        (5+3)*6
+        [[[5, '+', 3], '*', 6]]
+
+        -2--11
+        [[['-', 2], '-', ['-', 11]]]
+    """
+    ret = Forward()
+    lastExpr = baseExpr | ( lpar + ret + rpar )
+    for i,operDef in enumerate(opList):
+        opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
+        termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
+        if arity == 3:
+            if opExpr is None or len(opExpr) != 2:
+                raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
+            opExpr1, opExpr2 = opExpr
+        thisExpr = Forward().setName(termName)
+        if rightLeftAssoc == opAssoc.LEFT:
+            if arity == 1:
+                matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
+            elif arity == 2:
+                if opExpr is not None:
+                    matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
+                else:
+                    matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
+            elif arity == 3:
+                matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
+                            Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
+            else:
+                raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
+        elif rightLeftAssoc == opAssoc.RIGHT:
+            if arity == 1:
+                # try to avoid LR with this extra test
+                if not isinstance(opExpr, Optional):
+                    opExpr = Optional(opExpr)
+                matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
+            elif arity == 2:
+                if opExpr is not None:
+                    matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
+                else:
+                    matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
+            elif arity == 3:
+                matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
+                            Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
+            else:
+                raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
+        else:
+            raise ValueError("operator must indicate right or left associativity")
+        if pa:
+            matchExpr.setParseAction( pa )
+        thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
+        lastExpr = thisExpr
+    ret <<= lastExpr
+    return ret
+
+operatorPrecedence = infixNotation
+"""(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""
+
+dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
+sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
+quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
+                       Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
+unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
+
+def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
+    """
+    Helper method for defining nested lists enclosed in opening and closing
+    delimiters ("(" and ")" are the default).
+
+    Parameters:
+     - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression
+     - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression
+     - content - expression for items within the nested lists (default=C{None})
+     - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString})
+
+    If an expression is not provided for the content argument, the nested
+    expression will capture all whitespace-delimited content between delimiters
+    as a list of separate values.
+
+    Use the C{ignoreExpr} argument to define expressions that may contain
+    opening or closing characters that should not be treated as opening
+    or closing characters for nesting, such as quotedString or a comment
+    expression.  Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
+    The default is L{quotedString}, but if no expressions are to be ignored,
+    then pass C{None} for this argument.
+
+    Example::
+        data_type = oneOf("void int short long char float double")
+        decl_data_type = Combine(data_type + Optional(Word('*')))
+        ident = Word(alphas+'_', alphanums+'_')
+        number = pyparsing_common.number
+        arg = Group(decl_data_type + ident)
+        LPAR,RPAR = map(Suppress, "()")
+
+        code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
+
+        c_function = (decl_data_type("type") 
+                      + ident("name")
+                      + LPAR + Optional(delimitedList(arg), [])("args") + RPAR 
+                      + code_body("body"))
+        c_function.ignore(cStyleComment)
+        
+        source_code = '''
+            int is_odd(int x) { 
+                return (x%2); 
+            }
+                
+            int dec_to_hex(char hchar) { 
+                if (hchar >= '0' && hchar <= '9') { 
+                    return (ord(hchar)-ord('0')); 
+                } else { 
+                    return (10+ord(hchar)-ord('A'));
+                } 
+            }
+        '''
+        for func in c_function.searchString(source_code):
+            print("%(name)s (%(type)s) args: %(args)s" % func)
+
+    prints::
+        is_odd (int) args: [['int', 'x']]
+        dec_to_hex (int) args: [['char', 'hchar']]
+    """
+    if opener == closer:
+        raise ValueError("opening and closing strings cannot be the same")
+    if content is None:
+        if isinstance(opener,basestring) and isinstance(closer,basestring):
+            if len(opener) == 1 and len(closer)==1:
+                if ignoreExpr is not None:
+                    content = (Combine(OneOrMore(~ignoreExpr +
+                                    CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+                                ).setParseAction(lambda t:t[0].strip()))
+                else:
+                    content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
+                                ).setParseAction(lambda t:t[0].strip()))
+            else:
+                if ignoreExpr is not None:
+                    content = (Combine(OneOrMore(~ignoreExpr + 
+                                    ~Literal(opener) + ~Literal(closer) +
+                                    CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+                                ).setParseAction(lambda t:t[0].strip()))
+                else:
+                    content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
+                                    CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+                                ).setParseAction(lambda t:t[0].strip()))
+        else:
+            raise ValueError("opening and closing arguments must be strings if no content expression is given")
+    ret = Forward()
+    if ignoreExpr is not None:
+        ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
+    else:
+        ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content )  + Suppress(closer) )
+    ret.setName('nested %s%s expression' % (opener,closer))
+    return ret
+
+def indentedBlock(blockStatementExpr, indentStack, indent=True):
+    """
+    Helper method for defining space-delimited indentation blocks, such as
+    those used to define block statements in Python source code.
+
+    Parameters:
+     - blockStatementExpr - expression defining syntax of statement that
+            is repeated within the indented block
+     - indentStack - list created by caller to manage indentation stack
+            (multiple statementWithIndentedBlock expressions within a single grammar
+            should share a common indentStack)
+     - indent - boolean indicating whether block must be indented beyond the
+            the current level; set to False for block of left-most statements
+            (default=C{True})
+
+    A valid block must contain at least one C{blockStatement}.
+
+    Example::
+        data = '''
+        def A(z):
+          A1
+          B = 100
+          G = A2
+          A2
+          A3
+        B
+        def BB(a,b,c):
+          BB1
+          def BBA():
+            bba1
+            bba2
+            bba3
+        C
+        D
+        def spam(x,y):
+             def eggs(z):
+                 pass
+        '''
+
+
+        indentStack = [1]
+        stmt = Forward()
+
+        identifier = Word(alphas, alphanums)
+        funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
+        func_body = indentedBlock(stmt, indentStack)
+        funcDef = Group( funcDecl + func_body )
+
+        rvalue = Forward()
+        funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
+        rvalue << (funcCall | identifier | Word(nums))
+        assignment = Group(identifier + "=" + rvalue)
+        stmt << ( funcDef | assignment | identifier )
+
+        module_body = OneOrMore(stmt)
+
+        parseTree = module_body.parseString(data)
+        parseTree.pprint()
+    prints::
+        [['def',
+          'A',
+          ['(', 'z', ')'],
+          ':',
+          [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
+         'B',
+         ['def',
+          'BB',
+          ['(', 'a', 'b', 'c', ')'],
+          ':',
+          [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
+         'C',
+         'D',
+         ['def',
+          'spam',
+          ['(', 'x', 'y', ')'],
+          ':',
+          [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 
+    """
+    def checkPeerIndent(s,l,t):
+        if l >= len(s): return
+        curCol = col(l,s)
+        if curCol != indentStack[-1]:
+            if curCol > indentStack[-1]:
+                raise ParseFatalException(s,l,"illegal nesting")
+            raise ParseException(s,l,"not a peer entry")
+
+    def checkSubIndent(s,l,t):
+        curCol = col(l,s)
+        if curCol > indentStack[-1]:
+            indentStack.append( curCol )
+        else:
+            raise ParseException(s,l,"not a subentry")
+
+    def checkUnindent(s,l,t):
+        if l >= len(s): return
+        curCol = col(l,s)
+        if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
+            raise ParseException(s,l,"not an unindent")
+        indentStack.pop()
+
+    NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
+    INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
+    PEER   = Empty().setParseAction(checkPeerIndent).setName('')
+    UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
+    if indent:
+        smExpr = Group( Optional(NL) +
+            #~ FollowedBy(blockStatementExpr) +
+            INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
+    else:
+        smExpr = Group( Optional(NL) +
+            (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
+    blockStatementExpr.ignore(_bslash + LineEnd())
+    return smExpr.setName('indented block')
+
+alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
+punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
+
+anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
+_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
+commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
+def replaceHTMLEntity(t):
+    """Helper parser action to replace common HTML entities with their special characters"""
+    return _htmlEntityMap.get(t.entity)
+
+# it's easy to get these comment structures wrong - they're very common, so may as well make them available
+cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
+"Comment of the form C{/* ... */}"
+
+htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
+"Comment of the form C{<!-- ... -->}"
+
+restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
+dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
+"Comment of the form C{// ... (to end of line)}"
+
+cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
+"Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"
+
+javaStyleComment = cppStyleComment
+"Same as C{L{cppStyleComment}}"
+
+pythonStyleComment = Regex(r"#.*").setName("Python style comment")
+"Comment of the form C{# ... (to end of line)}"
+
+_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
+                                  Optional( Word(" \t") +
+                                            ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
+commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
+"""(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas.
+   This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""
+
+# some other useful expressions - using lower-case class name since we are really using this as a namespace
+class pyparsing_common:
+    """
+    Here are some common low-level expressions that may be useful in jump-starting parser development:
+     - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>})
+     - common L{programming identifiers<identifier>}
+     - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
+     - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
+     - L{UUID<uuid>}
+     - L{comma-separated list<comma_separated_list>}
+    Parse actions:
+     - C{L{convertToInteger}}
+     - C{L{convertToFloat}}
+     - C{L{convertToDate}}
+     - C{L{convertToDatetime}}
+     - C{L{stripHTMLTags}}
+     - C{L{upcaseTokens}}
+     - C{L{downcaseTokens}}
+
+    Example::
+        pyparsing_common.number.runTests('''
+            # any int or real number, returned as the appropriate type
+            100
+            -100
+            +100
+            3.14159
+            6.02e23
+            1e-12
+            ''')
+
+        pyparsing_common.fnumber.runTests('''
+            # any int or real number, returned as float
+            100
+            -100
+            +100
+            3.14159
+            6.02e23
+            1e-12
+            ''')
+
+        pyparsing_common.hex_integer.runTests('''
+            # hex numbers
+            100
+            FF
+            ''')
+
+        pyparsing_common.fraction.runTests('''
+            # fractions
+            1/2
+            -3/4
+            ''')
+
+        pyparsing_common.mixed_integer.runTests('''
+            # mixed fractions
+            1
+            1/2
+            -3/4
+            1-3/4
+            ''')
+
+        import uuid
+        pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
+        pyparsing_common.uuid.runTests('''
+            # uuid
+            12345678-1234-5678-1234-567812345678
+            ''')
+    prints::
+        # any int or real number, returned as the appropriate type
+        100
+        [100]
+
+        -100
+        [-100]
+
+        +100
+        [100]
+
+        3.14159
+        [3.14159]
+
+        6.02e23
+        [6.02e+23]
+
+        1e-12
+        [1e-12]
+
+        # any int or real number, returned as float
+        100
+        [100.0]
+
+        -100
+        [-100.0]
+
+        +100
+        [100.0]
+
+        3.14159
+        [3.14159]
+
+        6.02e23
+        [6.02e+23]
+
+        1e-12
+        [1e-12]
+
+        # hex numbers
+        100
+        [256]
+
+        FF
+        [255]
+
+        # fractions
+        1/2
+        [0.5]
+
+        -3/4
+        [-0.75]
+
+        # mixed fractions
+        1
+        [1]
+
+        1/2
+        [0.5]
+
+        -3/4
+        [-0.75]
+
+        1-3/4
+        [1.75]
+
+        # uuid
+        12345678-1234-5678-1234-567812345678
+        [UUID('12345678-1234-5678-1234-567812345678')]
+    """
+
+    convertToInteger = tokenMap(int)
+    """
+    Parse action for converting parsed integers to Python int
+    """
+
+    convertToFloat = tokenMap(float)
+    """
+    Parse action for converting parsed numbers to Python float
+    """
+
+    integer = Word(nums).setName("integer").setParseAction(convertToInteger)
+    """expression that parses an unsigned integer, returns an int"""
+
+    hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
+    """expression that parses a hexadecimal integer, returns an int"""
+
+    signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
+    """expression that parses an integer with optional leading sign, returns an int"""
+
+    fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
+    """fractional expression of an integer divided by an integer, returns a float"""
+    fraction.addParseAction(lambda t: t[0]/t[-1])
+
+    mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
+    """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
+    mixed_integer.addParseAction(sum)
+
+    real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
+    """expression that parses a floating point number and returns a float"""
+
+    sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
+    """expression that parses a floating point number with optional scientific notation and returns a float"""
+
+    # streamlining this expression makes the docs nicer-looking
+    number = (sci_real | real | signed_integer).streamline()
+    """any numeric expression, returns the corresponding Python type"""
+
+    fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
+    """any int or real number, returned as float"""
+    
+    identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
+    """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
+    
+    ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
+    "IPv4 address (C{0.0.0.0 - 255.255.255.255})"
+
+    _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
+    _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
+    _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
+    _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
+    _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
+    ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
+    "IPv6 address (long, short, or mixed form)"
+    
+    mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
+    "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
+
+    @staticmethod
+    def convertToDate(fmt="%Y-%m-%d"):
+        """
+        Helper to create a parse action for converting parsed date string to Python datetime.date
+
+        Params -
+         - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})
+
+        Example::
+            date_expr = pyparsing_common.iso8601_date.copy()
+            date_expr.setParseAction(pyparsing_common.convertToDate())
+            print(date_expr.parseString("1999-12-31"))
+        prints::
+            [datetime.date(1999, 12, 31)]
+        """
+        def cvt_fn(s,l,t):
+            try:
+                return datetime.strptime(t[0], fmt).date()
+            except ValueError as ve:
+                raise ParseException(s, l, str(ve))
+        return cvt_fn
+
+    @staticmethod
+    def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
+        """
+        Helper to create a parse action for converting parsed datetime string to Python datetime.datetime
+
+        Params -
+         - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})
+
+        Example::
+            dt_expr = pyparsing_common.iso8601_datetime.copy()
+            dt_expr.setParseAction(pyparsing_common.convertToDatetime())
+            print(dt_expr.parseString("1999-12-31T23:59:59.999"))
+        prints::
+            [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
+        """
+        def cvt_fn(s,l,t):
+            try:
+                return datetime.strptime(t[0], fmt)
+            except ValueError as ve:
+                raise ParseException(s, l, str(ve))
+        return cvt_fn
+
+    iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
+    "ISO8601 date (C{yyyy-mm-dd})"
+
+    iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
+    "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"
+
+    uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
+    "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
+
+    _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
+    @staticmethod
+    def stripHTMLTags(s, l, tokens):
+        """
+        Parse action to remove HTML tags from web page HTML source
+
+        Example::
+            # strip HTML links from normal text 
+            text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
+            td,td_end = makeHTMLTags("TD")
+            table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
+            
+            print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page'
+        """
+        return pyparsing_common._html_stripper.transformString(tokens[0])
+
+    _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',') 
+                                        + Optional( White(" \t") ) ) ).streamline().setName("commaItem")
+    comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list")
+    """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
+
+    upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
+    """Parse action to convert tokens to upper case."""
+
+    downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
+    """Parse action to convert tokens to lower case."""
+
+
+if __name__ == "__main__":
+
+    selectToken    = CaselessLiteral("select")
+    fromToken      = CaselessLiteral("from")
+
+    ident          = Word(alphas, alphanums + "_$")
+
+    columnName     = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
+    columnNameList = Group(delimitedList(columnName)).setName("columns")
+    columnSpec     = ('*' | columnNameList)
+
+    tableName      = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
+    tableNameList  = Group(delimitedList(tableName)).setName("tables")
+    
+    simpleSQL      = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
+
+    # demo runTests method, including embedded comments in test string
+    simpleSQL.runTests("""
+        # '*' as column list and dotted table name
+        select * from SYS.XYZZY
+
+        # caseless match on "SELECT", and casts back to "select"
+        SELECT * from XYZZY, ABC
+
+        # list of column names, and mixed case SELECT keyword
+        Select AA,BB,CC from Sys.dual
+
+        # multiple tables
+        Select A, B, C from Sys.dual, Table2
+
+        # invalid SELECT keyword - should fail
+        Xelect A, B, C from Sys.dual
+
+        # incomplete command - should fail
+        Select
+
+        # invalid column name - should fail
+        Select ^^^ frox Sys.dual
+
+        """)
+
+    pyparsing_common.number.runTests("""
+        100
+        -100
+        +100
+        3.14159
+        6.02e23
+        1e-12
+        """)
+
+    # any int or real number, returned as float
+    pyparsing_common.fnumber.runTests("""
+        100
+        -100
+        +100
+        3.14159
+        6.02e23
+        1e-12
+        """)
+
+    pyparsing_common.hex_integer.runTests("""
+        100
+        FF
+        """)
+
+    import uuid
+    pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
+    pyparsing_common.uuid.runTests("""
+        12345678-1234-5678-1234-567812345678
+        """)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/six.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/six.py
new file mode 100644
index 0000000000000000000000000000000000000000..190c0239cd7d7af82a6e0cbc8d68053fa2e3dfaf
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/_vendor/six.py
@@ -0,0 +1,868 @@
+"""Utilities for writing code that runs on Python 2 and 3"""
+
+# Copyright (c) 2010-2015 Benjamin Peterson
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import absolute_import
+
+import functools
+import itertools
+import operator
+import sys
+import types
+
+__author__ = "Benjamin Peterson <benjamin@python.org>"
+__version__ = "1.10.0"
+
+
+# Useful for very coarse version differentiation.
+PY2 = sys.version_info[0] == 2
+PY3 = sys.version_info[0] == 3
+PY34 = sys.version_info[0:2] >= (3, 4)
+
+if PY3:
+    string_types = str,
+    integer_types = int,
+    class_types = type,
+    text_type = str
+    binary_type = bytes
+
+    MAXSIZE = sys.maxsize
+else:
+    string_types = basestring,
+    integer_types = (int, long)
+    class_types = (type, types.ClassType)
+    text_type = unicode
+    binary_type = str
+
+    if sys.platform.startswith("java"):
+        # Jython always uses 32 bits.
+        MAXSIZE = int((1 << 31) - 1)
+    else:
+        # It's possible to have sizeof(long) != sizeof(Py_ssize_t).
+        class X(object):
+
+            def __len__(self):
+                return 1 << 31
+        try:
+            len(X())
+        except OverflowError:
+            # 32-bit
+            MAXSIZE = int((1 << 31) - 1)
+        else:
+            # 64-bit
+            MAXSIZE = int((1 << 63) - 1)
+        del X
+
+
+def _add_doc(func, doc):
+    """Add documentation to a function."""
+    func.__doc__ = doc
+
+
+def _import_module(name):
+    """Import module, returning the module after the last dot."""
+    __import__(name)
+    return sys.modules[name]
+
+
+class _LazyDescr(object):
+
+    def __init__(self, name):
+        self.name = name
+
+    def __get__(self, obj, tp):
+        result = self._resolve()
+        setattr(obj, self.name, result)  # Invokes __set__.
+        try:
+            # This is a bit ugly, but it avoids running this again by
+            # removing this descriptor.
+            delattr(obj.__class__, self.name)
+        except AttributeError:
+            pass
+        return result
+
+
+class MovedModule(_LazyDescr):
+
+    def __init__(self, name, old, new=None):
+        super(MovedModule, self).__init__(name)
+        if PY3:
+            if new is None:
+                new = name
+            self.mod = new
+        else:
+            self.mod = old
+
+    def _resolve(self):
+        return _import_module(self.mod)
+
+    def __getattr__(self, attr):
+        _module = self._resolve()
+        value = getattr(_module, attr)
+        setattr(self, attr, value)
+        return value
+
+
+class _LazyModule(types.ModuleType):
+
+    def __init__(self, name):
+        super(_LazyModule, self).__init__(name)
+        self.__doc__ = self.__class__.__doc__
+
+    def __dir__(self):
+        attrs = ["__doc__", "__name__"]
+        attrs += [attr.name for attr in self._moved_attributes]
+        return attrs
+
+    # Subclasses should override this
+    _moved_attributes = []
+
+
+class MovedAttribute(_LazyDescr):
+
+    def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None):
+        super(MovedAttribute, self).__init__(name)
+        if PY3:
+            if new_mod is None:
+                new_mod = name
+            self.mod = new_mod
+            if new_attr is None:
+                if old_attr is None:
+                    new_attr = name
+                else:
+                    new_attr = old_attr
+            self.attr = new_attr
+        else:
+            self.mod = old_mod
+            if old_attr is None:
+                old_attr = name
+            self.attr = old_attr
+
+    def _resolve(self):
+        module = _import_module(self.mod)
+        return getattr(module, self.attr)
+
+
+class _SixMetaPathImporter(object):
+
+    """
+    A meta path importer to import six.moves and its submodules.
+
+    This class implements a PEP302 finder and loader. It should be compatible
+    with Python 2.5 and all existing versions of Python3
+    """
+
+    def __init__(self, six_module_name):
+        self.name = six_module_name
+        self.known_modules = {}
+
+    def _add_module(self, mod, *fullnames):
+        for fullname in fullnames:
+            self.known_modules[self.name + "." + fullname] = mod
+
+    def _get_module(self, fullname):
+        return self.known_modules[self.name + "." + fullname]
+
+    def find_module(self, fullname, path=None):
+        if fullname in self.known_modules:
+            return self
+        return None
+
+    def __get_module(self, fullname):
+        try:
+            return self.known_modules[fullname]
+        except KeyError:
+            raise ImportError("This loader does not know module " + fullname)
+
+    def load_module(self, fullname):
+        try:
+            # in case of a reload
+            return sys.modules[fullname]
+        except KeyError:
+            pass
+        mod = self.__get_module(fullname)
+        if isinstance(mod, MovedModule):
+            mod = mod._resolve()
+        else:
+            mod.__loader__ = self
+        sys.modules[fullname] = mod
+        return mod
+
+    def is_package(self, fullname):
+        """
+        Return true, if the named module is a package.
+
+        We need this method to get correct spec objects with
+        Python 3.4 (see PEP451)
+        """
+        return hasattr(self.__get_module(fullname), "__path__")
+
+    def get_code(self, fullname):
+        """Return None
+
+        Required, if is_package is implemented"""
+        self.__get_module(fullname)  # eventually raises ImportError
+        return None
+    get_source = get_code  # same as get_code
+
+_importer = _SixMetaPathImporter(__name__)
+
+
+class _MovedItems(_LazyModule):
+
+    """Lazy loading of moved objects"""
+    __path__ = []  # mark as package
+
+
+_moved_attributes = [
+    MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"),
+    MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"),
+    MovedAttribute("filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse"),
+    MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"),
+    MovedAttribute("intern", "__builtin__", "sys"),
+    MovedAttribute("map", "itertools", "builtins", "imap", "map"),
+    MovedAttribute("getcwd", "os", "os", "getcwdu", "getcwd"),
+    MovedAttribute("getcwdb", "os", "os", "getcwd", "getcwdb"),
+    MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"),
+    MovedAttribute("reload_module", "__builtin__", "importlib" if PY34 else "imp", "reload"),
+    MovedAttribute("reduce", "__builtin__", "functools"),
+    MovedAttribute("shlex_quote", "pipes", "shlex", "quote"),
+    MovedAttribute("StringIO", "StringIO", "io"),
+    MovedAttribute("UserDict", "UserDict", "collections"),
+    MovedAttribute("UserList", "UserList", "collections"),
+    MovedAttribute("UserString", "UserString", "collections"),
+    MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"),
+    MovedAttribute("zip", "itertools", "builtins", "izip", "zip"),
+    MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"),
+    MovedModule("builtins", "__builtin__"),
+    MovedModule("configparser", "ConfigParser"),
+    MovedModule("copyreg", "copy_reg"),
+    MovedModule("dbm_gnu", "gdbm", "dbm.gnu"),
+    MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread"),
+    MovedModule("http_cookiejar", "cookielib", "http.cookiejar"),
+    MovedModule("http_cookies", "Cookie", "http.cookies"),
+    MovedModule("html_entities", "htmlentitydefs", "html.entities"),
+    MovedModule("html_parser", "HTMLParser", "html.parser"),
+    MovedModule("http_client", "httplib", "http.client"),
+    MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"),
+    MovedModule("email_mime_nonmultipart", "email.MIMENonMultipart", "email.mime.nonmultipart"),
+    MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"),
+    MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"),
+    MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"),
+    MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"),
+    MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"),
+    MovedModule("cPickle", "cPickle", "pickle"),
+    MovedModule("queue", "Queue"),
+    MovedModule("reprlib", "repr"),
+    MovedModule("socketserver", "SocketServer"),
+    MovedModule("_thread", "thread", "_thread"),
+    MovedModule("tkinter", "Tkinter"),
+    MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"),
+    MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"),
+    MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"),
+    MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"),
+    MovedModule("tkinter_tix", "Tix", "tkinter.tix"),
+    MovedModule("tkinter_ttk", "ttk", "tkinter.ttk"),
+    MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"),
+    MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"),
+    MovedModule("tkinter_colorchooser", "tkColorChooser",
+                "tkinter.colorchooser"),
+    MovedModule("tkinter_commondialog", "tkCommonDialog",
+                "tkinter.commondialog"),
+    MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"),
+    MovedModule("tkinter_font", "tkFont", "tkinter.font"),
+    MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"),
+    MovedModule("tkinter_tksimpledialog", "tkSimpleDialog",
+                "tkinter.simpledialog"),
+    MovedModule("urllib_parse", __name__ + ".moves.urllib_parse", "urllib.parse"),
+    MovedModule("urllib_error", __name__ + ".moves.urllib_error", "urllib.error"),
+    MovedModule("urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib"),
+    MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"),
+    MovedModule("xmlrpc_client", "xmlrpclib", "xmlrpc.client"),
+    MovedModule("xmlrpc_server", "SimpleXMLRPCServer", "xmlrpc.server"),
+]
+# Add windows specific modules.
+if sys.platform == "win32":
+    _moved_attributes += [
+        MovedModule("winreg", "_winreg"),
+    ]
+
+for attr in _moved_attributes:
+    setattr(_MovedItems, attr.name, attr)
+    if isinstance(attr, MovedModule):
+        _importer._add_module(attr, "moves." + attr.name)
+del attr
+
+_MovedItems._moved_attributes = _moved_attributes
+
+moves = _MovedItems(__name__ + ".moves")
+_importer._add_module(moves, "moves")
+
+
+class Module_six_moves_urllib_parse(_LazyModule):
+
+    """Lazy loading of moved objects in six.moves.urllib_parse"""
+
+
+_urllib_parse_moved_attributes = [
+    MovedAttribute("ParseResult", "urlparse", "urllib.parse"),
+    MovedAttribute("SplitResult", "urlparse", "urllib.parse"),
+    MovedAttribute("parse_qs", "urlparse", "urllib.parse"),
+    MovedAttribute("parse_qsl", "urlparse", "urllib.parse"),
+    MovedAttribute("urldefrag", "urlparse", "urllib.parse"),
+    MovedAttribute("urljoin", "urlparse", "urllib.parse"),
+    MovedAttribute("urlparse", "urlparse", "urllib.parse"),
+    MovedAttribute("urlsplit", "urlparse", "urllib.parse"),
+    MovedAttribute("urlunparse", "urlparse", "urllib.parse"),
+    MovedAttribute("urlunsplit", "urlparse", "urllib.parse"),
+    MovedAttribute("quote", "urllib", "urllib.parse"),
+    MovedAttribute("quote_plus", "urllib", "urllib.parse"),
+    MovedAttribute("unquote", "urllib", "urllib.parse"),
+    MovedAttribute("unquote_plus", "urllib", "urllib.parse"),
+    MovedAttribute("urlencode", "urllib", "urllib.parse"),
+    MovedAttribute("splitquery", "urllib", "urllib.parse"),
+    MovedAttribute("splittag", "urllib", "urllib.parse"),
+    MovedAttribute("splituser", "urllib", "urllib.parse"),
+    MovedAttribute("uses_fragment", "urlparse", "urllib.parse"),
+    MovedAttribute("uses_netloc", "urlparse", "urllib.parse"),
+    MovedAttribute("uses_params", "urlparse", "urllib.parse"),
+    MovedAttribute("uses_query", "urlparse", "urllib.parse"),
+    MovedAttribute("uses_relative", "urlparse", "urllib.parse"),
+]
+for attr in _urllib_parse_moved_attributes:
+    setattr(Module_six_moves_urllib_parse, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_parse._moved_attributes = _urllib_parse_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse"),
+                      "moves.urllib_parse", "moves.urllib.parse")
+
+
+class Module_six_moves_urllib_error(_LazyModule):
+
+    """Lazy loading of moved objects in six.moves.urllib_error"""
+
+
+_urllib_error_moved_attributes = [
+    MovedAttribute("URLError", "urllib2", "urllib.error"),
+    MovedAttribute("HTTPError", "urllib2", "urllib.error"),
+    MovedAttribute("ContentTooShortError", "urllib", "urllib.error"),
+]
+for attr in _urllib_error_moved_attributes:
+    setattr(Module_six_moves_urllib_error, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_error._moved_attributes = _urllib_error_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_error(__name__ + ".moves.urllib.error"),
+                      "moves.urllib_error", "moves.urllib.error")
+
+
+class Module_six_moves_urllib_request(_LazyModule):
+
+    """Lazy loading of moved objects in six.moves.urllib_request"""
+
+
+_urllib_request_moved_attributes = [
+    MovedAttribute("urlopen", "urllib2", "urllib.request"),
+    MovedAttribute("install_opener", "urllib2", "urllib.request"),
+    MovedAttribute("build_opener", "urllib2", "urllib.request"),
+    MovedAttribute("pathname2url", "urllib", "urllib.request"),
+    MovedAttribute("url2pathname", "urllib", "urllib.request"),
+    MovedAttribute("getproxies", "urllib", "urllib.request"),
+    MovedAttribute("Request", "urllib2", "urllib.request"),
+    MovedAttribute("OpenerDirector", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPDefaultErrorHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPRedirectHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPCookieProcessor", "urllib2", "urllib.request"),
+    MovedAttribute("ProxyHandler", "urllib2", "urllib.request"),
+    MovedAttribute("BaseHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPPasswordMgr", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPPasswordMgrWithDefaultRealm", "urllib2", "urllib.request"),
+    MovedAttribute("AbstractBasicAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPBasicAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("ProxyBasicAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("AbstractDigestAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPDigestAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("ProxyDigestAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPSHandler", "urllib2", "urllib.request"),
+    MovedAttribute("FileHandler", "urllib2", "urllib.request"),
+    MovedAttribute("FTPHandler", "urllib2", "urllib.request"),
+    MovedAttribute("CacheFTPHandler", "urllib2", "urllib.request"),
+    MovedAttribute("UnknownHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPErrorProcessor", "urllib2", "urllib.request"),
+    MovedAttribute("urlretrieve", "urllib", "urllib.request"),
+    MovedAttribute("urlcleanup", "urllib", "urllib.request"),
+    MovedAttribute("URLopener", "urllib", "urllib.request"),
+    MovedAttribute("FancyURLopener", "urllib", "urllib.request"),
+    MovedAttribute("proxy_bypass", "urllib", "urllib.request"),
+]
+for attr in _urllib_request_moved_attributes:
+    setattr(Module_six_moves_urllib_request, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_request._moved_attributes = _urllib_request_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_request(__name__ + ".moves.urllib.request"),
+                      "moves.urllib_request", "moves.urllib.request")
+
+
+class Module_six_moves_urllib_response(_LazyModule):
+
+    """Lazy loading of moved objects in six.moves.urllib_response"""
+
+
+_urllib_response_moved_attributes = [
+    MovedAttribute("addbase", "urllib", "urllib.response"),
+    MovedAttribute("addclosehook", "urllib", "urllib.response"),
+    MovedAttribute("addinfo", "urllib", "urllib.response"),
+    MovedAttribute("addinfourl", "urllib", "urllib.response"),
+]
+for attr in _urllib_response_moved_attributes:
+    setattr(Module_six_moves_urllib_response, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_response._moved_attributes = _urllib_response_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_response(__name__ + ".moves.urllib.response"),
+                      "moves.urllib_response", "moves.urllib.response")
+
+
+class Module_six_moves_urllib_robotparser(_LazyModule):
+
+    """Lazy loading of moved objects in six.moves.urllib_robotparser"""
+
+
+_urllib_robotparser_moved_attributes = [
+    MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser"),
+]
+for attr in _urllib_robotparser_moved_attributes:
+    setattr(Module_six_moves_urllib_robotparser, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_robotparser._moved_attributes = _urllib_robotparser_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser"),
+                      "moves.urllib_robotparser", "moves.urllib.robotparser")
+
+
+class Module_six_moves_urllib(types.ModuleType):
+
+    """Create a six.moves.urllib namespace that resembles the Python 3 namespace"""
+    __path__ = []  # mark as package
+    parse = _importer._get_module("moves.urllib_parse")
+    error = _importer._get_module("moves.urllib_error")
+    request = _importer._get_module("moves.urllib_request")
+    response = _importer._get_module("moves.urllib_response")
+    robotparser = _importer._get_module("moves.urllib_robotparser")
+
+    def __dir__(self):
+        return ['parse', 'error', 'request', 'response', 'robotparser']
+
+_importer._add_module(Module_six_moves_urllib(__name__ + ".moves.urllib"),
+                      "moves.urllib")
+
+
+def add_move(move):
+    """Add an item to six.moves."""
+    setattr(_MovedItems, move.name, move)
+
+
+def remove_move(name):
+    """Remove item from six.moves."""
+    try:
+        delattr(_MovedItems, name)
+    except AttributeError:
+        try:
+            del moves.__dict__[name]
+        except KeyError:
+            raise AttributeError("no such move, %r" % (name,))
+
+
+if PY3:
+    _meth_func = "__func__"
+    _meth_self = "__self__"
+
+    _func_closure = "__closure__"
+    _func_code = "__code__"
+    _func_defaults = "__defaults__"
+    _func_globals = "__globals__"
+else:
+    _meth_func = "im_func"
+    _meth_self = "im_self"
+
+    _func_closure = "func_closure"
+    _func_code = "func_code"
+    _func_defaults = "func_defaults"
+    _func_globals = "func_globals"
+
+
+try:
+    advance_iterator = next
+except NameError:
+    def advance_iterator(it):
+        return it.next()
+next = advance_iterator
+
+
+try:
+    callable = callable
+except NameError:
+    def callable(obj):
+        return any("__call__" in klass.__dict__ for klass in type(obj).__mro__)
+
+
+if PY3:
+    def get_unbound_function(unbound):
+        return unbound
+
+    create_bound_method = types.MethodType
+
+    def create_unbound_method(func, cls):
+        return func
+
+    Iterator = object
+else:
+    def get_unbound_function(unbound):
+        return unbound.im_func
+
+    def create_bound_method(func, obj):
+        return types.MethodType(func, obj, obj.__class__)
+
+    def create_unbound_method(func, cls):
+        return types.MethodType(func, None, cls)
+
+    class Iterator(object):
+
+        def next(self):
+            return type(self).__next__(self)
+
+    callable = callable
+_add_doc(get_unbound_function,
+         """Get the function out of a possibly unbound function""")
+
+
+get_method_function = operator.attrgetter(_meth_func)
+get_method_self = operator.attrgetter(_meth_self)
+get_function_closure = operator.attrgetter(_func_closure)
+get_function_code = operator.attrgetter(_func_code)
+get_function_defaults = operator.attrgetter(_func_defaults)
+get_function_globals = operator.attrgetter(_func_globals)
+
+
+if PY3:
+    def iterkeys(d, **kw):
+        return iter(d.keys(**kw))
+
+    def itervalues(d, **kw):
+        return iter(d.values(**kw))
+
+    def iteritems(d, **kw):
+        return iter(d.items(**kw))
+
+    def iterlists(d, **kw):
+        return iter(d.lists(**kw))
+
+    viewkeys = operator.methodcaller("keys")
+
+    viewvalues = operator.methodcaller("values")
+
+    viewitems = operator.methodcaller("items")
+else:
+    def iterkeys(d, **kw):
+        return d.iterkeys(**kw)
+
+    def itervalues(d, **kw):
+        return d.itervalues(**kw)
+
+    def iteritems(d, **kw):
+        return d.iteritems(**kw)
+
+    def iterlists(d, **kw):
+        return d.iterlists(**kw)
+
+    viewkeys = operator.methodcaller("viewkeys")
+
+    viewvalues = operator.methodcaller("viewvalues")
+
+    viewitems = operator.methodcaller("viewitems")
+
+_add_doc(iterkeys, "Return an iterator over the keys of a dictionary.")
+_add_doc(itervalues, "Return an iterator over the values of a dictionary.")
+_add_doc(iteritems,
+         "Return an iterator over the (key, value) pairs of a dictionary.")
+_add_doc(iterlists,
+         "Return an iterator over the (key, [values]) pairs of a dictionary.")
+
+
+if PY3:
+    def b(s):
+        return s.encode("latin-1")
+
+    def u(s):
+        return s
+    unichr = chr
+    import struct
+    int2byte = struct.Struct(">B").pack
+    del struct
+    byte2int = operator.itemgetter(0)
+    indexbytes = operator.getitem
+    iterbytes = iter
+    import io
+    StringIO = io.StringIO
+    BytesIO = io.BytesIO
+    _assertCountEqual = "assertCountEqual"
+    if sys.version_info[1] <= 1:
+        _assertRaisesRegex = "assertRaisesRegexp"
+        _assertRegex = "assertRegexpMatches"
+    else:
+        _assertRaisesRegex = "assertRaisesRegex"
+        _assertRegex = "assertRegex"
+else:
+    def b(s):
+        return s
+    # Workaround for standalone backslash
+
+    def u(s):
+        return unicode(s.replace(r'\\', r'\\\\'), "unicode_escape")
+    unichr = unichr
+    int2byte = chr
+
+    def byte2int(bs):
+        return ord(bs[0])
+
+    def indexbytes(buf, i):
+        return ord(buf[i])
+    iterbytes = functools.partial(itertools.imap, ord)
+    import StringIO
+    StringIO = BytesIO = StringIO.StringIO
+    _assertCountEqual = "assertItemsEqual"
+    _assertRaisesRegex = "assertRaisesRegexp"
+    _assertRegex = "assertRegexpMatches"
+_add_doc(b, """Byte literal""")
+_add_doc(u, """Text literal""")
+
+
+def assertCountEqual(self, *args, **kwargs):
+    return getattr(self, _assertCountEqual)(*args, **kwargs)
+
+
+def assertRaisesRegex(self, *args, **kwargs):
+    return getattr(self, _assertRaisesRegex)(*args, **kwargs)
+
+
+def assertRegex(self, *args, **kwargs):
+    return getattr(self, _assertRegex)(*args, **kwargs)
+
+
+if PY3:
+    exec_ = getattr(moves.builtins, "exec")
+
+    def reraise(tp, value, tb=None):
+        if value is None:
+            value = tp()
+        if value.__traceback__ is not tb:
+            raise value.with_traceback(tb)
+        raise value
+
+else:
+    def exec_(_code_, _globs_=None, _locs_=None):
+        """Execute code in a namespace."""
+        if _globs_ is None:
+            frame = sys._getframe(1)
+            _globs_ = frame.f_globals
+            if _locs_ is None:
+                _locs_ = frame.f_locals
+            del frame
+        elif _locs_ is None:
+            _locs_ = _globs_
+        exec("""exec _code_ in _globs_, _locs_""")
+
+    exec_("""def reraise(tp, value, tb=None):
+    raise tp, value, tb
+""")
+
+
+if sys.version_info[:2] == (3, 2):
+    exec_("""def raise_from(value, from_value):
+    if from_value is None:
+        raise value
+    raise value from from_value
+""")
+elif sys.version_info[:2] > (3, 2):
+    exec_("""def raise_from(value, from_value):
+    raise value from from_value
+""")
+else:
+    def raise_from(value, from_value):
+        raise value
+
+
+print_ = getattr(moves.builtins, "print", None)
+if print_ is None:
+    def print_(*args, **kwargs):
+        """The new-style print function for Python 2.4 and 2.5."""
+        fp = kwargs.pop("file", sys.stdout)
+        if fp is None:
+            return
+
+        def write(data):
+            if not isinstance(data, basestring):
+                data = str(data)
+            # If the file has an encoding, encode unicode with it.
+            if (isinstance(fp, file) and
+                    isinstance(data, unicode) and
+                    fp.encoding is not None):
+                errors = getattr(fp, "errors", None)
+                if errors is None:
+                    errors = "strict"
+                data = data.encode(fp.encoding, errors)
+            fp.write(data)
+        want_unicode = False
+        sep = kwargs.pop("sep", None)
+        if sep is not None:
+            if isinstance(sep, unicode):
+                want_unicode = True
+            elif not isinstance(sep, str):
+                raise TypeError("sep must be None or a string")
+        end = kwargs.pop("end", None)
+        if end is not None:
+            if isinstance(end, unicode):
+                want_unicode = True
+            elif not isinstance(end, str):
+                raise TypeError("end must be None or a string")
+        if kwargs:
+            raise TypeError("invalid keyword arguments to print()")
+        if not want_unicode:
+            for arg in args:
+                if isinstance(arg, unicode):
+                    want_unicode = True
+                    break
+        if want_unicode:
+            newline = unicode("\n")
+            space = unicode(" ")
+        else:
+            newline = "\n"
+            space = " "
+        if sep is None:
+            sep = space
+        if end is None:
+            end = newline
+        for i, arg in enumerate(args):
+            if i:
+                write(sep)
+            write(arg)
+        write(end)
+if sys.version_info[:2] < (3, 3):
+    _print = print_
+
+    def print_(*args, **kwargs):
+        fp = kwargs.get("file", sys.stdout)
+        flush = kwargs.pop("flush", False)
+        _print(*args, **kwargs)
+        if flush and fp is not None:
+            fp.flush()
+
+_add_doc(reraise, """Reraise an exception.""")
+
+if sys.version_info[0:2] < (3, 4):
+    def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS,
+              updated=functools.WRAPPER_UPDATES):
+        def wrapper(f):
+            f = functools.wraps(wrapped, assigned, updated)(f)
+            f.__wrapped__ = wrapped
+            return f
+        return wrapper
+else:
+    wraps = functools.wraps
+
+
+def with_metaclass(meta, *bases):
+    """Create a base class with a metaclass."""
+    # This requires a bit of explanation: the basic idea is to make a dummy
+    # metaclass for one level of class instantiation that replaces itself with
+    # the actual metaclass.
+    class metaclass(meta):
+
+        def __new__(cls, name, this_bases, d):
+            return meta(name, bases, d)
+    return type.__new__(metaclass, 'temporary_class', (), {})
+
+
+def add_metaclass(metaclass):
+    """Class decorator for creating a class with a metaclass."""
+    def wrapper(cls):
+        orig_vars = cls.__dict__.copy()
+        slots = orig_vars.get('__slots__')
+        if slots is not None:
+            if isinstance(slots, str):
+                slots = [slots]
+            for slots_var in slots:
+                orig_vars.pop(slots_var)
+        orig_vars.pop('__dict__', None)
+        orig_vars.pop('__weakref__', None)
+        return metaclass(cls.__name__, cls.__bases__, orig_vars)
+    return wrapper
+
+
+def python_2_unicode_compatible(klass):
+    """
+    A decorator that defines __unicode__ and __str__ methods under Python 2.
+    Under Python 3 it does nothing.
+
+    To support Python 2 and 3 with a single code base, define a __str__ method
+    returning text and apply this decorator to the class.
+    """
+    if PY2:
+        if '__str__' not in klass.__dict__:
+            raise ValueError("@python_2_unicode_compatible cannot be applied "
+                             "to %s because it doesn't define __str__()." %
+                             klass.__name__)
+        klass.__unicode__ = klass.__str__
+        klass.__str__ = lambda self: self.__unicode__().encode('utf-8')
+    return klass
+
+
+# Complete the moves implementation.
+# This code is at the end of this module to speed up module loading.
+# Turn this module into a package.
+__path__ = []  # required for PEP 302 and PEP 451
+__package__ = __name__  # see PEP 366 @ReservedAssignment
+if globals().get("__spec__") is not None:
+    __spec__.submodule_search_locations = []  # PEP 451 @UndefinedVariable
+# Remove other six meta path importers, since they cause problems. This can
+# happen if six is removed from sys.modules and then reloaded. (Setuptools does
+# this for some reason.)
+if sys.meta_path:
+    for i, importer in enumerate(sys.meta_path):
+        # Here's some real nastiness: Another "instance" of the six module might
+        # be floating around. Therefore, we can't use isinstance() to check for
+        # the six meta path importer, since the other six instance will have
+        # inserted an importer with different class.
+        if (type(importer).__name__ == "_SixMetaPathImporter" and
+                importer.name == __name__):
+            del sys.meta_path[i]
+            break
+    del i, importer
+# Finally, add the importer to the meta path import hook.
+sys.meta_path.append(_importer)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/extern/__init__.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/extern/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b4156fec2021c9057665df4464a58a1faa836723
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/extern/__init__.py
@@ -0,0 +1,73 @@
+import sys
+
+
+class VendorImporter:
+    """
+    A PEP 302 meta path importer for finding optionally-vendored
+    or otherwise naturally-installed packages from root_name.
+    """
+
+    def __init__(self, root_name, vendored_names=(), vendor_pkg=None):
+        self.root_name = root_name
+        self.vendored_names = set(vendored_names)
+        self.vendor_pkg = vendor_pkg or root_name.replace('extern', '_vendor')
+
+    @property
+    def search_path(self):
+        """
+        Search first the vendor package then as a natural package.
+        """
+        yield self.vendor_pkg + '.'
+        yield ''
+
+    def find_module(self, fullname, path=None):
+        """
+        Return self when fullname starts with root_name and the
+        target module is one vendored through this importer.
+        """
+        root, base, target = fullname.partition(self.root_name + '.')
+        if root:
+            return
+        if not any(map(target.startswith, self.vendored_names)):
+            return
+        return self
+
+    def load_module(self, fullname):
+        """
+        Iterate over the search path to locate and load fullname.
+        """
+        root, base, target = fullname.partition(self.root_name + '.')
+        for prefix in self.search_path:
+            try:
+                extant = prefix + target
+                __import__(extant)
+                mod = sys.modules[extant]
+                sys.modules[fullname] = mod
+                # mysterious hack:
+                # Remove the reference to the extant package/module
+                # on later Python versions to cause relative imports
+                # in the vendor package to resolve the same modules
+                # as those going through this importer.
+                if sys.version_info > (3, 3):
+                    del sys.modules[extant]
+                return mod
+            except ImportError:
+                pass
+        else:
+            raise ImportError(
+                "The '{target}' package is required; "
+                "normally this is bundled with this package so if you get "
+                "this warning, consult the packager of your "
+                "distribution.".format(**locals())
+            )
+
+    def install(self):
+        """
+        Install this importer into sys.meta_path if not already present.
+        """
+        if self not in sys.meta_path:
+            sys.meta_path.append(self)
+
+
+names = 'packaging', 'pyparsing', 'six', 'appdirs'
+VendorImporter(__name__, names).install()
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/py31compat.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/py31compat.py
new file mode 100644
index 0000000000000000000000000000000000000000..331a51bb0fb208f4049d3a404e55f5fef517b63c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/pkg_resources/py31compat.py
@@ -0,0 +1,22 @@
+import os
+import errno
+import sys
+
+
+def _makedirs_31(path, exist_ok=False):
+    try:
+        os.makedirs(path)
+    except OSError as exc:
+        if not exist_ok or exc.errno != errno.EEXIST:
+            raise
+
+
+# rely on compatibility behavior until mode considerations
+#  and exists_ok considerations are disentangled.
+# See https://github.com/pypa/setuptools/pull/1083#issuecomment-315168663
+needs_makedirs = (
+    sys.version_info < (3, 2, 5) or
+    (3, 3) <= sys.version_info < (3, 3, 6) or
+    (3, 4) <= sys.version_info < (3, 4, 1)
+)
+makedirs = _makedirs_31 if needs_makedirs else os.makedirs
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/DESCRIPTION.rst b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/DESCRIPTION.rst
new file mode 100644
index 0000000000000000000000000000000000000000..ba3a46bc659dbaec3378c2315dc90a0ecb6459ca
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/DESCRIPTION.rst
@@ -0,0 +1,36 @@
+.. image:: https://img.shields.io/pypi/v/setuptools.svg
+   :target: https://pypi.org/project/setuptools
+
+.. image:: https://readthedocs.org/projects/setuptools/badge/?version=latest
+    :target: https://setuptools.readthedocs.io
+
+.. image:: https://img.shields.io/travis/pypa/setuptools/master.svg?label=Linux%20build%20%40%20Travis%20CI
+   :target: https://travis-ci.org/pypa/setuptools
+
+.. image:: https://img.shields.io/appveyor/ci/jaraco/setuptools/master.svg?label=Windows%20build%20%40%20Appveyor
+   :target: https://ci.appveyor.com/project/jaraco/setuptools/branch/master
+
+.. image:: https://img.shields.io/pypi/pyversions/setuptools.svg
+
+See the `Installation Instructions
+<https://packaging.python.org/installing/>`_ in the Python Packaging
+User's Guide for instructions on installing, upgrading, and uninstalling
+Setuptools.
+
+The project is `maintained at GitHub <https://github.com/pypa/setuptools>`_.
+
+Questions and comments should be directed to the `distutils-sig
+mailing list <http://mail.python.org/pipermail/distutils-sig/>`_.
+Bug reports and especially tested patches may be
+submitted directly to the `bug tracker
+<https://github.com/pypa/setuptools/issues>`_.
+
+
+Code of Conduct
+---------------
+
+Everyone interacting in the setuptools project's codebases, issue trackers,
+chat rooms, and mailing lists is expected to follow the
+`PyPA Code of Conduct <https://www.pypa.io/en/latest/code-of-conduct/>`_.
+
+
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/INSTALLER b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/INSTALLER
new file mode 100644
index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/INSTALLER
@@ -0,0 +1 @@
+pip
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/METADATA b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/METADATA
new file mode 100644
index 0000000000000000000000000000000000000000..d80cdcfaaa822a061a7adbb166b5dd6d972b8544
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/METADATA
@@ -0,0 +1,67 @@
+Metadata-Version: 2.0
+Name: setuptools
+Version: 39.0.1
+Summary: Easily download, build, install, upgrade, and uninstall Python packages
+Home-page: https://github.com/pypa/setuptools
+Author: Python Packaging Authority
+Author-email: distutils-sig@python.org
+License: UNKNOWN
+Project-URL: Documentation, https://setuptools.readthedocs.io/
+Keywords: CPAN PyPI distutils eggs package management
+Platform: UNKNOWN
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.3
+Classifier: Programming Language :: Python :: 3.4
+Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Topic :: System :: Archiving :: Packaging
+Classifier: Topic :: System :: Systems Administration
+Classifier: Topic :: Utilities
+Requires-Python: >=2.7,!=3.0.*,!=3.1.*,!=3.2.*
+Description-Content-Type: text/x-rst; charset=UTF-8
+Provides-Extra: certs
+Provides-Extra: ssl
+
+.. image:: https://img.shields.io/pypi/v/setuptools.svg
+   :target: https://pypi.org/project/setuptools
+
+.. image:: https://readthedocs.org/projects/setuptools/badge/?version=latest
+    :target: https://setuptools.readthedocs.io
+
+.. image:: https://img.shields.io/travis/pypa/setuptools/master.svg?label=Linux%20build%20%40%20Travis%20CI
+   :target: https://travis-ci.org/pypa/setuptools
+
+.. image:: https://img.shields.io/appveyor/ci/jaraco/setuptools/master.svg?label=Windows%20build%20%40%20Appveyor
+   :target: https://ci.appveyor.com/project/jaraco/setuptools/branch/master
+
+.. image:: https://img.shields.io/pypi/pyversions/setuptools.svg
+
+See the `Installation Instructions
+<https://packaging.python.org/installing/>`_ in the Python Packaging
+User's Guide for instructions on installing, upgrading, and uninstalling
+Setuptools.
+
+The project is `maintained at GitHub <https://github.com/pypa/setuptools>`_.
+
+Questions and comments should be directed to the `distutils-sig
+mailing list <http://mail.python.org/pipermail/distutils-sig/>`_.
+Bug reports and especially tested patches may be
+submitted directly to the `bug tracker
+<https://github.com/pypa/setuptools/issues>`_.
+
+
+Code of Conduct
+---------------
+
+Everyone interacting in the setuptools project's codebases, issue trackers,
+chat rooms, and mailing lists is expected to follow the
+`PyPA Code of Conduct <https://www.pypa.io/en/latest/code-of-conduct/>`_.
+
+
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/RECORD b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/RECORD
new file mode 100644
index 0000000000000000000000000000000000000000..b6248353f70677b5c6a6e5ca5479cad29cc3f03e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/RECORD
@@ -0,0 +1,155 @@
+easy_install.py,sha256=MDC9vt5AxDsXX5qcKlBz2TnW6Tpuv_AobnfhCJ9X3PM,126
+setuptools/__init__.py,sha256=WWIdCbFJnZ9fZoaWDN_x1vDA_Rkm-Sc15iKvPtIYKFs,5700
+setuptools/archive_util.py,sha256=kw8Ib_lKjCcnPKNbS7h8HztRVK0d5RacU3r_KRdVnmM,6592
+setuptools/build_meta.py,sha256=FllaKTr1vSJyiUeRjVJEZmeEaRzhYueNlimtcwaJba8,5671
+setuptools/cli-32.exe,sha256=dfEuovMNnA2HLa3jRfMPVi5tk4R7alCbpTvuxtCyw0Y,65536
+setuptools/cli-64.exe,sha256=KLABu5pyrnokJCv6skjXZ6GsXeyYHGcqOUT3oHI3Xpo,74752
+setuptools/cli.exe,sha256=dfEuovMNnA2HLa3jRfMPVi5tk4R7alCbpTvuxtCyw0Y,65536
+setuptools/config.py,sha256=tVYBM3w1U_uBRRTOZydflxyZ_IrTJT5odlZz3cbuhSw,16381
+setuptools/dep_util.py,sha256=fgixvC1R7sH3r13ktyf7N0FALoqEXL1cBarmNpSEoWg,935
+setuptools/depends.py,sha256=hC8QIDcM3VDpRXvRVA6OfL9AaQfxvhxHcN_w6sAyNq8,5837
+setuptools/dist.py,sha256=1GpYnnbv9Bk6uRIvwYS5TRVTSMf89tGM4PAla0WkUek,42522
+setuptools/extension.py,sha256=uc6nHI-MxwmNCNPbUiBnybSyqhpJqjbhvOQ-emdvt_E,1729
+setuptools/glibc.py,sha256=X64VvGPL2AbURKwYRsWJOXXGAYOiF_v2qixeTkAULuU,3146
+setuptools/glob.py,sha256=Y-fpv8wdHZzv9DPCaGACpMSBWJ6amq_1e0R_i8_el4w,5207
+setuptools/gui-32.exe,sha256=XBr0bHMA6Hpz2s9s9Bzjl-PwXfa9nH4ie0rFn4V2kWA,65536
+setuptools/gui-64.exe,sha256=aYKMhX1IJLn4ULHgWX0sE0yREUt6B3TEHf_jOw6yNyE,75264
+setuptools/gui.exe,sha256=XBr0bHMA6Hpz2s9s9Bzjl-PwXfa9nH4ie0rFn4V2kWA,65536
+setuptools/launch.py,sha256=sd7ejwhBocCDx_wG9rIs0OaZ8HtmmFU8ZC6IR_S0Lvg,787
+setuptools/lib2to3_ex.py,sha256=t5e12hbR2pi9V4ezWDTB4JM-AISUnGOkmcnYHek3xjg,2013
+setuptools/monkey.py,sha256=zZGTH7p0xeXQKLmEwJTPIE4m5m7fJeHoAsxyv5M8e_E,5789
+setuptools/msvc.py,sha256=8EiV9ypb3EQJQssPcH1HZbdNsbRvqsFnJ7wPFEGwFIo,40877
+setuptools/namespaces.py,sha256=F0Nrbv8KCT2OrO7rwa03om4N4GZKAlnce-rr-cgDQa8,3199
+setuptools/package_index.py,sha256=NEsrNXnt_9gGP-nCCYzV-0gk15lXAGO7RghRxpfqLqE,40142
+setuptools/pep425tags.py,sha256=NuGMx1gGif7x6iYemh0LfgBr_FZF5GFORIbgmMdU8J4,10882
+setuptools/py27compat.py,sha256=3mwxRMDk5Q5O1rSXOERbQDXhFqwDJhhUitfMW_qpUCo,536
+setuptools/py31compat.py,sha256=XuU1HCsGE_3zGvBRIhYw2iB-IhCFK4-Pxw_jMiqdNVk,1192
+setuptools/py33compat.py,sha256=NKS84nl4LjLIoad6OQfgmygZn4mMvrok_b1N1tzebew,1182
+setuptools/py36compat.py,sha256=VUDWxmu5rt4QHlGTRtAFu6W5jvfL6WBjeDAzeoBy0OM,2891
+setuptools/sandbox.py,sha256=9UbwfEL5QY436oMI1LtFWohhoZ-UzwHvGyZjUH_qhkw,14276
+setuptools/script (dev).tmpl,sha256=f7MR17dTkzaqkCMSVseyOCMVrPVSMdmTQsaB8cZzfuI,201
+setuptools/script.tmpl,sha256=WGTt5piezO27c-Dbx6l5Q4T3Ff20A5z7872hv3aAhYY,138
+setuptools/site-patch.py,sha256=BVt6yIrDMXJoflA5J6DJIcsJUfW_XEeVhOzelTTFDP4,2307
+setuptools/ssl_support.py,sha256=YBDJsCZjSp62CWjxmSkke9kn9rhHHj25Cus6zhJRW3c,8492
+setuptools/unicode_utils.py,sha256=NOiZ_5hD72A6w-4wVj8awHFM3n51Kmw1Ic_vx15XFqw,996
+setuptools/version.py,sha256=og_cuZQb0QI6ukKZFfZWPlr1HgJBPPn2vO2m_bI9ZTE,144
+setuptools/wheel.py,sha256=yF9usxMvpwnymV-oOo5mfDiv3E8jrKkbDEItT7_kjBs,7230
+setuptools/windows_support.py,sha256=5GrfqSP2-dLGJoZTq2g6dCKkyQxxa2n5IQiXlJCoYEE,714
+setuptools/_vendor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+setuptools/_vendor/pyparsing.py,sha256=PifeLY3-WhIcBVzLtv0U4T_pwDtPruBhBCkg5vLqa28,229867
+setuptools/_vendor/six.py,sha256=A6hdJZVjI3t_geebZ9BzUvwRrIXo0lfwzQlM2LcKyas,30098
+setuptools/_vendor/packaging/__about__.py,sha256=zkcCPTN_6TcLW0Nrlg0176-R1QQ_WVPTm8sz1R4-HjM,720
+setuptools/_vendor/packaging/__init__.py,sha256=_vNac5TrzwsrzbOFIbF-5cHqc_Y2aPT2D7zrIR06BOo,513
+setuptools/_vendor/packaging/_compat.py,sha256=Vi_A0rAQeHbU-a9X0tt1yQm9RqkgQbDSxzRw8WlU9kA,860
+setuptools/_vendor/packaging/_structures.py,sha256=RImECJ4c_wTlaTYYwZYLHEiebDMaAJmK1oPARhw1T5o,1416
+setuptools/_vendor/packaging/markers.py,sha256=Gvpk9EY20yKaMTiKgQZ8yFEEpodqVgVYtfekoic1Yts,8239
+setuptools/_vendor/packaging/requirements.py,sha256=t44M2HVWtr8phIz2OhnILzuGT3rTATaovctV1dpnVIg,4343
+setuptools/_vendor/packaging/specifiers.py,sha256=SAMRerzO3fK2IkFZCaZkuwZaL_EGqHNOz4pni4vhnN0,28025
+setuptools/_vendor/packaging/utils.py,sha256=3m6WvPm6NNxE8rkTGmn0r75B_GZSGg7ikafxHsBN1WA,421
+setuptools/_vendor/packaging/version.py,sha256=OwGnxYfr2ghNzYx59qWIBkrK3SnB6n-Zfd1XaLpnnM0,11556
+setuptools/command/__init__.py,sha256=NWzJ0A1BEengZpVeqUyWLNm2bk4P3F4iL5QUErHy7kA,594
+setuptools/command/alias.py,sha256=KjpE0sz_SDIHv3fpZcIQK-sCkJz-SrC6Gmug6b9Nkc8,2426
+setuptools/command/bdist_egg.py,sha256=RQ9h8BmSVpXKJQST3i_b_sm093Z-aCXbfMBEM2IrI-Q,18185
+setuptools/command/bdist_rpm.py,sha256=B7l0TnzCGb-0nLlm6rS00jWLkojASwVmdhW2w5Qz_Ak,1508
+setuptools/command/bdist_wininst.py,sha256=_6dz3lpB1tY200LxKPLM7qgwTCceOMgaWFF-jW2-pm0,637
+setuptools/command/build_clib.py,sha256=bQ9aBr-5ZSO-9fGsGsDLz0mnnFteHUZnftVLkhvHDq0,4484
+setuptools/command/build_ext.py,sha256=PCRAZ2xYnqyEof7EFNtpKYl0sZzT0qdKUNTH3sUdPqk,13173
+setuptools/command/build_py.py,sha256=yWyYaaS9F3o9JbIczn064A5g1C5_UiKRDxGaTqYbtLE,9596
+setuptools/command/develop.py,sha256=wKbOw2_qUvcDti2lZmtxbDmYb54yAAibExzXIvToz-A,8046
+setuptools/command/dist_info.py,sha256=5t6kOfrdgALT-P3ogss6PF9k-Leyesueycuk3dUyZnI,960
+setuptools/command/easy_install.py,sha256=vRnKfAJ2hgTyt4OOLSna4BFEySfj9-KD4gGIAzfC6i8,89413
+setuptools/command/egg_info.py,sha256=BFs9e2mpws2YAFYtxxekoJaFj5X9N7o5LxpQeMdbyY4,24808
+setuptools/command/install.py,sha256=a0EZpL_A866KEdhicTGbuyD_TYl1sykfzdrri-zazT4,4683
+setuptools/command/install_egg_info.py,sha256=4zq_Ad3jE-EffParuyDEnvxU6efB-Xhrzdr8aB6Ln_8,3195
+setuptools/command/install_lib.py,sha256=n2iLR8f1MlYeGHtV2oFxDpUiL-wyLaQgwSAFX-YIEv4,5012
+setuptools/command/install_scripts.py,sha256=UD0rEZ6861mTYhIdzcsqKnUl8PozocXWl9VBQ1VTWnc,2439
+setuptools/command/launcher manifest.xml,sha256=xlLbjWrB01tKC0-hlVkOKkiSPbzMml2eOPtJ_ucCnbE,628
+setuptools/command/py36compat.py,sha256=SzjZcOxF7zdFUT47Zv2n7AM3H8koDys_0OpS-n9gIfc,4986
+setuptools/command/register.py,sha256=bHlMm1qmBbSdahTOT8w6UhA-EgeQIz7p6cD-qOauaiI,270
+setuptools/command/rotate.py,sha256=co5C1EkI7P0GGT6Tqz-T2SIj2LBJTZXYELpmao6d4KQ,2164
+setuptools/command/saveopts.py,sha256=za7QCBcQimKKriWcoCcbhxPjUz30gSB74zuTL47xpP4,658
+setuptools/command/sdist.py,sha256=obDTe2BmWt2PlnFPZZh7e0LWvemEsbCCO9MzhrTZjm8,6711
+setuptools/command/setopt.py,sha256=NTWDyx-gjDF-txf4dO577s7LOzHVoKR0Mq33rFxaRr8,5085
+setuptools/command/test.py,sha256=MeBAcXUePGjPKqjz4zvTrHatLvNsjlPFcagt3XnFYdk,9214
+setuptools/command/upload.py,sha256=i1gfItZ3nQOn5FKXb8tLC2Kd7eKC8lWO4bdE6NqGpE4,1172
+setuptools/command/upload_docs.py,sha256=oXiGplM_cUKLwE4CWWw98RzCufAu8tBhMC97GegFcms,7311
+setuptools/extern/__init__.py,sha256=2eKMsBMwsZqolIcYBtLZU3t96s6xSTP4PTaNfM5P-I0,2499
+setuptools-39.0.1.dist-info/DESCRIPTION.rst,sha256=It3a3GRjT5701mqhrpMcLyW_YS2Dokv-X8zWoTaMRe0,1422
+setuptools-39.0.1.dist-info/METADATA,sha256=whsT1qR2TE8wd0dsBKl8ZwCU_sfHFOGaHLWsDuaMqkQ,2728
+setuptools-39.0.1.dist-info/RECORD,,
+setuptools-39.0.1.dist-info/WHEEL,sha256=kdsN-5OJAZIiHN-iO4Rhl82KyS0bDWf4uBwMbkNafr8,110
+setuptools-39.0.1.dist-info/dependency_links.txt,sha256=HlkCFkoK5TbZ5EMLbLKYhLcY_E31kBWD8TqW2EgmatQ,239
+setuptools-39.0.1.dist-info/entry_points.txt,sha256=s4ibTr5_v_-uWueemgrdzLUIL_ageOMqsgCAKZDkY2E,2934
+setuptools-39.0.1.dist-info/metadata.json,sha256=kbzF0VTLzT0y_Hlf-90rhO8kH_DP02x9lA-6jkrIGsQ,4650
+setuptools-39.0.1.dist-info/top_level.txt,sha256=2HUXVVwA4Pff1xgTFr3GsTXXKaPaO6vlG6oNJ_4u4Tg,38
+setuptools-39.0.1.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
+../../../bin/easy_install,sha256=0d_9Bd3Fwb7aLh5VsasrW7P-ZHVya-qD9fqT1KjMhl0,316
+../../../bin/easy_install-3.6,sha256=0d_9Bd3Fwb7aLh5VsasrW7P-ZHVya-qD9fqT1KjMhl0,316
+setuptools-39.0.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+__pycache__/easy_install.cpython-36.pyc,,
+setuptools/_vendor/__pycache__/pyparsing.cpython-36.pyc,,
+setuptools/_vendor/__pycache__/six.cpython-36.pyc,,
+setuptools/_vendor/__pycache__/__init__.cpython-36.pyc,,
+setuptools/_vendor/packaging/__pycache__/specifiers.cpython-36.pyc,,
+setuptools/_vendor/packaging/__pycache__/_compat.cpython-36.pyc,,
+setuptools/_vendor/packaging/__pycache__/_structures.cpython-36.pyc,,
+setuptools/_vendor/packaging/__pycache__/requirements.cpython-36.pyc,,
+setuptools/_vendor/packaging/__pycache__/markers.cpython-36.pyc,,
+setuptools/_vendor/packaging/__pycache__/__about__.cpython-36.pyc,,
+setuptools/_vendor/packaging/__pycache__/__init__.cpython-36.pyc,,
+setuptools/_vendor/packaging/__pycache__/utils.cpython-36.pyc,,
+setuptools/_vendor/packaging/__pycache__/version.cpython-36.pyc,,
+setuptools/__pycache__/monkey.cpython-36.pyc,,
+setuptools/__pycache__/glibc.cpython-36.pyc,,
+setuptools/__pycache__/unicode_utils.cpython-36.pyc,,
+setuptools/__pycache__/sandbox.cpython-36.pyc,,
+setuptools/__pycache__/py27compat.cpython-36.pyc,,
+setuptools/__pycache__/launch.cpython-36.pyc,,
+setuptools/__pycache__/py36compat.cpython-36.pyc,,
+setuptools/__pycache__/archive_util.cpython-36.pyc,,
+setuptools/__pycache__/dist.cpython-36.pyc,,
+setuptools/__pycache__/depends.cpython-36.pyc,,
+setuptools/__pycache__/msvc.cpython-36.pyc,,
+setuptools/__pycache__/package_index.cpython-36.pyc,,
+setuptools/__pycache__/py31compat.cpython-36.pyc,,
+setuptools/__pycache__/ssl_support.cpython-36.pyc,,
+setuptools/__pycache__/config.cpython-36.pyc,,
+setuptools/__pycache__/py33compat.cpython-36.pyc,,
+setuptools/__pycache__/dep_util.cpython-36.pyc,,
+setuptools/__pycache__/lib2to3_ex.cpython-36.pyc,,
+setuptools/__pycache__/namespaces.cpython-36.pyc,,
+setuptools/__pycache__/site-patch.cpython-36.pyc,,
+setuptools/__pycache__/extension.cpython-36.pyc,,
+setuptools/__pycache__/__init__.cpython-36.pyc,,
+setuptools/__pycache__/build_meta.cpython-36.pyc,,
+setuptools/__pycache__/glob.cpython-36.pyc,,
+setuptools/__pycache__/windows_support.cpython-36.pyc,,
+setuptools/__pycache__/pep425tags.cpython-36.pyc,,
+setuptools/__pycache__/wheel.cpython-36.pyc,,
+setuptools/__pycache__/version.cpython-36.pyc,,
+setuptools/command/__pycache__/alias.cpython-36.pyc,,
+setuptools/command/__pycache__/bdist_egg.cpython-36.pyc,,
+setuptools/command/__pycache__/install_lib.cpython-36.pyc,,
+setuptools/command/__pycache__/egg_info.cpython-36.pyc,,
+setuptools/command/__pycache__/upload.cpython-36.pyc,,
+setuptools/command/__pycache__/py36compat.cpython-36.pyc,,
+setuptools/command/__pycache__/build_py.cpython-36.pyc,,
+setuptools/command/__pycache__/easy_install.cpython-36.pyc,,
+setuptools/command/__pycache__/build_clib.cpython-36.pyc,,
+setuptools/command/__pycache__/install_scripts.cpython-36.pyc,,
+setuptools/command/__pycache__/bdist_rpm.cpython-36.pyc,,
+setuptools/command/__pycache__/sdist.cpython-36.pyc,,
+setuptools/command/__pycache__/install_egg_info.cpython-36.pyc,,
+setuptools/command/__pycache__/setopt.cpython-36.pyc,,
+setuptools/command/__pycache__/test.cpython-36.pyc,,
+setuptools/command/__pycache__/develop.cpython-36.pyc,,
+setuptools/command/__pycache__/install.cpython-36.pyc,,
+setuptools/command/__pycache__/__init__.cpython-36.pyc,,
+setuptools/command/__pycache__/dist_info.cpython-36.pyc,,
+setuptools/command/__pycache__/bdist_wininst.cpython-36.pyc,,
+setuptools/command/__pycache__/saveopts.cpython-36.pyc,,
+setuptools/command/__pycache__/rotate.cpython-36.pyc,,
+setuptools/command/__pycache__/build_ext.cpython-36.pyc,,
+setuptools/command/__pycache__/upload_docs.cpython-36.pyc,,
+setuptools/command/__pycache__/register.cpython-36.pyc,,
+setuptools/extern/__pycache__/__init__.cpython-36.pyc,,
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/WHEEL b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/WHEEL
new file mode 100644
index 0000000000000000000000000000000000000000..7332a419cda6903b61439f3bac93492b0747e6e7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/WHEEL
@@ -0,0 +1,6 @@
+Wheel-Version: 1.0
+Generator: bdist_wheel (0.30.0)
+Root-Is-Purelib: true
+Tag: py2-none-any
+Tag: py3-none-any
+
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/dependency_links.txt b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/dependency_links.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e87d02103ede91545d70783dd59653d183424b68
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/dependency_links.txt
@@ -0,0 +1,2 @@
+https://files.pythonhosted.org/packages/source/c/certifi/certifi-2016.9.26.tar.gz#md5=baa81e951a29958563689d868ef1064d
+https://files.pythonhosted.org/packages/source/w/wincertstore/wincertstore-0.2.zip#md5=ae728f2f007185648d0c7a8679b361e2
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/entry_points.txt b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/entry_points.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b429e524ec46ebcfb0c18eecbc48d62eab35a9e0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/entry_points.txt
@@ -0,0 +1,64 @@
+[console_scripts]
+easy_install = setuptools.command.easy_install:main
+
+[distutils.commands]
+alias = setuptools.command.alias:alias
+bdist_egg = setuptools.command.bdist_egg:bdist_egg
+bdist_rpm = setuptools.command.bdist_rpm:bdist_rpm
+bdist_wininst = setuptools.command.bdist_wininst:bdist_wininst
+build_clib = setuptools.command.build_clib:build_clib
+build_ext = setuptools.command.build_ext:build_ext
+build_py = setuptools.command.build_py:build_py
+develop = setuptools.command.develop:develop
+dist_info = setuptools.command.dist_info:dist_info
+easy_install = setuptools.command.easy_install:easy_install
+egg_info = setuptools.command.egg_info:egg_info
+install = setuptools.command.install:install
+install_egg_info = setuptools.command.install_egg_info:install_egg_info
+install_lib = setuptools.command.install_lib:install_lib
+install_scripts = setuptools.command.install_scripts:install_scripts
+register = setuptools.command.register:register
+rotate = setuptools.command.rotate:rotate
+saveopts = setuptools.command.saveopts:saveopts
+sdist = setuptools.command.sdist:sdist
+setopt = setuptools.command.setopt:setopt
+test = setuptools.command.test:test
+upload = setuptools.command.upload:upload
+upload_docs = setuptools.command.upload_docs:upload_docs
+
+[distutils.setup_keywords]
+convert_2to3_doctests = setuptools.dist:assert_string_list
+dependency_links = setuptools.dist:assert_string_list
+eager_resources = setuptools.dist:assert_string_list
+entry_points = setuptools.dist:check_entry_points
+exclude_package_data = setuptools.dist:check_package_data
+extras_require = setuptools.dist:check_extras
+include_package_data = setuptools.dist:assert_bool
+install_requires = setuptools.dist:check_requirements
+namespace_packages = setuptools.dist:check_nsp
+package_data = setuptools.dist:check_package_data
+packages = setuptools.dist:check_packages
+python_requires = setuptools.dist:check_specifier
+setup_requires = setuptools.dist:check_requirements
+test_loader = setuptools.dist:check_importable
+test_runner = setuptools.dist:check_importable
+test_suite = setuptools.dist:check_test_suite
+tests_require = setuptools.dist:check_requirements
+use_2to3 = setuptools.dist:assert_bool
+use_2to3_exclude_fixers = setuptools.dist:assert_string_list
+use_2to3_fixers = setuptools.dist:assert_string_list
+zip_safe = setuptools.dist:assert_bool
+
+[egg_info.writers]
+PKG-INFO = setuptools.command.egg_info:write_pkg_info
+dependency_links.txt = setuptools.command.egg_info:overwrite_arg
+depends.txt = setuptools.command.egg_info:warn_depends_obsolete
+eager_resources.txt = setuptools.command.egg_info:overwrite_arg
+entry_points.txt = setuptools.command.egg_info:write_entries
+namespace_packages.txt = setuptools.command.egg_info:overwrite_arg
+requires.txt = setuptools.command.egg_info:write_requirements
+top_level.txt = setuptools.command.egg_info:write_toplevel_names
+
+[setuptools.installation]
+eggsecutable = setuptools.command.easy_install:bootstrap
+
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/metadata.json b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..494b205f8776f86c52cc48f0aa2626d061bba4d0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/metadata.json
@@ -0,0 +1 @@
+{"classifiers": ["Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: System :: Archiving :: Packaging", "Topic :: System :: Systems Administration", "Topic :: Utilities"], "description_content_type": "text/x-rst; charset=UTF-8", "extensions": {"python.commands": {"wrap_console": {"easy_install": "setuptools.command.easy_install:main"}}, "python.details": {"contacts": [{"email": "distutils-sig@python.org", "name": "Python Packaging Authority", "role": "author"}], "document_names": {"description": "DESCRIPTION.rst"}, "project_urls": {"Home": "https://github.com/pypa/setuptools"}}, "python.exports": {"console_scripts": {"easy_install": "setuptools.command.easy_install:main"}, "distutils.commands": {"alias": "setuptools.command.alias:alias", "bdist_egg": "setuptools.command.bdist_egg:bdist_egg", "bdist_rpm": "setuptools.command.bdist_rpm:bdist_rpm", "bdist_wininst": "setuptools.command.bdist_wininst:bdist_wininst", "build_clib": "setuptools.command.build_clib:build_clib", "build_ext": "setuptools.command.build_ext:build_ext", "build_py": "setuptools.command.build_py:build_py", "develop": "setuptools.command.develop:develop", "dist_info": "setuptools.command.dist_info:dist_info", "easy_install": "setuptools.command.easy_install:easy_install", "egg_info": "setuptools.command.egg_info:egg_info", "install": "setuptools.command.install:install", "install_egg_info": "setuptools.command.install_egg_info:install_egg_info", "install_lib": "setuptools.command.install_lib:install_lib", "install_scripts": "setuptools.command.install_scripts:install_scripts", "register": "setuptools.command.register:register", "rotate": "setuptools.command.rotate:rotate", "saveopts": "setuptools.command.saveopts:saveopts", "sdist": "setuptools.command.sdist:sdist", "setopt": "setuptools.command.setopt:setopt", "test": "setuptools.command.test:test", "upload": "setuptools.command.upload:upload", "upload_docs": "setuptools.command.upload_docs:upload_docs"}, "distutils.setup_keywords": {"convert_2to3_doctests": "setuptools.dist:assert_string_list", "dependency_links": "setuptools.dist:assert_string_list", "eager_resources": "setuptools.dist:assert_string_list", "entry_points": "setuptools.dist:check_entry_points", "exclude_package_data": "setuptools.dist:check_package_data", "extras_require": "setuptools.dist:check_extras", "include_package_data": "setuptools.dist:assert_bool", "install_requires": "setuptools.dist:check_requirements", "namespace_packages": "setuptools.dist:check_nsp", "package_data": "setuptools.dist:check_package_data", "packages": "setuptools.dist:check_packages", "python_requires": "setuptools.dist:check_specifier", "setup_requires": "setuptools.dist:check_requirements", "test_loader": "setuptools.dist:check_importable", "test_runner": "setuptools.dist:check_importable", "test_suite": "setuptools.dist:check_test_suite", "tests_require": "setuptools.dist:check_requirements", "use_2to3": "setuptools.dist:assert_bool", "use_2to3_exclude_fixers": "setuptools.dist:assert_string_list", "use_2to3_fixers": "setuptools.dist:assert_string_list", "zip_safe": "setuptools.dist:assert_bool"}, "egg_info.writers": {"PKG-INFO": "setuptools.command.egg_info:write_pkg_info", "dependency_links.txt": "setuptools.command.egg_info:overwrite_arg", "depends.txt": "setuptools.command.egg_info:warn_depends_obsolete", "eager_resources.txt": "setuptools.command.egg_info:overwrite_arg", "entry_points.txt": "setuptools.command.egg_info:write_entries", "namespace_packages.txt": "setuptools.command.egg_info:overwrite_arg", "requires.txt": "setuptools.command.egg_info:write_requirements", "top_level.txt": "setuptools.command.egg_info:write_toplevel_names"}, "setuptools.installation": {"eggsecutable": "setuptools.command.easy_install:bootstrap"}}}, "extras": ["certs", "ssl"], "generator": "bdist_wheel (0.30.0)", "keywords": ["CPAN", "PyPI", "distutils", "eggs", "package", "management"], "metadata_version": "2.0", "name": "setuptools", "project_url": "Documentation, https://setuptools.readthedocs.io/", "requires_python": ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*", "summary": "Easily download, build, install, upgrade, and uninstall Python packages", "version": "39.0.1"}
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/top_level.txt b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/top_level.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4577c6a795e510bf7578236665f582c3770fb42e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/top_level.txt
@@ -0,0 +1,3 @@
+easy_install
+pkg_resources
+setuptools
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/zip-safe b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/zip-safe
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools-39.0.1.dist-info/zip-safe
@@ -0,0 +1 @@
+
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/__init__.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7da47fbed5e5fa614f814c038b400360439144a5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/__init__.py
@@ -0,0 +1,180 @@
+"""Extensions to the 'distutils' for large or complex distributions"""
+
+import os
+import functools
+import distutils.core
+import distutils.filelist
+from distutils.util import convert_path
+from fnmatch import fnmatchcase
+
+from setuptools.extern.six.moves import filter, map
+
+import setuptools.version
+from setuptools.extension import Extension
+from setuptools.dist import Distribution, Feature
+from setuptools.depends import Require
+from . import monkey
+
+__all__ = [
+    'setup', 'Distribution', 'Feature', 'Command', 'Extension', 'Require',
+    'find_packages',
+]
+
+__version__ = setuptools.version.__version__
+
+bootstrap_install_from = None
+
+# If we run 2to3 on .py files, should we also convert docstrings?
+# Default: yes; assume that we can detect doctests reliably
+run_2to3_on_doctests = True
+# Standard package names for fixer packages
+lib2to3_fixer_packages = ['lib2to3.fixes']
+
+
+class PackageFinder(object):
+    """
+    Generate a list of all Python packages found within a directory
+    """
+
+    @classmethod
+    def find(cls, where='.', exclude=(), include=('*',)):
+        """Return a list all Python packages found within directory 'where'
+
+        'where' is the root directory which will be searched for packages.  It
+        should be supplied as a "cross-platform" (i.e. URL-style) path; it will
+        be converted to the appropriate local path syntax.
+
+        'exclude' is a sequence of package names to exclude; '*' can be used
+        as a wildcard in the names, such that 'foo.*' will exclude all
+        subpackages of 'foo' (but not 'foo' itself).
+
+        'include' is a sequence of package names to include.  If it's
+        specified, only the named packages will be included.  If it's not
+        specified, all found packages will be included.  'include' can contain
+        shell style wildcard patterns just like 'exclude'.
+        """
+
+        return list(cls._find_packages_iter(
+            convert_path(where),
+            cls._build_filter('ez_setup', '*__pycache__', *exclude),
+            cls._build_filter(*include)))
+
+    @classmethod
+    def _find_packages_iter(cls, where, exclude, include):
+        """
+        All the packages found in 'where' that pass the 'include' filter, but
+        not the 'exclude' filter.
+        """
+        for root, dirs, files in os.walk(where, followlinks=True):
+            # Copy dirs to iterate over it, then empty dirs.
+            all_dirs = dirs[:]
+            dirs[:] = []
+
+            for dir in all_dirs:
+                full_path = os.path.join(root, dir)
+                rel_path = os.path.relpath(full_path, where)
+                package = rel_path.replace(os.path.sep, '.')
+
+                # Skip directory trees that are not valid packages
+                if ('.' in dir or not cls._looks_like_package(full_path)):
+                    continue
+
+                # Should this package be included?
+                if include(package) and not exclude(package):
+                    yield package
+
+                # Keep searching subdirectories, as there may be more packages
+                # down there, even if the parent was excluded.
+                dirs.append(dir)
+
+    @staticmethod
+    def _looks_like_package(path):
+        """Does a directory look like a package?"""
+        return os.path.isfile(os.path.join(path, '__init__.py'))
+
+    @staticmethod
+    def _build_filter(*patterns):
+        """
+        Given a list of patterns, return a callable that will be true only if
+        the input matches at least one of the patterns.
+        """
+        return lambda name: any(fnmatchcase(name, pat=pat) for pat in patterns)
+
+
+class PEP420PackageFinder(PackageFinder):
+    @staticmethod
+    def _looks_like_package(path):
+        return True
+
+
+find_packages = PackageFinder.find
+
+
+def _install_setup_requires(attrs):
+    # Note: do not use `setuptools.Distribution` directly, as
+    # our PEP 517 backend patch `distutils.core.Distribution`.
+    dist = distutils.core.Distribution(dict(
+        (k, v) for k, v in attrs.items()
+        if k in ('dependency_links', 'setup_requires')
+    ))
+    # Honor setup.cfg's options.
+    dist.parse_config_files(ignore_option_errors=True)
+    if dist.setup_requires:
+        dist.fetch_build_eggs(dist.setup_requires)
+
+
+def setup(**attrs):
+    # Make sure we have any requirements needed to interpret 'attrs'.
+    _install_setup_requires(attrs)
+    return distutils.core.setup(**attrs)
+
+setup.__doc__ = distutils.core.setup.__doc__
+
+
+_Command = monkey.get_unpatched(distutils.core.Command)
+
+
+class Command(_Command):
+    __doc__ = _Command.__doc__
+
+    command_consumes_arguments = False
+
+    def __init__(self, dist, **kw):
+        """
+        Construct the command for dist, updating
+        vars(self) with any keyword parameters.
+        """
+        _Command.__init__(self, dist)
+        vars(self).update(kw)
+
+    def reinitialize_command(self, command, reinit_subcommands=0, **kw):
+        cmd = _Command.reinitialize_command(self, command, reinit_subcommands)
+        vars(cmd).update(kw)
+        return cmd
+
+
+def _find_all_simple(path):
+    """
+    Find all files under 'path'
+    """
+    results = (
+        os.path.join(base, file)
+        for base, dirs, files in os.walk(path, followlinks=True)
+        for file in files
+    )
+    return filter(os.path.isfile, results)
+
+
+def findall(dir=os.curdir):
+    """
+    Find all files under 'dir' and return the list of full filenames.
+    Unless dir is '.', return full filenames with dir prepended.
+    """
+    files = _find_all_simple(dir)
+    if dir == os.curdir:
+        make_rel = functools.partial(os.path.relpath, start=dir)
+        files = map(make_rel, files)
+    return list(files)
+
+
+monkey.patch_all()
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/__init__.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/__about__.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/__about__.py
new file mode 100644
index 0000000000000000000000000000000000000000..95d330ef823aa2e12f7846bc63c0955b25df6029
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/__about__.py
@@ -0,0 +1,21 @@
+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+from __future__ import absolute_import, division, print_function
+
+__all__ = [
+    "__title__", "__summary__", "__uri__", "__version__", "__author__",
+    "__email__", "__license__", "__copyright__",
+]
+
+__title__ = "packaging"
+__summary__ = "Core utilities for Python packages"
+__uri__ = "https://github.com/pypa/packaging"
+
+__version__ = "16.8"
+
+__author__ = "Donald Stufft and individual contributors"
+__email__ = "donald@stufft.io"
+
+__license__ = "BSD or Apache License, Version 2.0"
+__copyright__ = "Copyright 2014-2016 %s" % __author__
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/__init__.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ee6220203e5425f900fb5a43676c24ea377c2fa
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/__init__.py
@@ -0,0 +1,14 @@
+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+from __future__ import absolute_import, division, print_function
+
+from .__about__ import (
+    __author__, __copyright__, __email__, __license__, __summary__, __title__,
+    __uri__, __version__
+)
+
+__all__ = [
+    "__title__", "__summary__", "__uri__", "__version__", "__author__",
+    "__email__", "__license__", "__copyright__",
+]
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/_compat.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/_compat.py
new file mode 100644
index 0000000000000000000000000000000000000000..210bb80b7e7b64cb79f7e7cdf3e42819fe3471fe
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/_compat.py
@@ -0,0 +1,30 @@
+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+from __future__ import absolute_import, division, print_function
+
+import sys
+
+
+PY2 = sys.version_info[0] == 2
+PY3 = sys.version_info[0] == 3
+
+# flake8: noqa
+
+if PY3:
+    string_types = str,
+else:
+    string_types = basestring,
+
+
+def with_metaclass(meta, *bases):
+    """
+    Create a base class with a metaclass.
+    """
+    # This requires a bit of explanation: the basic idea is to make a dummy
+    # metaclass for one level of class instantiation that replaces itself with
+    # the actual metaclass.
+    class metaclass(meta):
+        def __new__(cls, name, this_bases, d):
+            return meta(name, bases, d)
+    return type.__new__(metaclass, 'temporary_class', (), {})
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/_structures.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/_structures.py
new file mode 100644
index 0000000000000000000000000000000000000000..ccc27861c3a4d9efaa3db753c77c4515a627bd98
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/_structures.py
@@ -0,0 +1,68 @@
+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+from __future__ import absolute_import, division, print_function
+
+
+class Infinity(object):
+
+    def __repr__(self):
+        return "Infinity"
+
+    def __hash__(self):
+        return hash(repr(self))
+
+    def __lt__(self, other):
+        return False
+
+    def __le__(self, other):
+        return False
+
+    def __eq__(self, other):
+        return isinstance(other, self.__class__)
+
+    def __ne__(self, other):
+        return not isinstance(other, self.__class__)
+
+    def __gt__(self, other):
+        return True
+
+    def __ge__(self, other):
+        return True
+
+    def __neg__(self):
+        return NegativeInfinity
+
+Infinity = Infinity()
+
+
+class NegativeInfinity(object):
+
+    def __repr__(self):
+        return "-Infinity"
+
+    def __hash__(self):
+        return hash(repr(self))
+
+    def __lt__(self, other):
+        return True
+
+    def __le__(self, other):
+        return True
+
+    def __eq__(self, other):
+        return isinstance(other, self.__class__)
+
+    def __ne__(self, other):
+        return not isinstance(other, self.__class__)
+
+    def __gt__(self, other):
+        return False
+
+    def __ge__(self, other):
+        return False
+
+    def __neg__(self):
+        return Infinity
+
+NegativeInfinity = NegativeInfinity()
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/markers.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/markers.py
new file mode 100644
index 0000000000000000000000000000000000000000..031332a3058f4bc47eadd320e63a2cdc6b76df25
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/markers.py
@@ -0,0 +1,301 @@
+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+from __future__ import absolute_import, division, print_function
+
+import operator
+import os
+import platform
+import sys
+
+from setuptools.extern.pyparsing import ParseException, ParseResults, stringStart, stringEnd
+from setuptools.extern.pyparsing import ZeroOrMore, Group, Forward, QuotedString
+from setuptools.extern.pyparsing import Literal as L  # noqa
+
+from ._compat import string_types
+from .specifiers import Specifier, InvalidSpecifier
+
+
+__all__ = [
+    "InvalidMarker", "UndefinedComparison", "UndefinedEnvironmentName",
+    "Marker", "default_environment",
+]
+
+
+class InvalidMarker(ValueError):
+    """
+    An invalid marker was found, users should refer to PEP 508.
+    """
+
+
+class UndefinedComparison(ValueError):
+    """
+    An invalid operation was attempted on a value that doesn't support it.
+    """
+
+
+class UndefinedEnvironmentName(ValueError):
+    """
+    A name was attempted to be used that does not exist inside of the
+    environment.
+    """
+
+
+class Node(object):
+
+    def __init__(self, value):
+        self.value = value
+
+    def __str__(self):
+        return str(self.value)
+
+    def __repr__(self):
+        return "<{0}({1!r})>".format(self.__class__.__name__, str(self))
+
+    def serialize(self):
+        raise NotImplementedError
+
+
+class Variable(Node):
+
+    def serialize(self):
+        return str(self)
+
+
+class Value(Node):
+
+    def serialize(self):
+        return '"{0}"'.format(self)
+
+
+class Op(Node):
+
+    def serialize(self):
+        return str(self)
+
+
+VARIABLE = (
+    L("implementation_version") |
+    L("platform_python_implementation") |
+    L("implementation_name") |
+    L("python_full_version") |
+    L("platform_release") |
+    L("platform_version") |
+    L("platform_machine") |
+    L("platform_system") |
+    L("python_version") |
+    L("sys_platform") |
+    L("os_name") |
+    L("os.name") |  # PEP-345
+    L("sys.platform") |  # PEP-345
+    L("platform.version") |  # PEP-345
+    L("platform.machine") |  # PEP-345
+    L("platform.python_implementation") |  # PEP-345
+    L("python_implementation") |  # undocumented setuptools legacy
+    L("extra")
+)
+ALIASES = {
+    'os.name': 'os_name',
+    'sys.platform': 'sys_platform',
+    'platform.version': 'platform_version',
+    'platform.machine': 'platform_machine',
+    'platform.python_implementation': 'platform_python_implementation',
+    'python_implementation': 'platform_python_implementation'
+}
+VARIABLE.setParseAction(lambda s, l, t: Variable(ALIASES.get(t[0], t[0])))
+
+VERSION_CMP = (
+    L("===") |
+    L("==") |
+    L(">=") |
+    L("<=") |
+    L("!=") |
+    L("~=") |
+    L(">") |
+    L("<")
+)
+
+MARKER_OP = VERSION_CMP | L("not in") | L("in")
+MARKER_OP.setParseAction(lambda s, l, t: Op(t[0]))
+
+MARKER_VALUE = QuotedString("'") | QuotedString('"')
+MARKER_VALUE.setParseAction(lambda s, l, t: Value(t[0]))
+
+BOOLOP = L("and") | L("or")
+
+MARKER_VAR = VARIABLE | MARKER_VALUE
+
+MARKER_ITEM = Group(MARKER_VAR + MARKER_OP + MARKER_VAR)
+MARKER_ITEM.setParseAction(lambda s, l, t: tuple(t[0]))
+
+LPAREN = L("(").suppress()
+RPAREN = L(")").suppress()
+
+MARKER_EXPR = Forward()
+MARKER_ATOM = MARKER_ITEM | Group(LPAREN + MARKER_EXPR + RPAREN)
+MARKER_EXPR << MARKER_ATOM + ZeroOrMore(BOOLOP + MARKER_EXPR)
+
+MARKER = stringStart + MARKER_EXPR + stringEnd
+
+
+def _coerce_parse_result(results):
+    if isinstance(results, ParseResults):
+        return [_coerce_parse_result(i) for i in results]
+    else:
+        return results
+
+
+def _format_marker(marker, first=True):
+    assert isinstance(marker, (list, tuple, string_types))
+
+    # Sometimes we have a structure like [[...]] which is a single item list
+    # where the single item is itself it's own list. In that case we want skip
+    # the rest of this function so that we don't get extraneous () on the
+    # outside.
+    if (isinstance(marker, list) and len(marker) == 1 and
+            isinstance(marker[0], (list, tuple))):
+        return _format_marker(marker[0])
+
+    if isinstance(marker, list):
+        inner = (_format_marker(m, first=False) for m in marker)
+        if first:
+            return " ".join(inner)
+        else:
+            return "(" + " ".join(inner) + ")"
+    elif isinstance(marker, tuple):
+        return " ".join([m.serialize() for m in marker])
+    else:
+        return marker
+
+
+_operators = {
+    "in": lambda lhs, rhs: lhs in rhs,
+    "not in": lambda lhs, rhs: lhs not in rhs,
+    "<": operator.lt,
+    "<=": operator.le,
+    "==": operator.eq,
+    "!=": operator.ne,
+    ">=": operator.ge,
+    ">": operator.gt,
+}
+
+
+def _eval_op(lhs, op, rhs):
+    try:
+        spec = Specifier("".join([op.serialize(), rhs]))
+    except InvalidSpecifier:
+        pass
+    else:
+        return spec.contains(lhs)
+
+    oper = _operators.get(op.serialize())
+    if oper is None:
+        raise UndefinedComparison(
+            "Undefined {0!r} on {1!r} and {2!r}.".format(op, lhs, rhs)
+        )
+
+    return oper(lhs, rhs)
+
+
+_undefined = object()
+
+
+def _get_env(environment, name):
+    value = environment.get(name, _undefined)
+
+    if value is _undefined:
+        raise UndefinedEnvironmentName(
+            "{0!r} does not exist in evaluation environment.".format(name)
+        )
+
+    return value
+
+
+def _evaluate_markers(markers, environment):
+    groups = [[]]
+
+    for marker in markers:
+        assert isinstance(marker, (list, tuple, string_types))
+
+        if isinstance(marker, list):
+            groups[-1].append(_evaluate_markers(marker, environment))
+        elif isinstance(marker, tuple):
+            lhs, op, rhs = marker
+
+            if isinstance(lhs, Variable):
+                lhs_value = _get_env(environment, lhs.value)
+                rhs_value = rhs.value
+            else:
+                lhs_value = lhs.value
+                rhs_value = _get_env(environment, rhs.value)
+
+            groups[-1].append(_eval_op(lhs_value, op, rhs_value))
+        else:
+            assert marker in ["and", "or"]
+            if marker == "or":
+                groups.append([])
+
+    return any(all(item) for item in groups)
+
+
+def format_full_version(info):
+    version = '{0.major}.{0.minor}.{0.micro}'.format(info)
+    kind = info.releaselevel
+    if kind != 'final':
+        version += kind[0] + str(info.serial)
+    return version
+
+
+def default_environment():
+    if hasattr(sys, 'implementation'):
+        iver = format_full_version(sys.implementation.version)
+        implementation_name = sys.implementation.name
+    else:
+        iver = '0'
+        implementation_name = ''
+
+    return {
+        "implementation_name": implementation_name,
+        "implementation_version": iver,
+        "os_name": os.name,
+        "platform_machine": platform.machine(),
+        "platform_release": platform.release(),
+        "platform_system": platform.system(),
+        "platform_version": platform.version(),
+        "python_full_version": platform.python_version(),
+        "platform_python_implementation": platform.python_implementation(),
+        "python_version": platform.python_version()[:3],
+        "sys_platform": sys.platform,
+    }
+
+
+class Marker(object):
+
+    def __init__(self, marker):
+        try:
+            self._markers = _coerce_parse_result(MARKER.parseString(marker))
+        except ParseException as e:
+            err_str = "Invalid marker: {0!r}, parse error at {1!r}".format(
+                marker, marker[e.loc:e.loc + 8])
+            raise InvalidMarker(err_str)
+
+    def __str__(self):
+        return _format_marker(self._markers)
+
+    def __repr__(self):
+        return "<Marker({0!r})>".format(str(self))
+
+    def evaluate(self, environment=None):
+        """Evaluate a marker.
+
+        Return the boolean from evaluating the given marker against the
+        environment. environment is an optional argument to override all or
+        part of the determined environment.
+
+        The environment is determined from the current Python process.
+        """
+        current_environment = default_environment()
+        if environment is not None:
+            current_environment.update(environment)
+
+        return _evaluate_markers(self._markers, current_environment)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/requirements.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/requirements.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b493416f215081b46c901d76c3a4b4648a32296
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/requirements.py
@@ -0,0 +1,127 @@
+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+from __future__ import absolute_import, division, print_function
+
+import string
+import re
+
+from setuptools.extern.pyparsing import stringStart, stringEnd, originalTextFor, ParseException
+from setuptools.extern.pyparsing import ZeroOrMore, Word, Optional, Regex, Combine
+from setuptools.extern.pyparsing import Literal as L  # noqa
+from setuptools.extern.six.moves.urllib import parse as urlparse
+
+from .markers import MARKER_EXPR, Marker
+from .specifiers import LegacySpecifier, Specifier, SpecifierSet
+
+
+class InvalidRequirement(ValueError):
+    """
+    An invalid requirement was found, users should refer to PEP 508.
+    """
+
+
+ALPHANUM = Word(string.ascii_letters + string.digits)
+
+LBRACKET = L("[").suppress()
+RBRACKET = L("]").suppress()
+LPAREN = L("(").suppress()
+RPAREN = L(")").suppress()
+COMMA = L(",").suppress()
+SEMICOLON = L(";").suppress()
+AT = L("@").suppress()
+
+PUNCTUATION = Word("-_.")
+IDENTIFIER_END = ALPHANUM | (ZeroOrMore(PUNCTUATION) + ALPHANUM)
+IDENTIFIER = Combine(ALPHANUM + ZeroOrMore(IDENTIFIER_END))
+
+NAME = IDENTIFIER("name")
+EXTRA = IDENTIFIER
+
+URI = Regex(r'[^ ]+')("url")
+URL = (AT + URI)
+
+EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA)
+EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras")
+
+VERSION_PEP440 = Regex(Specifier._regex_str, re.VERBOSE | re.IGNORECASE)
+VERSION_LEGACY = Regex(LegacySpecifier._regex_str, re.VERBOSE | re.IGNORECASE)
+
+VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY
+VERSION_MANY = Combine(VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE),
+                       joinString=",", adjacent=False)("_raw_spec")
+_VERSION_SPEC = Optional(((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY))
+_VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or '')
+
+VERSION_SPEC = originalTextFor(_VERSION_SPEC)("specifier")
+VERSION_SPEC.setParseAction(lambda s, l, t: t[1])
+
+MARKER_EXPR = originalTextFor(MARKER_EXPR())("marker")
+MARKER_EXPR.setParseAction(
+    lambda s, l, t: Marker(s[t._original_start:t._original_end])
+)
+MARKER_SEPERATOR = SEMICOLON
+MARKER = MARKER_SEPERATOR + MARKER_EXPR
+
+VERSION_AND_MARKER = VERSION_SPEC + Optional(MARKER)
+URL_AND_MARKER = URL + Optional(MARKER)
+
+NAMED_REQUIREMENT = \
+    NAME + Optional(EXTRAS) + (URL_AND_MARKER | VERSION_AND_MARKER)
+
+REQUIREMENT = stringStart + NAMED_REQUIREMENT + stringEnd
+
+
+class Requirement(object):
+    """Parse a requirement.
+
+    Parse a given requirement string into its parts, such as name, specifier,
+    URL, and extras. Raises InvalidRequirement on a badly-formed requirement
+    string.
+    """
+
+    # TODO: Can we test whether something is contained within a requirement?
+    #       If so how do we do that? Do we need to test against the _name_ of
+    #       the thing as well as the version? What about the markers?
+    # TODO: Can we normalize the name and extra name?
+
+    def __init__(self, requirement_string):
+        try:
+            req = REQUIREMENT.parseString(requirement_string)
+        except ParseException as e:
+            raise InvalidRequirement(
+                "Invalid requirement, parse error at \"{0!r}\"".format(
+                    requirement_string[e.loc:e.loc + 8]))
+
+        self.name = req.name
+        if req.url:
+            parsed_url = urlparse.urlparse(req.url)
+            if not (parsed_url.scheme and parsed_url.netloc) or (
+                    not parsed_url.scheme and not parsed_url.netloc):
+                raise InvalidRequirement("Invalid URL given")
+            self.url = req.url
+        else:
+            self.url = None
+        self.extras = set(req.extras.asList() if req.extras else [])
+        self.specifier = SpecifierSet(req.specifier)
+        self.marker = req.marker if req.marker else None
+
+    def __str__(self):
+        parts = [self.name]
+
+        if self.extras:
+            parts.append("[{0}]".format(",".join(sorted(self.extras))))
+
+        if self.specifier:
+            parts.append(str(self.specifier))
+
+        if self.url:
+            parts.append("@ {0}".format(self.url))
+
+        if self.marker:
+            parts.append("; {0}".format(self.marker))
+
+        return "".join(parts)
+
+    def __repr__(self):
+        return "<Requirement({0!r})>".format(str(self))
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/specifiers.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/specifiers.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f5a76cfd63f47dcce29b3ea82f59d10f4e8d771
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/specifiers.py
@@ -0,0 +1,774 @@
+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+from __future__ import absolute_import, division, print_function
+
+import abc
+import functools
+import itertools
+import re
+
+from ._compat import string_types, with_metaclass
+from .version import Version, LegacyVersion, parse
+
+
+class InvalidSpecifier(ValueError):
+    """
+    An invalid specifier was found, users should refer to PEP 440.
+    """
+
+
+class BaseSpecifier(with_metaclass(abc.ABCMeta, object)):
+
+    @abc.abstractmethod
+    def __str__(self):
+        """
+        Returns the str representation of this Specifier like object. This
+        should be representative of the Specifier itself.
+        """
+
+    @abc.abstractmethod
+    def __hash__(self):
+        """
+        Returns a hash value for this Specifier like object.
+        """
+
+    @abc.abstractmethod
+    def __eq__(self, other):
+        """
+        Returns a boolean representing whether or not the two Specifier like
+        objects are equal.
+        """
+
+    @abc.abstractmethod
+    def __ne__(self, other):
+        """
+        Returns a boolean representing whether or not the two Specifier like
+        objects are not equal.
+        """
+
+    @abc.abstractproperty
+    def prereleases(self):
+        """
+        Returns whether or not pre-releases as a whole are allowed by this
+        specifier.
+        """
+
+    @prereleases.setter
+    def prereleases(self, value):
+        """
+        Sets whether or not pre-releases as a whole are allowed by this
+        specifier.
+        """
+
+    @abc.abstractmethod
+    def contains(self, item, prereleases=None):
+        """
+        Determines if the given item is contained within this specifier.
+        """
+
+    @abc.abstractmethod
+    def filter(self, iterable, prereleases=None):
+        """
+        Takes an iterable of items and filters them so that only items which
+        are contained within this specifier are allowed in it.
+        """
+
+
+class _IndividualSpecifier(BaseSpecifier):
+
+    _operators = {}
+
+    def __init__(self, spec="", prereleases=None):
+        match = self._regex.search(spec)
+        if not match:
+            raise InvalidSpecifier("Invalid specifier: '{0}'".format(spec))
+
+        self._spec = (
+            match.group("operator").strip(),
+            match.group("version").strip(),
+        )
+
+        # Store whether or not this Specifier should accept prereleases
+        self._prereleases = prereleases
+
+    def __repr__(self):
+        pre = (
+            ", prereleases={0!r}".format(self.prereleases)
+            if self._prereleases is not None
+            else ""
+        )
+
+        return "<{0}({1!r}{2})>".format(
+            self.__class__.__name__,
+            str(self),
+            pre,
+        )
+
+    def __str__(self):
+        return "{0}{1}".format(*self._spec)
+
+    def __hash__(self):
+        return hash(self._spec)
+
+    def __eq__(self, other):
+        if isinstance(other, string_types):
+            try:
+                other = self.__class__(other)
+            except InvalidSpecifier:
+                return NotImplemented
+        elif not isinstance(other, self.__class__):
+            return NotImplemented
+
+        return self._spec == other._spec
+
+    def __ne__(self, other):
+        if isinstance(other, string_types):
+            try:
+                other = self.__class__(other)
+            except InvalidSpecifier:
+                return NotImplemented
+        elif not isinstance(other, self.__class__):
+            return NotImplemented
+
+        return self._spec != other._spec
+
+    def _get_operator(self, op):
+        return getattr(self, "_compare_{0}".format(self._operators[op]))
+
+    def _coerce_version(self, version):
+        if not isinstance(version, (LegacyVersion, Version)):
+            version = parse(version)
+        return version
+
+    @property
+    def operator(self):
+        return self._spec[0]
+
+    @property
+    def version(self):
+        return self._spec[1]
+
+    @property
+    def prereleases(self):
+        return self._prereleases
+
+    @prereleases.setter
+    def prereleases(self, value):
+        self._prereleases = value
+
+    def __contains__(self, item):
+        return self.contains(item)
+
+    def contains(self, item, prereleases=None):
+        # Determine if prereleases are to be allowed or not.
+        if prereleases is None:
+            prereleases = self.prereleases
+
+        # Normalize item to a Version or LegacyVersion, this allows us to have
+        # a shortcut for ``"2.0" in Specifier(">=2")
+        item = self._coerce_version(item)
+
+        # Determine if we should be supporting prereleases in this specifier
+        # or not, if we do not support prereleases than we can short circuit
+        # logic if this version is a prereleases.
+        if item.is_prerelease and not prereleases:
+            return False
+
+        # Actually do the comparison to determine if this item is contained
+        # within this Specifier or not.
+        return self._get_operator(self.operator)(item, self.version)
+
+    def filter(self, iterable, prereleases=None):
+        yielded = False
+        found_prereleases = []
+
+        kw = {"prereleases": prereleases if prereleases is not None else True}
+
+        # Attempt to iterate over all the values in the iterable and if any of
+        # them match, yield them.
+        for version in iterable:
+            parsed_version = self._coerce_version(version)
+
+            if self.contains(parsed_version, **kw):
+                # If our version is a prerelease, and we were not set to allow
+                # prereleases, then we'll store it for later incase nothing
+                # else matches this specifier.
+                if (parsed_version.is_prerelease and not
+                        (prereleases or self.prereleases)):
+                    found_prereleases.append(version)
+                # Either this is not a prerelease, or we should have been
+                # accepting prereleases from the begining.
+                else:
+                    yielded = True
+                    yield version
+
+        # Now that we've iterated over everything, determine if we've yielded
+        # any values, and if we have not and we have any prereleases stored up
+        # then we will go ahead and yield the prereleases.
+        if not yielded and found_prereleases:
+            for version in found_prereleases:
+                yield version
+
+
+class LegacySpecifier(_IndividualSpecifier):
+
+    _regex_str = (
+        r"""
+        (?P<operator>(==|!=|<=|>=|<|>))
+        \s*
+        (?P<version>
+            [^,;\s)]* # Since this is a "legacy" specifier, and the version
+                      # string can be just about anything, we match everything
+                      # except for whitespace, a semi-colon for marker support,
+                      # a closing paren since versions can be enclosed in
+                      # them, and a comma since it's a version separator.
+        )
+        """
+    )
+
+    _regex = re.compile(
+        r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE)
+
+    _operators = {
+        "==": "equal",
+        "!=": "not_equal",
+        "<=": "less_than_equal",
+        ">=": "greater_than_equal",
+        "<": "less_than",
+        ">": "greater_than",
+    }
+
+    def _coerce_version(self, version):
+        if not isinstance(version, LegacyVersion):
+            version = LegacyVersion(str(version))
+        return version
+
+    def _compare_equal(self, prospective, spec):
+        return prospective == self._coerce_version(spec)
+
+    def _compare_not_equal(self, prospective, spec):
+        return prospective != self._coerce_version(spec)
+
+    def _compare_less_than_equal(self, prospective, spec):
+        return prospective <= self._coerce_version(spec)
+
+    def _compare_greater_than_equal(self, prospective, spec):
+        return prospective >= self._coerce_version(spec)
+
+    def _compare_less_than(self, prospective, spec):
+        return prospective < self._coerce_version(spec)
+
+    def _compare_greater_than(self, prospective, spec):
+        return prospective > self._coerce_version(spec)
+
+
+def _require_version_compare(fn):
+    @functools.wraps(fn)
+    def wrapped(self, prospective, spec):
+        if not isinstance(prospective, Version):
+            return False
+        return fn(self, prospective, spec)
+    return wrapped
+
+
+class Specifier(_IndividualSpecifier):
+
+    _regex_str = (
+        r"""
+        (?P<operator>(~=|==|!=|<=|>=|<|>|===))
+        (?P<version>
+            (?:
+                # The identity operators allow for an escape hatch that will
+                # do an exact string match of the version you wish to install.
+                # This will not be parsed by PEP 440 and we cannot determine
+                # any semantic meaning from it. This operator is discouraged
+                # but included entirely as an escape hatch.
+                (?<====)  # Only match for the identity operator
+                \s*
+                [^\s]*    # We just match everything, except for whitespace
+                          # since we are only testing for strict identity.
+            )
+            |
+            (?:
+                # The (non)equality operators allow for wild card and local
+                # versions to be specified so we have to define these two
+                # operators separately to enable that.
+                (?<===|!=)            # Only match for equals and not equals
+
+                \s*
+                v?
+                (?:[0-9]+!)?          # epoch
+                [0-9]+(?:\.[0-9]+)*   # release
+                (?:                   # pre release
+                    [-_\.]?
+                    (a|b|c|rc|alpha|beta|pre|preview)
+                    [-_\.]?
+                    [0-9]*
+                )?
+                (?:                   # post release
+                    (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*)
+                )?
+
+                # You cannot use a wild card and a dev or local version
+                # together so group them with a | and make them optional.
+                (?:
+                    (?:[-_\.]?dev[-_\.]?[0-9]*)?         # dev release
+                    (?:\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*)? # local
+                    |
+                    \.\*  # Wild card syntax of .*
+                )?
+            )
+            |
+            (?:
+                # The compatible operator requires at least two digits in the
+                # release segment.
+                (?<=~=)               # Only match for the compatible operator
+
+                \s*
+                v?
+                (?:[0-9]+!)?          # epoch
+                [0-9]+(?:\.[0-9]+)+   # release  (We have a + instead of a *)
+                (?:                   # pre release
+                    [-_\.]?
+                    (a|b|c|rc|alpha|beta|pre|preview)
+                    [-_\.]?
+                    [0-9]*
+                )?
+                (?:                                   # post release
+                    (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*)
+                )?
+                (?:[-_\.]?dev[-_\.]?[0-9]*)?          # dev release
+            )
+            |
+            (?:
+                # All other operators only allow a sub set of what the
+                # (non)equality operators do. Specifically they do not allow
+                # local versions to be specified nor do they allow the prefix
+                # matching wild cards.
+                (?<!==|!=|~=)         # We have special cases for these
+                                      # operators so we want to make sure they
+                                      # don't match here.
+
+                \s*
+                v?
+                (?:[0-9]+!)?          # epoch
+                [0-9]+(?:\.[0-9]+)*   # release
+                (?:                   # pre release
+                    [-_\.]?
+                    (a|b|c|rc|alpha|beta|pre|preview)
+                    [-_\.]?
+                    [0-9]*
+                )?
+                (?:                                   # post release
+                    (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*)
+                )?
+                (?:[-_\.]?dev[-_\.]?[0-9]*)?          # dev release
+            )
+        )
+        """
+    )
+
+    _regex = re.compile(
+        r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE)
+
+    _operators = {
+        "~=": "compatible",
+        "==": "equal",
+        "!=": "not_equal",
+        "<=": "less_than_equal",
+        ">=": "greater_than_equal",
+        "<": "less_than",
+        ">": "greater_than",
+        "===": "arbitrary",
+    }
+
+    @_require_version_compare
+    def _compare_compatible(self, prospective, spec):
+        # Compatible releases have an equivalent combination of >= and ==. That
+        # is that ~=2.2 is equivalent to >=2.2,==2.*. This allows us to
+        # implement this in terms of the other specifiers instead of
+        # implementing it ourselves. The only thing we need to do is construct
+        # the other specifiers.
+
+        # We want everything but the last item in the version, but we want to
+        # ignore post and dev releases and we want to treat the pre-release as
+        # it's own separate segment.
+        prefix = ".".join(
+            list(
+                itertools.takewhile(
+                    lambda x: (not x.startswith("post") and not
+                               x.startswith("dev")),
+                    _version_split(spec),
+                )
+            )[:-1]
+        )
+
+        # Add the prefix notation to the end of our string
+        prefix += ".*"
+
+        return (self._get_operator(">=")(prospective, spec) and
+                self._get_operator("==")(prospective, prefix))
+
+    @_require_version_compare
+    def _compare_equal(self, prospective, spec):
+        # We need special logic to handle prefix matching
+        if spec.endswith(".*"):
+            # In the case of prefix matching we want to ignore local segment.
+            prospective = Version(prospective.public)
+            # Split the spec out by dots, and pretend that there is an implicit
+            # dot in between a release segment and a pre-release segment.
+            spec = _version_split(spec[:-2])  # Remove the trailing .*
+
+            # Split the prospective version out by dots, and pretend that there
+            # is an implicit dot in between a release segment and a pre-release
+            # segment.
+            prospective = _version_split(str(prospective))
+
+            # Shorten the prospective version to be the same length as the spec
+            # so that we can determine if the specifier is a prefix of the
+            # prospective version or not.
+            prospective = prospective[:len(spec)]
+
+            # Pad out our two sides with zeros so that they both equal the same
+            # length.
+            spec, prospective = _pad_version(spec, prospective)
+        else:
+            # Convert our spec string into a Version
+            spec = Version(spec)
+
+            # If the specifier does not have a local segment, then we want to
+            # act as if the prospective version also does not have a local
+            # segment.
+            if not spec.local:
+                prospective = Version(prospective.public)
+
+        return prospective == spec
+
+    @_require_version_compare
+    def _compare_not_equal(self, prospective, spec):
+        return not self._compare_equal(prospective, spec)
+
+    @_require_version_compare
+    def _compare_less_than_equal(self, prospective, spec):
+        return prospective <= Version(spec)
+
+    @_require_version_compare
+    def _compare_greater_than_equal(self, prospective, spec):
+        return prospective >= Version(spec)
+
+    @_require_version_compare
+    def _compare_less_than(self, prospective, spec):
+        # Convert our spec to a Version instance, since we'll want to work with
+        # it as a version.
+        spec = Version(spec)
+
+        # Check to see if the prospective version is less than the spec
+        # version. If it's not we can short circuit and just return False now
+        # instead of doing extra unneeded work.
+        if not prospective < spec:
+            return False
+
+        # This special case is here so that, unless the specifier itself
+        # includes is a pre-release version, that we do not accept pre-release
+        # versions for the version mentioned in the specifier (e.g. <3.1 should
+        # not match 3.1.dev0, but should match 3.0.dev0).
+        if not spec.is_prerelease and prospective.is_prerelease:
+            if Version(prospective.base_version) == Version(spec.base_version):
+                return False
+
+        # If we've gotten to here, it means that prospective version is both
+        # less than the spec version *and* it's not a pre-release of the same
+        # version in the spec.
+        return True
+
+    @_require_version_compare
+    def _compare_greater_than(self, prospective, spec):
+        # Convert our spec to a Version instance, since we'll want to work with
+        # it as a version.
+        spec = Version(spec)
+
+        # Check to see if the prospective version is greater than the spec
+        # version. If it's not we can short circuit and just return False now
+        # instead of doing extra unneeded work.
+        if not prospective > spec:
+            return False
+
+        # This special case is here so that, unless the specifier itself
+        # includes is a post-release version, that we do not accept
+        # post-release versions for the version mentioned in the specifier
+        # (e.g. >3.1 should not match 3.0.post0, but should match 3.2.post0).
+        if not spec.is_postrelease and prospective.is_postrelease:
+            if Version(prospective.base_version) == Version(spec.base_version):
+                return False
+
+        # Ensure that we do not allow a local version of the version mentioned
+        # in the specifier, which is techincally greater than, to match.
+        if prospective.local is not None:
+            if Version(prospective.base_version) == Version(spec.base_version):
+                return False
+
+        # If we've gotten to here, it means that prospective version is both
+        # greater than the spec version *and* it's not a pre-release of the
+        # same version in the spec.
+        return True
+
+    def _compare_arbitrary(self, prospective, spec):
+        return str(prospective).lower() == str(spec).lower()
+
+    @property
+    def prereleases(self):
+        # If there is an explicit prereleases set for this, then we'll just
+        # blindly use that.
+        if self._prereleases is not None:
+            return self._prereleases
+
+        # Look at all of our specifiers and determine if they are inclusive
+        # operators, and if they are if they are including an explicit
+        # prerelease.
+        operator, version = self._spec
+        if operator in ["==", ">=", "<=", "~=", "==="]:
+            # The == specifier can include a trailing .*, if it does we
+            # want to remove before parsing.
+            if operator == "==" and version.endswith(".*"):
+                version = version[:-2]
+
+            # Parse the version, and if it is a pre-release than this
+            # specifier allows pre-releases.
+            if parse(version).is_prerelease:
+                return True
+
+        return False
+
+    @prereleases.setter
+    def prereleases(self, value):
+        self._prereleases = value
+
+
+_prefix_regex = re.compile(r"^([0-9]+)((?:a|b|c|rc)[0-9]+)$")
+
+
+def _version_split(version):
+    result = []
+    for item in version.split("."):
+        match = _prefix_regex.search(item)
+        if match:
+            result.extend(match.groups())
+        else:
+            result.append(item)
+    return result
+
+
+def _pad_version(left, right):
+    left_split, right_split = [], []
+
+    # Get the release segment of our versions
+    left_split.append(list(itertools.takewhile(lambda x: x.isdigit(), left)))
+    right_split.append(list(itertools.takewhile(lambda x: x.isdigit(), right)))
+
+    # Get the rest of our versions
+    left_split.append(left[len(left_split[0]):])
+    right_split.append(right[len(right_split[0]):])
+
+    # Insert our padding
+    left_split.insert(
+        1,
+        ["0"] * max(0, len(right_split[0]) - len(left_split[0])),
+    )
+    right_split.insert(
+        1,
+        ["0"] * max(0, len(left_split[0]) - len(right_split[0])),
+    )
+
+    return (
+        list(itertools.chain(*left_split)),
+        list(itertools.chain(*right_split)),
+    )
+
+
+class SpecifierSet(BaseSpecifier):
+
+    def __init__(self, specifiers="", prereleases=None):
+        # Split on , to break each indidivual specifier into it's own item, and
+        # strip each item to remove leading/trailing whitespace.
+        specifiers = [s.strip() for s in specifiers.split(",") if s.strip()]
+
+        # Parsed each individual specifier, attempting first to make it a
+        # Specifier and falling back to a LegacySpecifier.
+        parsed = set()
+        for specifier in specifiers:
+            try:
+                parsed.add(Specifier(specifier))
+            except InvalidSpecifier:
+                parsed.add(LegacySpecifier(specifier))
+
+        # Turn our parsed specifiers into a frozen set and save them for later.
+        self._specs = frozenset(parsed)
+
+        # Store our prereleases value so we can use it later to determine if
+        # we accept prereleases or not.
+        self._prereleases = prereleases
+
+    def __repr__(self):
+        pre = (
+            ", prereleases={0!r}".format(self.prereleases)
+            if self._prereleases is not None
+            else ""
+        )
+
+        return "<SpecifierSet({0!r}{1})>".format(str(self), pre)
+
+    def __str__(self):
+        return ",".join(sorted(str(s) for s in self._specs))
+
+    def __hash__(self):
+        return hash(self._specs)
+
+    def __and__(self, other):
+        if isinstance(other, string_types):
+            other = SpecifierSet(other)
+        elif not isinstance(other, SpecifierSet):
+            return NotImplemented
+
+        specifier = SpecifierSet()
+        specifier._specs = frozenset(self._specs | other._specs)
+
+        if self._prereleases is None and other._prereleases is not None:
+            specifier._prereleases = other._prereleases
+        elif self._prereleases is not None and other._prereleases is None:
+            specifier._prereleases = self._prereleases
+        elif self._prereleases == other._prereleases:
+            specifier._prereleases = self._prereleases
+        else:
+            raise ValueError(
+                "Cannot combine SpecifierSets with True and False prerelease "
+                "overrides."
+            )
+
+        return specifier
+
+    def __eq__(self, other):
+        if isinstance(other, string_types):
+            other = SpecifierSet(other)
+        elif isinstance(other, _IndividualSpecifier):
+            other = SpecifierSet(str(other))
+        elif not isinstance(other, SpecifierSet):
+            return NotImplemented
+
+        return self._specs == other._specs
+
+    def __ne__(self, other):
+        if isinstance(other, string_types):
+            other = SpecifierSet(other)
+        elif isinstance(other, _IndividualSpecifier):
+            other = SpecifierSet(str(other))
+        elif not isinstance(other, SpecifierSet):
+            return NotImplemented
+
+        return self._specs != other._specs
+
+    def __len__(self):
+        return len(self._specs)
+
+    def __iter__(self):
+        return iter(self._specs)
+
+    @property
+    def prereleases(self):
+        # If we have been given an explicit prerelease modifier, then we'll
+        # pass that through here.
+        if self._prereleases is not None:
+            return self._prereleases
+
+        # If we don't have any specifiers, and we don't have a forced value,
+        # then we'll just return None since we don't know if this should have
+        # pre-releases or not.
+        if not self._specs:
+            return None
+
+        # Otherwise we'll see if any of the given specifiers accept
+        # prereleases, if any of them do we'll return True, otherwise False.
+        return any(s.prereleases for s in self._specs)
+
+    @prereleases.setter
+    def prereleases(self, value):
+        self._prereleases = value
+
+    def __contains__(self, item):
+        return self.contains(item)
+
+    def contains(self, item, prereleases=None):
+        # Ensure that our item is a Version or LegacyVersion instance.
+        if not isinstance(item, (LegacyVersion, Version)):
+            item = parse(item)
+
+        # Determine if we're forcing a prerelease or not, if we're not forcing
+        # one for this particular filter call, then we'll use whatever the
+        # SpecifierSet thinks for whether or not we should support prereleases.
+        if prereleases is None:
+            prereleases = self.prereleases
+
+        # We can determine if we're going to allow pre-releases by looking to
+        # see if any of the underlying items supports them. If none of them do
+        # and this item is a pre-release then we do not allow it and we can
+        # short circuit that here.
+        # Note: This means that 1.0.dev1 would not be contained in something
+        #       like >=1.0.devabc however it would be in >=1.0.debabc,>0.0.dev0
+        if not prereleases and item.is_prerelease:
+            return False
+
+        # We simply dispatch to the underlying specs here to make sure that the
+        # given version is contained within all of them.
+        # Note: This use of all() here means that an empty set of specifiers
+        #       will always return True, this is an explicit design decision.
+        return all(
+            s.contains(item, prereleases=prereleases)
+            for s in self._specs
+        )
+
+    def filter(self, iterable, prereleases=None):
+        # Determine if we're forcing a prerelease or not, if we're not forcing
+        # one for this particular filter call, then we'll use whatever the
+        # SpecifierSet thinks for whether or not we should support prereleases.
+        if prereleases is None:
+            prereleases = self.prereleases
+
+        # If we have any specifiers, then we want to wrap our iterable in the
+        # filter method for each one, this will act as a logical AND amongst
+        # each specifier.
+        if self._specs:
+            for spec in self._specs:
+                iterable = spec.filter(iterable, prereleases=bool(prereleases))
+            return iterable
+        # If we do not have any specifiers, then we need to have a rough filter
+        # which will filter out any pre-releases, unless there are no final
+        # releases, and which will filter out LegacyVersion in general.
+        else:
+            filtered = []
+            found_prereleases = []
+
+            for item in iterable:
+                # Ensure that we some kind of Version class for this item.
+                if not isinstance(item, (LegacyVersion, Version)):
+                    parsed_version = parse(item)
+                else:
+                    parsed_version = item
+
+                # Filter out any item which is parsed as a LegacyVersion
+                if isinstance(parsed_version, LegacyVersion):
+                    continue
+
+                # Store any item which is a pre-release for later unless we've
+                # already found a final version or we are accepting prereleases
+                if parsed_version.is_prerelease and not prereleases:
+                    if not filtered:
+                        found_prereleases.append(item)
+                else:
+                    filtered.append(item)
+
+            # If we've found no items except for pre-releases, then we'll go
+            # ahead and use the pre-releases
+            if not filtered and found_prereleases and prereleases is None:
+                return found_prereleases
+
+            return filtered
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/utils.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..942387cef5d75f299a769b1eb43b6c7679e7a3a0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/utils.py
@@ -0,0 +1,14 @@
+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+from __future__ import absolute_import, division, print_function
+
+import re
+
+
+_canonicalize_regex = re.compile(r"[-_.]+")
+
+
+def canonicalize_name(name):
+    # This is taken from PEP 503.
+    return _canonicalize_regex.sub("-", name).lower()
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/version.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..83b5ee8c5efadf22ce2f16ff08c8a8d75f1eb5df
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/packaging/version.py
@@ -0,0 +1,393 @@
+# This file is dual licensed under the terms of the Apache License, Version
+# 2.0, and the BSD License. See the LICENSE file in the root of this repository
+# for complete details.
+from __future__ import absolute_import, division, print_function
+
+import collections
+import itertools
+import re
+
+from ._structures import Infinity
+
+
+__all__ = [
+    "parse", "Version", "LegacyVersion", "InvalidVersion", "VERSION_PATTERN"
+]
+
+
+_Version = collections.namedtuple(
+    "_Version",
+    ["epoch", "release", "dev", "pre", "post", "local"],
+)
+
+
+def parse(version):
+    """
+    Parse the given version string and return either a :class:`Version` object
+    or a :class:`LegacyVersion` object depending on if the given version is
+    a valid PEP 440 version or a legacy version.
+    """
+    try:
+        return Version(version)
+    except InvalidVersion:
+        return LegacyVersion(version)
+
+
+class InvalidVersion(ValueError):
+    """
+    An invalid version was found, users should refer to PEP 440.
+    """
+
+
+class _BaseVersion(object):
+
+    def __hash__(self):
+        return hash(self._key)
+
+    def __lt__(self, other):
+        return self._compare(other, lambda s, o: s < o)
+
+    def __le__(self, other):
+        return self._compare(other, lambda s, o: s <= o)
+
+    def __eq__(self, other):
+        return self._compare(other, lambda s, o: s == o)
+
+    def __ge__(self, other):
+        return self._compare(other, lambda s, o: s >= o)
+
+    def __gt__(self, other):
+        return self._compare(other, lambda s, o: s > o)
+
+    def __ne__(self, other):
+        return self._compare(other, lambda s, o: s != o)
+
+    def _compare(self, other, method):
+        if not isinstance(other, _BaseVersion):
+            return NotImplemented
+
+        return method(self._key, other._key)
+
+
+class LegacyVersion(_BaseVersion):
+
+    def __init__(self, version):
+        self._version = str(version)
+        self._key = _legacy_cmpkey(self._version)
+
+    def __str__(self):
+        return self._version
+
+    def __repr__(self):
+        return "<LegacyVersion({0})>".format(repr(str(self)))
+
+    @property
+    def public(self):
+        return self._version
+
+    @property
+    def base_version(self):
+        return self._version
+
+    @property
+    def local(self):
+        return None
+
+    @property
+    def is_prerelease(self):
+        return False
+
+    @property
+    def is_postrelease(self):
+        return False
+
+
+_legacy_version_component_re = re.compile(
+    r"(\d+ | [a-z]+ | \.| -)", re.VERBOSE,
+)
+
+_legacy_version_replacement_map = {
+    "pre": "c", "preview": "c", "-": "final-", "rc": "c", "dev": "@",
+}
+
+
+def _parse_version_parts(s):
+    for part in _legacy_version_component_re.split(s):
+        part = _legacy_version_replacement_map.get(part, part)
+
+        if not part or part == ".":
+            continue
+
+        if part[:1] in "0123456789":
+            # pad for numeric comparison
+            yield part.zfill(8)
+        else:
+            yield "*" + part
+
+    # ensure that alpha/beta/candidate are before final
+    yield "*final"
+
+
+def _legacy_cmpkey(version):
+    # We hardcode an epoch of -1 here. A PEP 440 version can only have a epoch
+    # greater than or equal to 0. This will effectively put the LegacyVersion,
+    # which uses the defacto standard originally implemented by setuptools,
+    # as before all PEP 440 versions.
+    epoch = -1
+
+    # This scheme is taken from pkg_resources.parse_version setuptools prior to
+    # it's adoption of the packaging library.
+    parts = []
+    for part in _parse_version_parts(version.lower()):
+        if part.startswith("*"):
+            # remove "-" before a prerelease tag
+            if part < "*final":
+                while parts and parts[-1] == "*final-":
+                    parts.pop()
+
+            # remove trailing zeros from each series of numeric parts
+            while parts and parts[-1] == "00000000":
+                parts.pop()
+
+        parts.append(part)
+    parts = tuple(parts)
+
+    return epoch, parts
+
+# Deliberately not anchored to the start and end of the string, to make it
+# easier for 3rd party code to reuse
+VERSION_PATTERN = r"""
+    v?
+    (?:
+        (?:(?P<epoch>[0-9]+)!)?                           # epoch
+        (?P<release>[0-9]+(?:\.[0-9]+)*)                  # release segment
+        (?P<pre>                                          # pre-release
+            [-_\.]?
+            (?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))
+            [-_\.]?
+            (?P<pre_n>[0-9]+)?
+        )?
+        (?P<post>                                         # post release
+            (?:-(?P<post_n1>[0-9]+))
+            |
+            (?:
+                [-_\.]?
+                (?P<post_l>post|rev|r)
+                [-_\.]?
+                (?P<post_n2>[0-9]+)?
+            )
+        )?
+        (?P<dev>                                          # dev release
+            [-_\.]?
+            (?P<dev_l>dev)
+            [-_\.]?
+            (?P<dev_n>[0-9]+)?
+        )?
+    )
+    (?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
+"""
+
+
+class Version(_BaseVersion):
+
+    _regex = re.compile(
+        r"^\s*" + VERSION_PATTERN + r"\s*$",
+        re.VERBOSE | re.IGNORECASE,
+    )
+
+    def __init__(self, version):
+        # Validate the version and parse it into pieces
+        match = self._regex.search(version)
+        if not match:
+            raise InvalidVersion("Invalid version: '{0}'".format(version))
+
+        # Store the parsed out pieces of the version
+        self._version = _Version(
+            epoch=int(match.group("epoch")) if match.group("epoch") else 0,
+            release=tuple(int(i) for i in match.group("release").split(".")),
+            pre=_parse_letter_version(
+                match.group("pre_l"),
+                match.group("pre_n"),
+            ),
+            post=_parse_letter_version(
+                match.group("post_l"),
+                match.group("post_n1") or match.group("post_n2"),
+            ),
+            dev=_parse_letter_version(
+                match.group("dev_l"),
+                match.group("dev_n"),
+            ),
+            local=_parse_local_version(match.group("local")),
+        )
+
+        # Generate a key which will be used for sorting
+        self._key = _cmpkey(
+            self._version.epoch,
+            self._version.release,
+            self._version.pre,
+            self._version.post,
+            self._version.dev,
+            self._version.local,
+        )
+
+    def __repr__(self):
+        return "<Version({0})>".format(repr(str(self)))
+
+    def __str__(self):
+        parts = []
+
+        # Epoch
+        if self._version.epoch != 0:
+            parts.append("{0}!".format(self._version.epoch))
+
+        # Release segment
+        parts.append(".".join(str(x) for x in self._version.release))
+
+        # Pre-release
+        if self._version.pre is not None:
+            parts.append("".join(str(x) for x in self._version.pre))
+
+        # Post-release
+        if self._version.post is not None:
+            parts.append(".post{0}".format(self._version.post[1]))
+
+        # Development release
+        if self._version.dev is not None:
+            parts.append(".dev{0}".format(self._version.dev[1]))
+
+        # Local version segment
+        if self._version.local is not None:
+            parts.append(
+                "+{0}".format(".".join(str(x) for x in self._version.local))
+            )
+
+        return "".join(parts)
+
+    @property
+    def public(self):
+        return str(self).split("+", 1)[0]
+
+    @property
+    def base_version(self):
+        parts = []
+
+        # Epoch
+        if self._version.epoch != 0:
+            parts.append("{0}!".format(self._version.epoch))
+
+        # Release segment
+        parts.append(".".join(str(x) for x in self._version.release))
+
+        return "".join(parts)
+
+    @property
+    def local(self):
+        version_string = str(self)
+        if "+" in version_string:
+            return version_string.split("+", 1)[1]
+
+    @property
+    def is_prerelease(self):
+        return bool(self._version.dev or self._version.pre)
+
+    @property
+    def is_postrelease(self):
+        return bool(self._version.post)
+
+
+def _parse_letter_version(letter, number):
+    if letter:
+        # We consider there to be an implicit 0 in a pre-release if there is
+        # not a numeral associated with it.
+        if number is None:
+            number = 0
+
+        # We normalize any letters to their lower case form
+        letter = letter.lower()
+
+        # We consider some words to be alternate spellings of other words and
+        # in those cases we want to normalize the spellings to our preferred
+        # spelling.
+        if letter == "alpha":
+            letter = "a"
+        elif letter == "beta":
+            letter = "b"
+        elif letter in ["c", "pre", "preview"]:
+            letter = "rc"
+        elif letter in ["rev", "r"]:
+            letter = "post"
+
+        return letter, int(number)
+    if not letter and number:
+        # We assume if we are given a number, but we are not given a letter
+        # then this is using the implicit post release syntax (e.g. 1.0-1)
+        letter = "post"
+
+        return letter, int(number)
+
+
+_local_version_seperators = re.compile(r"[\._-]")
+
+
+def _parse_local_version(local):
+    """
+    Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
+    """
+    if local is not None:
+        return tuple(
+            part.lower() if not part.isdigit() else int(part)
+            for part in _local_version_seperators.split(local)
+        )
+
+
+def _cmpkey(epoch, release, pre, post, dev, local):
+    # When we compare a release version, we want to compare it with all of the
+    # trailing zeros removed. So we'll use a reverse the list, drop all the now
+    # leading zeros until we come to something non zero, then take the rest
+    # re-reverse it back into the correct order and make it a tuple and use
+    # that for our sorting key.
+    release = tuple(
+        reversed(list(
+            itertools.dropwhile(
+                lambda x: x == 0,
+                reversed(release),
+            )
+        ))
+    )
+
+    # We need to "trick" the sorting algorithm to put 1.0.dev0 before 1.0a0.
+    # We'll do this by abusing the pre segment, but we _only_ want to do this
+    # if there is not a pre or a post segment. If we have one of those then
+    # the normal sorting rules will handle this case correctly.
+    if pre is None and post is None and dev is not None:
+        pre = -Infinity
+    # Versions without a pre-release (except as noted above) should sort after
+    # those with one.
+    elif pre is None:
+        pre = Infinity
+
+    # Versions without a post segment should sort before those with one.
+    if post is None:
+        post = -Infinity
+
+    # Versions without a development segment should sort after those with one.
+    if dev is None:
+        dev = Infinity
+
+    if local is None:
+        # Versions without a local segment should sort before those with one.
+        local = -Infinity
+    else:
+        # Versions with a local segment need that segment parsed to implement
+        # the sorting rules in PEP440.
+        # - Alpha numeric segments sort before numeric segments
+        # - Alpha numeric segments sort lexicographically
+        # - Numeric segments sort numerically
+        # - Shorter versions sort before longer versions when the prefixes
+        #   match exactly
+        local = tuple(
+            (i, "") if isinstance(i, int) else (-Infinity, i)
+            for i in local
+        )
+
+    return epoch, release, pre, post, dev, local
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/pyparsing.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/pyparsing.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb46d411ab9212b54eaab6163e0feb3b39609e7c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/pyparsing.py
@@ -0,0 +1,5696 @@
+# module pyparsing.py
+#
+# Copyright (c) 2003-2016  Paul T. McGuire
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+
+__doc__ = \
+"""
+pyparsing module - Classes and methods to define and execute parsing grammars
+
+The pyparsing module is an alternative approach to creating and executing simple grammars,
+vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you
+don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
+provides a library of classes that you use to construct the grammar directly in Python.
+
+Here is a program to parse "Hello, World!" (or any greeting of the form 
+C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements 
+(L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to
+L{Literal} expressions)::
+
+    from pyparsing import Word, alphas
+
+    # define grammar of a greeting
+    greet = Word(alphas) + "," + Word(alphas) + "!"
+
+    hello = "Hello, World!"
+    print (hello, "->", greet.parseString(hello))
+
+The program outputs the following::
+
+    Hello, World! -> ['Hello', ',', 'World', '!']
+
+The Python representation of the grammar is quite readable, owing to the self-explanatory
+class names, and the use of '+', '|' and '^' operators.
+
+The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
+object with named attributes.
+
+The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
+ - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.)
+ - quoted strings
+ - embedded comments
+"""
+
+__version__ = "2.1.10"
+__versionTime__ = "07 Oct 2016 01:31 UTC"
+__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
+
+import string
+from weakref import ref as wkref
+import copy
+import sys
+import warnings
+import re
+import sre_constants
+import collections
+import pprint
+import traceback
+import types
+from datetime import datetime
+
+try:
+    from _thread import RLock
+except ImportError:
+    from threading import RLock
+
+try:
+    from collections import OrderedDict as _OrderedDict
+except ImportError:
+    try:
+        from ordereddict import OrderedDict as _OrderedDict
+    except ImportError:
+        _OrderedDict = None
+
+#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
+
+__all__ = [
+'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
+'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
+'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
+'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
+'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
+'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 
+'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
+'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
+'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
+'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
+'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
+'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
+'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
+'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 
+'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
+'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
+'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
+'CloseMatch', 'tokenMap', 'pyparsing_common',
+]
+
+system_version = tuple(sys.version_info)[:3]
+PY_3 = system_version[0] == 3
+if PY_3:
+    _MAX_INT = sys.maxsize
+    basestring = str
+    unichr = chr
+    _ustr = str
+
+    # build list of single arg builtins, that can be used as parse actions
+    singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
+
+else:
+    _MAX_INT = sys.maxint
+    range = xrange
+
+    def _ustr(obj):
+        """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
+           str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
+           then < returns the unicode object | encodes it with the default encoding | ... >.
+        """
+        if isinstance(obj,unicode):
+            return obj
+
+        try:
+            # If this works, then _ustr(obj) has the same behaviour as str(obj), so
+            # it won't break any existing code.
+            return str(obj)
+
+        except UnicodeEncodeError:
+            # Else encode it
+            ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
+            xmlcharref = Regex('&#\d+;')
+            xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
+            return xmlcharref.transformString(ret)
+
+    # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
+    singleArgBuiltins = []
+    import __builtin__
+    for fname in "sum len sorted reversed list tuple set any all min max".split():
+        try:
+            singleArgBuiltins.append(getattr(__builtin__,fname))
+        except AttributeError:
+            continue
+            
+_generatorType = type((y for y in range(1)))
+ 
+def _xml_escape(data):
+    """Escape &, <, >, ", ', etc. in a string of data."""
+
+    # ampersand must be replaced first
+    from_symbols = '&><"\''
+    to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
+    for from_,to_ in zip(from_symbols, to_symbols):
+        data = data.replace(from_, to_)
+    return data
+
+class _Constants(object):
+    pass
+
+alphas     = string.ascii_uppercase + string.ascii_lowercase
+nums       = "0123456789"
+hexnums    = nums + "ABCDEFabcdef"
+alphanums  = alphas + nums
+_bslash    = chr(92)
+printables = "".join(c for c in string.printable if c not in string.whitespace)
+
+class ParseBaseException(Exception):
+    """base exception class for all parsing runtime exceptions"""
+    # Performance tuning: we construct a *lot* of these, so keep this
+    # constructor as small and fast as possible
+    def __init__( self, pstr, loc=0, msg=None, elem=None ):
+        self.loc = loc
+        if msg is None:
+            self.msg = pstr
+            self.pstr = ""
+        else:
+            self.msg = msg
+            self.pstr = pstr
+        self.parserElement = elem
+        self.args = (pstr, loc, msg)
+
+    @classmethod
+    def _from_exception(cls, pe):
+        """
+        internal factory method to simplify creating one type of ParseException 
+        from another - avoids having __init__ signature conflicts among subclasses
+        """
+        return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
+
+    def __getattr__( self, aname ):
+        """supported attributes by name are:
+            - lineno - returns the line number of the exception text
+            - col - returns the column number of the exception text
+            - line - returns the line containing the exception text
+        """
+        if( aname == "lineno" ):
+            return lineno( self.loc, self.pstr )
+        elif( aname in ("col", "column") ):
+            return col( self.loc, self.pstr )
+        elif( aname == "line" ):
+            return line( self.loc, self.pstr )
+        else:
+            raise AttributeError(aname)
+
+    def __str__( self ):
+        return "%s (at char %d), (line:%d, col:%d)" % \
+                ( self.msg, self.loc, self.lineno, self.column )
+    def __repr__( self ):
+        return _ustr(self)
+    def markInputline( self, markerString = ">!<" ):
+        """Extracts the exception line from the input string, and marks
+           the location of the exception with a special symbol.
+        """
+        line_str = self.line
+        line_column = self.column - 1
+        if markerString:
+            line_str = "".join((line_str[:line_column],
+                                markerString, line_str[line_column:]))
+        return line_str.strip()
+    def __dir__(self):
+        return "lineno col line".split() + dir(type(self))
+
+class ParseException(ParseBaseException):
+    """
+    Exception thrown when parse expressions don't match class;
+    supported attributes by name are:
+     - lineno - returns the line number of the exception text
+     - col - returns the column number of the exception text
+     - line - returns the line containing the exception text
+        
+    Example::
+        try:
+            Word(nums).setName("integer").parseString("ABC")
+        except ParseException as pe:
+            print(pe)
+            print("column: {}".format(pe.col))
+            
+    prints::
+       Expected integer (at char 0), (line:1, col:1)
+        column: 1
+    """
+    pass
+
+class ParseFatalException(ParseBaseException):
+    """user-throwable exception thrown when inconsistent parse content
+       is found; stops all parsing immediately"""
+    pass
+
+class ParseSyntaxException(ParseFatalException):
+    """just like L{ParseFatalException}, but thrown internally when an
+       L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop 
+       immediately because an unbacktrackable syntax error has been found"""
+    pass
+
+#~ class ReparseException(ParseBaseException):
+    #~ """Experimental class - parse actions can raise this exception to cause
+       #~ pyparsing to reparse the input string:
+        #~ - with a modified input string, and/or
+        #~ - with a modified start location
+       #~ Set the values of the ReparseException in the constructor, and raise the
+       #~ exception in a parse action to cause pyparsing to use the new string/location.
+       #~ Setting the values as None causes no change to be made.
+       #~ """
+    #~ def __init_( self, newstring, restartLoc ):
+        #~ self.newParseText = newstring
+        #~ self.reparseLoc = restartLoc
+
+class RecursiveGrammarException(Exception):
+    """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
+    def __init__( self, parseElementList ):
+        self.parseElementTrace = parseElementList
+
+    def __str__( self ):
+        return "RecursiveGrammarException: %s" % self.parseElementTrace
+
+class _ParseResultsWithOffset(object):
+    def __init__(self,p1,p2):
+        self.tup = (p1,p2)
+    def __getitem__(self,i):
+        return self.tup[i]
+    def __repr__(self):
+        return repr(self.tup[0])
+    def setOffset(self,i):
+        self.tup = (self.tup[0],i)
+
+class ParseResults(object):
+    """
+    Structured parse results, to provide multiple means of access to the parsed data:
+       - as a list (C{len(results)})
+       - by list index (C{results[0], results[1]}, etc.)
+       - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})
+
+    Example::
+        integer = Word(nums)
+        date_str = (integer.setResultsName("year") + '/' 
+                        + integer.setResultsName("month") + '/' 
+                        + integer.setResultsName("day"))
+        # equivalent form:
+        # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+
+        # parseString returns a ParseResults object
+        result = date_str.parseString("1999/12/31")
+
+        def test(s, fn=repr):
+            print("%s -> %s" % (s, fn(eval(s))))
+        test("list(result)")
+        test("result[0]")
+        test("result['month']")
+        test("result.day")
+        test("'month' in result")
+        test("'minutes' in result")
+        test("result.dump()", str)
+    prints::
+        list(result) -> ['1999', '/', '12', '/', '31']
+        result[0] -> '1999'
+        result['month'] -> '12'
+        result.day -> '31'
+        'month' in result -> True
+        'minutes' in result -> False
+        result.dump() -> ['1999', '/', '12', '/', '31']
+        - day: 31
+        - month: 12
+        - year: 1999
+    """
+    def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
+        if isinstance(toklist, cls):
+            return toklist
+        retobj = object.__new__(cls)
+        retobj.__doinit = True
+        return retobj
+
+    # Performance tuning: we construct a *lot* of these, so keep this
+    # constructor as small and fast as possible
+    def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
+        if self.__doinit:
+            self.__doinit = False
+            self.__name = None
+            self.__parent = None
+            self.__accumNames = {}
+            self.__asList = asList
+            self.__modal = modal
+            if toklist is None:
+                toklist = []
+            if isinstance(toklist, list):
+                self.__toklist = toklist[:]
+            elif isinstance(toklist, _generatorType):
+                self.__toklist = list(toklist)
+            else:
+                self.__toklist = [toklist]
+            self.__tokdict = dict()
+
+        if name is not None and name:
+            if not modal:
+                self.__accumNames[name] = 0
+            if isinstance(name,int):
+                name = _ustr(name) # will always return a str, but use _ustr for consistency
+            self.__name = name
+            if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
+                if isinstance(toklist,basestring):
+                    toklist = [ toklist ]
+                if asList:
+                    if isinstance(toklist,ParseResults):
+                        self[name] = _ParseResultsWithOffset(toklist.copy(),0)
+                    else:
+                        self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
+                    self[name].__name = name
+                else:
+                    try:
+                        self[name] = toklist[0]
+                    except (KeyError,TypeError,IndexError):
+                        self[name] = toklist
+
+    def __getitem__( self, i ):
+        if isinstance( i, (int,slice) ):
+            return self.__toklist[i]
+        else:
+            if i not in self.__accumNames:
+                return self.__tokdict[i][-1][0]
+            else:
+                return ParseResults([ v[0] for v in self.__tokdict[i] ])
+
+    def __setitem__( self, k, v, isinstance=isinstance ):
+        if isinstance(v,_ParseResultsWithOffset):
+            self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
+            sub = v[0]
+        elif isinstance(k,(int,slice)):
+            self.__toklist[k] = v
+            sub = v
+        else:
+            self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
+            sub = v
+        if isinstance(sub,ParseResults):
+            sub.__parent = wkref(self)
+
+    def __delitem__( self, i ):
+        if isinstance(i,(int,slice)):
+            mylen = len( self.__toklist )
+            del self.__toklist[i]
+
+            # convert int to slice
+            if isinstance(i, int):
+                if i < 0:
+                    i += mylen
+                i = slice(i, i+1)
+            # get removed indices
+            removed = list(range(*i.indices(mylen)))
+            removed.reverse()
+            # fixup indices in token dictionary
+            for name,occurrences in self.__tokdict.items():
+                for j in removed:
+                    for k, (value, position) in enumerate(occurrences):
+                        occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
+        else:
+            del self.__tokdict[i]
+
+    def __contains__( self, k ):
+        return k in self.__tokdict
+
+    def __len__( self ): return len( self.__toklist )
+    def __bool__(self): return ( not not self.__toklist )
+    __nonzero__ = __bool__
+    def __iter__( self ): return iter( self.__toklist )
+    def __reversed__( self ): return iter( self.__toklist[::-1] )
+    def _iterkeys( self ):
+        if hasattr(self.__tokdict, "iterkeys"):
+            return self.__tokdict.iterkeys()
+        else:
+            return iter(self.__tokdict)
+
+    def _itervalues( self ):
+        return (self[k] for k in self._iterkeys())
+            
+    def _iteritems( self ):
+        return ((k, self[k]) for k in self._iterkeys())
+
+    if PY_3:
+        keys = _iterkeys       
+        """Returns an iterator of all named result keys (Python 3.x only)."""
+
+        values = _itervalues
+        """Returns an iterator of all named result values (Python 3.x only)."""
+
+        items = _iteritems
+        """Returns an iterator of all named result key-value tuples (Python 3.x only)."""
+
+    else:
+        iterkeys = _iterkeys
+        """Returns an iterator of all named result keys (Python 2.x only)."""
+
+        itervalues = _itervalues
+        """Returns an iterator of all named result values (Python 2.x only)."""
+
+        iteritems = _iteritems
+        """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
+
+        def keys( self ):
+            """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
+            return list(self.iterkeys())
+
+        def values( self ):
+            """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
+            return list(self.itervalues())
+                
+        def items( self ):
+            """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
+            return list(self.iteritems())
+
+    def haskeys( self ):
+        """Since keys() returns an iterator, this method is helpful in bypassing
+           code that looks for the existence of any defined results names."""
+        return bool(self.__tokdict)
+        
+    def pop( self, *args, **kwargs):
+        """
+        Removes and returns item at specified index (default=C{last}).
+        Supports both C{list} and C{dict} semantics for C{pop()}. If passed no
+        argument or an integer argument, it will use C{list} semantics
+        and pop tokens from the list of parsed tokens. If passed a 
+        non-integer argument (most likely a string), it will use C{dict}
+        semantics and pop the corresponding value from any defined 
+        results names. A second default return value argument is 
+        supported, just as in C{dict.pop()}.
+
+        Example::
+            def remove_first(tokens):
+                tokens.pop(0)
+            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
+            print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
+
+            label = Word(alphas)
+            patt = label("LABEL") + OneOrMore(Word(nums))
+            print(patt.parseString("AAB 123 321").dump())
+
+            # Use pop() in a parse action to remove named result (note that corresponding value is not
+            # removed from list form of results)
+            def remove_LABEL(tokens):
+                tokens.pop("LABEL")
+                return tokens
+            patt.addParseAction(remove_LABEL)
+            print(patt.parseString("AAB 123 321").dump())
+        prints::
+            ['AAB', '123', '321']
+            - LABEL: AAB
+
+            ['AAB', '123', '321']
+        """
+        if not args:
+            args = [-1]
+        for k,v in kwargs.items():
+            if k == 'default':
+                args = (args[0], v)
+            else:
+                raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
+        if (isinstance(args[0], int) or 
+                        len(args) == 1 or 
+                        args[0] in self):
+            index = args[0]
+            ret = self[index]
+            del self[index]
+            return ret
+        else:
+            defaultvalue = args[1]
+            return defaultvalue
+
+    def get(self, key, defaultValue=None):
+        """
+        Returns named result matching the given key, or if there is no
+        such name, then returns the given C{defaultValue} or C{None} if no
+        C{defaultValue} is specified.
+
+        Similar to C{dict.get()}.
+        
+        Example::
+            integer = Word(nums)
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")           
+
+            result = date_str.parseString("1999/12/31")
+            print(result.get("year")) # -> '1999'
+            print(result.get("hour", "not specified")) # -> 'not specified'
+            print(result.get("hour")) # -> None
+        """
+        if key in self:
+            return self[key]
+        else:
+            return defaultValue
+
+    def insert( self, index, insStr ):
+        """
+        Inserts new element at location index in the list of parsed tokens.
+        
+        Similar to C{list.insert()}.
+
+        Example::
+            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
+
+            # use a parse action to insert the parse location in the front of the parsed results
+            def insert_locn(locn, tokens):
+                tokens.insert(0, locn)
+            print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
+        """
+        self.__toklist.insert(index, insStr)
+        # fixup indices in token dictionary
+        for name,occurrences in self.__tokdict.items():
+            for k, (value, position) in enumerate(occurrences):
+                occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
+
+    def append( self, item ):
+        """
+        Add single element to end of ParseResults list of elements.
+
+        Example::
+            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
+            
+            # use a parse action to compute the sum of the parsed integers, and add it to the end
+            def append_sum(tokens):
+                tokens.append(sum(map(int, tokens)))
+            print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
+        """
+        self.__toklist.append(item)
+
+    def extend( self, itemseq ):
+        """
+        Add sequence of elements to end of ParseResults list of elements.
+
+        Example::
+            patt = OneOrMore(Word(alphas))
+            
+            # use a parse action to append the reverse of the matched strings, to make a palindrome
+            def make_palindrome(tokens):
+                tokens.extend(reversed([t[::-1] for t in tokens]))
+                return ''.join(tokens)
+            print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
+        """
+        if isinstance(itemseq, ParseResults):
+            self += itemseq
+        else:
+            self.__toklist.extend(itemseq)
+
+    def clear( self ):
+        """
+        Clear all elements and results names.
+        """
+        del self.__toklist[:]
+        self.__tokdict.clear()
+
+    def __getattr__( self, name ):
+        try:
+            return self[name]
+        except KeyError:
+            return ""
+            
+        if name in self.__tokdict:
+            if name not in self.__accumNames:
+                return self.__tokdict[name][-1][0]
+            else:
+                return ParseResults([ v[0] for v in self.__tokdict[name] ])
+        else:
+            return ""
+
+    def __add__( self, other ):
+        ret = self.copy()
+        ret += other
+        return ret
+
+    def __iadd__( self, other ):
+        if other.__tokdict:
+            offset = len(self.__toklist)
+            addoffset = lambda a: offset if a<0 else a+offset
+            otheritems = other.__tokdict.items()
+            otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
+                                for (k,vlist) in otheritems for v in vlist]
+            for k,v in otherdictitems:
+                self[k] = v
+                if isinstance(v[0],ParseResults):
+                    v[0].__parent = wkref(self)
+            
+        self.__toklist += other.__toklist
+        self.__accumNames.update( other.__accumNames )
+        return self
+
+    def __radd__(self, other):
+        if isinstance(other,int) and other == 0:
+            # useful for merging many ParseResults using sum() builtin
+            return self.copy()
+        else:
+            # this may raise a TypeError - so be it
+            return other + self
+        
+    def __repr__( self ):
+        return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
+
+    def __str__( self ):
+        return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
+
+    def _asStringList( self, sep='' ):
+        out = []
+        for item in self.__toklist:
+            if out and sep:
+                out.append(sep)
+            if isinstance( item, ParseResults ):
+                out += item._asStringList()
+            else:
+                out.append( _ustr(item) )
+        return out
+
+    def asList( self ):
+        """
+        Returns the parse results as a nested list of matching tokens, all converted to strings.
+
+        Example::
+            patt = OneOrMore(Word(alphas))
+            result = patt.parseString("sldkj lsdkj sldkj")
+            # even though the result prints in string-like form, it is actually a pyparsing ParseResults
+            print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
+            
+            # Use asList() to create an actual list
+            result_list = result.asList()
+            print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
+        """
+        return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
+
+    def asDict( self ):
+        """
+        Returns the named parse results as a nested dictionary.
+
+        Example::
+            integer = Word(nums)
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+            
+            result = date_str.parseString('12/31/1999')
+            print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
+            
+            result_dict = result.asDict()
+            print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
+
+            # even though a ParseResults supports dict-like access, sometime you just need to have a dict
+            import json
+            print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
+            print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
+        """
+        if PY_3:
+            item_fn = self.items
+        else:
+            item_fn = self.iteritems
+            
+        def toItem(obj):
+            if isinstance(obj, ParseResults):
+                if obj.haskeys():
+                    return obj.asDict()
+                else:
+                    return [toItem(v) for v in obj]
+            else:
+                return obj
+                
+        return dict((k,toItem(v)) for k,v in item_fn())
+
+    def copy( self ):
+        """
+        Returns a new copy of a C{ParseResults} object.
+        """
+        ret = ParseResults( self.__toklist )
+        ret.__tokdict = self.__tokdict.copy()
+        ret.__parent = self.__parent
+        ret.__accumNames.update( self.__accumNames )
+        ret.__name = self.__name
+        return ret
+
+    def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
+        """
+        (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
+        """
+        nl = "\n"
+        out = []
+        namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
+                                                            for v in vlist)
+        nextLevelIndent = indent + "  "
+
+        # collapse out indents if formatting is not desired
+        if not formatted:
+            indent = ""
+            nextLevelIndent = ""
+            nl = ""
+
+        selfTag = None
+        if doctag is not None:
+            selfTag = doctag
+        else:
+            if self.__name:
+                selfTag = self.__name
+
+        if not selfTag:
+            if namedItemsOnly:
+                return ""
+            else:
+                selfTag = "ITEM"
+
+        out += [ nl, indent, "<", selfTag, ">" ]
+
+        for i,res in enumerate(self.__toklist):
+            if isinstance(res,ParseResults):
+                if i in namedItems:
+                    out += [ res.asXML(namedItems[i],
+                                        namedItemsOnly and doctag is None,
+                                        nextLevelIndent,
+                                        formatted)]
+                else:
+                    out += [ res.asXML(None,
+                                        namedItemsOnly and doctag is None,
+                                        nextLevelIndent,
+                                        formatted)]
+            else:
+                # individual token, see if there is a name for it
+                resTag = None
+                if i in namedItems:
+                    resTag = namedItems[i]
+                if not resTag:
+                    if namedItemsOnly:
+                        continue
+                    else:
+                        resTag = "ITEM"
+                xmlBodyText = _xml_escape(_ustr(res))
+                out += [ nl, nextLevelIndent, "<", resTag, ">",
+                                                xmlBodyText,
+                                                "</", resTag, ">" ]
+
+        out += [ nl, indent, "</", selfTag, ">" ]
+        return "".join(out)
+
+    def __lookup(self,sub):
+        for k,vlist in self.__tokdict.items():
+            for v,loc in vlist:
+                if sub is v:
+                    return k
+        return None
+
+    def getName(self):
+        """
+        Returns the results name for this token expression. Useful when several 
+        different expressions might match at a particular location.
+
+        Example::
+            integer = Word(nums)
+            ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
+            house_number_expr = Suppress('#') + Word(nums, alphanums)
+            user_data = (Group(house_number_expr)("house_number") 
+                        | Group(ssn_expr)("ssn")
+                        | Group(integer)("age"))
+            user_info = OneOrMore(user_data)
+            
+            result = user_info.parseString("22 111-22-3333 #221B")
+            for item in result:
+                print(item.getName(), ':', item[0])
+        prints::
+            age : 22
+            ssn : 111-22-3333
+            house_number : 221B
+        """
+        if self.__name:
+            return self.__name
+        elif self.__parent:
+            par = self.__parent()
+            if par:
+                return par.__lookup(self)
+            else:
+                return None
+        elif (len(self) == 1 and
+               len(self.__tokdict) == 1 and
+               next(iter(self.__tokdict.values()))[0][1] in (0,-1)):
+            return next(iter(self.__tokdict.keys()))
+        else:
+            return None
+
+    def dump(self, indent='', depth=0, full=True):
+        """
+        Diagnostic method for listing out the contents of a C{ParseResults}.
+        Accepts an optional C{indent} argument so that this string can be embedded
+        in a nested display of other data.
+
+        Example::
+            integer = Word(nums)
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+            
+            result = date_str.parseString('12/31/1999')
+            print(result.dump())
+        prints::
+            ['12', '/', '31', '/', '1999']
+            - day: 1999
+            - month: 31
+            - year: 12
+        """
+        out = []
+        NL = '\n'
+        out.append( indent+_ustr(self.asList()) )
+        if full:
+            if self.haskeys():
+                items = sorted((str(k), v) for k,v in self.items())
+                for k,v in items:
+                    if out:
+                        out.append(NL)
+                    out.append( "%s%s- %s: " % (indent,('  '*depth), k) )
+                    if isinstance(v,ParseResults):
+                        if v:
+                            out.append( v.dump(indent,depth+1) )
+                        else:
+                            out.append(_ustr(v))
+                    else:
+                        out.append(repr(v))
+            elif any(isinstance(vv,ParseResults) for vv in self):
+                v = self
+                for i,vv in enumerate(v):
+                    if isinstance(vv,ParseResults):
+                        out.append("\n%s%s[%d]:\n%s%s%s" % (indent,('  '*(depth)),i,indent,('  '*(depth+1)),vv.dump(indent,depth+1) ))
+                    else:
+                        out.append("\n%s%s[%d]:\n%s%s%s" % (indent,('  '*(depth)),i,indent,('  '*(depth+1)),_ustr(vv)))
+            
+        return "".join(out)
+
+    def pprint(self, *args, **kwargs):
+        """
+        Pretty-printer for parsed results as a list, using the C{pprint} module.
+        Accepts additional positional or keyword args as defined for the 
+        C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
+
+        Example::
+            ident = Word(alphas, alphanums)
+            num = Word(nums)
+            func = Forward()
+            term = ident | num | Group('(' + func + ')')
+            func <<= ident + Group(Optional(delimitedList(term)))
+            result = func.parseString("fna a,b,(fnb c,d,200),100")
+            result.pprint(width=40)
+        prints::
+            ['fna',
+             ['a',
+              'b',
+              ['(', 'fnb', ['c', 'd', '200'], ')'],
+              '100']]
+        """
+        pprint.pprint(self.asList(), *args, **kwargs)
+
+    # add support for pickle protocol
+    def __getstate__(self):
+        return ( self.__toklist,
+                 ( self.__tokdict.copy(),
+                   self.__parent is not None and self.__parent() or None,
+                   self.__accumNames,
+                   self.__name ) )
+
+    def __setstate__(self,state):
+        self.__toklist = state[0]
+        (self.__tokdict,
+         par,
+         inAccumNames,
+         self.__name) = state[1]
+        self.__accumNames = {}
+        self.__accumNames.update(inAccumNames)
+        if par is not None:
+            self.__parent = wkref(par)
+        else:
+            self.__parent = None
+
+    def __getnewargs__(self):
+        return self.__toklist, self.__name, self.__asList, self.__modal
+
+    def __dir__(self):
+        return (dir(type(self)) + list(self.keys()))
+
+collections.MutableMapping.register(ParseResults)
+
+def col (loc,strg):
+    """Returns current column within a string, counting newlines as line separators.
+   The first column is number 1.
+
+   Note: the default parsing behavior is to expand tabs in the input string
+   before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
+   on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
+   consistent view of the parsed string, the parse location, and line and column
+   positions within the parsed string.
+   """
+    s = strg
+    return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
+
+def lineno(loc,strg):
+    """Returns current line number within a string, counting newlines as line separators.
+   The first line is number 1.
+
+   Note: the default parsing behavior is to expand tabs in the input string
+   before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
+   on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
+   consistent view of the parsed string, the parse location, and line and column
+   positions within the parsed string.
+   """
+    return strg.count("\n",0,loc) + 1
+
+def line( loc, strg ):
+    """Returns the line of text containing loc within a string, counting newlines as line separators.
+       """
+    lastCR = strg.rfind("\n", 0, loc)
+    nextCR = strg.find("\n", loc)
+    if nextCR >= 0:
+        return strg[lastCR+1:nextCR]
+    else:
+        return strg[lastCR+1:]
+
+def _defaultStartDebugAction( instring, loc, expr ):
+    print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
+
+def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
+    print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
+
+def _defaultExceptionDebugAction( instring, loc, expr, exc ):
+    print ("Exception raised:" + _ustr(exc))
+
+def nullDebugAction(*args):
+    """'Do-nothing' debug action, to suppress debugging output during parsing."""
+    pass
+
+# Only works on Python 3.x - nonlocal is toxic to Python 2 installs
+#~ 'decorator to trim function calls to match the arity of the target'
+#~ def _trim_arity(func, maxargs=3):
+    #~ if func in singleArgBuiltins:
+        #~ return lambda s,l,t: func(t)
+    #~ limit = 0
+    #~ foundArity = False
+    #~ def wrapper(*args):
+        #~ nonlocal limit,foundArity
+        #~ while 1:
+            #~ try:
+                #~ ret = func(*args[limit:])
+                #~ foundArity = True
+                #~ return ret
+            #~ except TypeError:
+                #~ if limit == maxargs or foundArity:
+                    #~ raise
+                #~ limit += 1
+                #~ continue
+    #~ return wrapper
+
+# this version is Python 2.x-3.x cross-compatible
+'decorator to trim function calls to match the arity of the target'
+def _trim_arity(func, maxargs=2):
+    if func in singleArgBuiltins:
+        return lambda s,l,t: func(t)
+    limit = [0]
+    foundArity = [False]
+    
+    # traceback return data structure changed in Py3.5 - normalize back to plain tuples
+    if system_version[:2] >= (3,5):
+        def extract_stack(limit=0):
+            # special handling for Python 3.5.0 - extra deep call stack by 1
+            offset = -3 if system_version == (3,5,0) else -2
+            frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]
+            return [(frame_summary.filename, frame_summary.lineno)]
+        def extract_tb(tb, limit=0):
+            frames = traceback.extract_tb(tb, limit=limit)
+            frame_summary = frames[-1]
+            return [(frame_summary.filename, frame_summary.lineno)]
+    else:
+        extract_stack = traceback.extract_stack
+        extract_tb = traceback.extract_tb
+    
+    # synthesize what would be returned by traceback.extract_stack at the call to 
+    # user's parse action 'func', so that we don't incur call penalty at parse time
+    
+    LINE_DIFF = 6
+    # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 
+    # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
+    this_line = extract_stack(limit=2)[-1]
+    pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
+
+    def wrapper(*args):
+        while 1:
+            try:
+                ret = func(*args[limit[0]:])
+                foundArity[0] = True
+                return ret
+            except TypeError:
+                # re-raise TypeErrors if they did not come from our arity testing
+                if foundArity[0]:
+                    raise
+                else:
+                    try:
+                        tb = sys.exc_info()[-1]
+                        if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
+                            raise
+                    finally:
+                        del tb
+
+                if limit[0] <= maxargs:
+                    limit[0] += 1
+                    continue
+                raise
+
+    # copy func name to wrapper for sensible debug output
+    func_name = "<parse action>"
+    try:
+        func_name = getattr(func, '__name__', 
+                            getattr(func, '__class__').__name__)
+    except Exception:
+        func_name = str(func)
+    wrapper.__name__ = func_name
+
+    return wrapper
+
+class ParserElement(object):
+    """Abstract base level parser element class."""
+    DEFAULT_WHITE_CHARS = " \n\t\r"
+    verbose_stacktrace = False
+
+    @staticmethod
+    def setDefaultWhitespaceChars( chars ):
+        r"""
+        Overrides the default whitespace chars
+
+        Example::
+            # default whitespace chars are space, <TAB> and newline
+            OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def', 'ghi', 'jkl']
+            
+            # change to just treat newline as significant
+            ParserElement.setDefaultWhitespaceChars(" \t")
+            OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def']
+        """
+        ParserElement.DEFAULT_WHITE_CHARS = chars
+
+    @staticmethod
+    def inlineLiteralsUsing(cls):
+        """
+        Set class to be used for inclusion of string literals into a parser.
+        
+        Example::
+            # default literal class used is Literal
+            integer = Word(nums)
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")           
+
+            date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
+
+
+            # change to Suppress
+            ParserElement.inlineLiteralsUsing(Suppress)
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")           
+
+            date_str.parseString("1999/12/31")  # -> ['1999', '12', '31']
+        """
+        ParserElement._literalStringClass = cls
+
+    def __init__( self, savelist=False ):
+        self.parseAction = list()
+        self.failAction = None
+        #~ self.name = "<unknown>"  # don't define self.name, let subclasses try/except upcall
+        self.strRepr = None
+        self.resultsName = None
+        self.saveAsList = savelist
+        self.skipWhitespace = True
+        self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
+        self.copyDefaultWhiteChars = True
+        self.mayReturnEmpty = False # used when checking for left-recursion
+        self.keepTabs = False
+        self.ignoreExprs = list()
+        self.debug = False
+        self.streamlined = False
+        self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
+        self.errmsg = ""
+        self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
+        self.debugActions = ( None, None, None ) #custom debug actions
+        self.re = None
+        self.callPreparse = True # used to avoid redundant calls to preParse
+        self.callDuringTry = False
+
+    def copy( self ):
+        """
+        Make a copy of this C{ParserElement}.  Useful for defining different parse actions
+        for the same parsing pattern, using copies of the original parse element.
+        
+        Example::
+            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
+            integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
+            integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
+            
+            print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
+        prints::
+            [5120, 100, 655360, 268435456]
+        Equivalent form of C{expr.copy()} is just C{expr()}::
+            integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
+        """
+        cpy = copy.copy( self )
+        cpy.parseAction = self.parseAction[:]
+        cpy.ignoreExprs = self.ignoreExprs[:]
+        if self.copyDefaultWhiteChars:
+            cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
+        return cpy
+
+    def setName( self, name ):
+        """
+        Define name for this expression, makes debugging and exception messages clearer.
+        
+        Example::
+            Word(nums).parseString("ABC")  # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
+            Word(nums).setName("integer").parseString("ABC")  # -> Exception: Expected integer (at char 0), (line:1, col:1)
+        """
+        self.name = name
+        self.errmsg = "Expected " + self.name
+        if hasattr(self,"exception"):
+            self.exception.msg = self.errmsg
+        return self
+
+    def setResultsName( self, name, listAllMatches=False ):
+        """
+        Define name for referencing matching tokens as a nested attribute
+        of the returned parse results.
+        NOTE: this returns a *copy* of the original C{ParserElement} object;
+        this is so that the client can define a basic element, such as an
+        integer, and reference it in multiple places with different names.
+
+        You can also set results names using the abbreviated syntax,
+        C{expr("name")} in place of C{expr.setResultsName("name")} - 
+        see L{I{__call__}<__call__>}.
+
+        Example::
+            date_str = (integer.setResultsName("year") + '/' 
+                        + integer.setResultsName("month") + '/' 
+                        + integer.setResultsName("day"))
+
+            # equivalent form:
+            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
+        """
+        newself = self.copy()
+        if name.endswith("*"):
+            name = name[:-1]
+            listAllMatches=True
+        newself.resultsName = name
+        newself.modalResults = not listAllMatches
+        return newself
+
+    def setBreak(self,breakFlag = True):
+        """Method to invoke the Python pdb debugger when this element is
+           about to be parsed. Set C{breakFlag} to True to enable, False to
+           disable.
+        """
+        if breakFlag:
+            _parseMethod = self._parse
+            def breaker(instring, loc, doActions=True, callPreParse=True):
+                import pdb
+                pdb.set_trace()
+                return _parseMethod( instring, loc, doActions, callPreParse )
+            breaker._originalParseMethod = _parseMethod
+            self._parse = breaker
+        else:
+            if hasattr(self._parse,"_originalParseMethod"):
+                self._parse = self._parse._originalParseMethod
+        return self
+
+    def setParseAction( self, *fns, **kwargs ):
+        """
+        Define action to perform when successfully matching parse element definition.
+        Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
+        C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
+         - s   = the original string being parsed (see note below)
+         - loc = the location of the matching substring
+         - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
+        If the functions in fns modify the tokens, they can return them as the return
+        value from fn, and the modified list of tokens will replace the original.
+        Otherwise, fn does not need to return any value.
+
+        Optional keyword arguments:
+         - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing
+
+        Note: the default parsing behavior is to expand tabs in the input string
+        before starting the parsing process.  See L{I{parseString}<parseString>} for more information
+        on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
+        consistent view of the parsed string, the parse location, and line and column
+        positions within the parsed string.
+        
+        Example::
+            integer = Word(nums)
+            date_str = integer + '/' + integer + '/' + integer
+
+            date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
+
+            # use parse action to convert to ints at parse time
+            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
+            date_str = integer + '/' + integer + '/' + integer
+
+            # note that integer fields are now ints, not strings
+            date_str.parseString("1999/12/31")  # -> [1999, '/', 12, '/', 31]
+        """
+        self.parseAction = list(map(_trim_arity, list(fns)))
+        self.callDuringTry = kwargs.get("callDuringTry", False)
+        return self
+
+    def addParseAction( self, *fns, **kwargs ):
+        """
+        Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.
+        
+        See examples in L{I{copy}<copy>}.
+        """
+        self.parseAction += list(map(_trim_arity, list(fns)))
+        self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
+        return self
+
+    def addCondition(self, *fns, **kwargs):
+        """Add a boolean predicate function to expression's list of parse actions. See 
+        L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction}, 
+        functions passed to C{addCondition} need to return boolean success/fail of the condition.
+
+        Optional keyword arguments:
+         - message = define a custom message to be used in the raised exception
+         - fatal   = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
+         
+        Example::
+            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
+            year_int = integer.copy()
+            year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
+            date_str = year_int + '/' + integer + '/' + integer
+
+            result = date_str.parseString("1999/12/31")  # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
+        """
+        msg = kwargs.get("message", "failed user-defined condition")
+        exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
+        for fn in fns:
+            def pa(s,l,t):
+                if not bool(_trim_arity(fn)(s,l,t)):
+                    raise exc_type(s,l,msg)
+            self.parseAction.append(pa)
+        self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
+        return self
+
+    def setFailAction( self, fn ):
+        """Define action to perform if parsing fails at this expression.
+           Fail acton fn is a callable function that takes the arguments
+           C{fn(s,loc,expr,err)} where:
+            - s = string being parsed
+            - loc = location where expression match was attempted and failed
+            - expr = the parse expression that failed
+            - err = the exception thrown
+           The function returns no value.  It may throw C{L{ParseFatalException}}
+           if it is desired to stop parsing immediately."""
+        self.failAction = fn
+        return self
+
+    def _skipIgnorables( self, instring, loc ):
+        exprsFound = True
+        while exprsFound:
+            exprsFound = False
+            for e in self.ignoreExprs:
+                try:
+                    while 1:
+                        loc,dummy = e._parse( instring, loc )
+                        exprsFound = True
+                except ParseException:
+                    pass
+        return loc
+
+    def preParse( self, instring, loc ):
+        if self.ignoreExprs:
+            loc = self._skipIgnorables( instring, loc )
+
+        if self.skipWhitespace:
+            wt = self.whiteChars
+            instrlen = len(instring)
+            while loc < instrlen and instring[loc] in wt:
+                loc += 1
+
+        return loc
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        return loc, []
+
+    def postParse( self, instring, loc, tokenlist ):
+        return tokenlist
+
+    #~ @profile
+    def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
+        debugging = ( self.debug ) #and doActions )
+
+        if debugging or self.failAction:
+            #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
+            if (self.debugActions[0] ):
+                self.debugActions[0]( instring, loc, self )
+            if callPreParse and self.callPreparse:
+                preloc = self.preParse( instring, loc )
+            else:
+                preloc = loc
+            tokensStart = preloc
+            try:
+                try:
+                    loc,tokens = self.parseImpl( instring, preloc, doActions )
+                except IndexError:
+                    raise ParseException( instring, len(instring), self.errmsg, self )
+            except ParseBaseException as err:
+                #~ print ("Exception raised:", err)
+                if self.debugActions[2]:
+                    self.debugActions[2]( instring, tokensStart, self, err )
+                if self.failAction:
+                    self.failAction( instring, tokensStart, self, err )
+                raise
+        else:
+            if callPreParse and self.callPreparse:
+                preloc = self.preParse( instring, loc )
+            else:
+                preloc = loc
+            tokensStart = preloc
+            if self.mayIndexError or loc >= len(instring):
+                try:
+                    loc,tokens = self.parseImpl( instring, preloc, doActions )
+                except IndexError:
+                    raise ParseException( instring, len(instring), self.errmsg, self )
+            else:
+                loc,tokens = self.parseImpl( instring, preloc, doActions )
+
+        tokens = self.postParse( instring, loc, tokens )
+
+        retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
+        if self.parseAction and (doActions or self.callDuringTry):
+            if debugging:
+                try:
+                    for fn in self.parseAction:
+                        tokens = fn( instring, tokensStart, retTokens )
+                        if tokens is not None:
+                            retTokens = ParseResults( tokens,
+                                                      self.resultsName,
+                                                      asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
+                                                      modal=self.modalResults )
+                except ParseBaseException as err:
+                    #~ print "Exception raised in user parse action:", err
+                    if (self.debugActions[2] ):
+                        self.debugActions[2]( instring, tokensStart, self, err )
+                    raise
+            else:
+                for fn in self.parseAction:
+                    tokens = fn( instring, tokensStart, retTokens )
+                    if tokens is not None:
+                        retTokens = ParseResults( tokens,
+                                                  self.resultsName,
+                                                  asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
+                                                  modal=self.modalResults )
+
+        if debugging:
+            #~ print ("Matched",self,"->",retTokens.asList())
+            if (self.debugActions[1] ):
+                self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
+
+        return loc, retTokens
+
+    def tryParse( self, instring, loc ):
+        try:
+            return self._parse( instring, loc, doActions=False )[0]
+        except ParseFatalException:
+            raise ParseException( instring, loc, self.errmsg, self)
+    
+    def canParseNext(self, instring, loc):
+        try:
+            self.tryParse(instring, loc)
+        except (ParseException, IndexError):
+            return False
+        else:
+            return True
+
+    class _UnboundedCache(object):
+        def __init__(self):
+            cache = {}
+            self.not_in_cache = not_in_cache = object()
+
+            def get(self, key):
+                return cache.get(key, not_in_cache)
+
+            def set(self, key, value):
+                cache[key] = value
+
+            def clear(self):
+                cache.clear()
+
+            self.get = types.MethodType(get, self)
+            self.set = types.MethodType(set, self)
+            self.clear = types.MethodType(clear, self)
+
+    if _OrderedDict is not None:
+        class _FifoCache(object):
+            def __init__(self, size):
+                self.not_in_cache = not_in_cache = object()
+
+                cache = _OrderedDict()
+
+                def get(self, key):
+                    return cache.get(key, not_in_cache)
+
+                def set(self, key, value):
+                    cache[key] = value
+                    if len(cache) > size:
+                        cache.popitem(False)
+
+                def clear(self):
+                    cache.clear()
+
+                self.get = types.MethodType(get, self)
+                self.set = types.MethodType(set, self)
+                self.clear = types.MethodType(clear, self)
+
+    else:
+        class _FifoCache(object):
+            def __init__(self, size):
+                self.not_in_cache = not_in_cache = object()
+
+                cache = {}
+                key_fifo = collections.deque([], size)
+
+                def get(self, key):
+                    return cache.get(key, not_in_cache)
+
+                def set(self, key, value):
+                    cache[key] = value
+                    if len(cache) > size:
+                        cache.pop(key_fifo.popleft(), None)
+                    key_fifo.append(key)
+
+                def clear(self):
+                    cache.clear()
+                    key_fifo.clear()
+
+                self.get = types.MethodType(get, self)
+                self.set = types.MethodType(set, self)
+                self.clear = types.MethodType(clear, self)
+
+    # argument cache for optimizing repeated calls when backtracking through recursive expressions
+    packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
+    packrat_cache_lock = RLock()
+    packrat_cache_stats = [0, 0]
+
+    # this method gets repeatedly called during backtracking with the same arguments -
+    # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
+    def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
+        HIT, MISS = 0, 1
+        lookup = (self, instring, loc, callPreParse, doActions)
+        with ParserElement.packrat_cache_lock:
+            cache = ParserElement.packrat_cache
+            value = cache.get(lookup)
+            if value is cache.not_in_cache:
+                ParserElement.packrat_cache_stats[MISS] += 1
+                try:
+                    value = self._parseNoCache(instring, loc, doActions, callPreParse)
+                except ParseBaseException as pe:
+                    # cache a copy of the exception, without the traceback
+                    cache.set(lookup, pe.__class__(*pe.args))
+                    raise
+                else:
+                    cache.set(lookup, (value[0], value[1].copy()))
+                    return value
+            else:
+                ParserElement.packrat_cache_stats[HIT] += 1
+                if isinstance(value, Exception):
+                    raise value
+                return (value[0], value[1].copy())
+
+    _parse = _parseNoCache
+
+    @staticmethod
+    def resetCache():
+        ParserElement.packrat_cache.clear()
+        ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
+
+    _packratEnabled = False
+    @staticmethod
+    def enablePackrat(cache_size_limit=128):
+        """Enables "packrat" parsing, which adds memoizing to the parsing logic.
+           Repeated parse attempts at the same string location (which happens
+           often in many complex grammars) can immediately return a cached value,
+           instead of re-executing parsing/validating code.  Memoizing is done of
+           both valid results and parsing exceptions.
+           
+           Parameters:
+            - cache_size_limit - (default=C{128}) - if an integer value is provided
+              will limit the size of the packrat cache; if None is passed, then
+              the cache size will be unbounded; if 0 is passed, the cache will
+              be effectively disabled.
+            
+           This speedup may break existing programs that use parse actions that
+           have side-effects.  For this reason, packrat parsing is disabled when
+           you first import pyparsing.  To activate the packrat feature, your
+           program must call the class method C{ParserElement.enablePackrat()}.  If
+           your program uses C{psyco} to "compile as you go", you must call
+           C{enablePackrat} before calling C{psyco.full()}.  If you do not do this,
+           Python will crash.  For best results, call C{enablePackrat()} immediately
+           after importing pyparsing.
+           
+           Example::
+               import pyparsing
+               pyparsing.ParserElement.enablePackrat()
+        """
+        if not ParserElement._packratEnabled:
+            ParserElement._packratEnabled = True
+            if cache_size_limit is None:
+                ParserElement.packrat_cache = ParserElement._UnboundedCache()
+            else:
+                ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
+            ParserElement._parse = ParserElement._parseCache
+
+    def parseString( self, instring, parseAll=False ):
+        """
+        Execute the parse expression with the given string.
+        This is the main interface to the client code, once the complete
+        expression has been built.
+
+        If you want the grammar to require that the entire input string be
+        successfully parsed, then set C{parseAll} to True (equivalent to ending
+        the grammar with C{L{StringEnd()}}).
+
+        Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
+        in order to report proper column numbers in parse actions.
+        If the input string contains tabs and
+        the grammar uses parse actions that use the C{loc} argument to index into the
+        string being parsed, you can ensure you have a consistent view of the input
+        string by:
+         - calling C{parseWithTabs} on your grammar before calling C{parseString}
+           (see L{I{parseWithTabs}<parseWithTabs>})
+         - define your parse action using the full C{(s,loc,toks)} signature, and
+           reference the input string using the parse action's C{s} argument
+         - explictly expand the tabs in your input string before calling
+           C{parseString}
+        
+        Example::
+            Word('a').parseString('aaaaabaaa')  # -> ['aaaaa']
+            Word('a').parseString('aaaaabaaa', parseAll=True)  # -> Exception: Expected end of text
+        """
+        ParserElement.resetCache()
+        if not self.streamlined:
+            self.streamline()
+            #~ self.saveAsList = True
+        for e in self.ignoreExprs:
+            e.streamline()
+        if not self.keepTabs:
+            instring = instring.expandtabs()
+        try:
+            loc, tokens = self._parse( instring, 0 )
+            if parseAll:
+                loc = self.preParse( instring, loc )
+                se = Empty() + StringEnd()
+                se._parse( instring, loc )
+        except ParseBaseException as exc:
+            if ParserElement.verbose_stacktrace:
+                raise
+            else:
+                # catch and re-raise exception from here, clears out pyparsing internal stack trace
+                raise exc
+        else:
+            return tokens
+
+    def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
+        """
+        Scan the input string for expression matches.  Each match will return the
+        matching tokens, start location, and end location.  May be called with optional
+        C{maxMatches} argument, to clip scanning after 'n' matches are found.  If
+        C{overlap} is specified, then overlapping matches will be reported.
+
+        Note that the start and end locations are reported relative to the string
+        being parsed.  See L{I{parseString}<parseString>} for more information on parsing
+        strings with embedded tabs.
+
+        Example::
+            source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
+            print(source)
+            for tokens,start,end in Word(alphas).scanString(source):
+                print(' '*start + '^'*(end-start))
+                print(' '*start + tokens[0])
+        
+        prints::
+        
+            sldjf123lsdjjkf345sldkjf879lkjsfd987
+            ^^^^^
+            sldjf
+                    ^^^^^^^
+                    lsdjjkf
+                              ^^^^^^
+                              sldkjf
+                                       ^^^^^^
+                                       lkjsfd
+        """
+        if not self.streamlined:
+            self.streamline()
+        for e in self.ignoreExprs:
+            e.streamline()
+
+        if not self.keepTabs:
+            instring = _ustr(instring).expandtabs()
+        instrlen = len(instring)
+        loc = 0
+        preparseFn = self.preParse
+        parseFn = self._parse
+        ParserElement.resetCache()
+        matches = 0
+        try:
+            while loc <= instrlen and matches < maxMatches:
+                try:
+                    preloc = preparseFn( instring, loc )
+                    nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
+                except ParseException:
+                    loc = preloc+1
+                else:
+                    if nextLoc > loc:
+                        matches += 1
+                        yield tokens, preloc, nextLoc
+                        if overlap:
+                            nextloc = preparseFn( instring, loc )
+                            if nextloc > loc:
+                                loc = nextLoc
+                            else:
+                                loc += 1
+                        else:
+                            loc = nextLoc
+                    else:
+                        loc = preloc+1
+        except ParseBaseException as exc:
+            if ParserElement.verbose_stacktrace:
+                raise
+            else:
+                # catch and re-raise exception from here, clears out pyparsing internal stack trace
+                raise exc
+
+    def transformString( self, instring ):
+        """
+        Extension to C{L{scanString}}, to modify matching text with modified tokens that may
+        be returned from a parse action.  To use C{transformString}, define a grammar and
+        attach a parse action to it that modifies the returned token list.
+        Invoking C{transformString()} on a target string will then scan for matches,
+        and replace the matched text patterns according to the logic in the parse
+        action.  C{transformString()} returns the resulting transformed string.
+        
+        Example::
+            wd = Word(alphas)
+            wd.setParseAction(lambda toks: toks[0].title())
+            
+            print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
+        Prints::
+            Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
+        """
+        out = []
+        lastE = 0
+        # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
+        # keep string locs straight between transformString and scanString
+        self.keepTabs = True
+        try:
+            for t,s,e in self.scanString( instring ):
+                out.append( instring[lastE:s] )
+                if t:
+                    if isinstance(t,ParseResults):
+                        out += t.asList()
+                    elif isinstance(t,list):
+                        out += t
+                    else:
+                        out.append(t)
+                lastE = e
+            out.append(instring[lastE:])
+            out = [o for o in out if o]
+            return "".join(map(_ustr,_flatten(out)))
+        except ParseBaseException as exc:
+            if ParserElement.verbose_stacktrace:
+                raise
+            else:
+                # catch and re-raise exception from here, clears out pyparsing internal stack trace
+                raise exc
+
+    def searchString( self, instring, maxMatches=_MAX_INT ):
+        """
+        Another extension to C{L{scanString}}, simplifying the access to the tokens found
+        to match the given parse expression.  May be called with optional
+        C{maxMatches} argument, to clip searching after 'n' matches are found.
+        
+        Example::
+            # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
+            cap_word = Word(alphas.upper(), alphas.lower())
+            
+            print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
+        prints::
+            ['More', 'Iron', 'Lead', 'Gold', 'I']
+        """
+        try:
+            return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
+        except ParseBaseException as exc:
+            if ParserElement.verbose_stacktrace:
+                raise
+            else:
+                # catch and re-raise exception from here, clears out pyparsing internal stack trace
+                raise exc
+
+    def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
+        """
+        Generator method to split a string using the given expression as a separator.
+        May be called with optional C{maxsplit} argument, to limit the number of splits;
+        and the optional C{includeSeparators} argument (default=C{False}), if the separating
+        matching text should be included in the split results.
+        
+        Example::        
+            punc = oneOf(list(".,;:/-!?"))
+            print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
+        prints::
+            ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
+        """
+        splits = 0
+        last = 0
+        for t,s,e in self.scanString(instring, maxMatches=maxsplit):
+            yield instring[last:s]
+            if includeSeparators:
+                yield t[0]
+            last = e
+        yield instring[last:]
+
+    def __add__(self, other ):
+        """
+        Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement
+        converts them to L{Literal}s by default.
+        
+        Example::
+            greet = Word(alphas) + "," + Word(alphas) + "!"
+            hello = "Hello, World!"
+            print (hello, "->", greet.parseString(hello))
+        Prints::
+            Hello, World! -> ['Hello', ',', 'World', '!']
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return And( [ self, other ] )
+
+    def __radd__(self, other ):
+        """
+        Implementation of + operator when left operand is not a C{L{ParserElement}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return other + self
+
+    def __sub__(self, other):
+        """
+        Implementation of - operator, returns C{L{And}} with error stop
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return And( [ self, And._ErrorStop(), other ] )
+
+    def __rsub__(self, other ):
+        """
+        Implementation of - operator when left operand is not a C{L{ParserElement}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return other - self
+
+    def __mul__(self,other):
+        """
+        Implementation of * operator, allows use of C{expr * 3} in place of
+        C{expr + expr + expr}.  Expressions may also me multiplied by a 2-integer
+        tuple, similar to C{{min,max}} multipliers in regular expressions.  Tuples
+        may also include C{None} as in:
+         - C{expr*(n,None)} or C{expr*(n,)} is equivalent
+              to C{expr*n + L{ZeroOrMore}(expr)}
+              (read as "at least n instances of C{expr}")
+         - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
+              (read as "0 to n instances of C{expr}")
+         - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
+         - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
+
+        Note that C{expr*(None,n)} does not raise an exception if
+        more than n exprs exist in the input stream; that is,
+        C{expr*(None,n)} does not enforce a maximum number of expr
+        occurrences.  If this behavior is desired, then write
+        C{expr*(None,n) + ~expr}
+        """
+        if isinstance(other,int):
+            minElements, optElements = other,0
+        elif isinstance(other,tuple):
+            other = (other + (None, None))[:2]
+            if other[0] is None:
+                other = (0, other[1])
+            if isinstance(other[0],int) and other[1] is None:
+                if other[0] == 0:
+                    return ZeroOrMore(self)
+                if other[0] == 1:
+                    return OneOrMore(self)
+                else:
+                    return self*other[0] + ZeroOrMore(self)
+            elif isinstance(other[0],int) and isinstance(other[1],int):
+                minElements, optElements = other
+                optElements -= minElements
+            else:
+                raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
+        else:
+            raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
+
+        if minElements < 0:
+            raise ValueError("cannot multiply ParserElement by negative value")
+        if optElements < 0:
+            raise ValueError("second tuple value must be greater or equal to first tuple value")
+        if minElements == optElements == 0:
+            raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
+
+        if (optElements):
+            def makeOptionalList(n):
+                if n>1:
+                    return Optional(self + makeOptionalList(n-1))
+                else:
+                    return Optional(self)
+            if minElements:
+                if minElements == 1:
+                    ret = self + makeOptionalList(optElements)
+                else:
+                    ret = And([self]*minElements) + makeOptionalList(optElements)
+            else:
+                ret = makeOptionalList(optElements)
+        else:
+            if minElements == 1:
+                ret = self
+            else:
+                ret = And([self]*minElements)
+        return ret
+
+    def __rmul__(self, other):
+        return self.__mul__(other)
+
+    def __or__(self, other ):
+        """
+        Implementation of | operator - returns C{L{MatchFirst}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return MatchFirst( [ self, other ] )
+
+    def __ror__(self, other ):
+        """
+        Implementation of | operator when left operand is not a C{L{ParserElement}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return other | self
+
+    def __xor__(self, other ):
+        """
+        Implementation of ^ operator - returns C{L{Or}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return Or( [ self, other ] )
+
+    def __rxor__(self, other ):
+        """
+        Implementation of ^ operator when left operand is not a C{L{ParserElement}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return other ^ self
+
+    def __and__(self, other ):
+        """
+        Implementation of & operator - returns C{L{Each}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return Each( [ self, other ] )
+
+    def __rand__(self, other ):
+        """
+        Implementation of & operator when left operand is not a C{L{ParserElement}}
+        """
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        if not isinstance( other, ParserElement ):
+            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
+                    SyntaxWarning, stacklevel=2)
+            return None
+        return other & self
+
+    def __invert__( self ):
+        """
+        Implementation of ~ operator - returns C{L{NotAny}}
+        """
+        return NotAny( self )
+
+    def __call__(self, name=None):
+        """
+        Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.
+        
+        If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
+        passed as C{True}.
+           
+        If C{name} is omitted, same as calling C{L{copy}}.
+
+        Example::
+            # these are equivalent
+            userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
+            userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")             
+        """
+        if name is not None:
+            return self.setResultsName(name)
+        else:
+            return self.copy()
+
+    def suppress( self ):
+        """
+        Suppresses the output of this C{ParserElement}; useful to keep punctuation from
+        cluttering up returned output.
+        """
+        return Suppress( self )
+
+    def leaveWhitespace( self ):
+        """
+        Disables the skipping of whitespace before matching the characters in the
+        C{ParserElement}'s defined pattern.  This is normally only used internally by
+        the pyparsing module, but may be needed in some whitespace-sensitive grammars.
+        """
+        self.skipWhitespace = False
+        return self
+
+    def setWhitespaceChars( self, chars ):
+        """
+        Overrides the default whitespace chars
+        """
+        self.skipWhitespace = True
+        self.whiteChars = chars
+        self.copyDefaultWhiteChars = False
+        return self
+
+    def parseWithTabs( self ):
+        """
+        Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
+        Must be called before C{parseString} when the input grammar contains elements that
+        match C{<TAB>} characters.
+        """
+        self.keepTabs = True
+        return self
+
+    def ignore( self, other ):
+        """
+        Define expression to be ignored (e.g., comments) while doing pattern
+        matching; may be called repeatedly, to define multiple comment or other
+        ignorable patterns.
+        
+        Example::
+            patt = OneOrMore(Word(alphas))
+            patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
+            
+            patt.ignore(cStyleComment)
+            patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
+        """
+        if isinstance(other, basestring):
+            other = Suppress(other)
+
+        if isinstance( other, Suppress ):
+            if other not in self.ignoreExprs:
+                self.ignoreExprs.append(other)
+        else:
+            self.ignoreExprs.append( Suppress( other.copy() ) )
+        return self
+
+    def setDebugActions( self, startAction, successAction, exceptionAction ):
+        """
+        Enable display of debugging messages while doing pattern matching.
+        """
+        self.debugActions = (startAction or _defaultStartDebugAction,
+                             successAction or _defaultSuccessDebugAction,
+                             exceptionAction or _defaultExceptionDebugAction)
+        self.debug = True
+        return self
+
+    def setDebug( self, flag=True ):
+        """
+        Enable display of debugging messages while doing pattern matching.
+        Set C{flag} to True to enable, False to disable.
+
+        Example::
+            wd = Word(alphas).setName("alphaword")
+            integer = Word(nums).setName("numword")
+            term = wd | integer
+            
+            # turn on debugging for wd
+            wd.setDebug()
+
+            OneOrMore(term).parseString("abc 123 xyz 890")
+        
+        prints::
+            Match alphaword at loc 0(1,1)
+            Matched alphaword -> ['abc']
+            Match alphaword at loc 3(1,4)
+            Exception raised:Expected alphaword (at char 4), (line:1, col:5)
+            Match alphaword at loc 7(1,8)
+            Matched alphaword -> ['xyz']
+            Match alphaword at loc 11(1,12)
+            Exception raised:Expected alphaword (at char 12), (line:1, col:13)
+            Match alphaword at loc 15(1,16)
+            Exception raised:Expected alphaword (at char 15), (line:1, col:16)
+
+        The output shown is that produced by the default debug actions - custom debug actions can be
+        specified using L{setDebugActions}. Prior to attempting
+        to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}
+        is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}
+        message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,
+        which makes debugging and exception messages easier to understand - for instance, the default
+        name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.
+        """
+        if flag:
+            self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
+        else:
+            self.debug = False
+        return self
+
+    def __str__( self ):
+        return self.name
+
+    def __repr__( self ):
+        return _ustr(self)
+
+    def streamline( self ):
+        self.streamlined = True
+        self.strRepr = None
+        return self
+
+    def checkRecursion( self, parseElementList ):
+        pass
+
+    def validate( self, validateTrace=[] ):
+        """
+        Check defined expressions for valid structure, check for infinite recursive definitions.
+        """
+        self.checkRecursion( [] )
+
+    def parseFile( self, file_or_filename, parseAll=False ):
+        """
+        Execute the parse expression on the given file or filename.
+        If a filename is specified (instead of a file object),
+        the entire file is opened, read, and closed before parsing.
+        """
+        try:
+            file_contents = file_or_filename.read()
+        except AttributeError:
+            with open(file_or_filename, "r") as f:
+                file_contents = f.read()
+        try:
+            return self.parseString(file_contents, parseAll)
+        except ParseBaseException as exc:
+            if ParserElement.verbose_stacktrace:
+                raise
+            else:
+                # catch and re-raise exception from here, clears out pyparsing internal stack trace
+                raise exc
+
+    def __eq__(self,other):
+        if isinstance(other, ParserElement):
+            return self is other or vars(self) == vars(other)
+        elif isinstance(other, basestring):
+            return self.matches(other)
+        else:
+            return super(ParserElement,self)==other
+
+    def __ne__(self,other):
+        return not (self == other)
+
+    def __hash__(self):
+        return hash(id(self))
+
+    def __req__(self,other):
+        return self == other
+
+    def __rne__(self,other):
+        return not (self == other)
+
+    def matches(self, testString, parseAll=True):
+        """
+        Method for quick testing of a parser against a test string. Good for simple 
+        inline microtests of sub expressions while building up larger parser.
+           
+        Parameters:
+         - testString - to test against this expression for a match
+         - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
+            
+        Example::
+            expr = Word(nums)
+            assert expr.matches("100")
+        """
+        try:
+            self.parseString(_ustr(testString), parseAll=parseAll)
+            return True
+        except ParseBaseException:
+            return False
+                
+    def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
+        """
+        Execute the parse expression on a series of test strings, showing each
+        test, the parsed results or where the parse failed. Quick and easy way to
+        run a parse expression against a list of sample strings.
+           
+        Parameters:
+         - tests - a list of separate test strings, or a multiline string of test strings
+         - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests           
+         - comment - (default=C{'#'}) - expression for indicating embedded comments in the test 
+              string; pass None to disable comment filtering
+         - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;
+              if False, only dump nested list
+         - printResults - (default=C{True}) prints test output to stdout
+         - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing
+
+        Returns: a (success, results) tuple, where success indicates that all tests succeeded
+        (or failed if C{failureTests} is True), and the results contain a list of lines of each 
+        test's output
+        
+        Example::
+            number_expr = pyparsing_common.number.copy()
+
+            result = number_expr.runTests('''
+                # unsigned integer
+                100
+                # negative integer
+                -100
+                # float with scientific notation
+                6.02e23
+                # integer with scientific notation
+                1e-12
+                ''')
+            print("Success" if result[0] else "Failed!")
+
+            result = number_expr.runTests('''
+                # stray character
+                100Z
+                # missing leading digit before '.'
+                -.100
+                # too many '.'
+                3.14.159
+                ''', failureTests=True)
+            print("Success" if result[0] else "Failed!")
+        prints::
+            # unsigned integer
+            100
+            [100]
+
+            # negative integer
+            -100
+            [-100]
+
+            # float with scientific notation
+            6.02e23
+            [6.02e+23]
+
+            # integer with scientific notation
+            1e-12
+            [1e-12]
+
+            Success
+            
+            # stray character
+            100Z
+               ^
+            FAIL: Expected end of text (at char 3), (line:1, col:4)
+
+            # missing leading digit before '.'
+            -.100
+            ^
+            FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
+
+            # too many '.'
+            3.14.159
+                ^
+            FAIL: Expected end of text (at char 4), (line:1, col:5)
+
+            Success
+
+        Each test string must be on a single line. If you want to test a string that spans multiple
+        lines, create a test like this::
+
+            expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
+        
+        (Note that this is a raw string literal, you must include the leading 'r'.)
+        """
+        if isinstance(tests, basestring):
+            tests = list(map(str.strip, tests.rstrip().splitlines()))
+        if isinstance(comment, basestring):
+            comment = Literal(comment)
+        allResults = []
+        comments = []
+        success = True
+        for t in tests:
+            if comment is not None and comment.matches(t, False) or comments and not t:
+                comments.append(t)
+                continue
+            if not t:
+                continue
+            out = ['\n'.join(comments), t]
+            comments = []
+            try:
+                t = t.replace(r'\n','\n')
+                result = self.parseString(t, parseAll=parseAll)
+                out.append(result.dump(full=fullDump))
+                success = success and not failureTests
+            except ParseBaseException as pe:
+                fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
+                if '\n' in t:
+                    out.append(line(pe.loc, t))
+                    out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
+                else:
+                    out.append(' '*pe.loc + '^' + fatal)
+                out.append("FAIL: " + str(pe))
+                success = success and failureTests
+                result = pe
+            except Exception as exc:
+                out.append("FAIL-EXCEPTION: " + str(exc))
+                success = success and failureTests
+                result = exc
+
+            if printResults:
+                if fullDump:
+                    out.append('')
+                print('\n'.join(out))
+
+            allResults.append((t, result))
+        
+        return success, allResults
+
+        
+class Token(ParserElement):
+    """
+    Abstract C{ParserElement} subclass, for defining atomic matching patterns.
+    """
+    def __init__( self ):
+        super(Token,self).__init__( savelist=False )
+
+
+class Empty(Token):
+    """
+    An empty token, will always match.
+    """
+    def __init__( self ):
+        super(Empty,self).__init__()
+        self.name = "Empty"
+        self.mayReturnEmpty = True
+        self.mayIndexError = False
+
+
+class NoMatch(Token):
+    """
+    A token that will never match.
+    """
+    def __init__( self ):
+        super(NoMatch,self).__init__()
+        self.name = "NoMatch"
+        self.mayReturnEmpty = True
+        self.mayIndexError = False
+        self.errmsg = "Unmatchable token"
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        raise ParseException(instring, loc, self.errmsg, self)
+
+
+class Literal(Token):
+    """
+    Token to exactly match a specified string.
+    
+    Example::
+        Literal('blah').parseString('blah')  # -> ['blah']
+        Literal('blah').parseString('blahfooblah')  # -> ['blah']
+        Literal('blah').parseString('bla')  # -> Exception: Expected "blah"
+    
+    For case-insensitive matching, use L{CaselessLiteral}.
+    
+    For keyword matching (force word break before and after the matched string),
+    use L{Keyword} or L{CaselessKeyword}.
+    """
+    def __init__( self, matchString ):
+        super(Literal,self).__init__()
+        self.match = matchString
+        self.matchLen = len(matchString)
+        try:
+            self.firstMatchChar = matchString[0]
+        except IndexError:
+            warnings.warn("null string passed to Literal; use Empty() instead",
+                            SyntaxWarning, stacklevel=2)
+            self.__class__ = Empty
+        self.name = '"%s"' % _ustr(self.match)
+        self.errmsg = "Expected " + self.name
+        self.mayReturnEmpty = False
+        self.mayIndexError = False
+
+    # Performance tuning: this routine gets called a *lot*
+    # if this is a single character match string  and the first character matches,
+    # short-circuit as quickly as possible, and avoid calling startswith
+    #~ @profile
+    def parseImpl( self, instring, loc, doActions=True ):
+        if (instring[loc] == self.firstMatchChar and
+            (self.matchLen==1 or instring.startswith(self.match,loc)) ):
+            return loc+self.matchLen, self.match
+        raise ParseException(instring, loc, self.errmsg, self)
+_L = Literal
+ParserElement._literalStringClass = Literal
+
+class Keyword(Token):
+    """
+    Token to exactly match a specified string as a keyword, that is, it must be
+    immediately followed by a non-keyword character.  Compare with C{L{Literal}}:
+     - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
+     - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
+    Accepts two optional constructor arguments in addition to the keyword string:
+     - C{identChars} is a string of characters that would be valid identifier characters,
+          defaulting to all alphanumerics + "_" and "$"
+     - C{caseless} allows case-insensitive matching, default is C{False}.
+       
+    Example::
+        Keyword("start").parseString("start")  # -> ['start']
+        Keyword("start").parseString("starting")  # -> Exception
+
+    For case-insensitive matching, use L{CaselessKeyword}.
+    """
+    DEFAULT_KEYWORD_CHARS = alphanums+"_$"
+
+    def __init__( self, matchString, identChars=None, caseless=False ):
+        super(Keyword,self).__init__()
+        if identChars is None:
+            identChars = Keyword.DEFAULT_KEYWORD_CHARS
+        self.match = matchString
+        self.matchLen = len(matchString)
+        try:
+            self.firstMatchChar = matchString[0]
+        except IndexError:
+            warnings.warn("null string passed to Keyword; use Empty() instead",
+                            SyntaxWarning, stacklevel=2)
+        self.name = '"%s"' % self.match
+        self.errmsg = "Expected " + self.name
+        self.mayReturnEmpty = False
+        self.mayIndexError = False
+        self.caseless = caseless
+        if caseless:
+            self.caselessmatch = matchString.upper()
+            identChars = identChars.upper()
+        self.identChars = set(identChars)
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if self.caseless:
+            if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
+                 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
+                 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
+                return loc+self.matchLen, self.match
+        else:
+            if (instring[loc] == self.firstMatchChar and
+                (self.matchLen==1 or instring.startswith(self.match,loc)) and
+                (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
+                (loc == 0 or instring[loc-1] not in self.identChars) ):
+                return loc+self.matchLen, self.match
+        raise ParseException(instring, loc, self.errmsg, self)
+
+    def copy(self):
+        c = super(Keyword,self).copy()
+        c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
+        return c
+
+    @staticmethod
+    def setDefaultKeywordChars( chars ):
+        """Overrides the default Keyword chars
+        """
+        Keyword.DEFAULT_KEYWORD_CHARS = chars
+
+class CaselessLiteral(Literal):
+    """
+    Token to match a specified string, ignoring case of letters.
+    Note: the matched results will always be in the case of the given
+    match string, NOT the case of the input text.
+
+    Example::
+        OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
+        
+    (Contrast with example for L{CaselessKeyword}.)
+    """
+    def __init__( self, matchString ):
+        super(CaselessLiteral,self).__init__( matchString.upper() )
+        # Preserve the defining literal.
+        self.returnString = matchString
+        self.name = "'%s'" % self.returnString
+        self.errmsg = "Expected " + self.name
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if instring[ loc:loc+self.matchLen ].upper() == self.match:
+            return loc+self.matchLen, self.returnString
+        raise ParseException(instring, loc, self.errmsg, self)
+
+class CaselessKeyword(Keyword):
+    """
+    Caseless version of L{Keyword}.
+
+    Example::
+        OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
+        
+    (Contrast with example for L{CaselessLiteral}.)
+    """
+    def __init__( self, matchString, identChars=None ):
+        super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
+             (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
+            return loc+self.matchLen, self.match
+        raise ParseException(instring, loc, self.errmsg, self)
+
+class CloseMatch(Token):
+    """
+    A variation on L{Literal} which matches "close" matches, that is, 
+    strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters:
+     - C{match_string} - string to be matched
+     - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match
+    
+    The results from a successful parse will contain the matched text from the input string and the following named results:
+     - C{mismatches} - a list of the positions within the match_string where mismatches were found
+     - C{original} - the original match_string used to compare against the input string
+    
+    If C{mismatches} is an empty list, then the match was an exact match.
+    
+    Example::
+        patt = CloseMatch("ATCATCGAATGGA")
+        patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
+        patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
+
+        # exact match
+        patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
+
+        # close match allowing up to 2 mismatches
+        patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
+        patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
+    """
+    def __init__(self, match_string, maxMismatches=1):
+        super(CloseMatch,self).__init__()
+        self.name = match_string
+        self.match_string = match_string
+        self.maxMismatches = maxMismatches
+        self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
+        self.mayIndexError = False
+        self.mayReturnEmpty = False
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        start = loc
+        instrlen = len(instring)
+        maxloc = start + len(self.match_string)
+
+        if maxloc <= instrlen:
+            match_string = self.match_string
+            match_stringloc = 0
+            mismatches = []
+            maxMismatches = self.maxMismatches
+
+            for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):
+                src,mat = s_m
+                if src != mat:
+                    mismatches.append(match_stringloc)
+                    if len(mismatches) > maxMismatches:
+                        break
+            else:
+                loc = match_stringloc + 1
+                results = ParseResults([instring[start:loc]])
+                results['original'] = self.match_string
+                results['mismatches'] = mismatches
+                return loc, results
+
+        raise ParseException(instring, loc, self.errmsg, self)
+
+
+class Word(Token):
+    """
+    Token for matching words composed of allowed character sets.
+    Defined with string containing all allowed initial characters,
+    an optional string containing allowed body characters (if omitted,
+    defaults to the initial character set), and an optional minimum,
+    maximum, and/or exact length.  The default value for C{min} is 1 (a
+    minimum value < 1 is not valid); the default values for C{max} and C{exact}
+    are 0, meaning no maximum or exact length restriction. An optional
+    C{excludeChars} parameter can list characters that might be found in 
+    the input C{bodyChars} string; useful to define a word of all printables
+    except for one or two characters, for instance.
+    
+    L{srange} is useful for defining custom character set strings for defining 
+    C{Word} expressions, using range notation from regular expression character sets.
+    
+    A common mistake is to use C{Word} to match a specific literal string, as in 
+    C{Word("Address")}. Remember that C{Word} uses the string argument to define
+    I{sets} of matchable characters. This expression would match "Add", "AAA",
+    "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.
+    To match an exact literal string, use L{Literal} or L{Keyword}.
+
+    pyparsing includes helper strings for building Words:
+     - L{alphas}
+     - L{nums}
+     - L{alphanums}
+     - L{hexnums}
+     - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)
+     - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)
+     - L{printables} (any non-whitespace character)
+
+    Example::
+        # a word composed of digits
+        integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
+        
+        # a word with a leading capital, and zero or more lowercase
+        capital_word = Word(alphas.upper(), alphas.lower())
+
+        # hostnames are alphanumeric, with leading alpha, and '-'
+        hostname = Word(alphas, alphanums+'-')
+        
+        # roman numeral (not a strict parser, accepts invalid mix of characters)
+        roman = Word("IVXLCDM")
+        
+        # any string of non-whitespace characters, except for ','
+        csv_value = Word(printables, excludeChars=",")
+    """
+    def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
+        super(Word,self).__init__()
+        if excludeChars:
+            initChars = ''.join(c for c in initChars if c not in excludeChars)
+            if bodyChars:
+                bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
+        self.initCharsOrig = initChars
+        self.initChars = set(initChars)
+        if bodyChars :
+            self.bodyCharsOrig = bodyChars
+            self.bodyChars = set(bodyChars)
+        else:
+            self.bodyCharsOrig = initChars
+            self.bodyChars = set(initChars)
+
+        self.maxSpecified = max > 0
+
+        if min < 1:
+            raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
+
+        self.minLen = min
+
+        if max > 0:
+            self.maxLen = max
+        else:
+            self.maxLen = _MAX_INT
+
+        if exact > 0:
+            self.maxLen = exact
+            self.minLen = exact
+
+        self.name = _ustr(self)
+        self.errmsg = "Expected " + self.name
+        self.mayIndexError = False
+        self.asKeyword = asKeyword
+
+        if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
+            if self.bodyCharsOrig == self.initCharsOrig:
+                self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
+            elif len(self.initCharsOrig) == 1:
+                self.reString = "%s[%s]*" % \
+                                      (re.escape(self.initCharsOrig),
+                                      _escapeRegexRangeChars(self.bodyCharsOrig),)
+            else:
+                self.reString = "[%s][%s]*" % \
+                                      (_escapeRegexRangeChars(self.initCharsOrig),
+                                      _escapeRegexRangeChars(self.bodyCharsOrig),)
+            if self.asKeyword:
+                self.reString = r"\b"+self.reString+r"\b"
+            try:
+                self.re = re.compile( self.reString )
+            except Exception:
+                self.re = None
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if self.re:
+            result = self.re.match(instring,loc)
+            if not result:
+                raise ParseException(instring, loc, self.errmsg, self)
+
+            loc = result.end()
+            return loc, result.group()
+
+        if not(instring[ loc ] in self.initChars):
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        start = loc
+        loc += 1
+        instrlen = len(instring)
+        bodychars = self.bodyChars
+        maxloc = start + self.maxLen
+        maxloc = min( maxloc, instrlen )
+        while loc < maxloc and instring[loc] in bodychars:
+            loc += 1
+
+        throwException = False
+        if loc - start < self.minLen:
+            throwException = True
+        if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
+            throwException = True
+        if self.asKeyword:
+            if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
+                throwException = True
+
+        if throwException:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        return loc, instring[start:loc]
+
+    def __str__( self ):
+        try:
+            return super(Word,self).__str__()
+        except Exception:
+            pass
+
+
+        if self.strRepr is None:
+
+            def charsAsStr(s):
+                if len(s)>4:
+                    return s[:4]+"..."
+                else:
+                    return s
+
+            if ( self.initCharsOrig != self.bodyCharsOrig ):
+                self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
+            else:
+                self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
+
+        return self.strRepr
+
+
+class Regex(Token):
+    """
+    Token for matching strings that match a given regular expression.
+    Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
+    If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as 
+    named parse results.
+
+    Example::
+        realnum = Regex(r"[+-]?\d+\.\d*")
+        date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
+        # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
+        roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
+    """
+    compiledREtype = type(re.compile("[A-Z]"))
+    def __init__( self, pattern, flags=0):
+        """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
+        super(Regex,self).__init__()
+
+        if isinstance(pattern, basestring):
+            if not pattern:
+                warnings.warn("null string passed to Regex; use Empty() instead",
+                        SyntaxWarning, stacklevel=2)
+
+            self.pattern = pattern
+            self.flags = flags
+
+            try:
+                self.re = re.compile(self.pattern, self.flags)
+                self.reString = self.pattern
+            except sre_constants.error:
+                warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
+                    SyntaxWarning, stacklevel=2)
+                raise
+
+        elif isinstance(pattern, Regex.compiledREtype):
+            self.re = pattern
+            self.pattern = \
+            self.reString = str(pattern)
+            self.flags = flags
+            
+        else:
+            raise ValueError("Regex may only be constructed with a string or a compiled RE object")
+
+        self.name = _ustr(self)
+        self.errmsg = "Expected " + self.name
+        self.mayIndexError = False
+        self.mayReturnEmpty = True
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        result = self.re.match(instring,loc)
+        if not result:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        loc = result.end()
+        d = result.groupdict()
+        ret = ParseResults(result.group())
+        if d:
+            for k in d:
+                ret[k] = d[k]
+        return loc,ret
+
+    def __str__( self ):
+        try:
+            return super(Regex,self).__str__()
+        except Exception:
+            pass
+
+        if self.strRepr is None:
+            self.strRepr = "Re:(%s)" % repr(self.pattern)
+
+        return self.strRepr
+
+
+class QuotedString(Token):
+    r"""
+    Token for matching strings that are delimited by quoting characters.
+    
+    Defined with the following parameters:
+        - quoteChar - string of one or more characters defining the quote delimiting string
+        - escChar - character to escape quotes, typically backslash (default=C{None})
+        - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})
+        - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
+        - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
+        - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
+        - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
+
+    Example::
+        qs = QuotedString('"')
+        print(qs.searchString('lsjdf "This is the quote" sldjf'))
+        complex_qs = QuotedString('{{', endQuoteChar='}}')
+        print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
+        sql_qs = QuotedString('"', escQuote='""')
+        print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
+    prints::
+        [['This is the quote']]
+        [['This is the "quote"']]
+        [['This is the quote with "embedded" quotes']]
+    """
+    def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
+        super(QuotedString,self).__init__()
+
+        # remove white space from quote chars - wont work anyway
+        quoteChar = quoteChar.strip()
+        if not quoteChar:
+            warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
+            raise SyntaxError()
+
+        if endQuoteChar is None:
+            endQuoteChar = quoteChar
+        else:
+            endQuoteChar = endQuoteChar.strip()
+            if not endQuoteChar:
+                warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
+                raise SyntaxError()
+
+        self.quoteChar = quoteChar
+        self.quoteCharLen = len(quoteChar)
+        self.firstQuoteChar = quoteChar[0]
+        self.endQuoteChar = endQuoteChar
+        self.endQuoteCharLen = len(endQuoteChar)
+        self.escChar = escChar
+        self.escQuote = escQuote
+        self.unquoteResults = unquoteResults
+        self.convertWhitespaceEscapes = convertWhitespaceEscapes
+
+        if multiline:
+            self.flags = re.MULTILINE | re.DOTALL
+            self.pattern = r'%s(?:[^%s%s]' % \
+                ( re.escape(self.quoteChar),
+                  _escapeRegexRangeChars(self.endQuoteChar[0]),
+                  (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
+        else:
+            self.flags = 0
+            self.pattern = r'%s(?:[^%s\n\r%s]' % \
+                ( re.escape(self.quoteChar),
+                  _escapeRegexRangeChars(self.endQuoteChar[0]),
+                  (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
+        if len(self.endQuoteChar) > 1:
+            self.pattern += (
+                '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
+                                               _escapeRegexRangeChars(self.endQuoteChar[i]))
+                                    for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
+                )
+        if escQuote:
+            self.pattern += (r'|(?:%s)' % re.escape(escQuote))
+        if escChar:
+            self.pattern += (r'|(?:%s.)' % re.escape(escChar))
+            self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
+        self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
+
+        try:
+            self.re = re.compile(self.pattern, self.flags)
+            self.reString = self.pattern
+        except sre_constants.error:
+            warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
+                SyntaxWarning, stacklevel=2)
+            raise
+
+        self.name = _ustr(self)
+        self.errmsg = "Expected " + self.name
+        self.mayIndexError = False
+        self.mayReturnEmpty = True
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
+        if not result:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        loc = result.end()
+        ret = result.group()
+
+        if self.unquoteResults:
+
+            # strip off quotes
+            ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
+
+            if isinstance(ret,basestring):
+                # replace escaped whitespace
+                if '\\' in ret and self.convertWhitespaceEscapes:
+                    ws_map = {
+                        r'\t' : '\t',
+                        r'\n' : '\n',
+                        r'\f' : '\f',
+                        r'\r' : '\r',
+                    }
+                    for wslit,wschar in ws_map.items():
+                        ret = ret.replace(wslit, wschar)
+
+                # replace escaped characters
+                if self.escChar:
+                    ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
+
+                # replace escaped quotes
+                if self.escQuote:
+                    ret = ret.replace(self.escQuote, self.endQuoteChar)
+
+        return loc, ret
+
+    def __str__( self ):
+        try:
+            return super(QuotedString,self).__str__()
+        except Exception:
+            pass
+
+        if self.strRepr is None:
+            self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
+
+        return self.strRepr
+
+
+class CharsNotIn(Token):
+    """
+    Token for matching words composed of characters I{not} in a given set (will
+    include whitespace in matched characters if not listed in the provided exclusion set - see example).
+    Defined with string containing all disallowed characters, and an optional
+    minimum, maximum, and/or exact length.  The default value for C{min} is 1 (a
+    minimum value < 1 is not valid); the default values for C{max} and C{exact}
+    are 0, meaning no maximum or exact length restriction.
+
+    Example::
+        # define a comma-separated-value as anything that is not a ','
+        csv_value = CharsNotIn(',')
+        print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
+    prints::
+        ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
+    """
+    def __init__( self, notChars, min=1, max=0, exact=0 ):
+        super(CharsNotIn,self).__init__()
+        self.skipWhitespace = False
+        self.notChars = notChars
+
+        if min < 1:
+            raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
+
+        self.minLen = min
+
+        if max > 0:
+            self.maxLen = max
+        else:
+            self.maxLen = _MAX_INT
+
+        if exact > 0:
+            self.maxLen = exact
+            self.minLen = exact
+
+        self.name = _ustr(self)
+        self.errmsg = "Expected " + self.name
+        self.mayReturnEmpty = ( self.minLen == 0 )
+        self.mayIndexError = False
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if instring[loc] in self.notChars:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        start = loc
+        loc += 1
+        notchars = self.notChars
+        maxlen = min( start+self.maxLen, len(instring) )
+        while loc < maxlen and \
+              (instring[loc] not in notchars):
+            loc += 1
+
+        if loc - start < self.minLen:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        return loc, instring[start:loc]
+
+    def __str__( self ):
+        try:
+            return super(CharsNotIn, self).__str__()
+        except Exception:
+            pass
+
+        if self.strRepr is None:
+            if len(self.notChars) > 4:
+                self.strRepr = "!W:(%s...)" % self.notChars[:4]
+            else:
+                self.strRepr = "!W:(%s)" % self.notChars
+
+        return self.strRepr
+
+class White(Token):
+    """
+    Special matching class for matching whitespace.  Normally, whitespace is ignored
+    by pyparsing grammars.  This class is included when some whitespace structures
+    are significant.  Define with a string containing the whitespace characters to be
+    matched; default is C{" \\t\\r\\n"}.  Also takes optional C{min}, C{max}, and C{exact} arguments,
+    as defined for the C{L{Word}} class.
+    """
+    whiteStrs = {
+        " " : "<SPC>",
+        "\t": "<TAB>",
+        "\n": "<LF>",
+        "\r": "<CR>",
+        "\f": "<FF>",
+        }
+    def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
+        super(White,self).__init__()
+        self.matchWhite = ws
+        self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
+        #~ self.leaveWhitespace()
+        self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
+        self.mayReturnEmpty = True
+        self.errmsg = "Expected " + self.name
+
+        self.minLen = min
+
+        if max > 0:
+            self.maxLen = max
+        else:
+            self.maxLen = _MAX_INT
+
+        if exact > 0:
+            self.maxLen = exact
+            self.minLen = exact
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if not(instring[ loc ] in self.matchWhite):
+            raise ParseException(instring, loc, self.errmsg, self)
+        start = loc
+        loc += 1
+        maxloc = start + self.maxLen
+        maxloc = min( maxloc, len(instring) )
+        while loc < maxloc and instring[loc] in self.matchWhite:
+            loc += 1
+
+        if loc - start < self.minLen:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        return loc, instring[start:loc]
+
+
+class _PositionToken(Token):
+    def __init__( self ):
+        super(_PositionToken,self).__init__()
+        self.name=self.__class__.__name__
+        self.mayReturnEmpty = True
+        self.mayIndexError = False
+
+class GoToColumn(_PositionToken):
+    """
+    Token to advance to a specific column of input text; useful for tabular report scraping.
+    """
+    def __init__( self, colno ):
+        super(GoToColumn,self).__init__()
+        self.col = colno
+
+    def preParse( self, instring, loc ):
+        if col(loc,instring) != self.col:
+            instrlen = len(instring)
+            if self.ignoreExprs:
+                loc = self._skipIgnorables( instring, loc )
+            while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
+                loc += 1
+        return loc
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        thiscol = col( loc, instring )
+        if thiscol > self.col:
+            raise ParseException( instring, loc, "Text not in expected column", self )
+        newloc = loc + self.col - thiscol
+        ret = instring[ loc: newloc ]
+        return newloc, ret
+
+
+class LineStart(_PositionToken):
+    """
+    Matches if current position is at the beginning of a line within the parse string
+    
+    Example::
+    
+        test = '''\
+        AAA this line
+        AAA and this line
+          AAA but not this one
+        B AAA and definitely not this one
+        '''
+
+        for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
+            print(t)
+    
+    Prints::
+        ['AAA', ' this line']
+        ['AAA', ' and this line']    
+
+    """
+    def __init__( self ):
+        super(LineStart,self).__init__()
+        self.errmsg = "Expected start of line"
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if col(loc, instring) == 1:
+            return loc, []
+        raise ParseException(instring, loc, self.errmsg, self)
+
+class LineEnd(_PositionToken):
+    """
+    Matches if current position is at the end of a line within the parse string
+    """
+    def __init__( self ):
+        super(LineEnd,self).__init__()
+        self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
+        self.errmsg = "Expected end of line"
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if loc<len(instring):
+            if instring[loc] == "\n":
+                return loc+1, "\n"
+            else:
+                raise ParseException(instring, loc, self.errmsg, self)
+        elif loc == len(instring):
+            return loc+1, []
+        else:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+class StringStart(_PositionToken):
+    """
+    Matches if current position is at the beginning of the parse string
+    """
+    def __init__( self ):
+        super(StringStart,self).__init__()
+        self.errmsg = "Expected start of text"
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if loc != 0:
+            # see if entire string up to here is just whitespace and ignoreables
+            if loc != self.preParse( instring, 0 ):
+                raise ParseException(instring, loc, self.errmsg, self)
+        return loc, []
+
+class StringEnd(_PositionToken):
+    """
+    Matches if current position is at the end of the parse string
+    """
+    def __init__( self ):
+        super(StringEnd,self).__init__()
+        self.errmsg = "Expected end of text"
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if loc < len(instring):
+            raise ParseException(instring, loc, self.errmsg, self)
+        elif loc == len(instring):
+            return loc+1, []
+        elif loc > len(instring):
+            return loc, []
+        else:
+            raise ParseException(instring, loc, self.errmsg, self)
+
+class WordStart(_PositionToken):
+    """
+    Matches if the current position is at the beginning of a Word, and
+    is not preceded by any character in a given set of C{wordChars}
+    (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
+    use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
+    the string being parsed, or at the beginning of a line.
+    """
+    def __init__(self, wordChars = printables):
+        super(WordStart,self).__init__()
+        self.wordChars = set(wordChars)
+        self.errmsg = "Not at the start of a word"
+
+    def parseImpl(self, instring, loc, doActions=True ):
+        if loc != 0:
+            if (instring[loc-1] in self.wordChars or
+                instring[loc] not in self.wordChars):
+                raise ParseException(instring, loc, self.errmsg, self)
+        return loc, []
+
+class WordEnd(_PositionToken):
+    """
+    Matches if the current position is at the end of a Word, and
+    is not followed by any character in a given set of C{wordChars}
+    (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
+    use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
+    the string being parsed, or at the end of a line.
+    """
+    def __init__(self, wordChars = printables):
+        super(WordEnd,self).__init__()
+        self.wordChars = set(wordChars)
+        self.skipWhitespace = False
+        self.errmsg = "Not at the end of a word"
+
+    def parseImpl(self, instring, loc, doActions=True ):
+        instrlen = len(instring)
+        if instrlen>0 and loc<instrlen:
+            if (instring[loc] in self.wordChars or
+                instring[loc-1] not in self.wordChars):
+                raise ParseException(instring, loc, self.errmsg, self)
+        return loc, []
+
+
+class ParseExpression(ParserElement):
+    """
+    Abstract subclass of ParserElement, for combining and post-processing parsed tokens.
+    """
+    def __init__( self, exprs, savelist = False ):
+        super(ParseExpression,self).__init__(savelist)
+        if isinstance( exprs, _generatorType ):
+            exprs = list(exprs)
+
+        if isinstance( exprs, basestring ):
+            self.exprs = [ ParserElement._literalStringClass( exprs ) ]
+        elif isinstance( exprs, collections.Iterable ):
+            exprs = list(exprs)
+            # if sequence of strings provided, wrap with Literal
+            if all(isinstance(expr, basestring) for expr in exprs):
+                exprs = map(ParserElement._literalStringClass, exprs)
+            self.exprs = list(exprs)
+        else:
+            try:
+                self.exprs = list( exprs )
+            except TypeError:
+                self.exprs = [ exprs ]
+        self.callPreparse = False
+
+    def __getitem__( self, i ):
+        return self.exprs[i]
+
+    def append( self, other ):
+        self.exprs.append( other )
+        self.strRepr = None
+        return self
+
+    def leaveWhitespace( self ):
+        """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
+           all contained expressions."""
+        self.skipWhitespace = False
+        self.exprs = [ e.copy() for e in self.exprs ]
+        for e in self.exprs:
+            e.leaveWhitespace()
+        return self
+
+    def ignore( self, other ):
+        if isinstance( other, Suppress ):
+            if other not in self.ignoreExprs:
+                super( ParseExpression, self).ignore( other )
+                for e in self.exprs:
+                    e.ignore( self.ignoreExprs[-1] )
+        else:
+            super( ParseExpression, self).ignore( other )
+            for e in self.exprs:
+                e.ignore( self.ignoreExprs[-1] )
+        return self
+
+    def __str__( self ):
+        try:
+            return super(ParseExpression,self).__str__()
+        except Exception:
+            pass
+
+        if self.strRepr is None:
+            self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
+        return self.strRepr
+
+    def streamline( self ):
+        super(ParseExpression,self).streamline()
+
+        for e in self.exprs:
+            e.streamline()
+
+        # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
+        # but only if there are no parse actions or resultsNames on the nested And's
+        # (likewise for Or's and MatchFirst's)
+        if ( len(self.exprs) == 2 ):
+            other = self.exprs[0]
+            if ( isinstance( other, self.__class__ ) and
+                  not(other.parseAction) and
+                  other.resultsName is None and
+                  not other.debug ):
+                self.exprs = other.exprs[:] + [ self.exprs[1] ]
+                self.strRepr = None
+                self.mayReturnEmpty |= other.mayReturnEmpty
+                self.mayIndexError  |= other.mayIndexError
+
+            other = self.exprs[-1]
+            if ( isinstance( other, self.__class__ ) and
+                  not(other.parseAction) and
+                  other.resultsName is None and
+                  not other.debug ):
+                self.exprs = self.exprs[:-1] + other.exprs[:]
+                self.strRepr = None
+                self.mayReturnEmpty |= other.mayReturnEmpty
+                self.mayIndexError  |= other.mayIndexError
+
+        self.errmsg = "Expected " + _ustr(self)
+        
+        return self
+
+    def setResultsName( self, name, listAllMatches=False ):
+        ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
+        return ret
+
+    def validate( self, validateTrace=[] ):
+        tmp = validateTrace[:]+[self]
+        for e in self.exprs:
+            e.validate(tmp)
+        self.checkRecursion( [] )
+        
+    def copy(self):
+        ret = super(ParseExpression,self).copy()
+        ret.exprs = [e.copy() for e in self.exprs]
+        return ret
+
+class And(ParseExpression):
+    """
+    Requires all given C{ParseExpression}s to be found in the given order.
+    Expressions may be separated by whitespace.
+    May be constructed using the C{'+'} operator.
+    May also be constructed using the C{'-'} operator, which will suppress backtracking.
+
+    Example::
+        integer = Word(nums)
+        name_expr = OneOrMore(Word(alphas))
+
+        expr = And([integer("id"),name_expr("name"),integer("age")])
+        # more easily written as:
+        expr = integer("id") + name_expr("name") + integer("age")
+    """
+
+    class _ErrorStop(Empty):
+        def __init__(self, *args, **kwargs):
+            super(And._ErrorStop,self).__init__(*args, **kwargs)
+            self.name = '-'
+            self.leaveWhitespace()
+
+    def __init__( self, exprs, savelist = True ):
+        super(And,self).__init__(exprs, savelist)
+        self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
+        self.setWhitespaceChars( self.exprs[0].whiteChars )
+        self.skipWhitespace = self.exprs[0].skipWhitespace
+        self.callPreparse = True
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        # pass False as last arg to _parse for first element, since we already
+        # pre-parsed the string as part of our And pre-parsing
+        loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
+        errorStop = False
+        for e in self.exprs[1:]:
+            if isinstance(e, And._ErrorStop):
+                errorStop = True
+                continue
+            if errorStop:
+                try:
+                    loc, exprtokens = e._parse( instring, loc, doActions )
+                except ParseSyntaxException:
+                    raise
+                except ParseBaseException as pe:
+                    pe.__traceback__ = None
+                    raise ParseSyntaxException._from_exception(pe)
+                except IndexError:
+                    raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
+            else:
+                loc, exprtokens = e._parse( instring, loc, doActions )
+            if exprtokens or exprtokens.haskeys():
+                resultlist += exprtokens
+        return loc, resultlist
+
+    def __iadd__(self, other ):
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        return self.append( other ) #And( [ self, other ] )
+
+    def checkRecursion( self, parseElementList ):
+        subRecCheckList = parseElementList[:] + [ self ]
+        for e in self.exprs:
+            e.checkRecursion( subRecCheckList )
+            if not e.mayReturnEmpty:
+                break
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
+
+        return self.strRepr
+
+
+class Or(ParseExpression):
+    """
+    Requires that at least one C{ParseExpression} is found.
+    If two expressions match, the expression that matches the longest string will be used.
+    May be constructed using the C{'^'} operator.
+
+    Example::
+        # construct Or using '^' operator
+        
+        number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
+        print(number.searchString("123 3.1416 789"))
+    prints::
+        [['123'], ['3.1416'], ['789']]
+    """
+    def __init__( self, exprs, savelist = False ):
+        super(Or,self).__init__(exprs, savelist)
+        if self.exprs:
+            self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
+        else:
+            self.mayReturnEmpty = True
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        maxExcLoc = -1
+        maxException = None
+        matches = []
+        for e in self.exprs:
+            try:
+                loc2 = e.tryParse( instring, loc )
+            except ParseException as err:
+                err.__traceback__ = None
+                if err.loc > maxExcLoc:
+                    maxException = err
+                    maxExcLoc = err.loc
+            except IndexError:
+                if len(instring) > maxExcLoc:
+                    maxException = ParseException(instring,len(instring),e.errmsg,self)
+                    maxExcLoc = len(instring)
+            else:
+                # save match among all matches, to retry longest to shortest
+                matches.append((loc2, e))
+
+        if matches:
+            matches.sort(key=lambda x: -x[0])
+            for _,e in matches:
+                try:
+                    return e._parse( instring, loc, doActions )
+                except ParseException as err:
+                    err.__traceback__ = None
+                    if err.loc > maxExcLoc:
+                        maxException = err
+                        maxExcLoc = err.loc
+
+        if maxException is not None:
+            maxException.msg = self.errmsg
+            raise maxException
+        else:
+            raise ParseException(instring, loc, "no defined alternatives to match", self)
+
+
+    def __ixor__(self, other ):
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        return self.append( other ) #Or( [ self, other ] )
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
+
+        return self.strRepr
+
+    def checkRecursion( self, parseElementList ):
+        subRecCheckList = parseElementList[:] + [ self ]
+        for e in self.exprs:
+            e.checkRecursion( subRecCheckList )
+
+
+class MatchFirst(ParseExpression):
+    """
+    Requires that at least one C{ParseExpression} is found.
+    If two expressions match, the first one listed is the one that will match.
+    May be constructed using the C{'|'} operator.
+
+    Example::
+        # construct MatchFirst using '|' operator
+        
+        # watch the order of expressions to match
+        number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
+        print(number.searchString("123 3.1416 789")) #  Fail! -> [['123'], ['3'], ['1416'], ['789']]
+
+        # put more selective expression first
+        number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
+        print(number.searchString("123 3.1416 789")) #  Better -> [['123'], ['3.1416'], ['789']]
+    """
+    def __init__( self, exprs, savelist = False ):
+        super(MatchFirst,self).__init__(exprs, savelist)
+        if self.exprs:
+            self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
+        else:
+            self.mayReturnEmpty = True
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        maxExcLoc = -1
+        maxException = None
+        for e in self.exprs:
+            try:
+                ret = e._parse( instring, loc, doActions )
+                return ret
+            except ParseException as err:
+                if err.loc > maxExcLoc:
+                    maxException = err
+                    maxExcLoc = err.loc
+            except IndexError:
+                if len(instring) > maxExcLoc:
+                    maxException = ParseException(instring,len(instring),e.errmsg,self)
+                    maxExcLoc = len(instring)
+
+        # only got here if no expression matched, raise exception for match that made it the furthest
+        else:
+            if maxException is not None:
+                maxException.msg = self.errmsg
+                raise maxException
+            else:
+                raise ParseException(instring, loc, "no defined alternatives to match", self)
+
+    def __ior__(self, other ):
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass( other )
+        return self.append( other ) #MatchFirst( [ self, other ] )
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
+
+        return self.strRepr
+
+    def checkRecursion( self, parseElementList ):
+        subRecCheckList = parseElementList[:] + [ self ]
+        for e in self.exprs:
+            e.checkRecursion( subRecCheckList )
+
+
+class Each(ParseExpression):
+    """
+    Requires all given C{ParseExpression}s to be found, but in any order.
+    Expressions may be separated by whitespace.
+    May be constructed using the C{'&'} operator.
+
+    Example::
+        color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
+        shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
+        integer = Word(nums)
+        shape_attr = "shape:" + shape_type("shape")
+        posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
+        color_attr = "color:" + color("color")
+        size_attr = "size:" + integer("size")
+
+        # use Each (using operator '&') to accept attributes in any order 
+        # (shape and posn are required, color and size are optional)
+        shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
+
+        shape_spec.runTests('''
+            shape: SQUARE color: BLACK posn: 100, 120
+            shape: CIRCLE size: 50 color: BLUE posn: 50,80
+            color:GREEN size:20 shape:TRIANGLE posn:20,40
+            '''
+            )
+    prints::
+        shape: SQUARE color: BLACK posn: 100, 120
+        ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
+        - color: BLACK
+        - posn: ['100', ',', '120']
+          - x: 100
+          - y: 120
+        - shape: SQUARE
+
+
+        shape: CIRCLE size: 50 color: BLUE posn: 50,80
+        ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
+        - color: BLUE
+        - posn: ['50', ',', '80']
+          - x: 50
+          - y: 80
+        - shape: CIRCLE
+        - size: 50
+
+
+        color: GREEN size: 20 shape: TRIANGLE posn: 20,40
+        ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
+        - color: GREEN
+        - posn: ['20', ',', '40']
+          - x: 20
+          - y: 40
+        - shape: TRIANGLE
+        - size: 20
+    """
+    def __init__( self, exprs, savelist = True ):
+        super(Each,self).__init__(exprs, savelist)
+        self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
+        self.skipWhitespace = True
+        self.initExprGroups = True
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if self.initExprGroups:
+            self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
+            opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
+            opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
+            self.optionals = opt1 + opt2
+            self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
+            self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
+            self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
+            self.required += self.multirequired
+            self.initExprGroups = False
+        tmpLoc = loc
+        tmpReqd = self.required[:]
+        tmpOpt  = self.optionals[:]
+        matchOrder = []
+
+        keepMatching = True
+        while keepMatching:
+            tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
+            failed = []
+            for e in tmpExprs:
+                try:
+                    tmpLoc = e.tryParse( instring, tmpLoc )
+                except ParseException:
+                    failed.append(e)
+                else:
+                    matchOrder.append(self.opt1map.get(id(e),e))
+                    if e in tmpReqd:
+                        tmpReqd.remove(e)
+                    elif e in tmpOpt:
+                        tmpOpt.remove(e)
+            if len(failed) == len(tmpExprs):
+                keepMatching = False
+
+        if tmpReqd:
+            missing = ", ".join(_ustr(e) for e in tmpReqd)
+            raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
+
+        # add any unmatched Optionals, in case they have default values defined
+        matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
+
+        resultlist = []
+        for e in matchOrder:
+            loc,results = e._parse(instring,loc,doActions)
+            resultlist.append(results)
+
+        finalResults = sum(resultlist, ParseResults([]))
+        return loc, finalResults
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
+
+        return self.strRepr
+
+    def checkRecursion( self, parseElementList ):
+        subRecCheckList = parseElementList[:] + [ self ]
+        for e in self.exprs:
+            e.checkRecursion( subRecCheckList )
+
+
+class ParseElementEnhance(ParserElement):
+    """
+    Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.
+    """
+    def __init__( self, expr, savelist=False ):
+        super(ParseElementEnhance,self).__init__(savelist)
+        if isinstance( expr, basestring ):
+            if issubclass(ParserElement._literalStringClass, Token):
+                expr = ParserElement._literalStringClass(expr)
+            else:
+                expr = ParserElement._literalStringClass(Literal(expr))
+        self.expr = expr
+        self.strRepr = None
+        if expr is not None:
+            self.mayIndexError = expr.mayIndexError
+            self.mayReturnEmpty = expr.mayReturnEmpty
+            self.setWhitespaceChars( expr.whiteChars )
+            self.skipWhitespace = expr.skipWhitespace
+            self.saveAsList = expr.saveAsList
+            self.callPreparse = expr.callPreparse
+            self.ignoreExprs.extend(expr.ignoreExprs)
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if self.expr is not None:
+            return self.expr._parse( instring, loc, doActions, callPreParse=False )
+        else:
+            raise ParseException("",loc,self.errmsg,self)
+
+    def leaveWhitespace( self ):
+        self.skipWhitespace = False
+        self.expr = self.expr.copy()
+        if self.expr is not None:
+            self.expr.leaveWhitespace()
+        return self
+
+    def ignore( self, other ):
+        if isinstance( other, Suppress ):
+            if other not in self.ignoreExprs:
+                super( ParseElementEnhance, self).ignore( other )
+                if self.expr is not None:
+                    self.expr.ignore( self.ignoreExprs[-1] )
+        else:
+            super( ParseElementEnhance, self).ignore( other )
+            if self.expr is not None:
+                self.expr.ignore( self.ignoreExprs[-1] )
+        return self
+
+    def streamline( self ):
+        super(ParseElementEnhance,self).streamline()
+        if self.expr is not None:
+            self.expr.streamline()
+        return self
+
+    def checkRecursion( self, parseElementList ):
+        if self in parseElementList:
+            raise RecursiveGrammarException( parseElementList+[self] )
+        subRecCheckList = parseElementList[:] + [ self ]
+        if self.expr is not None:
+            self.expr.checkRecursion( subRecCheckList )
+
+    def validate( self, validateTrace=[] ):
+        tmp = validateTrace[:]+[self]
+        if self.expr is not None:
+            self.expr.validate(tmp)
+        self.checkRecursion( [] )
+
+    def __str__( self ):
+        try:
+            return super(ParseElementEnhance,self).__str__()
+        except Exception:
+            pass
+
+        if self.strRepr is None and self.expr is not None:
+            self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
+        return self.strRepr
+
+
+class FollowedBy(ParseElementEnhance):
+    """
+    Lookahead matching of the given parse expression.  C{FollowedBy}
+    does I{not} advance the parsing position within the input string, it only
+    verifies that the specified parse expression matches at the current
+    position.  C{FollowedBy} always returns a null token list.
+
+    Example::
+        # use FollowedBy to match a label only if it is followed by a ':'
+        data_word = Word(alphas)
+        label = data_word + FollowedBy(':')
+        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
+        
+        OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
+    prints::
+        [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
+    """
+    def __init__( self, expr ):
+        super(FollowedBy,self).__init__(expr)
+        self.mayReturnEmpty = True
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        self.expr.tryParse( instring, loc )
+        return loc, []
+
+
+class NotAny(ParseElementEnhance):
+    """
+    Lookahead to disallow matching with the given parse expression.  C{NotAny}
+    does I{not} advance the parsing position within the input string, it only
+    verifies that the specified parse expression does I{not} match at the current
+    position.  Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny}
+    always returns a null token list.  May be constructed using the '~' operator.
+
+    Example::
+        
+    """
+    def __init__( self, expr ):
+        super(NotAny,self).__init__(expr)
+        #~ self.leaveWhitespace()
+        self.skipWhitespace = False  # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
+        self.mayReturnEmpty = True
+        self.errmsg = "Found unwanted token, "+_ustr(self.expr)
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        if self.expr.canParseNext(instring, loc):
+            raise ParseException(instring, loc, self.errmsg, self)
+        return loc, []
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "~{" + _ustr(self.expr) + "}"
+
+        return self.strRepr
+
+class _MultipleMatch(ParseElementEnhance):
+    def __init__( self, expr, stopOn=None):
+        super(_MultipleMatch, self).__init__(expr)
+        self.saveAsList = True
+        ender = stopOn
+        if isinstance(ender, basestring):
+            ender = ParserElement._literalStringClass(ender)
+        self.not_ender = ~ender if ender is not None else None
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        self_expr_parse = self.expr._parse
+        self_skip_ignorables = self._skipIgnorables
+        check_ender = self.not_ender is not None
+        if check_ender:
+            try_not_ender = self.not_ender.tryParse
+        
+        # must be at least one (but first see if we are the stopOn sentinel;
+        # if so, fail)
+        if check_ender:
+            try_not_ender(instring, loc)
+        loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
+        try:
+            hasIgnoreExprs = (not not self.ignoreExprs)
+            while 1:
+                if check_ender:
+                    try_not_ender(instring, loc)
+                if hasIgnoreExprs:
+                    preloc = self_skip_ignorables( instring, loc )
+                else:
+                    preloc = loc
+                loc, tmptokens = self_expr_parse( instring, preloc, doActions )
+                if tmptokens or tmptokens.haskeys():
+                    tokens += tmptokens
+        except (ParseException,IndexError):
+            pass
+
+        return loc, tokens
+        
+class OneOrMore(_MultipleMatch):
+    """
+    Repetition of one or more of the given expression.
+    
+    Parameters:
+     - expr - expression that must match one or more times
+     - stopOn - (default=C{None}) - expression for a terminating sentinel
+          (only required if the sentinel would ordinarily match the repetition 
+          expression)          
+
+    Example::
+        data_word = Word(alphas)
+        label = data_word + FollowedBy(':')
+        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
+
+        text = "shape: SQUARE posn: upper left color: BLACK"
+        OneOrMore(attr_expr).parseString(text).pprint()  # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
+
+        # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
+        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
+        OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
+        
+        # could also be written as
+        (attr_expr * (1,)).parseString(text).pprint()
+    """
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "{" + _ustr(self.expr) + "}..."
+
+        return self.strRepr
+
+class ZeroOrMore(_MultipleMatch):
+    """
+    Optional repetition of zero or more of the given expression.
+    
+    Parameters:
+     - expr - expression that must match zero or more times
+     - stopOn - (default=C{None}) - expression for a terminating sentinel
+          (only required if the sentinel would ordinarily match the repetition 
+          expression)          
+
+    Example: similar to L{OneOrMore}
+    """
+    def __init__( self, expr, stopOn=None):
+        super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)
+        self.mayReturnEmpty = True
+        
+    def parseImpl( self, instring, loc, doActions=True ):
+        try:
+            return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
+        except (ParseException,IndexError):
+            return loc, []
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "[" + _ustr(self.expr) + "]..."
+
+        return self.strRepr
+
+class _NullToken(object):
+    def __bool__(self):
+        return False
+    __nonzero__ = __bool__
+    def __str__(self):
+        return ""
+
+_optionalNotMatched = _NullToken()
+class Optional(ParseElementEnhance):
+    """
+    Optional matching of the given expression.
+
+    Parameters:
+     - expr - expression that must match zero or more times
+     - default (optional) - value to be returned if the optional expression is not found.
+
+    Example::
+        # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
+        zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
+        zip.runTests('''
+            # traditional ZIP code
+            12345
+            
+            # ZIP+4 form
+            12101-0001
+            
+            # invalid ZIP
+            98765-
+            ''')
+    prints::
+        # traditional ZIP code
+        12345
+        ['12345']
+
+        # ZIP+4 form
+        12101-0001
+        ['12101-0001']
+
+        # invalid ZIP
+        98765-
+             ^
+        FAIL: Expected end of text (at char 5), (line:1, col:6)
+    """
+    def __init__( self, expr, default=_optionalNotMatched ):
+        super(Optional,self).__init__( expr, savelist=False )
+        self.saveAsList = self.expr.saveAsList
+        self.defaultValue = default
+        self.mayReturnEmpty = True
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        try:
+            loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
+        except (ParseException,IndexError):
+            if self.defaultValue is not _optionalNotMatched:
+                if self.expr.resultsName:
+                    tokens = ParseResults([ self.defaultValue ])
+                    tokens[self.expr.resultsName] = self.defaultValue
+                else:
+                    tokens = [ self.defaultValue ]
+            else:
+                tokens = []
+        return loc, tokens
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+
+        if self.strRepr is None:
+            self.strRepr = "[" + _ustr(self.expr) + "]"
+
+        return self.strRepr
+
+class SkipTo(ParseElementEnhance):
+    """
+    Token for skipping over all undefined text until the matched expression is found.
+
+    Parameters:
+     - expr - target expression marking the end of the data to be skipped
+     - include - (default=C{False}) if True, the target expression is also parsed 
+          (the skipped text and target expression are returned as a 2-element list).
+     - ignore - (default=C{None}) used to define grammars (typically quoted strings and 
+          comments) that might contain false matches to the target expression
+     - failOn - (default=C{None}) define expressions that are not allowed to be 
+          included in the skipped test; if found before the target expression is found, 
+          the SkipTo is not a match
+
+    Example::
+        report = '''
+            Outstanding Issues Report - 1 Jan 2000
+
+               # | Severity | Description                               |  Days Open
+            -----+----------+-------------------------------------------+-----------
+             101 | Critical | Intermittent system crash                 |          6
+              94 | Cosmetic | Spelling error on Login ('log|n')         |         14
+              79 | Minor    | System slow when running too many reports |         47
+            '''
+        integer = Word(nums)
+        SEP = Suppress('|')
+        # use SkipTo to simply match everything up until the next SEP
+        # - ignore quoted strings, so that a '|' character inside a quoted string does not match
+        # - parse action will call token.strip() for each matched token, i.e., the description body
+        string_data = SkipTo(SEP, ignore=quotedString)
+        string_data.setParseAction(tokenMap(str.strip))
+        ticket_expr = (integer("issue_num") + SEP 
+                      + string_data("sev") + SEP 
+                      + string_data("desc") + SEP 
+                      + integer("days_open"))
+        
+        for tkt in ticket_expr.searchString(report):
+            print tkt.dump()
+    prints::
+        ['101', 'Critical', 'Intermittent system crash', '6']
+        - days_open: 6
+        - desc: Intermittent system crash
+        - issue_num: 101
+        - sev: Critical
+        ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
+        - days_open: 14
+        - desc: Spelling error on Login ('log|n')
+        - issue_num: 94
+        - sev: Cosmetic
+        ['79', 'Minor', 'System slow when running too many reports', '47']
+        - days_open: 47
+        - desc: System slow when running too many reports
+        - issue_num: 79
+        - sev: Minor
+    """
+    def __init__( self, other, include=False, ignore=None, failOn=None ):
+        super( SkipTo, self ).__init__( other )
+        self.ignoreExpr = ignore
+        self.mayReturnEmpty = True
+        self.mayIndexError = False
+        self.includeMatch = include
+        self.asList = False
+        if isinstance(failOn, basestring):
+            self.failOn = ParserElement._literalStringClass(failOn)
+        else:
+            self.failOn = failOn
+        self.errmsg = "No match found for "+_ustr(self.expr)
+
+    def parseImpl( self, instring, loc, doActions=True ):
+        startloc = loc
+        instrlen = len(instring)
+        expr = self.expr
+        expr_parse = self.expr._parse
+        self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
+        self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
+        
+        tmploc = loc
+        while tmploc <= instrlen:
+            if self_failOn_canParseNext is not None:
+                # break if failOn expression matches
+                if self_failOn_canParseNext(instring, tmploc):
+                    break
+                    
+            if self_ignoreExpr_tryParse is not None:
+                # advance past ignore expressions
+                while 1:
+                    try:
+                        tmploc = self_ignoreExpr_tryParse(instring, tmploc)
+                    except ParseBaseException:
+                        break
+            
+            try:
+                expr_parse(instring, tmploc, doActions=False, callPreParse=False)
+            except (ParseException, IndexError):
+                # no match, advance loc in string
+                tmploc += 1
+            else:
+                # matched skipto expr, done
+                break
+
+        else:
+            # ran off the end of the input string without matching skipto expr, fail
+            raise ParseException(instring, loc, self.errmsg, self)
+
+        # build up return values
+        loc = tmploc
+        skiptext = instring[startloc:loc]
+        skipresult = ParseResults(skiptext)
+        
+        if self.includeMatch:
+            loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
+            skipresult += mat
+
+        return loc, skipresult
+
+class Forward(ParseElementEnhance):
+    """
+    Forward declaration of an expression to be defined later -
+    used for recursive grammars, such as algebraic infix notation.
+    When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
+
+    Note: take care when assigning to C{Forward} not to overlook precedence of operators.
+    Specifically, '|' has a lower precedence than '<<', so that::
+        fwdExpr << a | b | c
+    will actually be evaluated as::
+        (fwdExpr << a) | b | c
+    thereby leaving b and c out as parseable alternatives.  It is recommended that you
+    explicitly group the values inserted into the C{Forward}::
+        fwdExpr << (a | b | c)
+    Converting to use the '<<=' operator instead will avoid this problem.
+
+    See L{ParseResults.pprint} for an example of a recursive parser created using
+    C{Forward}.
+    """
+    def __init__( self, other=None ):
+        super(Forward,self).__init__( other, savelist=False )
+
+    def __lshift__( self, other ):
+        if isinstance( other, basestring ):
+            other = ParserElement._literalStringClass(other)
+        self.expr = other
+        self.strRepr = None
+        self.mayIndexError = self.expr.mayIndexError
+        self.mayReturnEmpty = self.expr.mayReturnEmpty
+        self.setWhitespaceChars( self.expr.whiteChars )
+        self.skipWhitespace = self.expr.skipWhitespace
+        self.saveAsList = self.expr.saveAsList
+        self.ignoreExprs.extend(self.expr.ignoreExprs)
+        return self
+        
+    def __ilshift__(self, other):
+        return self << other
+    
+    def leaveWhitespace( self ):
+        self.skipWhitespace = False
+        return self
+
+    def streamline( self ):
+        if not self.streamlined:
+            self.streamlined = True
+            if self.expr is not None:
+                self.expr.streamline()
+        return self
+
+    def validate( self, validateTrace=[] ):
+        if self not in validateTrace:
+            tmp = validateTrace[:]+[self]
+            if self.expr is not None:
+                self.expr.validate(tmp)
+        self.checkRecursion([])
+
+    def __str__( self ):
+        if hasattr(self,"name"):
+            return self.name
+        return self.__class__.__name__ + ": ..."
+
+        # stubbed out for now - creates awful memory and perf issues
+        self._revertClass = self.__class__
+        self.__class__ = _ForwardNoRecurse
+        try:
+            if self.expr is not None:
+                retString = _ustr(self.expr)
+            else:
+                retString = "None"
+        finally:
+            self.__class__ = self._revertClass
+        return self.__class__.__name__ + ": " + retString
+
+    def copy(self):
+        if self.expr is not None:
+            return super(Forward,self).copy()
+        else:
+            ret = Forward()
+            ret <<= self
+            return ret
+
+class _ForwardNoRecurse(Forward):
+    def __str__( self ):
+        return "..."
+
+class TokenConverter(ParseElementEnhance):
+    """
+    Abstract subclass of C{ParseExpression}, for converting parsed results.
+    """
+    def __init__( self, expr, savelist=False ):
+        super(TokenConverter,self).__init__( expr )#, savelist )
+        self.saveAsList = False
+
+class Combine(TokenConverter):
+    """
+    Converter to concatenate all matching tokens to a single string.
+    By default, the matching patterns must also be contiguous in the input string;
+    this can be disabled by specifying C{'adjacent=False'} in the constructor.
+
+    Example::
+        real = Word(nums) + '.' + Word(nums)
+        print(real.parseString('3.1416')) # -> ['3', '.', '1416']
+        # will also erroneously match the following
+        print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
+
+        real = Combine(Word(nums) + '.' + Word(nums))
+        print(real.parseString('3.1416')) # -> ['3.1416']
+        # no match when there are internal spaces
+        print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
+    """
+    def __init__( self, expr, joinString="", adjacent=True ):
+        super(Combine,self).__init__( expr )
+        # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
+        if adjacent:
+            self.leaveWhitespace()
+        self.adjacent = adjacent
+        self.skipWhitespace = True
+        self.joinString = joinString
+        self.callPreparse = True
+
+    def ignore( self, other ):
+        if self.adjacent:
+            ParserElement.ignore(self, other)
+        else:
+            super( Combine, self).ignore( other )
+        return self
+
+    def postParse( self, instring, loc, tokenlist ):
+        retToks = tokenlist.copy()
+        del retToks[:]
+        retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
+
+        if self.resultsName and retToks.haskeys():
+            return [ retToks ]
+        else:
+            return retToks
+
+class Group(TokenConverter):
+    """
+    Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.
+
+    Example::
+        ident = Word(alphas)
+        num = Word(nums)
+        term = ident | num
+        func = ident + Optional(delimitedList(term))
+        print(func.parseString("fn a,b,100"))  # -> ['fn', 'a', 'b', '100']
+
+        func = ident + Group(Optional(delimitedList(term)))
+        print(func.parseString("fn a,b,100"))  # -> ['fn', ['a', 'b', '100']]
+    """
+    def __init__( self, expr ):
+        super(Group,self).__init__( expr )
+        self.saveAsList = True
+
+    def postParse( self, instring, loc, tokenlist ):
+        return [ tokenlist ]
+
+class Dict(TokenConverter):
+    """
+    Converter to return a repetitive expression as a list, but also as a dictionary.
+    Each element can also be referenced using the first token in the expression as its key.
+    Useful for tabular report scraping when the first column can be used as a item key.
+
+    Example::
+        data_word = Word(alphas)
+        label = data_word + FollowedBy(':')
+        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
+
+        text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
+        attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
+        
+        # print attributes as plain groups
+        print(OneOrMore(attr_expr).parseString(text).dump())
+        
+        # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
+        result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
+        print(result.dump())
+        
+        # access named fields as dict entries, or output as dict
+        print(result['shape'])        
+        print(result.asDict())
+    prints::
+        ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
+
+        [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
+        - color: light blue
+        - posn: upper left
+        - shape: SQUARE
+        - texture: burlap
+        SQUARE
+        {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
+    See more examples at L{ParseResults} of accessing fields by results name.
+    """
+    def __init__( self, expr ):
+        super(Dict,self).__init__( expr )
+        self.saveAsList = True
+
+    def postParse( self, instring, loc, tokenlist ):
+        for i,tok in enumerate(tokenlist):
+            if len(tok) == 0:
+                continue
+            ikey = tok[0]
+            if isinstance(ikey,int):
+                ikey = _ustr(tok[0]).strip()
+            if len(tok)==1:
+                tokenlist[ikey] = _ParseResultsWithOffset("",i)
+            elif len(tok)==2 and not isinstance(tok[1],ParseResults):
+                tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
+            else:
+                dictvalue = tok.copy() #ParseResults(i)
+                del dictvalue[0]
+                if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
+                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
+                else:
+                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
+
+        if self.resultsName:
+            return [ tokenlist ]
+        else:
+            return tokenlist
+
+
+class Suppress(TokenConverter):
+    """
+    Converter for ignoring the results of a parsed expression.
+
+    Example::
+        source = "a, b, c,d"
+        wd = Word(alphas)
+        wd_list1 = wd + ZeroOrMore(',' + wd)
+        print(wd_list1.parseString(source))
+
+        # often, delimiters that are useful during parsing are just in the
+        # way afterward - use Suppress to keep them out of the parsed output
+        wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
+        print(wd_list2.parseString(source))
+    prints::
+        ['a', ',', 'b', ',', 'c', ',', 'd']
+        ['a', 'b', 'c', 'd']
+    (See also L{delimitedList}.)
+    """
+    def postParse( self, instring, loc, tokenlist ):
+        return []
+
+    def suppress( self ):
+        return self
+
+
+class OnlyOnce(object):
+    """
+    Wrapper for parse actions, to ensure they are only called once.
+    """
+    def __init__(self, methodCall):
+        self.callable = _trim_arity(methodCall)
+        self.called = False
+    def __call__(self,s,l,t):
+        if not self.called:
+            results = self.callable(s,l,t)
+            self.called = True
+            return results
+        raise ParseException(s,l,"")
+    def reset(self):
+        self.called = False
+
+def traceParseAction(f):
+    """
+    Decorator for debugging parse actions. 
+    
+    When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".}
+    When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised.
+
+    Example::
+        wd = Word(alphas)
+
+        @traceParseAction
+        def remove_duplicate_chars(tokens):
+            return ''.join(sorted(set(''.join(tokens)))
+
+        wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
+        print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
+    prints::
+        >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
+        <<leaving remove_duplicate_chars (ret: 'dfjkls')
+        ['dfjkls']
+    """
+    f = _trim_arity(f)
+    def z(*paArgs):
+        thisFunc = f.__name__
+        s,l,t = paArgs[-3:]
+        if len(paArgs)>3:
+            thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
+        sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
+        try:
+            ret = f(*paArgs)
+        except Exception as exc:
+            sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
+            raise
+        sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
+        return ret
+    try:
+        z.__name__ = f.__name__
+    except AttributeError:
+        pass
+    return z
+
+#
+# global helpers
+#
+def delimitedList( expr, delim=",", combine=False ):
+    """
+    Helper to define a delimited list of expressions - the delimiter defaults to ','.
+    By default, the list elements and delimiters can have intervening whitespace, and
+    comments, but this can be overridden by passing C{combine=True} in the constructor.
+    If C{combine} is set to C{True}, the matching tokens are returned as a single token
+    string, with the delimiters included; otherwise, the matching tokens are returned
+    as a list of tokens, with the delimiters suppressed.
+
+    Example::
+        delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
+        delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
+    """
+    dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
+    if combine:
+        return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
+    else:
+        return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
+
+def countedArray( expr, intExpr=None ):
+    """
+    Helper to define a counted list of expressions.
+    This helper defines a pattern of the form::
+        integer expr expr expr...
+    where the leading integer tells how many expr expressions follow.
+    The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
+    
+    If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value.
+
+    Example::
+        countedArray(Word(alphas)).parseString('2 ab cd ef')  # -> ['ab', 'cd']
+
+        # in this parser, the leading integer value is given in binary,
+        # '10' indicating that 2 values are in the array
+        binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
+        countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef')  # -> ['ab', 'cd']
+    """
+    arrayExpr = Forward()
+    def countFieldParseAction(s,l,t):
+        n = t[0]
+        arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
+        return []
+    if intExpr is None:
+        intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
+    else:
+        intExpr = intExpr.copy()
+    intExpr.setName("arrayLen")
+    intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
+    return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
+
+def _flatten(L):
+    ret = []
+    for i in L:
+        if isinstance(i,list):
+            ret.extend(_flatten(i))
+        else:
+            ret.append(i)
+    return ret
+
+def matchPreviousLiteral(expr):
+    """
+    Helper to define an expression that is indirectly defined from
+    the tokens matched in a previous expression, that is, it looks
+    for a 'repeat' of a previous expression.  For example::
+        first = Word(nums)
+        second = matchPreviousLiteral(first)
+        matchExpr = first + ":" + second
+    will match C{"1:1"}, but not C{"1:2"}.  Because this matches a
+    previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
+    If this is not desired, use C{matchPreviousExpr}.
+    Do I{not} use with packrat parsing enabled.
+    """
+    rep = Forward()
+    def copyTokenToRepeater(s,l,t):
+        if t:
+            if len(t) == 1:
+                rep << t[0]
+            else:
+                # flatten t tokens
+                tflat = _flatten(t.asList())
+                rep << And(Literal(tt) for tt in tflat)
+        else:
+            rep << Empty()
+    expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
+    rep.setName('(prev) ' + _ustr(expr))
+    return rep
+
+def matchPreviousExpr(expr):
+    """
+    Helper to define an expression that is indirectly defined from
+    the tokens matched in a previous expression, that is, it looks
+    for a 'repeat' of a previous expression.  For example::
+        first = Word(nums)
+        second = matchPreviousExpr(first)
+        matchExpr = first + ":" + second
+    will match C{"1:1"}, but not C{"1:2"}.  Because this matches by
+    expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};
+    the expressions are evaluated first, and then compared, so
+    C{"1"} is compared with C{"10"}.
+    Do I{not} use with packrat parsing enabled.
+    """
+    rep = Forward()
+    e2 = expr.copy()
+    rep <<= e2
+    def copyTokenToRepeater(s,l,t):
+        matchTokens = _flatten(t.asList())
+        def mustMatchTheseTokens(s,l,t):
+            theseTokens = _flatten(t.asList())
+            if  theseTokens != matchTokens:
+                raise ParseException("",0,"")
+        rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
+    expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
+    rep.setName('(prev) ' + _ustr(expr))
+    return rep
+
+def _escapeRegexRangeChars(s):
+    #~  escape these chars: ^-]
+    for c in r"\^-]":
+        s = s.replace(c,_bslash+c)
+    s = s.replace("\n",r"\n")
+    s = s.replace("\t",r"\t")
+    return _ustr(s)
+
+def oneOf( strs, caseless=False, useRegex=True ):
+    """
+    Helper to quickly define a set of alternative Literals, and makes sure to do
+    longest-first testing when there is a conflict, regardless of the input order,
+    but returns a C{L{MatchFirst}} for best performance.
+
+    Parameters:
+     - strs - a string of space-delimited literals, or a collection of string literals
+     - caseless - (default=C{False}) - treat all literals as caseless
+     - useRegex - (default=C{True}) - as an optimization, will generate a Regex
+          object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
+          if creating a C{Regex} raises an exception)
+
+    Example::
+        comp_oper = oneOf("< = > <= >= !=")
+        var = Word(alphas)
+        number = Word(nums)
+        term = var | number
+        comparison_expr = term + comp_oper + term
+        print(comparison_expr.searchString("B = 12  AA=23 B<=AA AA>12"))
+    prints::
+        [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
+    """
+    if caseless:
+        isequal = ( lambda a,b: a.upper() == b.upper() )
+        masks = ( lambda a,b: b.upper().startswith(a.upper()) )
+        parseElementClass = CaselessLiteral
+    else:
+        isequal = ( lambda a,b: a == b )
+        masks = ( lambda a,b: b.startswith(a) )
+        parseElementClass = Literal
+
+    symbols = []
+    if isinstance(strs,basestring):
+        symbols = strs.split()
+    elif isinstance(strs, collections.Iterable):
+        symbols = list(strs)
+    else:
+        warnings.warn("Invalid argument to oneOf, expected string or iterable",
+                SyntaxWarning, stacklevel=2)
+    if not symbols:
+        return NoMatch()
+
+    i = 0
+    while i < len(symbols)-1:
+        cur = symbols[i]
+        for j,other in enumerate(symbols[i+1:]):
+            if ( isequal(other, cur) ):
+                del symbols[i+j+1]
+                break
+            elif ( masks(cur, other) ):
+                del symbols[i+j+1]
+                symbols.insert(i,other)
+                cur = other
+                break
+        else:
+            i += 1
+
+    if not caseless and useRegex:
+        #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
+        try:
+            if len(symbols)==len("".join(symbols)):
+                return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
+            else:
+                return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
+        except Exception:
+            warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
+                    SyntaxWarning, stacklevel=2)
+
+
+    # last resort, just use MatchFirst
+    return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
+
+def dictOf( key, value ):
+    """
+    Helper to easily and clearly define a dictionary by specifying the respective patterns
+    for the key and value.  Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
+    in the proper order.  The key pattern can include delimiting markers or punctuation,
+    as long as they are suppressed, thereby leaving the significant key text.  The value
+    pattern can include named results, so that the C{Dict} results can include named token
+    fields.
+
+    Example::
+        text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
+        attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
+        print(OneOrMore(attr_expr).parseString(text).dump())
+        
+        attr_label = label
+        attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
+
+        # similar to Dict, but simpler call format
+        result = dictOf(attr_label, attr_value).parseString(text)
+        print(result.dump())
+        print(result['shape'])
+        print(result.shape)  # object attribute access works too
+        print(result.asDict())
+    prints::
+        [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
+        - color: light blue
+        - posn: upper left
+        - shape: SQUARE
+        - texture: burlap
+        SQUARE
+        SQUARE
+        {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
+    """
+    return Dict( ZeroOrMore( Group ( key + value ) ) )
+
+def originalTextFor(expr, asString=True):
+    """
+    Helper to return the original, untokenized text for a given expression.  Useful to
+    restore the parsed fields of an HTML start tag into the raw tag text itself, or to
+    revert separate tokens with intervening whitespace back to the original matching
+    input text. By default, returns astring containing the original parsed text.  
+       
+    If the optional C{asString} argument is passed as C{False}, then the return value is a 
+    C{L{ParseResults}} containing any results names that were originally matched, and a 
+    single token containing the original matched text from the input string.  So if 
+    the expression passed to C{L{originalTextFor}} contains expressions with defined
+    results names, you must set C{asString} to C{False} if you want to preserve those
+    results name values.
+
+    Example::
+        src = "this is test <b> bold <i>text</i> </b> normal text "
+        for tag in ("b","i"):
+            opener,closer = makeHTMLTags(tag)
+            patt = originalTextFor(opener + SkipTo(closer) + closer)
+            print(patt.searchString(src)[0])
+    prints::
+        ['<b> bold <i>text</i> </b>']
+        ['<i>text</i>']
+    """
+    locMarker = Empty().setParseAction(lambda s,loc,t: loc)
+    endlocMarker = locMarker.copy()
+    endlocMarker.callPreparse = False
+    matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
+    if asString:
+        extractText = lambda s,l,t: s[t._original_start:t._original_end]
+    else:
+        def extractText(s,l,t):
+            t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
+    matchExpr.setParseAction(extractText)
+    matchExpr.ignoreExprs = expr.ignoreExprs
+    return matchExpr
+
+def ungroup(expr): 
+    """
+    Helper to undo pyparsing's default grouping of And expressions, even
+    if all but one are non-empty.
+    """
+    return TokenConverter(expr).setParseAction(lambda t:t[0])
+
+def locatedExpr(expr):
+    """
+    Helper to decorate a returned token with its starting and ending locations in the input string.
+    This helper adds the following results names:
+     - locn_start = location where matched expression begins
+     - locn_end = location where matched expression ends
+     - value = the actual parsed results
+
+    Be careful if the input text contains C{<TAB>} characters, you may want to call
+    C{L{ParserElement.parseWithTabs}}
+
+    Example::
+        wd = Word(alphas)
+        for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
+            print(match)
+    prints::
+        [[0, 'ljsdf', 5]]
+        [[8, 'lksdjjf', 15]]
+        [[18, 'lkkjj', 23]]
+    """
+    locator = Empty().setParseAction(lambda s,l,t: l)
+    return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
+
+
+# convenience constants for positional expressions
+empty       = Empty().setName("empty")
+lineStart   = LineStart().setName("lineStart")
+lineEnd     = LineEnd().setName("lineEnd")
+stringStart = StringStart().setName("stringStart")
+stringEnd   = StringEnd().setName("stringEnd")
+
+_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
+_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
+_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
+_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE)
+_charRange = Group(_singleChar + Suppress("-") + _singleChar)
+_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
+
+def srange(s):
+    r"""
+    Helper to easily define string ranges for use in Word construction.  Borrows
+    syntax from regexp '[]' string range definitions::
+        srange("[0-9]")   -> "0123456789"
+        srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
+        srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
+    The input string must be enclosed in []'s, and the returned string is the expanded
+    character set joined into a single string.
+    The values enclosed in the []'s may be:
+     - a single character
+     - an escaped character with a leading backslash (such as C{\-} or C{\]})
+     - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) 
+         (C{\0x##} is also supported for backwards compatibility) 
+     - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
+     - a range of any of the above, separated by a dash (C{'a-z'}, etc.)
+     - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
+    """
+    _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
+    try:
+        return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
+    except Exception:
+        return ""
+
+def matchOnlyAtCol(n):
+    """
+    Helper method for defining parse actions that require matching at a specific
+    column in the input text.
+    """
+    def verifyCol(strg,locn,toks):
+        if col(locn,strg) != n:
+            raise ParseException(strg,locn,"matched token not at column %d" % n)
+    return verifyCol
+
+def replaceWith(replStr):
+    """
+    Helper method for common parse actions that simply return a literal value.  Especially
+    useful when used with C{L{transformString<ParserElement.transformString>}()}.
+
+    Example::
+        num = Word(nums).setParseAction(lambda toks: int(toks[0]))
+        na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
+        term = na | num
+        
+        OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
+    """
+    return lambda s,l,t: [replStr]
+
+def removeQuotes(s,l,t):
+    """
+    Helper parse action for removing quotation marks from parsed quoted strings.
+
+    Example::
+        # by default, quotation marks are included in parsed results
+        quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
+
+        # use removeQuotes to strip quotation marks from parsed results
+        quotedString.setParseAction(removeQuotes)
+        quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
+    """
+    return t[0][1:-1]
+
+def tokenMap(func, *args):
+    """
+    Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional 
+    args are passed, they are forwarded to the given function as additional arguments after
+    the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
+    parsed data to an integer using base 16.
+
+    Example (compare the last to example in L{ParserElement.transformString}::
+        hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
+        hex_ints.runTests('''
+            00 11 22 aa FF 0a 0d 1a
+            ''')
+        
+        upperword = Word(alphas).setParseAction(tokenMap(str.upper))
+        OneOrMore(upperword).runTests('''
+            my kingdom for a horse
+            ''')
+
+        wd = Word(alphas).setParseAction(tokenMap(str.title))
+        OneOrMore(wd).setParseAction(' '.join).runTests('''
+            now is the winter of our discontent made glorious summer by this sun of york
+            ''')
+    prints::
+        00 11 22 aa FF 0a 0d 1a
+        [0, 17, 34, 170, 255, 10, 13, 26]
+
+        my kingdom for a horse
+        ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
+
+        now is the winter of our discontent made glorious summer by this sun of york
+        ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
+    """
+    def pa(s,l,t):
+        return [func(tokn, *args) for tokn in t]
+
+    try:
+        func_name = getattr(func, '__name__', 
+                            getattr(func, '__class__').__name__)
+    except Exception:
+        func_name = str(func)
+    pa.__name__ = func_name
+
+    return pa
+
+upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
+"""(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}"""
+
+downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
+"""(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""
+    
+def _makeTags(tagStr, xml):
+    """Internal helper to construct opening and closing tag expressions, given a tag name"""
+    if isinstance(tagStr,basestring):
+        resname = tagStr
+        tagStr = Keyword(tagStr, caseless=not xml)
+    else:
+        resname = tagStr.name
+
+    tagAttrName = Word(alphas,alphanums+"_-:")
+    if (xml):
+        tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
+        openTag = Suppress("<") + tagStr("tag") + \
+                Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
+                Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
+    else:
+        printablesLessRAbrack = "".join(c for c in printables if c not in ">")
+        tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
+        openTag = Suppress("<") + tagStr("tag") + \
+                Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
+                Optional( Suppress("=") + tagAttrValue ) ))) + \
+                Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
+    closeTag = Combine(_L("</") + tagStr + ">")
+
+    openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname)
+    closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname)
+    openTag.tag = resname
+    closeTag.tag = resname
+    return openTag, closeTag
+
+def makeHTMLTags(tagStr):
+    """
+    Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches
+    tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values.
+
+    Example::
+        text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
+        # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple
+        a,a_end = makeHTMLTags("A")
+        link_expr = a + SkipTo(a_end)("link_text") + a_end
+        
+        for link in link_expr.searchString(text):
+            # attributes in the <A> tag (like "href" shown here) are also accessible as named results
+            print(link.link_text, '->', link.href)
+    prints::
+        pyparsing -> http://pyparsing.wikispaces.com
+    """
+    return _makeTags( tagStr, False )
+
+def makeXMLTags(tagStr):
+    """
+    Helper to construct opening and closing tag expressions for XML, given a tag name. Matches
+    tags only in the given upper/lower case.
+
+    Example: similar to L{makeHTMLTags}
+    """
+    return _makeTags( tagStr, True )
+
+def withAttribute(*args,**attrDict):
+    """
+    Helper to create a validating parse action to be used with start tags created
+    with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
+    with a required attribute value, to avoid false matches on common tags such as
+    C{<TD>} or C{<DIV>}.
+
+    Call C{withAttribute} with a series of attribute names and values. Specify the list
+    of filter attributes names and values as:
+     - keyword arguments, as in C{(align="right")}, or
+     - as an explicit dict with C{**} operator, when an attribute name is also a Python
+          reserved word, as in C{**{"class":"Customer", "align":"right"}}
+     - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
+    For attribute names with a namespace prefix, you must use the second form.  Attribute
+    names are matched insensitive to upper/lower case.
+       
+    If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
+
+    To verify that the attribute exists, but without specifying a value, pass
+    C{withAttribute.ANY_VALUE} as the value.
+
+    Example::
+        html = '''
+            <div>
+            Some text
+            <div type="grid">1 4 0 1 0</div>
+            <div type="graph">1,3 2,3 1,1</div>
+            <div>this has no type</div>
+            </div>
+                
+        '''
+        div,div_end = makeHTMLTags("div")
+
+        # only match div tag having a type attribute with value "grid"
+        div_grid = div().setParseAction(withAttribute(type="grid"))
+        grid_expr = div_grid + SkipTo(div | div_end)("body")
+        for grid_header in grid_expr.searchString(html):
+            print(grid_header.body)
+        
+        # construct a match with any div tag having a type attribute, regardless of the value
+        div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
+        div_expr = div_any_type + SkipTo(div | div_end)("body")
+        for div_header in div_expr.searchString(html):
+            print(div_header.body)
+    prints::
+        1 4 0 1 0
+
+        1 4 0 1 0
+        1,3 2,3 1,1
+    """
+    if args:
+        attrs = args[:]
+    else:
+        attrs = attrDict.items()
+    attrs = [(k,v) for k,v in attrs]
+    def pa(s,l,tokens):
+        for attrName,attrValue in attrs:
+            if attrName not in tokens:
+                raise ParseException(s,l,"no matching attribute " + attrName)
+            if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
+                raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
+                                            (attrName, tokens[attrName], attrValue))
+    return pa
+withAttribute.ANY_VALUE = object()
+
+def withClass(classname, namespace=''):
+    """
+    Simplified version of C{L{withAttribute}} when matching on a div class - made
+    difficult because C{class} is a reserved word in Python.
+
+    Example::
+        html = '''
+            <div>
+            Some text
+            <div class="grid">1 4 0 1 0</div>
+            <div class="graph">1,3 2,3 1,1</div>
+            <div>this &lt;div&gt; has no class</div>
+            </div>
+                
+        '''
+        div,div_end = makeHTMLTags("div")
+        div_grid = div().setParseAction(withClass("grid"))
+        
+        grid_expr = div_grid + SkipTo(div | div_end)("body")
+        for grid_header in grid_expr.searchString(html):
+            print(grid_header.body)
+        
+        div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
+        div_expr = div_any_type + SkipTo(div | div_end)("body")
+        for div_header in div_expr.searchString(html):
+            print(div_header.body)
+    prints::
+        1 4 0 1 0
+
+        1 4 0 1 0
+        1,3 2,3 1,1
+    """
+    classattr = "%s:class" % namespace if namespace else "class"
+    return withAttribute(**{classattr : classname})        
+
+opAssoc = _Constants()
+opAssoc.LEFT = object()
+opAssoc.RIGHT = object()
+
+def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
+    """
+    Helper method for constructing grammars of expressions made up of
+    operators working in a precedence hierarchy.  Operators may be unary or
+    binary, left- or right-associative.  Parse actions can also be attached
+    to operator expressions. The generated parser will also recognize the use 
+    of parentheses to override operator precedences (see example below).
+    
+    Note: if you define a deep operator list, you may see performance issues
+    when using infixNotation. See L{ParserElement.enablePackrat} for a
+    mechanism to potentially improve your parser performance.
+
+    Parameters:
+     - baseExpr - expression representing the most basic element for the nested
+     - opList - list of tuples, one for each operator precedence level in the
+      expression grammar; each tuple is of the form
+      (opExpr, numTerms, rightLeftAssoc, parseAction), where:
+       - opExpr is the pyparsing expression for the operator;
+          may also be a string, which will be converted to a Literal;
+          if numTerms is 3, opExpr is a tuple of two expressions, for the
+          two operators separating the 3 terms
+       - numTerms is the number of terms for this operator (must
+          be 1, 2, or 3)
+       - rightLeftAssoc is the indicator whether the operator is
+          right or left associative, using the pyparsing-defined
+          constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
+       - parseAction is the parse action to be associated with
+          expressions matching this operator expression (the
+          parse action tuple member may be omitted)
+     - lpar - expression for matching left-parentheses (default=C{Suppress('(')})
+     - rpar - expression for matching right-parentheses (default=C{Suppress(')')})
+
+    Example::
+        # simple example of four-function arithmetic with ints and variable names
+        integer = pyparsing_common.signed_integer
+        varname = pyparsing_common.identifier 
+        
+        arith_expr = infixNotation(integer | varname,
+            [
+            ('-', 1, opAssoc.RIGHT),
+            (oneOf('* /'), 2, opAssoc.LEFT),
+            (oneOf('+ -'), 2, opAssoc.LEFT),
+            ])
+        
+        arith_expr.runTests('''
+            5+3*6
+            (5+3)*6
+            -2--11
+            ''', fullDump=False)
+    prints::
+        5+3*6
+        [[5, '+', [3, '*', 6]]]
+
+        (5+3)*6
+        [[[5, '+', 3], '*', 6]]
+
+        -2--11
+        [[['-', 2], '-', ['-', 11]]]
+    """
+    ret = Forward()
+    lastExpr = baseExpr | ( lpar + ret + rpar )
+    for i,operDef in enumerate(opList):
+        opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
+        termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
+        if arity == 3:
+            if opExpr is None or len(opExpr) != 2:
+                raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
+            opExpr1, opExpr2 = opExpr
+        thisExpr = Forward().setName(termName)
+        if rightLeftAssoc == opAssoc.LEFT:
+            if arity == 1:
+                matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
+            elif arity == 2:
+                if opExpr is not None:
+                    matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
+                else:
+                    matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
+            elif arity == 3:
+                matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
+                            Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
+            else:
+                raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
+        elif rightLeftAssoc == opAssoc.RIGHT:
+            if arity == 1:
+                # try to avoid LR with this extra test
+                if not isinstance(opExpr, Optional):
+                    opExpr = Optional(opExpr)
+                matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
+            elif arity == 2:
+                if opExpr is not None:
+                    matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
+                else:
+                    matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
+            elif arity == 3:
+                matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
+                            Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
+            else:
+                raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
+        else:
+            raise ValueError("operator must indicate right or left associativity")
+        if pa:
+            matchExpr.setParseAction( pa )
+        thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
+        lastExpr = thisExpr
+    ret <<= lastExpr
+    return ret
+
+operatorPrecedence = infixNotation
+"""(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""
+
+dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
+sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
+quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
+                       Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
+unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
+
+def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
+    """
+    Helper method for defining nested lists enclosed in opening and closing
+    delimiters ("(" and ")" are the default).
+
+    Parameters:
+     - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression
+     - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression
+     - content - expression for items within the nested lists (default=C{None})
+     - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString})
+
+    If an expression is not provided for the content argument, the nested
+    expression will capture all whitespace-delimited content between delimiters
+    as a list of separate values.
+
+    Use the C{ignoreExpr} argument to define expressions that may contain
+    opening or closing characters that should not be treated as opening
+    or closing characters for nesting, such as quotedString or a comment
+    expression.  Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
+    The default is L{quotedString}, but if no expressions are to be ignored,
+    then pass C{None} for this argument.
+
+    Example::
+        data_type = oneOf("void int short long char float double")
+        decl_data_type = Combine(data_type + Optional(Word('*')))
+        ident = Word(alphas+'_', alphanums+'_')
+        number = pyparsing_common.number
+        arg = Group(decl_data_type + ident)
+        LPAR,RPAR = map(Suppress, "()")
+
+        code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
+
+        c_function = (decl_data_type("type") 
+                      + ident("name")
+                      + LPAR + Optional(delimitedList(arg), [])("args") + RPAR 
+                      + code_body("body"))
+        c_function.ignore(cStyleComment)
+        
+        source_code = '''
+            int is_odd(int x) { 
+                return (x%2); 
+            }
+                
+            int dec_to_hex(char hchar) { 
+                if (hchar >= '0' && hchar <= '9') { 
+                    return (ord(hchar)-ord('0')); 
+                } else { 
+                    return (10+ord(hchar)-ord('A'));
+                } 
+            }
+        '''
+        for func in c_function.searchString(source_code):
+            print("%(name)s (%(type)s) args: %(args)s" % func)
+
+    prints::
+        is_odd (int) args: [['int', 'x']]
+        dec_to_hex (int) args: [['char', 'hchar']]
+    """
+    if opener == closer:
+        raise ValueError("opening and closing strings cannot be the same")
+    if content is None:
+        if isinstance(opener,basestring) and isinstance(closer,basestring):
+            if len(opener) == 1 and len(closer)==1:
+                if ignoreExpr is not None:
+                    content = (Combine(OneOrMore(~ignoreExpr +
+                                    CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+                                ).setParseAction(lambda t:t[0].strip()))
+                else:
+                    content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
+                                ).setParseAction(lambda t:t[0].strip()))
+            else:
+                if ignoreExpr is not None:
+                    content = (Combine(OneOrMore(~ignoreExpr + 
+                                    ~Literal(opener) + ~Literal(closer) +
+                                    CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+                                ).setParseAction(lambda t:t[0].strip()))
+                else:
+                    content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
+                                    CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
+                                ).setParseAction(lambda t:t[0].strip()))
+        else:
+            raise ValueError("opening and closing arguments must be strings if no content expression is given")
+    ret = Forward()
+    if ignoreExpr is not None:
+        ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
+    else:
+        ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content )  + Suppress(closer) )
+    ret.setName('nested %s%s expression' % (opener,closer))
+    return ret
+
+def indentedBlock(blockStatementExpr, indentStack, indent=True):
+    """
+    Helper method for defining space-delimited indentation blocks, such as
+    those used to define block statements in Python source code.
+
+    Parameters:
+     - blockStatementExpr - expression defining syntax of statement that
+            is repeated within the indented block
+     - indentStack - list created by caller to manage indentation stack
+            (multiple statementWithIndentedBlock expressions within a single grammar
+            should share a common indentStack)
+     - indent - boolean indicating whether block must be indented beyond the
+            the current level; set to False for block of left-most statements
+            (default=C{True})
+
+    A valid block must contain at least one C{blockStatement}.
+
+    Example::
+        data = '''
+        def A(z):
+          A1
+          B = 100
+          G = A2
+          A2
+          A3
+        B
+        def BB(a,b,c):
+          BB1
+          def BBA():
+            bba1
+            bba2
+            bba3
+        C
+        D
+        def spam(x,y):
+             def eggs(z):
+                 pass
+        '''
+
+
+        indentStack = [1]
+        stmt = Forward()
+
+        identifier = Word(alphas, alphanums)
+        funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
+        func_body = indentedBlock(stmt, indentStack)
+        funcDef = Group( funcDecl + func_body )
+
+        rvalue = Forward()
+        funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
+        rvalue << (funcCall | identifier | Word(nums))
+        assignment = Group(identifier + "=" + rvalue)
+        stmt << ( funcDef | assignment | identifier )
+
+        module_body = OneOrMore(stmt)
+
+        parseTree = module_body.parseString(data)
+        parseTree.pprint()
+    prints::
+        [['def',
+          'A',
+          ['(', 'z', ')'],
+          ':',
+          [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
+         'B',
+         ['def',
+          'BB',
+          ['(', 'a', 'b', 'c', ')'],
+          ':',
+          [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
+         'C',
+         'D',
+         ['def',
+          'spam',
+          ['(', 'x', 'y', ')'],
+          ':',
+          [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 
+    """
+    def checkPeerIndent(s,l,t):
+        if l >= len(s): return
+        curCol = col(l,s)
+        if curCol != indentStack[-1]:
+            if curCol > indentStack[-1]:
+                raise ParseFatalException(s,l,"illegal nesting")
+            raise ParseException(s,l,"not a peer entry")
+
+    def checkSubIndent(s,l,t):
+        curCol = col(l,s)
+        if curCol > indentStack[-1]:
+            indentStack.append( curCol )
+        else:
+            raise ParseException(s,l,"not a subentry")
+
+    def checkUnindent(s,l,t):
+        if l >= len(s): return
+        curCol = col(l,s)
+        if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
+            raise ParseException(s,l,"not an unindent")
+        indentStack.pop()
+
+    NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
+    INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
+    PEER   = Empty().setParseAction(checkPeerIndent).setName('')
+    UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
+    if indent:
+        smExpr = Group( Optional(NL) +
+            #~ FollowedBy(blockStatementExpr) +
+            INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
+    else:
+        smExpr = Group( Optional(NL) +
+            (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
+    blockStatementExpr.ignore(_bslash + LineEnd())
+    return smExpr.setName('indented block')
+
+alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
+punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
+
+anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
+_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
+commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
+def replaceHTMLEntity(t):
+    """Helper parser action to replace common HTML entities with their special characters"""
+    return _htmlEntityMap.get(t.entity)
+
+# it's easy to get these comment structures wrong - they're very common, so may as well make them available
+cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
+"Comment of the form C{/* ... */}"
+
+htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
+"Comment of the form C{<!-- ... -->}"
+
+restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
+dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
+"Comment of the form C{// ... (to end of line)}"
+
+cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
+"Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"
+
+javaStyleComment = cppStyleComment
+"Same as C{L{cppStyleComment}}"
+
+pythonStyleComment = Regex(r"#.*").setName("Python style comment")
+"Comment of the form C{# ... (to end of line)}"
+
+_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
+                                  Optional( Word(" \t") +
+                                            ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
+commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
+"""(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas.
+   This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""
+
+# some other useful expressions - using lower-case class name since we are really using this as a namespace
+class pyparsing_common:
+    """
+    Here are some common low-level expressions that may be useful in jump-starting parser development:
+     - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>})
+     - common L{programming identifiers<identifier>}
+     - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
+     - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
+     - L{UUID<uuid>}
+     - L{comma-separated list<comma_separated_list>}
+    Parse actions:
+     - C{L{convertToInteger}}
+     - C{L{convertToFloat}}
+     - C{L{convertToDate}}
+     - C{L{convertToDatetime}}
+     - C{L{stripHTMLTags}}
+     - C{L{upcaseTokens}}
+     - C{L{downcaseTokens}}
+
+    Example::
+        pyparsing_common.number.runTests('''
+            # any int or real number, returned as the appropriate type
+            100
+            -100
+            +100
+            3.14159
+            6.02e23
+            1e-12
+            ''')
+
+        pyparsing_common.fnumber.runTests('''
+            # any int or real number, returned as float
+            100
+            -100
+            +100
+            3.14159
+            6.02e23
+            1e-12
+            ''')
+
+        pyparsing_common.hex_integer.runTests('''
+            # hex numbers
+            100
+            FF
+            ''')
+
+        pyparsing_common.fraction.runTests('''
+            # fractions
+            1/2
+            -3/4
+            ''')
+
+        pyparsing_common.mixed_integer.runTests('''
+            # mixed fractions
+            1
+            1/2
+            -3/4
+            1-3/4
+            ''')
+
+        import uuid
+        pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
+        pyparsing_common.uuid.runTests('''
+            # uuid
+            12345678-1234-5678-1234-567812345678
+            ''')
+    prints::
+        # any int or real number, returned as the appropriate type
+        100
+        [100]
+
+        -100
+        [-100]
+
+        +100
+        [100]
+
+        3.14159
+        [3.14159]
+
+        6.02e23
+        [6.02e+23]
+
+        1e-12
+        [1e-12]
+
+        # any int or real number, returned as float
+        100
+        [100.0]
+
+        -100
+        [-100.0]
+
+        +100
+        [100.0]
+
+        3.14159
+        [3.14159]
+
+        6.02e23
+        [6.02e+23]
+
+        1e-12
+        [1e-12]
+
+        # hex numbers
+        100
+        [256]
+
+        FF
+        [255]
+
+        # fractions
+        1/2
+        [0.5]
+
+        -3/4
+        [-0.75]
+
+        # mixed fractions
+        1
+        [1]
+
+        1/2
+        [0.5]
+
+        -3/4
+        [-0.75]
+
+        1-3/4
+        [1.75]
+
+        # uuid
+        12345678-1234-5678-1234-567812345678
+        [UUID('12345678-1234-5678-1234-567812345678')]
+    """
+
+    convertToInteger = tokenMap(int)
+    """
+    Parse action for converting parsed integers to Python int
+    """
+
+    convertToFloat = tokenMap(float)
+    """
+    Parse action for converting parsed numbers to Python float
+    """
+
+    integer = Word(nums).setName("integer").setParseAction(convertToInteger)
+    """expression that parses an unsigned integer, returns an int"""
+
+    hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
+    """expression that parses a hexadecimal integer, returns an int"""
+
+    signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
+    """expression that parses an integer with optional leading sign, returns an int"""
+
+    fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
+    """fractional expression of an integer divided by an integer, returns a float"""
+    fraction.addParseAction(lambda t: t[0]/t[-1])
+
+    mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
+    """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
+    mixed_integer.addParseAction(sum)
+
+    real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
+    """expression that parses a floating point number and returns a float"""
+
+    sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
+    """expression that parses a floating point number with optional scientific notation and returns a float"""
+
+    # streamlining this expression makes the docs nicer-looking
+    number = (sci_real | real | signed_integer).streamline()
+    """any numeric expression, returns the corresponding Python type"""
+
+    fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
+    """any int or real number, returned as float"""
+    
+    identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
+    """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
+    
+    ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
+    "IPv4 address (C{0.0.0.0 - 255.255.255.255})"
+
+    _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
+    _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
+    _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
+    _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
+    _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
+    ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
+    "IPv6 address (long, short, or mixed form)"
+    
+    mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
+    "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
+
+    @staticmethod
+    def convertToDate(fmt="%Y-%m-%d"):
+        """
+        Helper to create a parse action for converting parsed date string to Python datetime.date
+
+        Params -
+         - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})
+
+        Example::
+            date_expr = pyparsing_common.iso8601_date.copy()
+            date_expr.setParseAction(pyparsing_common.convertToDate())
+            print(date_expr.parseString("1999-12-31"))
+        prints::
+            [datetime.date(1999, 12, 31)]
+        """
+        def cvt_fn(s,l,t):
+            try:
+                return datetime.strptime(t[0], fmt).date()
+            except ValueError as ve:
+                raise ParseException(s, l, str(ve))
+        return cvt_fn
+
+    @staticmethod
+    def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
+        """
+        Helper to create a parse action for converting parsed datetime string to Python datetime.datetime
+
+        Params -
+         - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})
+
+        Example::
+            dt_expr = pyparsing_common.iso8601_datetime.copy()
+            dt_expr.setParseAction(pyparsing_common.convertToDatetime())
+            print(dt_expr.parseString("1999-12-31T23:59:59.999"))
+        prints::
+            [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
+        """
+        def cvt_fn(s,l,t):
+            try:
+                return datetime.strptime(t[0], fmt)
+            except ValueError as ve:
+                raise ParseException(s, l, str(ve))
+        return cvt_fn
+
+    iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
+    "ISO8601 date (C{yyyy-mm-dd})"
+
+    iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
+    "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"
+
+    uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
+    "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
+
+    _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
+    @staticmethod
+    def stripHTMLTags(s, l, tokens):
+        """
+        Parse action to remove HTML tags from web page HTML source
+
+        Example::
+            # strip HTML links from normal text 
+            text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
+            td,td_end = makeHTMLTags("TD")
+            table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
+            
+            print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page'
+        """
+        return pyparsing_common._html_stripper.transformString(tokens[0])
+
+    _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',') 
+                                        + Optional( White(" \t") ) ) ).streamline().setName("commaItem")
+    comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list")
+    """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
+
+    upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
+    """Parse action to convert tokens to upper case."""
+
+    downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
+    """Parse action to convert tokens to lower case."""
+
+
+if __name__ == "__main__":
+
+    selectToken    = CaselessLiteral("select")
+    fromToken      = CaselessLiteral("from")
+
+    ident          = Word(alphas, alphanums + "_$")
+
+    columnName     = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
+    columnNameList = Group(delimitedList(columnName)).setName("columns")
+    columnSpec     = ('*' | columnNameList)
+
+    tableName      = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
+    tableNameList  = Group(delimitedList(tableName)).setName("tables")
+    
+    simpleSQL      = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
+
+    # demo runTests method, including embedded comments in test string
+    simpleSQL.runTests("""
+        # '*' as column list and dotted table name
+        select * from SYS.XYZZY
+
+        # caseless match on "SELECT", and casts back to "select"
+        SELECT * from XYZZY, ABC
+
+        # list of column names, and mixed case SELECT keyword
+        Select AA,BB,CC from Sys.dual
+
+        # multiple tables
+        Select A, B, C from Sys.dual, Table2
+
+        # invalid SELECT keyword - should fail
+        Xelect A, B, C from Sys.dual
+
+        # incomplete command - should fail
+        Select
+
+        # invalid column name - should fail
+        Select ^^^ frox Sys.dual
+
+        """)
+
+    pyparsing_common.number.runTests("""
+        100
+        -100
+        +100
+        3.14159
+        6.02e23
+        1e-12
+        """)
+
+    # any int or real number, returned as float
+    pyparsing_common.fnumber.runTests("""
+        100
+        -100
+        +100
+        3.14159
+        6.02e23
+        1e-12
+        """)
+
+    pyparsing_common.hex_integer.runTests("""
+        100
+        FF
+        """)
+
+    import uuid
+    pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
+    pyparsing_common.uuid.runTests("""
+        12345678-1234-5678-1234-567812345678
+        """)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/six.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/six.py
new file mode 100644
index 0000000000000000000000000000000000000000..190c0239cd7d7af82a6e0cbc8d68053fa2e3dfaf
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/_vendor/six.py
@@ -0,0 +1,868 @@
+"""Utilities for writing code that runs on Python 2 and 3"""
+
+# Copyright (c) 2010-2015 Benjamin Peterson
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import absolute_import
+
+import functools
+import itertools
+import operator
+import sys
+import types
+
+__author__ = "Benjamin Peterson <benjamin@python.org>"
+__version__ = "1.10.0"
+
+
+# Useful for very coarse version differentiation.
+PY2 = sys.version_info[0] == 2
+PY3 = sys.version_info[0] == 3
+PY34 = sys.version_info[0:2] >= (3, 4)
+
+if PY3:
+    string_types = str,
+    integer_types = int,
+    class_types = type,
+    text_type = str
+    binary_type = bytes
+
+    MAXSIZE = sys.maxsize
+else:
+    string_types = basestring,
+    integer_types = (int, long)
+    class_types = (type, types.ClassType)
+    text_type = unicode
+    binary_type = str
+
+    if sys.platform.startswith("java"):
+        # Jython always uses 32 bits.
+        MAXSIZE = int((1 << 31) - 1)
+    else:
+        # It's possible to have sizeof(long) != sizeof(Py_ssize_t).
+        class X(object):
+
+            def __len__(self):
+                return 1 << 31
+        try:
+            len(X())
+        except OverflowError:
+            # 32-bit
+            MAXSIZE = int((1 << 31) - 1)
+        else:
+            # 64-bit
+            MAXSIZE = int((1 << 63) - 1)
+        del X
+
+
+def _add_doc(func, doc):
+    """Add documentation to a function."""
+    func.__doc__ = doc
+
+
+def _import_module(name):
+    """Import module, returning the module after the last dot."""
+    __import__(name)
+    return sys.modules[name]
+
+
+class _LazyDescr(object):
+
+    def __init__(self, name):
+        self.name = name
+
+    def __get__(self, obj, tp):
+        result = self._resolve()
+        setattr(obj, self.name, result)  # Invokes __set__.
+        try:
+            # This is a bit ugly, but it avoids running this again by
+            # removing this descriptor.
+            delattr(obj.__class__, self.name)
+        except AttributeError:
+            pass
+        return result
+
+
+class MovedModule(_LazyDescr):
+
+    def __init__(self, name, old, new=None):
+        super(MovedModule, self).__init__(name)
+        if PY3:
+            if new is None:
+                new = name
+            self.mod = new
+        else:
+            self.mod = old
+
+    def _resolve(self):
+        return _import_module(self.mod)
+
+    def __getattr__(self, attr):
+        _module = self._resolve()
+        value = getattr(_module, attr)
+        setattr(self, attr, value)
+        return value
+
+
+class _LazyModule(types.ModuleType):
+
+    def __init__(self, name):
+        super(_LazyModule, self).__init__(name)
+        self.__doc__ = self.__class__.__doc__
+
+    def __dir__(self):
+        attrs = ["__doc__", "__name__"]
+        attrs += [attr.name for attr in self._moved_attributes]
+        return attrs
+
+    # Subclasses should override this
+    _moved_attributes = []
+
+
+class MovedAttribute(_LazyDescr):
+
+    def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None):
+        super(MovedAttribute, self).__init__(name)
+        if PY3:
+            if new_mod is None:
+                new_mod = name
+            self.mod = new_mod
+            if new_attr is None:
+                if old_attr is None:
+                    new_attr = name
+                else:
+                    new_attr = old_attr
+            self.attr = new_attr
+        else:
+            self.mod = old_mod
+            if old_attr is None:
+                old_attr = name
+            self.attr = old_attr
+
+    def _resolve(self):
+        module = _import_module(self.mod)
+        return getattr(module, self.attr)
+
+
+class _SixMetaPathImporter(object):
+
+    """
+    A meta path importer to import six.moves and its submodules.
+
+    This class implements a PEP302 finder and loader. It should be compatible
+    with Python 2.5 and all existing versions of Python3
+    """
+
+    def __init__(self, six_module_name):
+        self.name = six_module_name
+        self.known_modules = {}
+
+    def _add_module(self, mod, *fullnames):
+        for fullname in fullnames:
+            self.known_modules[self.name + "." + fullname] = mod
+
+    def _get_module(self, fullname):
+        return self.known_modules[self.name + "." + fullname]
+
+    def find_module(self, fullname, path=None):
+        if fullname in self.known_modules:
+            return self
+        return None
+
+    def __get_module(self, fullname):
+        try:
+            return self.known_modules[fullname]
+        except KeyError:
+            raise ImportError("This loader does not know module " + fullname)
+
+    def load_module(self, fullname):
+        try:
+            # in case of a reload
+            return sys.modules[fullname]
+        except KeyError:
+            pass
+        mod = self.__get_module(fullname)
+        if isinstance(mod, MovedModule):
+            mod = mod._resolve()
+        else:
+            mod.__loader__ = self
+        sys.modules[fullname] = mod
+        return mod
+
+    def is_package(self, fullname):
+        """
+        Return true, if the named module is a package.
+
+        We need this method to get correct spec objects with
+        Python 3.4 (see PEP451)
+        """
+        return hasattr(self.__get_module(fullname), "__path__")
+
+    def get_code(self, fullname):
+        """Return None
+
+        Required, if is_package is implemented"""
+        self.__get_module(fullname)  # eventually raises ImportError
+        return None
+    get_source = get_code  # same as get_code
+
+_importer = _SixMetaPathImporter(__name__)
+
+
+class _MovedItems(_LazyModule):
+
+    """Lazy loading of moved objects"""
+    __path__ = []  # mark as package
+
+
+_moved_attributes = [
+    MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"),
+    MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"),
+    MovedAttribute("filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse"),
+    MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"),
+    MovedAttribute("intern", "__builtin__", "sys"),
+    MovedAttribute("map", "itertools", "builtins", "imap", "map"),
+    MovedAttribute("getcwd", "os", "os", "getcwdu", "getcwd"),
+    MovedAttribute("getcwdb", "os", "os", "getcwd", "getcwdb"),
+    MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"),
+    MovedAttribute("reload_module", "__builtin__", "importlib" if PY34 else "imp", "reload"),
+    MovedAttribute("reduce", "__builtin__", "functools"),
+    MovedAttribute("shlex_quote", "pipes", "shlex", "quote"),
+    MovedAttribute("StringIO", "StringIO", "io"),
+    MovedAttribute("UserDict", "UserDict", "collections"),
+    MovedAttribute("UserList", "UserList", "collections"),
+    MovedAttribute("UserString", "UserString", "collections"),
+    MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"),
+    MovedAttribute("zip", "itertools", "builtins", "izip", "zip"),
+    MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"),
+    MovedModule("builtins", "__builtin__"),
+    MovedModule("configparser", "ConfigParser"),
+    MovedModule("copyreg", "copy_reg"),
+    MovedModule("dbm_gnu", "gdbm", "dbm.gnu"),
+    MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread"),
+    MovedModule("http_cookiejar", "cookielib", "http.cookiejar"),
+    MovedModule("http_cookies", "Cookie", "http.cookies"),
+    MovedModule("html_entities", "htmlentitydefs", "html.entities"),
+    MovedModule("html_parser", "HTMLParser", "html.parser"),
+    MovedModule("http_client", "httplib", "http.client"),
+    MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"),
+    MovedModule("email_mime_nonmultipart", "email.MIMENonMultipart", "email.mime.nonmultipart"),
+    MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"),
+    MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"),
+    MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"),
+    MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"),
+    MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"),
+    MovedModule("cPickle", "cPickle", "pickle"),
+    MovedModule("queue", "Queue"),
+    MovedModule("reprlib", "repr"),
+    MovedModule("socketserver", "SocketServer"),
+    MovedModule("_thread", "thread", "_thread"),
+    MovedModule("tkinter", "Tkinter"),
+    MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"),
+    MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"),
+    MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"),
+    MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"),
+    MovedModule("tkinter_tix", "Tix", "tkinter.tix"),
+    MovedModule("tkinter_ttk", "ttk", "tkinter.ttk"),
+    MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"),
+    MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"),
+    MovedModule("tkinter_colorchooser", "tkColorChooser",
+                "tkinter.colorchooser"),
+    MovedModule("tkinter_commondialog", "tkCommonDialog",
+                "tkinter.commondialog"),
+    MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"),
+    MovedModule("tkinter_font", "tkFont", "tkinter.font"),
+    MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"),
+    MovedModule("tkinter_tksimpledialog", "tkSimpleDialog",
+                "tkinter.simpledialog"),
+    MovedModule("urllib_parse", __name__ + ".moves.urllib_parse", "urllib.parse"),
+    MovedModule("urllib_error", __name__ + ".moves.urllib_error", "urllib.error"),
+    MovedModule("urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib"),
+    MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"),
+    MovedModule("xmlrpc_client", "xmlrpclib", "xmlrpc.client"),
+    MovedModule("xmlrpc_server", "SimpleXMLRPCServer", "xmlrpc.server"),
+]
+# Add windows specific modules.
+if sys.platform == "win32":
+    _moved_attributes += [
+        MovedModule("winreg", "_winreg"),
+    ]
+
+for attr in _moved_attributes:
+    setattr(_MovedItems, attr.name, attr)
+    if isinstance(attr, MovedModule):
+        _importer._add_module(attr, "moves." + attr.name)
+del attr
+
+_MovedItems._moved_attributes = _moved_attributes
+
+moves = _MovedItems(__name__ + ".moves")
+_importer._add_module(moves, "moves")
+
+
+class Module_six_moves_urllib_parse(_LazyModule):
+
+    """Lazy loading of moved objects in six.moves.urllib_parse"""
+
+
+_urllib_parse_moved_attributes = [
+    MovedAttribute("ParseResult", "urlparse", "urllib.parse"),
+    MovedAttribute("SplitResult", "urlparse", "urllib.parse"),
+    MovedAttribute("parse_qs", "urlparse", "urllib.parse"),
+    MovedAttribute("parse_qsl", "urlparse", "urllib.parse"),
+    MovedAttribute("urldefrag", "urlparse", "urllib.parse"),
+    MovedAttribute("urljoin", "urlparse", "urllib.parse"),
+    MovedAttribute("urlparse", "urlparse", "urllib.parse"),
+    MovedAttribute("urlsplit", "urlparse", "urllib.parse"),
+    MovedAttribute("urlunparse", "urlparse", "urllib.parse"),
+    MovedAttribute("urlunsplit", "urlparse", "urllib.parse"),
+    MovedAttribute("quote", "urllib", "urllib.parse"),
+    MovedAttribute("quote_plus", "urllib", "urllib.parse"),
+    MovedAttribute("unquote", "urllib", "urllib.parse"),
+    MovedAttribute("unquote_plus", "urllib", "urllib.parse"),
+    MovedAttribute("urlencode", "urllib", "urllib.parse"),
+    MovedAttribute("splitquery", "urllib", "urllib.parse"),
+    MovedAttribute("splittag", "urllib", "urllib.parse"),
+    MovedAttribute("splituser", "urllib", "urllib.parse"),
+    MovedAttribute("uses_fragment", "urlparse", "urllib.parse"),
+    MovedAttribute("uses_netloc", "urlparse", "urllib.parse"),
+    MovedAttribute("uses_params", "urlparse", "urllib.parse"),
+    MovedAttribute("uses_query", "urlparse", "urllib.parse"),
+    MovedAttribute("uses_relative", "urlparse", "urllib.parse"),
+]
+for attr in _urllib_parse_moved_attributes:
+    setattr(Module_six_moves_urllib_parse, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_parse._moved_attributes = _urllib_parse_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse"),
+                      "moves.urllib_parse", "moves.urllib.parse")
+
+
+class Module_six_moves_urllib_error(_LazyModule):
+
+    """Lazy loading of moved objects in six.moves.urllib_error"""
+
+
+_urllib_error_moved_attributes = [
+    MovedAttribute("URLError", "urllib2", "urllib.error"),
+    MovedAttribute("HTTPError", "urllib2", "urllib.error"),
+    MovedAttribute("ContentTooShortError", "urllib", "urllib.error"),
+]
+for attr in _urllib_error_moved_attributes:
+    setattr(Module_six_moves_urllib_error, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_error._moved_attributes = _urllib_error_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_error(__name__ + ".moves.urllib.error"),
+                      "moves.urllib_error", "moves.urllib.error")
+
+
+class Module_six_moves_urllib_request(_LazyModule):
+
+    """Lazy loading of moved objects in six.moves.urllib_request"""
+
+
+_urllib_request_moved_attributes = [
+    MovedAttribute("urlopen", "urllib2", "urllib.request"),
+    MovedAttribute("install_opener", "urllib2", "urllib.request"),
+    MovedAttribute("build_opener", "urllib2", "urllib.request"),
+    MovedAttribute("pathname2url", "urllib", "urllib.request"),
+    MovedAttribute("url2pathname", "urllib", "urllib.request"),
+    MovedAttribute("getproxies", "urllib", "urllib.request"),
+    MovedAttribute("Request", "urllib2", "urllib.request"),
+    MovedAttribute("OpenerDirector", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPDefaultErrorHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPRedirectHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPCookieProcessor", "urllib2", "urllib.request"),
+    MovedAttribute("ProxyHandler", "urllib2", "urllib.request"),
+    MovedAttribute("BaseHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPPasswordMgr", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPPasswordMgrWithDefaultRealm", "urllib2", "urllib.request"),
+    MovedAttribute("AbstractBasicAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPBasicAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("ProxyBasicAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("AbstractDigestAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPDigestAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("ProxyDigestAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPSHandler", "urllib2", "urllib.request"),
+    MovedAttribute("FileHandler", "urllib2", "urllib.request"),
+    MovedAttribute("FTPHandler", "urllib2", "urllib.request"),
+    MovedAttribute("CacheFTPHandler", "urllib2", "urllib.request"),
+    MovedAttribute("UnknownHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPErrorProcessor", "urllib2", "urllib.request"),
+    MovedAttribute("urlretrieve", "urllib", "urllib.request"),
+    MovedAttribute("urlcleanup", "urllib", "urllib.request"),
+    MovedAttribute("URLopener", "urllib", "urllib.request"),
+    MovedAttribute("FancyURLopener", "urllib", "urllib.request"),
+    MovedAttribute("proxy_bypass", "urllib", "urllib.request"),
+]
+for attr in _urllib_request_moved_attributes:
+    setattr(Module_six_moves_urllib_request, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_request._moved_attributes = _urllib_request_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_request(__name__ + ".moves.urllib.request"),
+                      "moves.urllib_request", "moves.urllib.request")
+
+
+class Module_six_moves_urllib_response(_LazyModule):
+
+    """Lazy loading of moved objects in six.moves.urllib_response"""
+
+
+_urllib_response_moved_attributes = [
+    MovedAttribute("addbase", "urllib", "urllib.response"),
+    MovedAttribute("addclosehook", "urllib", "urllib.response"),
+    MovedAttribute("addinfo", "urllib", "urllib.response"),
+    MovedAttribute("addinfourl", "urllib", "urllib.response"),
+]
+for attr in _urllib_response_moved_attributes:
+    setattr(Module_six_moves_urllib_response, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_response._moved_attributes = _urllib_response_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_response(__name__ + ".moves.urllib.response"),
+                      "moves.urllib_response", "moves.urllib.response")
+
+
+class Module_six_moves_urllib_robotparser(_LazyModule):
+
+    """Lazy loading of moved objects in six.moves.urllib_robotparser"""
+
+
+_urllib_robotparser_moved_attributes = [
+    MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser"),
+]
+for attr in _urllib_robotparser_moved_attributes:
+    setattr(Module_six_moves_urllib_robotparser, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_robotparser._moved_attributes = _urllib_robotparser_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser"),
+                      "moves.urllib_robotparser", "moves.urllib.robotparser")
+
+
+class Module_six_moves_urllib(types.ModuleType):
+
+    """Create a six.moves.urllib namespace that resembles the Python 3 namespace"""
+    __path__ = []  # mark as package
+    parse = _importer._get_module("moves.urllib_parse")
+    error = _importer._get_module("moves.urllib_error")
+    request = _importer._get_module("moves.urllib_request")
+    response = _importer._get_module("moves.urllib_response")
+    robotparser = _importer._get_module("moves.urllib_robotparser")
+
+    def __dir__(self):
+        return ['parse', 'error', 'request', 'response', 'robotparser']
+
+_importer._add_module(Module_six_moves_urllib(__name__ + ".moves.urllib"),
+                      "moves.urllib")
+
+
+def add_move(move):
+    """Add an item to six.moves."""
+    setattr(_MovedItems, move.name, move)
+
+
+def remove_move(name):
+    """Remove item from six.moves."""
+    try:
+        delattr(_MovedItems, name)
+    except AttributeError:
+        try:
+            del moves.__dict__[name]
+        except KeyError:
+            raise AttributeError("no such move, %r" % (name,))
+
+
+if PY3:
+    _meth_func = "__func__"
+    _meth_self = "__self__"
+
+    _func_closure = "__closure__"
+    _func_code = "__code__"
+    _func_defaults = "__defaults__"
+    _func_globals = "__globals__"
+else:
+    _meth_func = "im_func"
+    _meth_self = "im_self"
+
+    _func_closure = "func_closure"
+    _func_code = "func_code"
+    _func_defaults = "func_defaults"
+    _func_globals = "func_globals"
+
+
+try:
+    advance_iterator = next
+except NameError:
+    def advance_iterator(it):
+        return it.next()
+next = advance_iterator
+
+
+try:
+    callable = callable
+except NameError:
+    def callable(obj):
+        return any("__call__" in klass.__dict__ for klass in type(obj).__mro__)
+
+
+if PY3:
+    def get_unbound_function(unbound):
+        return unbound
+
+    create_bound_method = types.MethodType
+
+    def create_unbound_method(func, cls):
+        return func
+
+    Iterator = object
+else:
+    def get_unbound_function(unbound):
+        return unbound.im_func
+
+    def create_bound_method(func, obj):
+        return types.MethodType(func, obj, obj.__class__)
+
+    def create_unbound_method(func, cls):
+        return types.MethodType(func, None, cls)
+
+    class Iterator(object):
+
+        def next(self):
+            return type(self).__next__(self)
+
+    callable = callable
+_add_doc(get_unbound_function,
+         """Get the function out of a possibly unbound function""")
+
+
+get_method_function = operator.attrgetter(_meth_func)
+get_method_self = operator.attrgetter(_meth_self)
+get_function_closure = operator.attrgetter(_func_closure)
+get_function_code = operator.attrgetter(_func_code)
+get_function_defaults = operator.attrgetter(_func_defaults)
+get_function_globals = operator.attrgetter(_func_globals)
+
+
+if PY3:
+    def iterkeys(d, **kw):
+        return iter(d.keys(**kw))
+
+    def itervalues(d, **kw):
+        return iter(d.values(**kw))
+
+    def iteritems(d, **kw):
+        return iter(d.items(**kw))
+
+    def iterlists(d, **kw):
+        return iter(d.lists(**kw))
+
+    viewkeys = operator.methodcaller("keys")
+
+    viewvalues = operator.methodcaller("values")
+
+    viewitems = operator.methodcaller("items")
+else:
+    def iterkeys(d, **kw):
+        return d.iterkeys(**kw)
+
+    def itervalues(d, **kw):
+        return d.itervalues(**kw)
+
+    def iteritems(d, **kw):
+        return d.iteritems(**kw)
+
+    def iterlists(d, **kw):
+        return d.iterlists(**kw)
+
+    viewkeys = operator.methodcaller("viewkeys")
+
+    viewvalues = operator.methodcaller("viewvalues")
+
+    viewitems = operator.methodcaller("viewitems")
+
+_add_doc(iterkeys, "Return an iterator over the keys of a dictionary.")
+_add_doc(itervalues, "Return an iterator over the values of a dictionary.")
+_add_doc(iteritems,
+         "Return an iterator over the (key, value) pairs of a dictionary.")
+_add_doc(iterlists,
+         "Return an iterator over the (key, [values]) pairs of a dictionary.")
+
+
+if PY3:
+    def b(s):
+        return s.encode("latin-1")
+
+    def u(s):
+        return s
+    unichr = chr
+    import struct
+    int2byte = struct.Struct(">B").pack
+    del struct
+    byte2int = operator.itemgetter(0)
+    indexbytes = operator.getitem
+    iterbytes = iter
+    import io
+    StringIO = io.StringIO
+    BytesIO = io.BytesIO
+    _assertCountEqual = "assertCountEqual"
+    if sys.version_info[1] <= 1:
+        _assertRaisesRegex = "assertRaisesRegexp"
+        _assertRegex = "assertRegexpMatches"
+    else:
+        _assertRaisesRegex = "assertRaisesRegex"
+        _assertRegex = "assertRegex"
+else:
+    def b(s):
+        return s
+    # Workaround for standalone backslash
+
+    def u(s):
+        return unicode(s.replace(r'\\', r'\\\\'), "unicode_escape")
+    unichr = unichr
+    int2byte = chr
+
+    def byte2int(bs):
+        return ord(bs[0])
+
+    def indexbytes(buf, i):
+        return ord(buf[i])
+    iterbytes = functools.partial(itertools.imap, ord)
+    import StringIO
+    StringIO = BytesIO = StringIO.StringIO
+    _assertCountEqual = "assertItemsEqual"
+    _assertRaisesRegex = "assertRaisesRegexp"
+    _assertRegex = "assertRegexpMatches"
+_add_doc(b, """Byte literal""")
+_add_doc(u, """Text literal""")
+
+
+def assertCountEqual(self, *args, **kwargs):
+    return getattr(self, _assertCountEqual)(*args, **kwargs)
+
+
+def assertRaisesRegex(self, *args, **kwargs):
+    return getattr(self, _assertRaisesRegex)(*args, **kwargs)
+
+
+def assertRegex(self, *args, **kwargs):
+    return getattr(self, _assertRegex)(*args, **kwargs)
+
+
+if PY3:
+    exec_ = getattr(moves.builtins, "exec")
+
+    def reraise(tp, value, tb=None):
+        if value is None:
+            value = tp()
+        if value.__traceback__ is not tb:
+            raise value.with_traceback(tb)
+        raise value
+
+else:
+    def exec_(_code_, _globs_=None, _locs_=None):
+        """Execute code in a namespace."""
+        if _globs_ is None:
+            frame = sys._getframe(1)
+            _globs_ = frame.f_globals
+            if _locs_ is None:
+                _locs_ = frame.f_locals
+            del frame
+        elif _locs_ is None:
+            _locs_ = _globs_
+        exec("""exec _code_ in _globs_, _locs_""")
+
+    exec_("""def reraise(tp, value, tb=None):
+    raise tp, value, tb
+""")
+
+
+if sys.version_info[:2] == (3, 2):
+    exec_("""def raise_from(value, from_value):
+    if from_value is None:
+        raise value
+    raise value from from_value
+""")
+elif sys.version_info[:2] > (3, 2):
+    exec_("""def raise_from(value, from_value):
+    raise value from from_value
+""")
+else:
+    def raise_from(value, from_value):
+        raise value
+
+
+print_ = getattr(moves.builtins, "print", None)
+if print_ is None:
+    def print_(*args, **kwargs):
+        """The new-style print function for Python 2.4 and 2.5."""
+        fp = kwargs.pop("file", sys.stdout)
+        if fp is None:
+            return
+
+        def write(data):
+            if not isinstance(data, basestring):
+                data = str(data)
+            # If the file has an encoding, encode unicode with it.
+            if (isinstance(fp, file) and
+                    isinstance(data, unicode) and
+                    fp.encoding is not None):
+                errors = getattr(fp, "errors", None)
+                if errors is None:
+                    errors = "strict"
+                data = data.encode(fp.encoding, errors)
+            fp.write(data)
+        want_unicode = False
+        sep = kwargs.pop("sep", None)
+        if sep is not None:
+            if isinstance(sep, unicode):
+                want_unicode = True
+            elif not isinstance(sep, str):
+                raise TypeError("sep must be None or a string")
+        end = kwargs.pop("end", None)
+        if end is not None:
+            if isinstance(end, unicode):
+                want_unicode = True
+            elif not isinstance(end, str):
+                raise TypeError("end must be None or a string")
+        if kwargs:
+            raise TypeError("invalid keyword arguments to print()")
+        if not want_unicode:
+            for arg in args:
+                if isinstance(arg, unicode):
+                    want_unicode = True
+                    break
+        if want_unicode:
+            newline = unicode("\n")
+            space = unicode(" ")
+        else:
+            newline = "\n"
+            space = " "
+        if sep is None:
+            sep = space
+        if end is None:
+            end = newline
+        for i, arg in enumerate(args):
+            if i:
+                write(sep)
+            write(arg)
+        write(end)
+if sys.version_info[:2] < (3, 3):
+    _print = print_
+
+    def print_(*args, **kwargs):
+        fp = kwargs.get("file", sys.stdout)
+        flush = kwargs.pop("flush", False)
+        _print(*args, **kwargs)
+        if flush and fp is not None:
+            fp.flush()
+
+_add_doc(reraise, """Reraise an exception.""")
+
+if sys.version_info[0:2] < (3, 4):
+    def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS,
+              updated=functools.WRAPPER_UPDATES):
+        def wrapper(f):
+            f = functools.wraps(wrapped, assigned, updated)(f)
+            f.__wrapped__ = wrapped
+            return f
+        return wrapper
+else:
+    wraps = functools.wraps
+
+
+def with_metaclass(meta, *bases):
+    """Create a base class with a metaclass."""
+    # This requires a bit of explanation: the basic idea is to make a dummy
+    # metaclass for one level of class instantiation that replaces itself with
+    # the actual metaclass.
+    class metaclass(meta):
+
+        def __new__(cls, name, this_bases, d):
+            return meta(name, bases, d)
+    return type.__new__(metaclass, 'temporary_class', (), {})
+
+
+def add_metaclass(metaclass):
+    """Class decorator for creating a class with a metaclass."""
+    def wrapper(cls):
+        orig_vars = cls.__dict__.copy()
+        slots = orig_vars.get('__slots__')
+        if slots is not None:
+            if isinstance(slots, str):
+                slots = [slots]
+            for slots_var in slots:
+                orig_vars.pop(slots_var)
+        orig_vars.pop('__dict__', None)
+        orig_vars.pop('__weakref__', None)
+        return metaclass(cls.__name__, cls.__bases__, orig_vars)
+    return wrapper
+
+
+def python_2_unicode_compatible(klass):
+    """
+    A decorator that defines __unicode__ and __str__ methods under Python 2.
+    Under Python 3 it does nothing.
+
+    To support Python 2 and 3 with a single code base, define a __str__ method
+    returning text and apply this decorator to the class.
+    """
+    if PY2:
+        if '__str__' not in klass.__dict__:
+            raise ValueError("@python_2_unicode_compatible cannot be applied "
+                             "to %s because it doesn't define __str__()." %
+                             klass.__name__)
+        klass.__unicode__ = klass.__str__
+        klass.__str__ = lambda self: self.__unicode__().encode('utf-8')
+    return klass
+
+
+# Complete the moves implementation.
+# This code is at the end of this module to speed up module loading.
+# Turn this module into a package.
+__path__ = []  # required for PEP 302 and PEP 451
+__package__ = __name__  # see PEP 366 @ReservedAssignment
+if globals().get("__spec__") is not None:
+    __spec__.submodule_search_locations = []  # PEP 451 @UndefinedVariable
+# Remove other six meta path importers, since they cause problems. This can
+# happen if six is removed from sys.modules and then reloaded. (Setuptools does
+# this for some reason.)
+if sys.meta_path:
+    for i, importer in enumerate(sys.meta_path):
+        # Here's some real nastiness: Another "instance" of the six module might
+        # be floating around. Therefore, we can't use isinstance() to check for
+        # the six meta path importer, since the other six instance will have
+        # inserted an importer with different class.
+        if (type(importer).__name__ == "_SixMetaPathImporter" and
+                importer.name == __name__):
+            del sys.meta_path[i]
+            break
+    del i, importer
+# Finally, add the importer to the meta path import hook.
+sys.meta_path.append(_importer)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/archive_util.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/archive_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..81436044d995ff430334a7ef324b08e616f4b7a7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/archive_util.py
@@ -0,0 +1,173 @@
+"""Utilities for extracting common archive formats"""
+
+import zipfile
+import tarfile
+import os
+import shutil
+import posixpath
+import contextlib
+from distutils.errors import DistutilsError
+
+from pkg_resources import ensure_directory
+
+__all__ = [
+    "unpack_archive", "unpack_zipfile", "unpack_tarfile", "default_filter",
+    "UnrecognizedFormat", "extraction_drivers", "unpack_directory",
+]
+
+
+class UnrecognizedFormat(DistutilsError):
+    """Couldn't recognize the archive type"""
+
+
+def default_filter(src, dst):
+    """The default progress/filter callback; returns True for all files"""
+    return dst
+
+
+def unpack_archive(filename, extract_dir, progress_filter=default_filter,
+        drivers=None):
+    """Unpack `filename` to `extract_dir`, or raise ``UnrecognizedFormat``
+
+    `progress_filter` is a function taking two arguments: a source path
+    internal to the archive ('/'-separated), and a filesystem path where it
+    will be extracted.  The callback must return the desired extract path
+    (which may be the same as the one passed in), or else ``None`` to skip
+    that file or directory.  The callback can thus be used to report on the
+    progress of the extraction, as well as to filter the items extracted or
+    alter their extraction paths.
+
+    `drivers`, if supplied, must be a non-empty sequence of functions with the
+    same signature as this function (minus the `drivers` argument), that raise
+    ``UnrecognizedFormat`` if they do not support extracting the designated
+    archive type.  The `drivers` are tried in sequence until one is found that
+    does not raise an error, or until all are exhausted (in which case
+    ``UnrecognizedFormat`` is raised).  If you do not supply a sequence of
+    drivers, the module's ``extraction_drivers`` constant will be used, which
+    means that ``unpack_zipfile`` and ``unpack_tarfile`` will be tried, in that
+    order.
+    """
+    for driver in drivers or extraction_drivers:
+        try:
+            driver(filename, extract_dir, progress_filter)
+        except UnrecognizedFormat:
+            continue
+        else:
+            return
+    else:
+        raise UnrecognizedFormat(
+            "Not a recognized archive type: %s" % filename
+        )
+
+
+def unpack_directory(filename, extract_dir, progress_filter=default_filter):
+    """"Unpack" a directory, using the same interface as for archives
+
+    Raises ``UnrecognizedFormat`` if `filename` is not a directory
+    """
+    if not os.path.isdir(filename):
+        raise UnrecognizedFormat("%s is not a directory" % filename)
+
+    paths = {
+        filename: ('', extract_dir),
+    }
+    for base, dirs, files in os.walk(filename):
+        src, dst = paths[base]
+        for d in dirs:
+            paths[os.path.join(base, d)] = src + d + '/', os.path.join(dst, d)
+        for f in files:
+            target = os.path.join(dst, f)
+            target = progress_filter(src + f, target)
+            if not target:
+                # skip non-files
+                continue
+            ensure_directory(target)
+            f = os.path.join(base, f)
+            shutil.copyfile(f, target)
+            shutil.copystat(f, target)
+
+
+def unpack_zipfile(filename, extract_dir, progress_filter=default_filter):
+    """Unpack zip `filename` to `extract_dir`
+
+    Raises ``UnrecognizedFormat`` if `filename` is not a zipfile (as determined
+    by ``zipfile.is_zipfile()``).  See ``unpack_archive()`` for an explanation
+    of the `progress_filter` argument.
+    """
+
+    if not zipfile.is_zipfile(filename):
+        raise UnrecognizedFormat("%s is not a zip file" % (filename,))
+
+    with zipfile.ZipFile(filename) as z:
+        for info in z.infolist():
+            name = info.filename
+
+            # don't extract absolute paths or ones with .. in them
+            if name.startswith('/') or '..' in name.split('/'):
+                continue
+
+            target = os.path.join(extract_dir, *name.split('/'))
+            target = progress_filter(name, target)
+            if not target:
+                continue
+            if name.endswith('/'):
+                # directory
+                ensure_directory(target)
+            else:
+                # file
+                ensure_directory(target)
+                data = z.read(info.filename)
+                with open(target, 'wb') as f:
+                    f.write(data)
+            unix_attributes = info.external_attr >> 16
+            if unix_attributes:
+                os.chmod(target, unix_attributes)
+
+
+def unpack_tarfile(filename, extract_dir, progress_filter=default_filter):
+    """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
+
+    Raises ``UnrecognizedFormat`` if `filename` is not a tarfile (as determined
+    by ``tarfile.open()``).  See ``unpack_archive()`` for an explanation
+    of the `progress_filter` argument.
+    """
+    try:
+        tarobj = tarfile.open(filename)
+    except tarfile.TarError:
+        raise UnrecognizedFormat(
+            "%s is not a compressed or uncompressed tar file" % (filename,)
+        )
+    with contextlib.closing(tarobj):
+        # don't do any chowning!
+        tarobj.chown = lambda *args: None
+        for member in tarobj:
+            name = member.name
+            # don't extract absolute paths or ones with .. in them
+            if not name.startswith('/') and '..' not in name.split('/'):
+                prelim_dst = os.path.join(extract_dir, *name.split('/'))
+
+                # resolve any links and to extract the link targets as normal
+                # files
+                while member is not None and (member.islnk() or member.issym()):
+                    linkpath = member.linkname
+                    if member.issym():
+                        base = posixpath.dirname(member.name)
+                        linkpath = posixpath.join(base, linkpath)
+                        linkpath = posixpath.normpath(linkpath)
+                    member = tarobj._getmember(linkpath)
+
+                if member is not None and (member.isfile() or member.isdir()):
+                    final_dst = progress_filter(name, prelim_dst)
+                    if final_dst:
+                        if final_dst.endswith(os.sep):
+                            final_dst = final_dst[:-1]
+                        try:
+                            # XXX Ugh
+                            tarobj._extract_member(member, final_dst)
+                        except tarfile.ExtractError:
+                            # chown/chmod/mkfifo/mknode/makedev failed
+                            pass
+        return True
+
+
+extraction_drivers = unpack_directory, unpack_zipfile, unpack_tarfile
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/build_meta.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/build_meta.py
new file mode 100644
index 0000000000000000000000000000000000000000..609ea1e510c83a480f85ccd9a5d4df1f638b4a7d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/build_meta.py
@@ -0,0 +1,172 @@
+"""A PEP 517 interface to setuptools
+
+Previously, when a user or a command line tool (let's call it a "frontend")
+needed to make a request of setuptools to take a certain action, for
+example, generating a list of installation requirements, the frontend would
+would call "setup.py egg_info" or "setup.py bdist_wheel" on the command line.
+
+PEP 517 defines a different method of interfacing with setuptools. Rather
+than calling "setup.py" directly, the frontend should:
+
+  1. Set the current directory to the directory with a setup.py file
+  2. Import this module into a safe python interpreter (one in which
+     setuptools can potentially set global variables or crash hard).
+  3. Call one of the functions defined in PEP 517.
+
+What each function does is defined in PEP 517. However, here is a "casual"
+definition of the functions (this definition should not be relied on for
+bug reports or API stability):
+
+  - `build_wheel`: build a wheel in the folder and return the basename
+  - `get_requires_for_build_wheel`: get the `setup_requires` to build
+  - `prepare_metadata_for_build_wheel`: get the `install_requires`
+  - `build_sdist`: build an sdist in the folder and return the basename
+  - `get_requires_for_build_sdist`: get the `setup_requires` to build
+
+Again, this is not a formal definition! Just a "taste" of the module.
+"""
+
+import os
+import sys
+import tokenize
+import shutil
+import contextlib
+
+import setuptools
+import distutils
+
+
+class SetupRequirementsError(BaseException):
+    def __init__(self, specifiers):
+        self.specifiers = specifiers
+
+
+class Distribution(setuptools.dist.Distribution):
+    def fetch_build_eggs(self, specifiers):
+        raise SetupRequirementsError(specifiers)
+
+    @classmethod
+    @contextlib.contextmanager
+    def patch(cls):
+        """
+        Replace
+        distutils.dist.Distribution with this class
+        for the duration of this context.
+        """
+        orig = distutils.core.Distribution
+        distutils.core.Distribution = cls
+        try:
+            yield
+        finally:
+            distutils.core.Distribution = orig
+
+
+def _run_setup(setup_script='setup.py'):
+    # Note that we can reuse our build directory between calls
+    # Correctness comes first, then optimization later
+    __file__ = setup_script
+    __name__ = '__main__'
+    f = getattr(tokenize, 'open', open)(__file__)
+    code = f.read().replace('\\r\\n', '\\n')
+    f.close()
+    exec(compile(code, __file__, 'exec'), locals())
+
+
+def _fix_config(config_settings):
+    config_settings = config_settings or {}
+    config_settings.setdefault('--global-option', [])
+    return config_settings
+
+
+def _get_build_requires(config_settings):
+    config_settings = _fix_config(config_settings)
+    requirements = ['setuptools', 'wheel']
+
+    sys.argv = sys.argv[:1] + ['egg_info'] + \
+        config_settings["--global-option"]
+    try:
+        with Distribution.patch():
+            _run_setup()
+    except SetupRequirementsError as e:
+        requirements += e.specifiers
+
+    return requirements
+
+
+def _get_immediate_subdirectories(a_dir):
+    return [name for name in os.listdir(a_dir)
+            if os.path.isdir(os.path.join(a_dir, name))]
+
+
+def get_requires_for_build_wheel(config_settings=None):
+    config_settings = _fix_config(config_settings)
+    return _get_build_requires(config_settings)
+
+
+def get_requires_for_build_sdist(config_settings=None):
+    config_settings = _fix_config(config_settings)
+    return _get_build_requires(config_settings)
+
+
+def prepare_metadata_for_build_wheel(metadata_directory, config_settings=None):
+    sys.argv = sys.argv[:1] + ['dist_info', '--egg-base', metadata_directory]
+    _run_setup()
+    
+    dist_info_directory = metadata_directory
+    while True:    
+        dist_infos = [f for f in os.listdir(dist_info_directory)
+                      if f.endswith('.dist-info')]
+
+        if len(dist_infos) == 0 and \
+                len(_get_immediate_subdirectories(dist_info_directory)) == 1:
+            dist_info_directory = os.path.join(
+                dist_info_directory, os.listdir(dist_info_directory)[0])
+            continue
+
+        assert len(dist_infos) == 1
+        break
+
+    # PEP 517 requires that the .dist-info directory be placed in the
+    # metadata_directory. To comply, we MUST copy the directory to the root
+    if dist_info_directory != metadata_directory:
+        shutil.move(
+            os.path.join(dist_info_directory, dist_infos[0]),
+            metadata_directory)
+        shutil.rmtree(dist_info_directory, ignore_errors=True)
+
+    return dist_infos[0]
+
+
+def build_wheel(wheel_directory, config_settings=None,
+                metadata_directory=None):
+    config_settings = _fix_config(config_settings)
+    wheel_directory = os.path.abspath(wheel_directory)
+    sys.argv = sys.argv[:1] + ['bdist_wheel'] + \
+        config_settings["--global-option"]
+    _run_setup()
+    if wheel_directory != 'dist':
+        shutil.rmtree(wheel_directory)
+        shutil.copytree('dist', wheel_directory)
+
+    wheels = [f for f in os.listdir(wheel_directory)
+              if f.endswith('.whl')]
+
+    assert len(wheels) == 1
+    return wheels[0]
+
+
+def build_sdist(sdist_directory, config_settings=None):
+    config_settings = _fix_config(config_settings)
+    sdist_directory = os.path.abspath(sdist_directory)
+    sys.argv = sys.argv[:1] + ['sdist'] + \
+        config_settings["--global-option"]
+    _run_setup()
+    if sdist_directory != 'dist':
+        shutil.rmtree(sdist_directory)
+        shutil.copytree('dist', sdist_directory)
+
+    sdists = [f for f in os.listdir(sdist_directory)
+              if f.endswith('.tar.gz')]
+
+    assert len(sdists) == 1
+    return sdists[0]
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/cli-32.exe b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/cli-32.exe
new file mode 100644
index 0000000000000000000000000000000000000000..b1487b7819e7286577a043c7726fbe0ca1543083
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/cli-32.exe differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/cli-64.exe b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/cli-64.exe
new file mode 100644
index 0000000000000000000000000000000000000000..675e6bf3743f3d3011c238657e7128ee9960ef7f
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/cli-64.exe differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/cli.exe b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/cli.exe
new file mode 100644
index 0000000000000000000000000000000000000000..b1487b7819e7286577a043c7726fbe0ca1543083
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/cli.exe differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/__init__.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe619e2e676f6e0cf95b2af63cdab33ed386f6b0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/__init__.py
@@ -0,0 +1,18 @@
+__all__ = [
+    'alias', 'bdist_egg', 'bdist_rpm', 'build_ext', 'build_py', 'develop',
+    'easy_install', 'egg_info', 'install', 'install_lib', 'rotate', 'saveopts',
+    'sdist', 'setopt', 'test', 'install_egg_info', 'install_scripts',
+    'register', 'bdist_wininst', 'upload_docs', 'upload', 'build_clib',
+    'dist_info',
+]
+
+from distutils.command.bdist import bdist
+import sys
+
+from setuptools.command import install_scripts
+
+if 'egg' not in bdist.format_commands:
+    bdist.format_command['egg'] = ('bdist_egg', "Python .egg file")
+    bdist.format_commands.append('egg')
+
+del bdist, sys
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/alias.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/alias.py
new file mode 100644
index 0000000000000000000000000000000000000000..4532b1cc0dca76227927e873f9c64f01008e565a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/alias.py
@@ -0,0 +1,80 @@
+from distutils.errors import DistutilsOptionError
+
+from setuptools.extern.six.moves import map
+
+from setuptools.command.setopt import edit_config, option_base, config_file
+
+
+def shquote(arg):
+    """Quote an argument for later parsing by shlex.split()"""
+    for c in '"', "'", "\\", "#":
+        if c in arg:
+            return repr(arg)
+    if arg.split() != [arg]:
+        return repr(arg)
+    return arg
+
+
+class alias(option_base):
+    """Define a shortcut that invokes one or more commands"""
+
+    description = "define a shortcut to invoke one or more commands"
+    command_consumes_arguments = True
+
+    user_options = [
+        ('remove', 'r', 'remove (unset) the alias'),
+    ] + option_base.user_options
+
+    boolean_options = option_base.boolean_options + ['remove']
+
+    def initialize_options(self):
+        option_base.initialize_options(self)
+        self.args = None
+        self.remove = None
+
+    def finalize_options(self):
+        option_base.finalize_options(self)
+        if self.remove and len(self.args) != 1:
+            raise DistutilsOptionError(
+                "Must specify exactly one argument (the alias name) when "
+                "using --remove"
+            )
+
+    def run(self):
+        aliases = self.distribution.get_option_dict('aliases')
+
+        if not self.args:
+            print("Command Aliases")
+            print("---------------")
+            for alias in aliases:
+                print("setup.py alias", format_alias(alias, aliases))
+            return
+
+        elif len(self.args) == 1:
+            alias, = self.args
+            if self.remove:
+                command = None
+            elif alias in aliases:
+                print("setup.py alias", format_alias(alias, aliases))
+                return
+            else:
+                print("No alias definition found for %r" % alias)
+                return
+        else:
+            alias = self.args[0]
+            command = ' '.join(map(shquote, self.args[1:]))
+
+        edit_config(self.filename, {'aliases': {alias: command}}, self.dry_run)
+
+
+def format_alias(name, aliases):
+    source, command = aliases[name]
+    if source == config_file('global'):
+        source = '--global-config '
+    elif source == config_file('user'):
+        source = '--user-config '
+    elif source == config_file('local'):
+        source = ''
+    else:
+        source = '--filename=%r' % source
+    return source + name + ' ' + command
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/bdist_egg.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/bdist_egg.py
new file mode 100644
index 0000000000000000000000000000000000000000..423b8187656c2274c52010d8fd3a25236b65120f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/bdist_egg.py
@@ -0,0 +1,502 @@
+"""setuptools.command.bdist_egg
+
+Build .egg distributions"""
+
+from distutils.errors import DistutilsSetupError
+from distutils.dir_util import remove_tree, mkpath
+from distutils import log
+from types import CodeType
+import sys
+import os
+import re
+import textwrap
+import marshal
+
+from setuptools.extern import six
+
+from pkg_resources import get_build_platform, Distribution, ensure_directory
+from pkg_resources import EntryPoint
+from setuptools.extension import Library
+from setuptools import Command
+
+try:
+    # Python 2.7 or >=3.2
+    from sysconfig import get_path, get_python_version
+
+    def _get_purelib():
+        return get_path("purelib")
+except ImportError:
+    from distutils.sysconfig import get_python_lib, get_python_version
+
+    def _get_purelib():
+        return get_python_lib(False)
+
+
+def strip_module(filename):
+    if '.' in filename:
+        filename = os.path.splitext(filename)[0]
+    if filename.endswith('module'):
+        filename = filename[:-6]
+    return filename
+
+
+def sorted_walk(dir):
+    """Do os.walk in a reproducible way,
+    independent of indeterministic filesystem readdir order
+    """
+    for base, dirs, files in os.walk(dir):
+        dirs.sort()
+        files.sort()
+        yield base, dirs, files
+
+
+def write_stub(resource, pyfile):
+    _stub_template = textwrap.dedent("""
+        def __bootstrap__():
+            global __bootstrap__, __loader__, __file__
+            import sys, pkg_resources, imp
+            __file__ = pkg_resources.resource_filename(__name__, %r)
+            __loader__ = None; del __bootstrap__, __loader__
+            imp.load_dynamic(__name__,__file__)
+        __bootstrap__()
+        """).lstrip()
+    with open(pyfile, 'w') as f:
+        f.write(_stub_template % resource)
+
+
+class bdist_egg(Command):
+    description = "create an \"egg\" distribution"
+
+    user_options = [
+        ('bdist-dir=', 'b',
+         "temporary directory for creating the distribution"),
+        ('plat-name=', 'p', "platform name to embed in generated filenames "
+                            "(default: %s)" % get_build_platform()),
+        ('exclude-source-files', None,
+         "remove all .py files from the generated egg"),
+        ('keep-temp', 'k',
+         "keep the pseudo-installation tree around after " +
+         "creating the distribution archive"),
+        ('dist-dir=', 'd',
+         "directory to put final built distributions in"),
+        ('skip-build', None,
+         "skip rebuilding everything (for testing/debugging)"),
+    ]
+
+    boolean_options = [
+        'keep-temp', 'skip-build', 'exclude-source-files'
+    ]
+
+    def initialize_options(self):
+        self.bdist_dir = None
+        self.plat_name = None
+        self.keep_temp = 0
+        self.dist_dir = None
+        self.skip_build = 0
+        self.egg_output = None
+        self.exclude_source_files = None
+
+    def finalize_options(self):
+        ei_cmd = self.ei_cmd = self.get_finalized_command("egg_info")
+        self.egg_info = ei_cmd.egg_info
+
+        if self.bdist_dir is None:
+            bdist_base = self.get_finalized_command('bdist').bdist_base
+            self.bdist_dir = os.path.join(bdist_base, 'egg')
+
+        if self.plat_name is None:
+            self.plat_name = get_build_platform()
+
+        self.set_undefined_options('bdist', ('dist_dir', 'dist_dir'))
+
+        if self.egg_output is None:
+
+            # Compute filename of the output egg
+            basename = Distribution(
+                None, None, ei_cmd.egg_name, ei_cmd.egg_version,
+                get_python_version(),
+                self.distribution.has_ext_modules() and self.plat_name
+            ).egg_name()
+
+            self.egg_output = os.path.join(self.dist_dir, basename + '.egg')
+
+    def do_install_data(self):
+        # Hack for packages that install data to install's --install-lib
+        self.get_finalized_command('install').install_lib = self.bdist_dir
+
+        site_packages = os.path.normcase(os.path.realpath(_get_purelib()))
+        old, self.distribution.data_files = self.distribution.data_files, []
+
+        for item in old:
+            if isinstance(item, tuple) and len(item) == 2:
+                if os.path.isabs(item[0]):
+                    realpath = os.path.realpath(item[0])
+                    normalized = os.path.normcase(realpath)
+                    if normalized == site_packages or normalized.startswith(
+                        site_packages + os.sep
+                    ):
+                        item = realpath[len(site_packages) + 1:], item[1]
+                        # XXX else: raise ???
+            self.distribution.data_files.append(item)
+
+        try:
+            log.info("installing package data to %s", self.bdist_dir)
+            self.call_command('install_data', force=0, root=None)
+        finally:
+            self.distribution.data_files = old
+
+    def get_outputs(self):
+        return [self.egg_output]
+
+    def call_command(self, cmdname, **kw):
+        """Invoke reinitialized command `cmdname` with keyword args"""
+        for dirname in INSTALL_DIRECTORY_ATTRS:
+            kw.setdefault(dirname, self.bdist_dir)
+        kw.setdefault('skip_build', self.skip_build)
+        kw.setdefault('dry_run', self.dry_run)
+        cmd = self.reinitialize_command(cmdname, **kw)
+        self.run_command(cmdname)
+        return cmd
+
+    def run(self):
+        # Generate metadata first
+        self.run_command("egg_info")
+        # We run install_lib before install_data, because some data hacks
+        # pull their data path from the install_lib command.
+        log.info("installing library code to %s", self.bdist_dir)
+        instcmd = self.get_finalized_command('install')
+        old_root = instcmd.root
+        instcmd.root = None
+        if self.distribution.has_c_libraries() and not self.skip_build:
+            self.run_command('build_clib')
+        cmd = self.call_command('install_lib', warn_dir=0)
+        instcmd.root = old_root
+
+        all_outputs, ext_outputs = self.get_ext_outputs()
+        self.stubs = []
+        to_compile = []
+        for (p, ext_name) in enumerate(ext_outputs):
+            filename, ext = os.path.splitext(ext_name)
+            pyfile = os.path.join(self.bdist_dir, strip_module(filename) +
+                                  '.py')
+            self.stubs.append(pyfile)
+            log.info("creating stub loader for %s", ext_name)
+            if not self.dry_run:
+                write_stub(os.path.basename(ext_name), pyfile)
+            to_compile.append(pyfile)
+            ext_outputs[p] = ext_name.replace(os.sep, '/')
+
+        if to_compile:
+            cmd.byte_compile(to_compile)
+        if self.distribution.data_files:
+            self.do_install_data()
+
+        # Make the EGG-INFO directory
+        archive_root = self.bdist_dir
+        egg_info = os.path.join(archive_root, 'EGG-INFO')
+        self.mkpath(egg_info)
+        if self.distribution.scripts:
+            script_dir = os.path.join(egg_info, 'scripts')
+            log.info("installing scripts to %s", script_dir)
+            self.call_command('install_scripts', install_dir=script_dir,
+                              no_ep=1)
+
+        self.copy_metadata_to(egg_info)
+        native_libs = os.path.join(egg_info, "native_libs.txt")
+        if all_outputs:
+            log.info("writing %s", native_libs)
+            if not self.dry_run:
+                ensure_directory(native_libs)
+                libs_file = open(native_libs, 'wt')
+                libs_file.write('\n'.join(all_outputs))
+                libs_file.write('\n')
+                libs_file.close()
+        elif os.path.isfile(native_libs):
+            log.info("removing %s", native_libs)
+            if not self.dry_run:
+                os.unlink(native_libs)
+
+        write_safety_flag(
+            os.path.join(archive_root, 'EGG-INFO'), self.zip_safe()
+        )
+
+        if os.path.exists(os.path.join(self.egg_info, 'depends.txt')):
+            log.warn(
+                "WARNING: 'depends.txt' will not be used by setuptools 0.6!\n"
+                "Use the install_requires/extras_require setup() args instead."
+            )
+
+        if self.exclude_source_files:
+            self.zap_pyfiles()
+
+        # Make the archive
+        make_zipfile(self.egg_output, archive_root, verbose=self.verbose,
+                     dry_run=self.dry_run, mode=self.gen_header())
+        if not self.keep_temp:
+            remove_tree(self.bdist_dir, dry_run=self.dry_run)
+
+        # Add to 'Distribution.dist_files' so that the "upload" command works
+        getattr(self.distribution, 'dist_files', []).append(
+            ('bdist_egg', get_python_version(), self.egg_output))
+
+    def zap_pyfiles(self):
+        log.info("Removing .py files from temporary directory")
+        for base, dirs, files in walk_egg(self.bdist_dir):
+            for name in files:
+                path = os.path.join(base, name)
+
+                if name.endswith('.py'):
+                    log.debug("Deleting %s", path)
+                    os.unlink(path)
+
+                if base.endswith('__pycache__'):
+                    path_old = path
+
+                    pattern = r'(?P<name>.+)\.(?P<magic>[^.]+)\.pyc'
+                    m = re.match(pattern, name)
+                    path_new = os.path.join(
+                        base, os.pardir, m.group('name') + '.pyc')
+                    log.info(
+                        "Renaming file from [%s] to [%s]"
+                        % (path_old, path_new))
+                    try:
+                        os.remove(path_new)
+                    except OSError:
+                        pass
+                    os.rename(path_old, path_new)
+
+    def zip_safe(self):
+        safe = getattr(self.distribution, 'zip_safe', None)
+        if safe is not None:
+            return safe
+        log.warn("zip_safe flag not set; analyzing archive contents...")
+        return analyze_egg(self.bdist_dir, self.stubs)
+
+    def gen_header(self):
+        epm = EntryPoint.parse_map(self.distribution.entry_points or '')
+        ep = epm.get('setuptools.installation', {}).get('eggsecutable')
+        if ep is None:
+            return 'w'  # not an eggsecutable, do it the usual way.
+
+        if not ep.attrs or ep.extras:
+            raise DistutilsSetupError(
+                "eggsecutable entry point (%r) cannot have 'extras' "
+                "or refer to a module" % (ep,)
+            )
+
+        pyver = sys.version[:3]
+        pkg = ep.module_name
+        full = '.'.join(ep.attrs)
+        base = ep.attrs[0]
+        basename = os.path.basename(self.egg_output)
+
+        header = (
+            "#!/bin/sh\n"
+            'if [ `basename $0` = "%(basename)s" ]\n'
+            'then exec python%(pyver)s -c "'
+            "import sys, os; sys.path.insert(0, os.path.abspath('$0')); "
+            "from %(pkg)s import %(base)s; sys.exit(%(full)s())"
+            '" "$@"\n'
+            'else\n'
+            '  echo $0 is not the correct name for this egg file.\n'
+            '  echo Please rename it back to %(basename)s and try again.\n'
+            '  exec false\n'
+            'fi\n'
+        ) % locals()
+
+        if not self.dry_run:
+            mkpath(os.path.dirname(self.egg_output), dry_run=self.dry_run)
+            f = open(self.egg_output, 'w')
+            f.write(header)
+            f.close()
+        return 'a'
+
+    def copy_metadata_to(self, target_dir):
+        "Copy metadata (egg info) to the target_dir"
+        # normalize the path (so that a forward-slash in egg_info will
+        # match using startswith below)
+        norm_egg_info = os.path.normpath(self.egg_info)
+        prefix = os.path.join(norm_egg_info, '')
+        for path in self.ei_cmd.filelist.files:
+            if path.startswith(prefix):
+                target = os.path.join(target_dir, path[len(prefix):])
+                ensure_directory(target)
+                self.copy_file(path, target)
+
+    def get_ext_outputs(self):
+        """Get a list of relative paths to C extensions in the output distro"""
+
+        all_outputs = []
+        ext_outputs = []
+
+        paths = {self.bdist_dir: ''}
+        for base, dirs, files in sorted_walk(self.bdist_dir):
+            for filename in files:
+                if os.path.splitext(filename)[1].lower() in NATIVE_EXTENSIONS:
+                    all_outputs.append(paths[base] + filename)
+            for filename in dirs:
+                paths[os.path.join(base, filename)] = (paths[base] +
+                                                       filename + '/')
+
+        if self.distribution.has_ext_modules():
+            build_cmd = self.get_finalized_command('build_ext')
+            for ext in build_cmd.extensions:
+                if isinstance(ext, Library):
+                    continue
+                fullname = build_cmd.get_ext_fullname(ext.name)
+                filename = build_cmd.get_ext_filename(fullname)
+                if not os.path.basename(filename).startswith('dl-'):
+                    if os.path.exists(os.path.join(self.bdist_dir, filename)):
+                        ext_outputs.append(filename)
+
+        return all_outputs, ext_outputs
+
+
+NATIVE_EXTENSIONS = dict.fromkeys('.dll .so .dylib .pyd'.split())
+
+
+def walk_egg(egg_dir):
+    """Walk an unpacked egg's contents, skipping the metadata directory"""
+    walker = sorted_walk(egg_dir)
+    base, dirs, files = next(walker)
+    if 'EGG-INFO' in dirs:
+        dirs.remove('EGG-INFO')
+    yield base, dirs, files
+    for bdf in walker:
+        yield bdf
+
+
+def analyze_egg(egg_dir, stubs):
+    # check for existing flag in EGG-INFO
+    for flag, fn in safety_flags.items():
+        if os.path.exists(os.path.join(egg_dir, 'EGG-INFO', fn)):
+            return flag
+    if not can_scan():
+        return False
+    safe = True
+    for base, dirs, files in walk_egg(egg_dir):
+        for name in files:
+            if name.endswith('.py') or name.endswith('.pyw'):
+                continue
+            elif name.endswith('.pyc') or name.endswith('.pyo'):
+                # always scan, even if we already know we're not safe
+                safe = scan_module(egg_dir, base, name, stubs) and safe
+    return safe
+
+
+def write_safety_flag(egg_dir, safe):
+    # Write or remove zip safety flag file(s)
+    for flag, fn in safety_flags.items():
+        fn = os.path.join(egg_dir, fn)
+        if os.path.exists(fn):
+            if safe is None or bool(safe) != flag:
+                os.unlink(fn)
+        elif safe is not None and bool(safe) == flag:
+            f = open(fn, 'wt')
+            f.write('\n')
+            f.close()
+
+
+safety_flags = {
+    True: 'zip-safe',
+    False: 'not-zip-safe',
+}
+
+
+def scan_module(egg_dir, base, name, stubs):
+    """Check whether module possibly uses unsafe-for-zipfile stuff"""
+
+    filename = os.path.join(base, name)
+    if filename[:-1] in stubs:
+        return True  # Extension module
+    pkg = base[len(egg_dir) + 1:].replace(os.sep, '.')
+    module = pkg + (pkg and '.' or '') + os.path.splitext(name)[0]
+    if sys.version_info < (3, 3):
+        skip = 8  # skip magic & date
+    elif sys.version_info < (3, 7):
+        skip = 12  # skip magic & date & file size
+    else:
+        skip = 16  # skip magic & reserved? & date & file size
+    f = open(filename, 'rb')
+    f.read(skip)
+    code = marshal.load(f)
+    f.close()
+    safe = True
+    symbols = dict.fromkeys(iter_symbols(code))
+    for bad in ['__file__', '__path__']:
+        if bad in symbols:
+            log.warn("%s: module references %s", module, bad)
+            safe = False
+    if 'inspect' in symbols:
+        for bad in [
+            'getsource', 'getabsfile', 'getsourcefile', 'getfile'
+            'getsourcelines', 'findsource', 'getcomments', 'getframeinfo',
+            'getinnerframes', 'getouterframes', 'stack', 'trace'
+        ]:
+            if bad in symbols:
+                log.warn("%s: module MAY be using inspect.%s", module, bad)
+                safe = False
+    return safe
+
+
+def iter_symbols(code):
+    """Yield names and strings used by `code` and its nested code objects"""
+    for name in code.co_names:
+        yield name
+    for const in code.co_consts:
+        if isinstance(const, six.string_types):
+            yield const
+        elif isinstance(const, CodeType):
+            for name in iter_symbols(const):
+                yield name
+
+
+def can_scan():
+    if not sys.platform.startswith('java') and sys.platform != 'cli':
+        # CPython, PyPy, etc.
+        return True
+    log.warn("Unable to analyze compiled code on this platform.")
+    log.warn("Please ask the author to include a 'zip_safe'"
+             " setting (either True or False) in the package's setup.py")
+
+
+# Attribute names of options for commands that might need to be convinced to
+# install to the egg build directory
+
+INSTALL_DIRECTORY_ATTRS = [
+    'install_lib', 'install_dir', 'install_data', 'install_base'
+]
+
+
+def make_zipfile(zip_filename, base_dir, verbose=0, dry_run=0, compress=True,
+                 mode='w'):
+    """Create a zip file from all the files under 'base_dir'.  The output
+    zip file will be named 'base_dir' + ".zip".  Uses either the "zipfile"
+    Python module (if available) or the InfoZIP "zip" utility (if installed
+    and found on the default search path).  If neither tool is available,
+    raises DistutilsExecError.  Returns the name of the output zip file.
+    """
+    import zipfile
+
+    mkpath(os.path.dirname(zip_filename), dry_run=dry_run)
+    log.info("creating '%s' and adding '%s' to it", zip_filename, base_dir)
+
+    def visit(z, dirname, names):
+        for name in names:
+            path = os.path.normpath(os.path.join(dirname, name))
+            if os.path.isfile(path):
+                p = path[len(base_dir) + 1:]
+                if not dry_run:
+                    z.write(path, p)
+                log.debug("adding '%s'", p)
+
+    compression = zipfile.ZIP_DEFLATED if compress else zipfile.ZIP_STORED
+    if not dry_run:
+        z = zipfile.ZipFile(zip_filename, mode, compression=compression)
+        for dirname, dirs, files in sorted_walk(base_dir):
+            visit(z, dirname, files)
+        z.close()
+    else:
+        for dirname, dirs, files in sorted_walk(base_dir):
+            visit(None, dirname, files)
+    return zip_filename
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/bdist_rpm.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/bdist_rpm.py
new file mode 100644
index 0000000000000000000000000000000000000000..70730927ecaed778ebbdee98eb37c24ec3f1a8e6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/bdist_rpm.py
@@ -0,0 +1,43 @@
+import distutils.command.bdist_rpm as orig
+
+
+class bdist_rpm(orig.bdist_rpm):
+    """
+    Override the default bdist_rpm behavior to do the following:
+
+    1. Run egg_info to ensure the name and version are properly calculated.
+    2. Always run 'install' using --single-version-externally-managed to
+       disable eggs in RPM distributions.
+    3. Replace dash with underscore in the version numbers for better RPM
+       compatibility.
+    """
+
+    def run(self):
+        # ensure distro name is up-to-date
+        self.run_command('egg_info')
+
+        orig.bdist_rpm.run(self)
+
+    def _make_spec_file(self):
+        version = self.distribution.get_version()
+        rpmversion = version.replace('-', '_')
+        spec = orig.bdist_rpm._make_spec_file(self)
+        line23 = '%define version ' + version
+        line24 = '%define version ' + rpmversion
+        spec = [
+            line.replace(
+                "Source0: %{name}-%{version}.tar",
+                "Source0: %{name}-%{unmangled_version}.tar"
+            ).replace(
+                "setup.py install ",
+                "setup.py install --single-version-externally-managed "
+            ).replace(
+                "%setup",
+                "%setup -n %{name}-%{unmangled_version}"
+            ).replace(line23, line24)
+            for line in spec
+        ]
+        insert_loc = spec.index(line24) + 1
+        unmangled_version = "%define unmangled_version " + version
+        spec.insert(insert_loc, unmangled_version)
+        return spec
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/bdist_wininst.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/bdist_wininst.py
new file mode 100644
index 0000000000000000000000000000000000000000..073de97b46c92e2e221cade8c1350ab2c5cff891
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/bdist_wininst.py
@@ -0,0 +1,21 @@
+import distutils.command.bdist_wininst as orig
+
+
+class bdist_wininst(orig.bdist_wininst):
+    def reinitialize_command(self, command, reinit_subcommands=0):
+        """
+        Supplement reinitialize_command to work around
+        http://bugs.python.org/issue20819
+        """
+        cmd = self.distribution.reinitialize_command(
+            command, reinit_subcommands)
+        if command in ('install', 'install_lib'):
+            cmd.install_lib = None
+        return cmd
+
+    def run(self):
+        self._is_running = True
+        try:
+            orig.bdist_wininst.run(self)
+        finally:
+            self._is_running = False
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/build_clib.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/build_clib.py
new file mode 100644
index 0000000000000000000000000000000000000000..09caff6ffde8fc3f368cb635dc3cbbbc8851530d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/build_clib.py
@@ -0,0 +1,98 @@
+import distutils.command.build_clib as orig
+from distutils.errors import DistutilsSetupError
+from distutils import log
+from setuptools.dep_util import newer_pairwise_group
+
+
+class build_clib(orig.build_clib):
+    """
+    Override the default build_clib behaviour to do the following:
+
+    1. Implement a rudimentary timestamp-based dependency system
+       so 'compile()' doesn't run every time.
+    2. Add more keys to the 'build_info' dictionary:
+        * obj_deps - specify dependencies for each object compiled.
+                     this should be a dictionary mapping a key
+                     with the source filename to a list of
+                     dependencies. Use an empty string for global
+                     dependencies.
+        * cflags   - specify a list of additional flags to pass to
+                     the compiler.
+    """
+
+    def build_libraries(self, libraries):
+        for (lib_name, build_info) in libraries:
+            sources = build_info.get('sources')
+            if sources is None or not isinstance(sources, (list, tuple)):
+                raise DistutilsSetupError(
+                       "in 'libraries' option (library '%s'), "
+                       "'sources' must be present and must be "
+                       "a list of source filenames" % lib_name)
+            sources = list(sources)
+
+            log.info("building '%s' library", lib_name)
+
+            # Make sure everything is the correct type.
+            # obj_deps should be a dictionary of keys as sources
+            # and a list/tuple of files that are its dependencies.
+            obj_deps = build_info.get('obj_deps', dict())
+            if not isinstance(obj_deps, dict):
+                raise DistutilsSetupError(
+                       "in 'libraries' option (library '%s'), "
+                       "'obj_deps' must be a dictionary of "
+                       "type 'source: list'" % lib_name)
+            dependencies = []
+
+            # Get the global dependencies that are specified by the '' key.
+            # These will go into every source's dependency list.
+            global_deps = obj_deps.get('', list())
+            if not isinstance(global_deps, (list, tuple)):
+                raise DistutilsSetupError(
+                       "in 'libraries' option (library '%s'), "
+                       "'obj_deps' must be a dictionary of "
+                       "type 'source: list'" % lib_name)
+
+            # Build the list to be used by newer_pairwise_group
+            # each source will be auto-added to its dependencies.
+            for source in sources:
+                src_deps = [source]
+                src_deps.extend(global_deps)
+                extra_deps = obj_deps.get(source, list())
+                if not isinstance(extra_deps, (list, tuple)):
+                    raise DistutilsSetupError(
+                           "in 'libraries' option (library '%s'), "
+                           "'obj_deps' must be a dictionary of "
+                           "type 'source: list'" % lib_name)
+                src_deps.extend(extra_deps)
+                dependencies.append(src_deps)
+
+            expected_objects = self.compiler.object_filenames(
+                    sources,
+                    output_dir=self.build_temp
+                    )
+
+            if newer_pairwise_group(dependencies, expected_objects) != ([], []):
+                # First, compile the source code to object files in the library
+                # directory.  (This should probably change to putting object
+                # files in a temporary build directory.)
+                macros = build_info.get('macros')
+                include_dirs = build_info.get('include_dirs')
+                cflags = build_info.get('cflags')
+                objects = self.compiler.compile(
+                        sources,
+                        output_dir=self.build_temp,
+                        macros=macros,
+                        include_dirs=include_dirs,
+                        extra_postargs=cflags,
+                        debug=self.debug
+                        )
+
+            # Now "link" the object files together into a static library.
+            # (On Unix at least, this isn't really linking -- it just
+            # builds an archive.  Whatever.)
+            self.compiler.create_static_lib(
+                    expected_objects,
+                    lib_name,
+                    output_dir=self.build_clib,
+                    debug=self.debug
+                    )
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/build_ext.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/build_ext.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea97b37b2c4dd97a1abc49ef9e33d322617c1895
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/build_ext.py
@@ -0,0 +1,331 @@
+import os
+import sys
+import itertools
+import imp
+from distutils.command.build_ext import build_ext as _du_build_ext
+from distutils.file_util import copy_file
+from distutils.ccompiler import new_compiler
+from distutils.sysconfig import customize_compiler, get_config_var
+from distutils.errors import DistutilsError
+from distutils import log
+
+from setuptools.extension import Library
+from setuptools.extern import six
+
+try:
+    # Attempt to use Cython for building extensions, if available
+    from Cython.Distutils.build_ext import build_ext as _build_ext
+    # Additionally, assert that the compiler module will load
+    # also. Ref #1229.
+    __import__('Cython.Compiler.Main')
+except ImportError:
+    _build_ext = _du_build_ext
+
+# make sure _config_vars is initialized
+get_config_var("LDSHARED")
+from distutils.sysconfig import _config_vars as _CONFIG_VARS
+
+
+def _customize_compiler_for_shlib(compiler):
+    if sys.platform == "darwin":
+        # building .dylib requires additional compiler flags on OSX; here we
+        # temporarily substitute the pyconfig.h variables so that distutils'
+        # 'customize_compiler' uses them before we build the shared libraries.
+        tmp = _CONFIG_VARS.copy()
+        try:
+            # XXX Help!  I don't have any idea whether these are right...
+            _CONFIG_VARS['LDSHARED'] = (
+                "gcc -Wl,-x -dynamiclib -undefined dynamic_lookup")
+            _CONFIG_VARS['CCSHARED'] = " -dynamiclib"
+            _CONFIG_VARS['SO'] = ".dylib"
+            customize_compiler(compiler)
+        finally:
+            _CONFIG_VARS.clear()
+            _CONFIG_VARS.update(tmp)
+    else:
+        customize_compiler(compiler)
+
+
+have_rtld = False
+use_stubs = False
+libtype = 'shared'
+
+if sys.platform == "darwin":
+    use_stubs = True
+elif os.name != 'nt':
+    try:
+        import dl
+        use_stubs = have_rtld = hasattr(dl, 'RTLD_NOW')
+    except ImportError:
+        pass
+
+if_dl = lambda s: s if have_rtld else ''
+
+
+def get_abi3_suffix():
+    """Return the file extension for an abi3-compliant Extension()"""
+    for suffix, _, _ in (s for s in imp.get_suffixes() if s[2] == imp.C_EXTENSION):
+        if '.abi3' in suffix:  # Unix
+            return suffix
+        elif suffix == '.pyd':  # Windows
+            return suffix
+
+
+class build_ext(_build_ext):
+    def run(self):
+        """Build extensions in build directory, then copy if --inplace"""
+        old_inplace, self.inplace = self.inplace, 0
+        _build_ext.run(self)
+        self.inplace = old_inplace
+        if old_inplace:
+            self.copy_extensions_to_source()
+
+    def copy_extensions_to_source(self):
+        build_py = self.get_finalized_command('build_py')
+        for ext in self.extensions:
+            fullname = self.get_ext_fullname(ext.name)
+            filename = self.get_ext_filename(fullname)
+            modpath = fullname.split('.')
+            package = '.'.join(modpath[:-1])
+            package_dir = build_py.get_package_dir(package)
+            dest_filename = os.path.join(package_dir,
+                                         os.path.basename(filename))
+            src_filename = os.path.join(self.build_lib, filename)
+
+            # Always copy, even if source is older than destination, to ensure
+            # that the right extensions for the current Python/platform are
+            # used.
+            copy_file(
+                src_filename, dest_filename, verbose=self.verbose,
+                dry_run=self.dry_run
+            )
+            if ext._needs_stub:
+                self.write_stub(package_dir or os.curdir, ext, True)
+
+    def get_ext_filename(self, fullname):
+        filename = _build_ext.get_ext_filename(self, fullname)
+        if fullname in self.ext_map:
+            ext = self.ext_map[fullname]
+            use_abi3 = (
+                six.PY3
+                and getattr(ext, 'py_limited_api')
+                and get_abi3_suffix()
+            )
+            if use_abi3:
+                so_ext = _get_config_var_837('EXT_SUFFIX')
+                filename = filename[:-len(so_ext)]
+                filename = filename + get_abi3_suffix()
+            if isinstance(ext, Library):
+                fn, ext = os.path.splitext(filename)
+                return self.shlib_compiler.library_filename(fn, libtype)
+            elif use_stubs and ext._links_to_dynamic:
+                d, fn = os.path.split(filename)
+                return os.path.join(d, 'dl-' + fn)
+        return filename
+
+    def initialize_options(self):
+        _build_ext.initialize_options(self)
+        self.shlib_compiler = None
+        self.shlibs = []
+        self.ext_map = {}
+
+    def finalize_options(self):
+        _build_ext.finalize_options(self)
+        self.extensions = self.extensions or []
+        self.check_extensions_list(self.extensions)
+        self.shlibs = [ext for ext in self.extensions
+                       if isinstance(ext, Library)]
+        if self.shlibs:
+            self.setup_shlib_compiler()
+        for ext in self.extensions:
+            ext._full_name = self.get_ext_fullname(ext.name)
+        for ext in self.extensions:
+            fullname = ext._full_name
+            self.ext_map[fullname] = ext
+
+            # distutils 3.1 will also ask for module names
+            # XXX what to do with conflicts?
+            self.ext_map[fullname.split('.')[-1]] = ext
+
+            ltd = self.shlibs and self.links_to_dynamic(ext) or False
+            ns = ltd and use_stubs and not isinstance(ext, Library)
+            ext._links_to_dynamic = ltd
+            ext._needs_stub = ns
+            filename = ext._file_name = self.get_ext_filename(fullname)
+            libdir = os.path.dirname(os.path.join(self.build_lib, filename))
+            if ltd and libdir not in ext.library_dirs:
+                ext.library_dirs.append(libdir)
+            if ltd and use_stubs and os.curdir not in ext.runtime_library_dirs:
+                ext.runtime_library_dirs.append(os.curdir)
+
+    def setup_shlib_compiler(self):
+        compiler = self.shlib_compiler = new_compiler(
+            compiler=self.compiler, dry_run=self.dry_run, force=self.force
+        )
+        _customize_compiler_for_shlib(compiler)
+
+        if self.include_dirs is not None:
+            compiler.set_include_dirs(self.include_dirs)
+        if self.define is not None:
+            # 'define' option is a list of (name,value) tuples
+            for (name, value) in self.define:
+                compiler.define_macro(name, value)
+        if self.undef is not None:
+            for macro in self.undef:
+                compiler.undefine_macro(macro)
+        if self.libraries is not None:
+            compiler.set_libraries(self.libraries)
+        if self.library_dirs is not None:
+            compiler.set_library_dirs(self.library_dirs)
+        if self.rpath is not None:
+            compiler.set_runtime_library_dirs(self.rpath)
+        if self.link_objects is not None:
+            compiler.set_link_objects(self.link_objects)
+
+        # hack so distutils' build_extension() builds a library instead
+        compiler.link_shared_object = link_shared_object.__get__(compiler)
+
+    def get_export_symbols(self, ext):
+        if isinstance(ext, Library):
+            return ext.export_symbols
+        return _build_ext.get_export_symbols(self, ext)
+
+    def build_extension(self, ext):
+        ext._convert_pyx_sources_to_lang()
+        _compiler = self.compiler
+        try:
+            if isinstance(ext, Library):
+                self.compiler = self.shlib_compiler
+            _build_ext.build_extension(self, ext)
+            if ext._needs_stub:
+                cmd = self.get_finalized_command('build_py').build_lib
+                self.write_stub(cmd, ext)
+        finally:
+            self.compiler = _compiler
+
+    def links_to_dynamic(self, ext):
+        """Return true if 'ext' links to a dynamic lib in the same package"""
+        # XXX this should check to ensure the lib is actually being built
+        # XXX as dynamic, and not just using a locally-found version or a
+        # XXX static-compiled version
+        libnames = dict.fromkeys([lib._full_name for lib in self.shlibs])
+        pkg = '.'.join(ext._full_name.split('.')[:-1] + [''])
+        return any(pkg + libname in libnames for libname in ext.libraries)
+
+    def get_outputs(self):
+        return _build_ext.get_outputs(self) + self.__get_stubs_outputs()
+
+    def __get_stubs_outputs(self):
+        # assemble the base name for each extension that needs a stub
+        ns_ext_bases = (
+            os.path.join(self.build_lib, *ext._full_name.split('.'))
+            for ext in self.extensions
+            if ext._needs_stub
+        )
+        # pair each base with the extension
+        pairs = itertools.product(ns_ext_bases, self.__get_output_extensions())
+        return list(base + fnext for base, fnext in pairs)
+
+    def __get_output_extensions(self):
+        yield '.py'
+        yield '.pyc'
+        if self.get_finalized_command('build_py').optimize:
+            yield '.pyo'
+
+    def write_stub(self, output_dir, ext, compile=False):
+        log.info("writing stub loader for %s to %s", ext._full_name,
+                 output_dir)
+        stub_file = (os.path.join(output_dir, *ext._full_name.split('.')) +
+                     '.py')
+        if compile and os.path.exists(stub_file):
+            raise DistutilsError(stub_file + " already exists! Please delete.")
+        if not self.dry_run:
+            f = open(stub_file, 'w')
+            f.write(
+                '\n'.join([
+                    "def __bootstrap__():",
+                    "   global __bootstrap__, __file__, __loader__",
+                    "   import sys, os, pkg_resources, imp" + if_dl(", dl"),
+                    "   __file__ = pkg_resources.resource_filename"
+                    "(__name__,%r)"
+                    % os.path.basename(ext._file_name),
+                    "   del __bootstrap__",
+                    "   if '__loader__' in globals():",
+                    "       del __loader__",
+                    if_dl("   old_flags = sys.getdlopenflags()"),
+                    "   old_dir = os.getcwd()",
+                    "   try:",
+                    "     os.chdir(os.path.dirname(__file__))",
+                    if_dl("     sys.setdlopenflags(dl.RTLD_NOW)"),
+                    "     imp.load_dynamic(__name__,__file__)",
+                    "   finally:",
+                    if_dl("     sys.setdlopenflags(old_flags)"),
+                    "     os.chdir(old_dir)",
+                    "__bootstrap__()",
+                    ""  # terminal \n
+                ])
+            )
+            f.close()
+        if compile:
+            from distutils.util import byte_compile
+
+            byte_compile([stub_file], optimize=0,
+                         force=True, dry_run=self.dry_run)
+            optimize = self.get_finalized_command('install_lib').optimize
+            if optimize > 0:
+                byte_compile([stub_file], optimize=optimize,
+                             force=True, dry_run=self.dry_run)
+            if os.path.exists(stub_file) and not self.dry_run:
+                os.unlink(stub_file)
+
+
+if use_stubs or os.name == 'nt':
+    # Build shared libraries
+    #
+    def link_shared_object(
+            self, objects, output_libname, output_dir=None, libraries=None,
+            library_dirs=None, runtime_library_dirs=None, export_symbols=None,
+            debug=0, extra_preargs=None, extra_postargs=None, build_temp=None,
+            target_lang=None):
+        self.link(
+            self.SHARED_LIBRARY, objects, output_libname,
+            output_dir, libraries, library_dirs, runtime_library_dirs,
+            export_symbols, debug, extra_preargs, extra_postargs,
+            build_temp, target_lang
+        )
+else:
+    # Build static libraries everywhere else
+    libtype = 'static'
+
+    def link_shared_object(
+            self, objects, output_libname, output_dir=None, libraries=None,
+            library_dirs=None, runtime_library_dirs=None, export_symbols=None,
+            debug=0, extra_preargs=None, extra_postargs=None, build_temp=None,
+            target_lang=None):
+        # XXX we need to either disallow these attrs on Library instances,
+        # or warn/abort here if set, or something...
+        # libraries=None, library_dirs=None, runtime_library_dirs=None,
+        # export_symbols=None, extra_preargs=None, extra_postargs=None,
+        # build_temp=None
+
+        assert output_dir is None  # distutils build_ext doesn't pass this
+        output_dir, filename = os.path.split(output_libname)
+        basename, ext = os.path.splitext(filename)
+        if self.library_filename("x").startswith('lib'):
+            # strip 'lib' prefix; this is kludgy if some platform uses
+            # a different prefix
+            basename = basename[3:]
+
+        self.create_static_lib(
+            objects, basename, output_dir, debug, target_lang
+        )
+
+
+def _get_config_var_837(name):
+    """
+    In https://github.com/pypa/setuptools/pull/837, we discovered
+    Python 3.3.0 exposes the extension suffix under the name 'SO'.
+    """
+    if sys.version_info < (3, 3, 1):
+        name = 'SO'
+    return get_config_var(name)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/build_py.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/build_py.py
new file mode 100644
index 0000000000000000000000000000000000000000..b0314fd413ae7f8c1027ccde0b092fd493fb104b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/build_py.py
@@ -0,0 +1,270 @@
+from glob import glob
+from distutils.util import convert_path
+import distutils.command.build_py as orig
+import os
+import fnmatch
+import textwrap
+import io
+import distutils.errors
+import itertools
+
+from setuptools.extern import six
+from setuptools.extern.six.moves import map, filter, filterfalse
+
+try:
+    from setuptools.lib2to3_ex import Mixin2to3
+except ImportError:
+
+    class Mixin2to3:
+        def run_2to3(self, files, doctests=True):
+            "do nothing"
+
+
+class build_py(orig.build_py, Mixin2to3):
+    """Enhanced 'build_py' command that includes data files with packages
+
+    The data files are specified via a 'package_data' argument to 'setup()'.
+    See 'setuptools.dist.Distribution' for more details.
+
+    Also, this version of the 'build_py' command allows you to specify both
+    'py_modules' and 'packages' in the same setup operation.
+    """
+
+    def finalize_options(self):
+        orig.build_py.finalize_options(self)
+        self.package_data = self.distribution.package_data
+        self.exclude_package_data = (self.distribution.exclude_package_data or
+                                     {})
+        if 'data_files' in self.__dict__:
+            del self.__dict__['data_files']
+        self.__updated_files = []
+        self.__doctests_2to3 = []
+
+    def run(self):
+        """Build modules, packages, and copy data files to build directory"""
+        if not self.py_modules and not self.packages:
+            return
+
+        if self.py_modules:
+            self.build_modules()
+
+        if self.packages:
+            self.build_packages()
+            self.build_package_data()
+
+        self.run_2to3(self.__updated_files, False)
+        self.run_2to3(self.__updated_files, True)
+        self.run_2to3(self.__doctests_2to3, True)
+
+        # Only compile actual .py files, using our base class' idea of what our
+        # output files are.
+        self.byte_compile(orig.build_py.get_outputs(self, include_bytecode=0))
+
+    def __getattr__(self, attr):
+        "lazily compute data files"
+        if attr == 'data_files':
+            self.data_files = self._get_data_files()
+            return self.data_files
+        return orig.build_py.__getattr__(self, attr)
+
+    def build_module(self, module, module_file, package):
+        if six.PY2 and isinstance(package, six.string_types):
+            # avoid errors on Python 2 when unicode is passed (#190)
+            package = package.split('.')
+        outfile, copied = orig.build_py.build_module(self, module, module_file,
+                                                     package)
+        if copied:
+            self.__updated_files.append(outfile)
+        return outfile, copied
+
+    def _get_data_files(self):
+        """Generate list of '(package,src_dir,build_dir,filenames)' tuples"""
+        self.analyze_manifest()
+        return list(map(self._get_pkg_data_files, self.packages or ()))
+
+    def _get_pkg_data_files(self, package):
+        # Locate package source directory
+        src_dir = self.get_package_dir(package)
+
+        # Compute package build directory
+        build_dir = os.path.join(*([self.build_lib] + package.split('.')))
+
+        # Strip directory from globbed filenames
+        filenames = [
+            os.path.relpath(file, src_dir)
+            for file in self.find_data_files(package, src_dir)
+        ]
+        return package, src_dir, build_dir, filenames
+
+    def find_data_files(self, package, src_dir):
+        """Return filenames for package's data files in 'src_dir'"""
+        patterns = self._get_platform_patterns(
+            self.package_data,
+            package,
+            src_dir,
+        )
+        globs_expanded = map(glob, patterns)
+        # flatten the expanded globs into an iterable of matches
+        globs_matches = itertools.chain.from_iterable(globs_expanded)
+        glob_files = filter(os.path.isfile, globs_matches)
+        files = itertools.chain(
+            self.manifest_files.get(package, []),
+            glob_files,
+        )
+        return self.exclude_data_files(package, src_dir, files)
+
+    def build_package_data(self):
+        """Copy data files into build directory"""
+        for package, src_dir, build_dir, filenames in self.data_files:
+            for filename in filenames:
+                target = os.path.join(build_dir, filename)
+                self.mkpath(os.path.dirname(target))
+                srcfile = os.path.join(src_dir, filename)
+                outf, copied = self.copy_file(srcfile, target)
+                srcfile = os.path.abspath(srcfile)
+                if (copied and
+                        srcfile in self.distribution.convert_2to3_doctests):
+                    self.__doctests_2to3.append(outf)
+
+    def analyze_manifest(self):
+        self.manifest_files = mf = {}
+        if not self.distribution.include_package_data:
+            return
+        src_dirs = {}
+        for package in self.packages or ():
+            # Locate package source directory
+            src_dirs[assert_relative(self.get_package_dir(package))] = package
+
+        self.run_command('egg_info')
+        ei_cmd = self.get_finalized_command('egg_info')
+        for path in ei_cmd.filelist.files:
+            d, f = os.path.split(assert_relative(path))
+            prev = None
+            oldf = f
+            while d and d != prev and d not in src_dirs:
+                prev = d
+                d, df = os.path.split(d)
+                f = os.path.join(df, f)
+            if d in src_dirs:
+                if path.endswith('.py') and f == oldf:
+                    continue  # it's a module, not data
+                mf.setdefault(src_dirs[d], []).append(path)
+
+    def get_data_files(self):
+        pass  # Lazily compute data files in _get_data_files() function.
+
+    def check_package(self, package, package_dir):
+        """Check namespace packages' __init__ for declare_namespace"""
+        try:
+            return self.packages_checked[package]
+        except KeyError:
+            pass
+
+        init_py = orig.build_py.check_package(self, package, package_dir)
+        self.packages_checked[package] = init_py
+
+        if not init_py or not self.distribution.namespace_packages:
+            return init_py
+
+        for pkg in self.distribution.namespace_packages:
+            if pkg == package or pkg.startswith(package + '.'):
+                break
+        else:
+            return init_py
+
+        with io.open(init_py, 'rb') as f:
+            contents = f.read()
+        if b'declare_namespace' not in contents:
+            raise distutils.errors.DistutilsError(
+                "Namespace package problem: %s is a namespace package, but "
+                "its\n__init__.py does not call declare_namespace()! Please "
+                'fix it.\n(See the setuptools manual under '
+                '"Namespace Packages" for details.)\n"' % (package,)
+            )
+        return init_py
+
+    def initialize_options(self):
+        self.packages_checked = {}
+        orig.build_py.initialize_options(self)
+
+    def get_package_dir(self, package):
+        res = orig.build_py.get_package_dir(self, package)
+        if self.distribution.src_root is not None:
+            return os.path.join(self.distribution.src_root, res)
+        return res
+
+    def exclude_data_files(self, package, src_dir, files):
+        """Filter filenames for package's data files in 'src_dir'"""
+        files = list(files)
+        patterns = self._get_platform_patterns(
+            self.exclude_package_data,
+            package,
+            src_dir,
+        )
+        match_groups = (
+            fnmatch.filter(files, pattern)
+            for pattern in patterns
+        )
+        # flatten the groups of matches into an iterable of matches
+        matches = itertools.chain.from_iterable(match_groups)
+        bad = set(matches)
+        keepers = (
+            fn
+            for fn in files
+            if fn not in bad
+        )
+        # ditch dupes
+        return list(_unique_everseen(keepers))
+
+    @staticmethod
+    def _get_platform_patterns(spec, package, src_dir):
+        """
+        yield platform-specific path patterns (suitable for glob
+        or fn_match) from a glob-based spec (such as
+        self.package_data or self.exclude_package_data)
+        matching package in src_dir.
+        """
+        raw_patterns = itertools.chain(
+            spec.get('', []),
+            spec.get(package, []),
+        )
+        return (
+            # Each pattern has to be converted to a platform-specific path
+            os.path.join(src_dir, convert_path(pattern))
+            for pattern in raw_patterns
+        )
+
+
+# from Python docs
+def _unique_everseen(iterable, key=None):
+    "List unique elements, preserving order. Remember all elements ever seen."
+    # unique_everseen('AAAABBBCCDAABBB') --> A B C D
+    # unique_everseen('ABBCcAD', str.lower) --> A B C D
+    seen = set()
+    seen_add = seen.add
+    if key is None:
+        for element in filterfalse(seen.__contains__, iterable):
+            seen_add(element)
+            yield element
+    else:
+        for element in iterable:
+            k = key(element)
+            if k not in seen:
+                seen_add(k)
+                yield element
+
+
+def assert_relative(path):
+    if not os.path.isabs(path):
+        return path
+    from distutils.errors import DistutilsSetupError
+
+    msg = textwrap.dedent("""
+        Error: setup script specifies an absolute path:
+
+            %s
+
+        setup() arguments must *always* be /-separated paths relative to the
+        setup.py directory, *never* absolute paths.
+        """).lstrip() % path
+    raise DistutilsSetupError(msg)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/develop.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/develop.py
new file mode 100644
index 0000000000000000000000000000000000000000..959c932a5c0c3a94463e3547c05ba26958ae5b96
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/develop.py
@@ -0,0 +1,216 @@
+from distutils.util import convert_path
+from distutils import log
+from distutils.errors import DistutilsError, DistutilsOptionError
+import os
+import glob
+import io
+
+from setuptools.extern import six
+
+from pkg_resources import Distribution, PathMetadata, normalize_path
+from setuptools.command.easy_install import easy_install
+from setuptools import namespaces
+import setuptools
+
+
+class develop(namespaces.DevelopInstaller, easy_install):
+    """Set up package for development"""
+
+    description = "install package in 'development mode'"
+
+    user_options = easy_install.user_options + [
+        ("uninstall", "u", "Uninstall this source package"),
+        ("egg-path=", None, "Set the path to be used in the .egg-link file"),
+    ]
+
+    boolean_options = easy_install.boolean_options + ['uninstall']
+
+    command_consumes_arguments = False  # override base
+
+    def run(self):
+        if self.uninstall:
+            self.multi_version = True
+            self.uninstall_link()
+            self.uninstall_namespaces()
+        else:
+            self.install_for_development()
+        self.warn_deprecated_options()
+
+    def initialize_options(self):
+        self.uninstall = None
+        self.egg_path = None
+        easy_install.initialize_options(self)
+        self.setup_path = None
+        self.always_copy_from = '.'  # always copy eggs installed in curdir
+
+    def finalize_options(self):
+        ei = self.get_finalized_command("egg_info")
+        if ei.broken_egg_info:
+            template = "Please rename %r to %r before using 'develop'"
+            args = ei.egg_info, ei.broken_egg_info
+            raise DistutilsError(template % args)
+        self.args = [ei.egg_name]
+
+        easy_install.finalize_options(self)
+        self.expand_basedirs()
+        self.expand_dirs()
+        # pick up setup-dir .egg files only: no .egg-info
+        self.package_index.scan(glob.glob('*.egg'))
+
+        egg_link_fn = ei.egg_name + '.egg-link'
+        self.egg_link = os.path.join(self.install_dir, egg_link_fn)
+        self.egg_base = ei.egg_base
+        if self.egg_path is None:
+            self.egg_path = os.path.abspath(ei.egg_base)
+
+        target = normalize_path(self.egg_base)
+        egg_path = normalize_path(os.path.join(self.install_dir,
+                                               self.egg_path))
+        if egg_path != target:
+            raise DistutilsOptionError(
+                "--egg-path must be a relative path from the install"
+                " directory to " + target
+            )
+
+        # Make a distribution for the package's source
+        self.dist = Distribution(
+            target,
+            PathMetadata(target, os.path.abspath(ei.egg_info)),
+            project_name=ei.egg_name
+        )
+
+        self.setup_path = self._resolve_setup_path(
+            self.egg_base,
+            self.install_dir,
+            self.egg_path,
+        )
+
+    @staticmethod
+    def _resolve_setup_path(egg_base, install_dir, egg_path):
+        """
+        Generate a path from egg_base back to '.' where the
+        setup script resides and ensure that path points to the
+        setup path from $install_dir/$egg_path.
+        """
+        path_to_setup = egg_base.replace(os.sep, '/').rstrip('/')
+        if path_to_setup != os.curdir:
+            path_to_setup = '../' * (path_to_setup.count('/') + 1)
+        resolved = normalize_path(
+            os.path.join(install_dir, egg_path, path_to_setup)
+        )
+        if resolved != normalize_path(os.curdir):
+            raise DistutilsOptionError(
+                "Can't get a consistent path to setup script from"
+                " installation directory", resolved, normalize_path(os.curdir))
+        return path_to_setup
+
+    def install_for_development(self):
+        if six.PY3 and getattr(self.distribution, 'use_2to3', False):
+            # If we run 2to3 we can not do this inplace:
+
+            # Ensure metadata is up-to-date
+            self.reinitialize_command('build_py', inplace=0)
+            self.run_command('build_py')
+            bpy_cmd = self.get_finalized_command("build_py")
+            build_path = normalize_path(bpy_cmd.build_lib)
+
+            # Build extensions
+            self.reinitialize_command('egg_info', egg_base=build_path)
+            self.run_command('egg_info')
+
+            self.reinitialize_command('build_ext', inplace=0)
+            self.run_command('build_ext')
+
+            # Fixup egg-link and easy-install.pth
+            ei_cmd = self.get_finalized_command("egg_info")
+            self.egg_path = build_path
+            self.dist.location = build_path
+            # XXX
+            self.dist._provider = PathMetadata(build_path, ei_cmd.egg_info)
+        else:
+            # Without 2to3 inplace works fine:
+            self.run_command('egg_info')
+
+            # Build extensions in-place
+            self.reinitialize_command('build_ext', inplace=1)
+            self.run_command('build_ext')
+
+        self.install_site_py()  # ensure that target dir is site-safe
+        if setuptools.bootstrap_install_from:
+            self.easy_install(setuptools.bootstrap_install_from)
+            setuptools.bootstrap_install_from = None
+
+        self.install_namespaces()
+
+        # create an .egg-link in the installation dir, pointing to our egg
+        log.info("Creating %s (link to %s)", self.egg_link, self.egg_base)
+        if not self.dry_run:
+            with open(self.egg_link, "w") as f:
+                f.write(self.egg_path + "\n" + self.setup_path)
+        # postprocess the installed distro, fixing up .pth, installing scripts,
+        # and handling requirements
+        self.process_distribution(None, self.dist, not self.no_deps)
+
+    def uninstall_link(self):
+        if os.path.exists(self.egg_link):
+            log.info("Removing %s (link to %s)", self.egg_link, self.egg_base)
+            egg_link_file = open(self.egg_link)
+            contents = [line.rstrip() for line in egg_link_file]
+            egg_link_file.close()
+            if contents not in ([self.egg_path],
+                                [self.egg_path, self.setup_path]):
+                log.warn("Link points to %s: uninstall aborted", contents)
+                return
+            if not self.dry_run:
+                os.unlink(self.egg_link)
+        if not self.dry_run:
+            self.update_pth(self.dist)  # remove any .pth link to us
+        if self.distribution.scripts:
+            # XXX should also check for entry point scripts!
+            log.warn("Note: you must uninstall or replace scripts manually!")
+
+    def install_egg_scripts(self, dist):
+        if dist is not self.dist:
+            # Installing a dependency, so fall back to normal behavior
+            return easy_install.install_egg_scripts(self, dist)
+
+        # create wrapper scripts in the script dir, pointing to dist.scripts
+
+        # new-style...
+        self.install_wrapper_scripts(dist)
+
+        # ...and old-style
+        for script_name in self.distribution.scripts or []:
+            script_path = os.path.abspath(convert_path(script_name))
+            script_name = os.path.basename(script_path)
+            with io.open(script_path) as strm:
+                script_text = strm.read()
+            self.install_script(dist, script_name, script_text, script_path)
+
+    def install_wrapper_scripts(self, dist):
+        dist = VersionlessRequirement(dist)
+        return easy_install.install_wrapper_scripts(self, dist)
+
+
+class VersionlessRequirement(object):
+    """
+    Adapt a pkg_resources.Distribution to simply return the project
+    name as the 'requirement' so that scripts will work across
+    multiple versions.
+
+    >>> dist = Distribution(project_name='foo', version='1.0')
+    >>> str(dist.as_requirement())
+    'foo==1.0'
+    >>> adapted_dist = VersionlessRequirement(dist)
+    >>> str(adapted_dist.as_requirement())
+    'foo'
+    """
+
+    def __init__(self, dist):
+        self.__dist = dist
+
+    def __getattr__(self, name):
+        return getattr(self.__dist, name)
+
+    def as_requirement(self):
+        return self.project_name
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/dist_info.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/dist_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..c45258fa03a3ddd6a73db4514365f8741d16ca86
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/dist_info.py
@@ -0,0 +1,36 @@
+"""
+Create a dist_info directory
+As defined in the wheel specification
+"""
+
+import os
+
+from distutils.core import Command
+from distutils import log
+
+
+class dist_info(Command):
+
+    description = 'create a .dist-info directory'
+
+    user_options = [
+        ('egg-base=', 'e', "directory containing .egg-info directories"
+                           " (default: top of the source tree)"),
+    ]
+
+    def initialize_options(self):
+        self.egg_base = None
+
+    def finalize_options(self):
+        pass
+
+    def run(self):
+        egg_info = self.get_finalized_command('egg_info')
+        egg_info.egg_base = self.egg_base
+        egg_info.finalize_options()
+        egg_info.run()
+        dist_info_dir = egg_info.egg_info[:-len('.egg-info')] + '.dist-info'
+        log.info("creating '{}'".format(os.path.abspath(dist_info_dir)))
+
+        bdist_wheel = self.get_finalized_command('bdist_wheel')
+        bdist_wheel.egg2dist(egg_info.egg_info, dist_info_dir)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/easy_install.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/easy_install.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9fa55d90216a51455572c5f2eddb15ce13a7d6e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/easy_install.py
@@ -0,0 +1,2389 @@
+#!/usr/bin/env python
+"""
+Easy Install
+------------
+
+A tool for doing automatic download/extract/build of distutils-based Python
+packages.  For detailed documentation, see the accompanying EasyInstall.txt
+file, or visit the `EasyInstall home page`__.
+
+__ https://setuptools.readthedocs.io/en/latest/easy_install.html
+
+"""
+
+from glob import glob
+from distutils.util import get_platform
+from distutils.util import convert_path, subst_vars
+from distutils.errors import (
+    DistutilsArgError, DistutilsOptionError,
+    DistutilsError, DistutilsPlatformError,
+)
+from distutils.command.install import INSTALL_SCHEMES, SCHEME_KEYS
+from distutils import log, dir_util
+from distutils.command.build_scripts import first_line_re
+from distutils.spawn import find_executable
+import sys
+import os
+import zipimport
+import shutil
+import tempfile
+import zipfile
+import re
+import stat
+import random
+import textwrap
+import warnings
+import site
+import struct
+import contextlib
+import subprocess
+import shlex
+import io
+
+from setuptools.extern import six
+from setuptools.extern.six.moves import configparser, map
+
+from setuptools import Command
+from setuptools.sandbox import run_setup
+from setuptools.py31compat import get_path, get_config_vars
+from setuptools.py27compat import rmtree_safe
+from setuptools.command import setopt
+from setuptools.archive_util import unpack_archive
+from setuptools.package_index import (
+    PackageIndex, parse_requirement_arg, URL_SCHEME,
+)
+from setuptools.command import bdist_egg, egg_info
+from setuptools.wheel import Wheel
+from pkg_resources import (
+    yield_lines, normalize_path, resource_string, ensure_directory,
+    get_distribution, find_distributions, Environment, Requirement,
+    Distribution, PathMetadata, EggMetadata, WorkingSet, DistributionNotFound,
+    VersionConflict, DEVELOP_DIST,
+)
+import pkg_resources.py31compat
+
+# Turn on PEP440Warnings
+warnings.filterwarnings("default", category=pkg_resources.PEP440Warning)
+
+__all__ = [
+    'samefile', 'easy_install', 'PthDistributions', 'extract_wininst_cfg',
+    'main', 'get_exe_prefixes',
+]
+
+
+def is_64bit():
+    return struct.calcsize("P") == 8
+
+
+def samefile(p1, p2):
+    """
+    Determine if two paths reference the same file.
+
+    Augments os.path.samefile to work on Windows and
+    suppresses errors if the path doesn't exist.
+    """
+    both_exist = os.path.exists(p1) and os.path.exists(p2)
+    use_samefile = hasattr(os.path, 'samefile') and both_exist
+    if use_samefile:
+        return os.path.samefile(p1, p2)
+    norm_p1 = os.path.normpath(os.path.normcase(p1))
+    norm_p2 = os.path.normpath(os.path.normcase(p2))
+    return norm_p1 == norm_p2
+
+
+if six.PY2:
+
+    def _to_ascii(s):
+        return s
+
+    def isascii(s):
+        try:
+            six.text_type(s, 'ascii')
+            return True
+        except UnicodeError:
+            return False
+else:
+
+    def _to_ascii(s):
+        return s.encode('ascii')
+
+    def isascii(s):
+        try:
+            s.encode('ascii')
+            return True
+        except UnicodeError:
+            return False
+
+
+_one_liner = lambda text: textwrap.dedent(text).strip().replace('\n', '; ')
+
+
+class easy_install(Command):
+    """Manage a download/build/install process"""
+    description = "Find/get/install Python packages"
+    command_consumes_arguments = True
+
+    user_options = [
+        ('prefix=', None, "installation prefix"),
+        ("zip-ok", "z", "install package as a zipfile"),
+        ("multi-version", "m", "make apps have to require() a version"),
+        ("upgrade", "U", "force upgrade (searches PyPI for latest versions)"),
+        ("install-dir=", "d", "install package to DIR"),
+        ("script-dir=", "s", "install scripts to DIR"),
+        ("exclude-scripts", "x", "Don't install scripts"),
+        ("always-copy", "a", "Copy all needed packages to install dir"),
+        ("index-url=", "i", "base URL of Python Package Index"),
+        ("find-links=", "f", "additional URL(s) to search for packages"),
+        ("build-directory=", "b",
+         "download/extract/build in DIR; keep the results"),
+        ('optimize=', 'O',
+         "also compile with optimization: -O1 for \"python -O\", "
+         "-O2 for \"python -OO\", and -O0 to disable [default: -O0]"),
+        ('record=', None,
+         "filename in which to record list of installed files"),
+        ('always-unzip', 'Z', "don't install as a zipfile, no matter what"),
+        ('site-dirs=', 'S', "list of directories where .pth files work"),
+        ('editable', 'e', "Install specified packages in editable form"),
+        ('no-deps', 'N', "don't install dependencies"),
+        ('allow-hosts=', 'H', "pattern(s) that hostnames must match"),
+        ('local-snapshots-ok', 'l',
+         "allow building eggs from local checkouts"),
+        ('version', None, "print version information and exit"),
+        ('install-layout=', None, "installation layout to choose (known values: deb)"),
+        ('force-installation-into-system-dir', '0', "force installation into /usr"),
+        ('no-find-links', None,
+         "Don't load find-links defined in packages being installed")
+    ]
+    boolean_options = [
+        'zip-ok', 'multi-version', 'exclude-scripts', 'upgrade', 'always-copy',
+        'editable',
+        'no-deps', 'local-snapshots-ok', 'version', 'force-installation-into-system-dir'
+    ]
+
+    if site.ENABLE_USER_SITE:
+        help_msg = "install in user site-package '%s'" % site.USER_SITE
+        user_options.append(('user', None, help_msg))
+        boolean_options.append('user')
+
+    negative_opt = {'always-unzip': 'zip-ok'}
+    create_index = PackageIndex
+
+    def initialize_options(self):
+        # the --user option seems to be an opt-in one,
+        # so the default should be False.
+        self.user = 0
+        self.zip_ok = self.local_snapshots_ok = None
+        self.install_dir = self.script_dir = self.exclude_scripts = None
+        self.index_url = None
+        self.find_links = None
+        self.build_directory = None
+        self.args = None
+        self.optimize = self.record = None
+        self.upgrade = self.always_copy = self.multi_version = None
+        self.editable = self.no_deps = self.allow_hosts = None
+        self.root = self.prefix = self.no_report = None
+        self.version = None
+        self.install_purelib = None  # for pure module distributions
+        self.install_platlib = None  # non-pure (dists w/ extensions)
+        self.install_headers = None  # for C/C++ headers
+        self.install_lib = None  # set to either purelib or platlib
+        self.install_scripts = None
+        self.install_data = None
+        self.install_base = None
+        self.install_platbase = None
+        if site.ENABLE_USER_SITE:
+            self.install_userbase = site.USER_BASE
+            self.install_usersite = site.USER_SITE
+        else:
+            self.install_userbase = None
+            self.install_usersite = None
+        self.no_find_links = None
+
+        # Options not specifiable via command line
+        self.package_index = None
+        self.pth_file = self.always_copy_from = None
+        self.site_dirs = None
+        self.installed_projects = {}
+        self.sitepy_installed = False
+        # enable custom installation, known values: deb
+        self.install_layout = None
+        self.force_installation_into_system_dir = None
+        self.multiarch = None
+
+        # Always read easy_install options, even if we are subclassed, or have
+        # an independent instance created.  This ensures that defaults will
+        # always come from the standard configuration file(s)' "easy_install"
+        # section, even if this is a "develop" or "install" command, or some
+        # other embedding.
+        self._dry_run = None
+        self.verbose = self.distribution.verbose
+        self.distribution._set_command_options(
+            self, self.distribution.get_option_dict('easy_install')
+        )
+
+    def delete_blockers(self, blockers):
+        extant_blockers = (
+            filename for filename in blockers
+            if os.path.exists(filename) or os.path.islink(filename)
+        )
+        list(map(self._delete_path, extant_blockers))
+
+    def _delete_path(self, path):
+        log.info("Deleting %s", path)
+        if self.dry_run:
+            return
+
+        is_tree = os.path.isdir(path) and not os.path.islink(path)
+        remover = rmtree if is_tree else os.unlink
+        remover(path)
+
+    @staticmethod
+    def _render_version():
+        """
+        Render the Setuptools version and installation details, then exit.
+        """
+        ver = sys.version[:3]
+        dist = get_distribution('setuptools')
+        tmpl = 'setuptools {dist.version} from {dist.location} (Python {ver})'
+        print(tmpl.format(**locals()))
+        raise SystemExit()
+
+    def finalize_options(self):
+        self.version and self._render_version()
+
+        py_version = sys.version.split()[0]
+        prefix, exec_prefix = get_config_vars('prefix', 'exec_prefix')
+
+        self.config_vars = {
+            'dist_name': self.distribution.get_name(),
+            'dist_version': self.distribution.get_version(),
+            'dist_fullname': self.distribution.get_fullname(),
+            'py_version': py_version,
+            'py_version_short': py_version[0:3],
+            'py_version_nodot': py_version[0] + py_version[2],
+            'sys_prefix': prefix,
+            'prefix': prefix,
+            'sys_exec_prefix': exec_prefix,
+            'exec_prefix': exec_prefix,
+            # Only python 3.2+ has abiflags
+            'abiflags': getattr(sys, 'abiflags', ''),
+        }
+
+        if site.ENABLE_USER_SITE:
+            self.config_vars['userbase'] = self.install_userbase
+            self.config_vars['usersite'] = self.install_usersite
+
+        self._fix_install_dir_for_user_site()
+
+        self.expand_basedirs()
+        self.expand_dirs()
+
+        if self.install_layout:
+            if not self.install_layout.lower() in ['deb']:
+                raise DistutilsOptionError("unknown value for --install-layout")
+            self.install_layout = self.install_layout.lower()
+
+            import sysconfig
+            if sys.version_info[:2] >= (3, 3):
+                self.multiarch = sysconfig.get_config_var('MULTIARCH')
+
+        self._expand(
+            'install_dir', 'script_dir', 'build_directory',
+            'site_dirs',
+        )
+        # If a non-default installation directory was specified, default the
+        # script directory to match it.
+        if self.script_dir is None:
+            self.script_dir = self.install_dir
+
+        if self.no_find_links is None:
+            self.no_find_links = False
+
+        # Let install_dir get set by install_lib command, which in turn
+        # gets its info from the install command, and takes into account
+        # --prefix and --home and all that other crud.
+        self.set_undefined_options(
+            'install_lib', ('install_dir', 'install_dir')
+        )
+        # Likewise, set default script_dir from 'install_scripts.install_dir'
+        self.set_undefined_options(
+            'install_scripts', ('install_dir', 'script_dir')
+        )
+
+        if self.user and self.install_purelib:
+            self.install_dir = self.install_purelib
+            self.script_dir = self.install_scripts
+
+        if self.prefix == '/usr' and not self.force_installation_into_system_dir:
+            raise DistutilsOptionError("""installation into /usr
+
+Trying to install into the system managed parts of the file system. Please
+consider to install to another location, or use the option
+--force-installation-into-system-dir to overwrite this warning.
+""")
+
+        # default --record from the install command
+        self.set_undefined_options('install', ('record', 'record'))
+        # Should this be moved to the if statement below? It's not used
+        # elsewhere
+        normpath = map(normalize_path, sys.path)
+        self.all_site_dirs = get_site_dirs()
+        if self.site_dirs is not None:
+            site_dirs = [
+                os.path.expanduser(s.strip()) for s in
+                self.site_dirs.split(',')
+            ]
+            for d in site_dirs:
+                if not os.path.isdir(d):
+                    log.warn("%s (in --site-dirs) does not exist", d)
+                elif normalize_path(d) not in normpath:
+                    raise DistutilsOptionError(
+                        d + " (in --site-dirs) is not on sys.path"
+                    )
+                else:
+                    self.all_site_dirs.append(normalize_path(d))
+        if not self.editable:
+            self.check_site_dir()
+        self.index_url = self.index_url or "https://pypi.python.org/simple"
+        self.shadow_path = self.all_site_dirs[:]
+        for path_item in self.install_dir, normalize_path(self.script_dir):
+            if path_item not in self.shadow_path:
+                self.shadow_path.insert(0, path_item)
+
+        if self.allow_hosts is not None:
+            hosts = [s.strip() for s in self.allow_hosts.split(',')]
+        else:
+            hosts = ['*']
+        if self.package_index is None:
+            self.package_index = self.create_index(
+                self.index_url, search_path=self.shadow_path, hosts=hosts,
+            )
+        self.local_index = Environment(self.shadow_path + sys.path)
+
+        if self.find_links is not None:
+            if isinstance(self.find_links, six.string_types):
+                self.find_links = self.find_links.split()
+        else:
+            self.find_links = []
+        if self.local_snapshots_ok:
+            self.package_index.scan_egg_links(self.shadow_path + sys.path)
+        if not self.no_find_links:
+            self.package_index.add_find_links(self.find_links)
+        self.set_undefined_options('install_lib', ('optimize', 'optimize'))
+        if not isinstance(self.optimize, int):
+            try:
+                self.optimize = int(self.optimize)
+                if not (0 <= self.optimize <= 2):
+                    raise ValueError
+            except ValueError:
+                raise DistutilsOptionError("--optimize must be 0, 1, or 2")
+
+        if self.editable and not self.build_directory:
+            raise DistutilsArgError(
+                "Must specify a build directory (-b) when using --editable"
+            )
+        if not self.args:
+            raise DistutilsArgError(
+                "No urls, filenames, or requirements specified (see --help)")
+
+        self.outputs = []
+
+    def _fix_install_dir_for_user_site(self):
+        """
+        Fix the install_dir if "--user" was used.
+        """
+        if not self.user or not site.ENABLE_USER_SITE:
+            return
+
+        self.create_home_path()
+        if self.install_userbase is None:
+            msg = "User base directory is not specified"
+            raise DistutilsPlatformError(msg)
+        self.install_base = self.install_platbase = self.install_userbase
+        scheme_name = os.name.replace('posix', 'unix') + '_user'
+        self.select_scheme(scheme_name)
+
+    def _expand_attrs(self, attrs):
+        for attr in attrs:
+            val = getattr(self, attr)
+            if val is not None:
+                if os.name == 'posix' or os.name == 'nt':
+                    val = os.path.expanduser(val)
+                val = subst_vars(val, self.config_vars)
+                setattr(self, attr, val)
+
+    def expand_basedirs(self):
+        """Calls `os.path.expanduser` on install_base, install_platbase and
+        root."""
+        self._expand_attrs(['install_base', 'install_platbase', 'root'])
+
+    def expand_dirs(self):
+        """Calls `os.path.expanduser` on install dirs."""
+        dirs = [
+            'install_purelib',
+            'install_platlib',
+            'install_lib',
+            'install_headers',
+            'install_scripts',
+            'install_data',
+        ]
+        self._expand_attrs(dirs)
+
+    def run(self):
+        if self.verbose != self.distribution.verbose:
+            log.set_verbosity(self.verbose)
+        try:
+            for spec in self.args:
+                self.easy_install(spec, not self.no_deps)
+            if self.record:
+                outputs = list(sorted(self.outputs))
+                if self.root:  # strip any package prefix
+                    root_len = len(self.root)
+                    for counter in range(len(outputs)):
+                        outputs[counter] = outputs[counter][root_len:]
+                from distutils import file_util
+
+                self.execute(
+                    file_util.write_file, (self.record, outputs),
+                    "writing list of installed files to '%s'" %
+                    self.record
+                )
+            self.warn_deprecated_options()
+        finally:
+            log.set_verbosity(self.distribution.verbose)
+
+    def pseudo_tempname(self):
+        """Return a pseudo-tempname base in the install directory.
+        This code is intentionally naive; if a malicious party can write to
+        the target directory you're already in deep doodoo.
+        """
+        try:
+            pid = os.getpid()
+        except Exception:
+            pid = random.randint(0, sys.maxsize)
+        return os.path.join(self.install_dir, "test-easy-install-%s" % pid)
+
+    def warn_deprecated_options(self):
+        pass
+
+    def check_site_dir(self):
+        """Verify that self.install_dir is .pth-capable dir, if needed"""
+
+        instdir = normalize_path(self.install_dir)
+        pth_file = os.path.join(instdir, 'easy-install.pth')
+
+        # Is it a configured, PYTHONPATH, implicit, or explicit site dir?
+        is_site_dir = instdir in self.all_site_dirs
+
+        if not is_site_dir and not self.multi_version:
+            # No?  Then directly test whether it does .pth file processing
+            is_site_dir = self.check_pth_processing()
+        else:
+            # make sure we can write to target dir
+            testfile = self.pseudo_tempname() + '.write-test'
+            test_exists = os.path.exists(testfile)
+            try:
+                if test_exists:
+                    os.unlink(testfile)
+                open(testfile, 'w').close()
+                os.unlink(testfile)
+            except (OSError, IOError):
+                self.cant_write_to_target()
+
+        if not is_site_dir and not self.multi_version:
+            # Can't install non-multi to non-site dir
+            raise DistutilsError(self.no_default_version_msg())
+
+        if is_site_dir:
+            if self.pth_file is None:
+                self.pth_file = PthDistributions(pth_file, self.all_site_dirs)
+        else:
+            self.pth_file = None
+
+        if instdir not in map(normalize_path, _pythonpath()):
+            # only PYTHONPATH dirs need a site.py, so pretend it's there
+            self.sitepy_installed = True
+        elif self.multi_version and not os.path.exists(pth_file):
+            self.sitepy_installed = True  # don't need site.py in this case
+            self.pth_file = None  # and don't create a .pth file
+        self.install_dir = instdir
+
+    __cant_write_msg = textwrap.dedent("""
+        can't create or remove files in install directory
+
+        The following error occurred while trying to add or remove files in the
+        installation directory:
+
+            %s
+
+        The installation directory you specified (via --install-dir, --prefix, or
+        the distutils default setting) was:
+
+            %s
+        """).lstrip()
+
+    __not_exists_id = textwrap.dedent("""
+        This directory does not currently exist.  Please create it and try again, or
+        choose a different installation directory (using the -d or --install-dir
+        option).
+        """).lstrip()
+
+    __access_msg = textwrap.dedent("""
+        Perhaps your account does not have write access to this directory?  If the
+        installation directory is a system-owned directory, you may need to sign in
+        as the administrator or "root" account.  If you do not have administrative
+        access to this machine, you may wish to choose a different installation
+        directory, preferably one that is listed in your PYTHONPATH environment
+        variable.
+
+        For information on other options, you may wish to consult the
+        documentation at:
+
+          https://setuptools.readthedocs.io/en/latest/easy_install.html
+
+        Please make the appropriate changes for your system and try again.
+        """).lstrip()
+
+    def cant_write_to_target(self):
+        msg = self.__cant_write_msg % (sys.exc_info()[1], self.install_dir,)
+
+        if not os.path.exists(self.install_dir):
+            msg += '\n' + self.__not_exists_id
+        else:
+            msg += '\n' + self.__access_msg
+        raise DistutilsError(msg)
+
+    def check_pth_processing(self):
+        """Empirically verify whether .pth files are supported in inst. dir"""
+        instdir = self.install_dir
+        log.info("Checking .pth file support in %s", instdir)
+        pth_file = self.pseudo_tempname() + ".pth"
+        ok_file = pth_file + '.ok'
+        ok_exists = os.path.exists(ok_file)
+        tmpl = _one_liner("""
+            import os
+            f = open({ok_file!r}, 'w')
+            f.write('OK')
+            f.close()
+            """) + '\n'
+        try:
+            if ok_exists:
+                os.unlink(ok_file)
+            dirname = os.path.dirname(ok_file)
+            pkg_resources.py31compat.makedirs(dirname, exist_ok=True)
+            f = open(pth_file, 'w')
+        except (OSError, IOError):
+            self.cant_write_to_target()
+        else:
+            try:
+                f.write(tmpl.format(**locals()))
+                f.close()
+                f = None
+                executable = sys.executable
+                if os.name == 'nt':
+                    dirname, basename = os.path.split(executable)
+                    alt = os.path.join(dirname, 'pythonw.exe')
+                    use_alt = (
+                        basename.lower() == 'python.exe' and
+                        os.path.exists(alt)
+                    )
+                    if use_alt:
+                        # use pythonw.exe to avoid opening a console window
+                        executable = alt
+
+                from distutils.spawn import spawn
+
+                spawn([executable, '-E', '-c', 'pass'], 0)
+
+                if os.path.exists(ok_file):
+                    log.info(
+                        "TEST PASSED: %s appears to support .pth files",
+                        instdir
+                    )
+                    return True
+            finally:
+                if f:
+                    f.close()
+                if os.path.exists(ok_file):
+                    os.unlink(ok_file)
+                if os.path.exists(pth_file):
+                    os.unlink(pth_file)
+        if not self.multi_version:
+            log.warn("TEST FAILED: %s does NOT support .pth files", instdir)
+        return False
+
+    def install_egg_scripts(self, dist):
+        """Write all the scripts for `dist`, unless scripts are excluded"""
+        if not self.exclude_scripts and dist.metadata_isdir('scripts'):
+            for script_name in dist.metadata_listdir('scripts'):
+                if dist.metadata_isdir('scripts/' + script_name):
+                    # The "script" is a directory, likely a Python 3
+                    # __pycache__ directory, so skip it.
+                    continue
+                self.install_script(
+                    dist, script_name,
+                    dist.get_metadata('scripts/' + script_name)
+                )
+        self.install_wrapper_scripts(dist)
+
+    def add_output(self, path):
+        if os.path.isdir(path):
+            for base, dirs, files in os.walk(path):
+                for filename in files:
+                    self.outputs.append(os.path.join(base, filename))
+        else:
+            self.outputs.append(path)
+
+    def not_editable(self, spec):
+        if self.editable:
+            raise DistutilsArgError(
+                "Invalid argument %r: you can't use filenames or URLs "
+                "with --editable (except via the --find-links option)."
+                % (spec,)
+            )
+
+    def check_editable(self, spec):
+        if not self.editable:
+            return
+
+        if os.path.exists(os.path.join(self.build_directory, spec.key)):
+            raise DistutilsArgError(
+                "%r already exists in %s; can't do a checkout there" %
+                (spec.key, self.build_directory)
+            )
+
+    @contextlib.contextmanager
+    def _tmpdir(self):
+        tmpdir = tempfile.mkdtemp(prefix=six.u("easy_install-"))
+        try:
+            # cast to str as workaround for #709 and #710 and #712
+            yield str(tmpdir)
+        finally:
+            os.path.exists(tmpdir) and rmtree(rmtree_safe(tmpdir))
+
+    def easy_install(self, spec, deps=False):
+        if not self.editable:
+            self.install_site_py()
+
+        with self._tmpdir() as tmpdir:
+            if not isinstance(spec, Requirement):
+                if URL_SCHEME(spec):
+                    # It's a url, download it to tmpdir and process
+                    self.not_editable(spec)
+                    dl = self.package_index.download(spec, tmpdir)
+                    return self.install_item(None, dl, tmpdir, deps, True)
+
+                elif os.path.exists(spec):
+                    # Existing file or directory, just process it directly
+                    self.not_editable(spec)
+                    return self.install_item(None, spec, tmpdir, deps, True)
+                else:
+                    spec = parse_requirement_arg(spec)
+
+            self.check_editable(spec)
+            dist = self.package_index.fetch_distribution(
+                spec, tmpdir, self.upgrade, self.editable,
+                not self.always_copy, self.local_index
+            )
+            if dist is None:
+                msg = "Could not find suitable distribution for %r" % spec
+                if self.always_copy:
+                    msg += " (--always-copy skips system and development eggs)"
+                raise DistutilsError(msg)
+            elif dist.precedence == DEVELOP_DIST:
+                # .egg-info dists don't need installing, just process deps
+                self.process_distribution(spec, dist, deps, "Using")
+                return dist
+            else:
+                return self.install_item(spec, dist.location, tmpdir, deps)
+
+    def install_item(self, spec, download, tmpdir, deps, install_needed=False):
+
+        # Installation is also needed if file in tmpdir or is not an egg
+        install_needed = install_needed or self.always_copy
+        install_needed = install_needed or os.path.dirname(download) == tmpdir
+        install_needed = install_needed or not download.endswith('.egg')
+        install_needed = install_needed or (
+            self.always_copy_from is not None and
+            os.path.dirname(normalize_path(download)) ==
+            normalize_path(self.always_copy_from)
+        )
+
+        if spec and not install_needed:
+            # at this point, we know it's a local .egg, we just don't know if
+            # it's already installed.
+            for dist in self.local_index[spec.project_name]:
+                if dist.location == download:
+                    break
+            else:
+                install_needed = True  # it's not in the local index
+
+        log.info("Processing %s", os.path.basename(download))
+
+        if install_needed:
+            dists = self.install_eggs(spec, download, tmpdir)
+            for dist in dists:
+                self.process_distribution(spec, dist, deps)
+        else:
+            dists = [self.egg_distribution(download)]
+            self.process_distribution(spec, dists[0], deps, "Using")
+
+        if spec is not None:
+            for dist in dists:
+                if dist in spec:
+                    return dist
+
+    def select_scheme(self, name):
+        """Sets the install directories by applying the install schemes."""
+        # it's the caller's problem if they supply a bad name!
+        scheme = INSTALL_SCHEMES[name]
+        for key in SCHEME_KEYS:
+            attrname = 'install_' + key
+            if getattr(self, attrname) is None:
+                setattr(self, attrname, scheme[key])
+
+    def process_distribution(self, requirement, dist, deps=True, *info):
+        self.update_pth(dist)
+        self.package_index.add(dist)
+        if dist in self.local_index[dist.key]:
+            self.local_index.remove(dist)
+        self.local_index.add(dist)
+        self.install_egg_scripts(dist)
+        self.installed_projects[dist.key] = dist
+        log.info(self.installation_report(requirement, dist, *info))
+        if (dist.has_metadata('dependency_links.txt') and
+                not self.no_find_links):
+            self.package_index.add_find_links(
+                dist.get_metadata_lines('dependency_links.txt')
+            )
+        if not deps and not self.always_copy:
+            return
+        elif requirement is not None and dist.key != requirement.key:
+            log.warn("Skipping dependencies for %s", dist)
+            return  # XXX this is not the distribution we were looking for
+        elif requirement is None or dist not in requirement:
+            # if we wound up with a different version, resolve what we've got
+            distreq = dist.as_requirement()
+            requirement = Requirement(str(distreq))
+        log.info("Processing dependencies for %s", requirement)
+        try:
+            distros = WorkingSet([]).resolve(
+                [requirement], self.local_index, self.easy_install
+            )
+        except DistributionNotFound as e:
+            raise DistutilsError(str(e))
+        except VersionConflict as e:
+            raise DistutilsError(e.report())
+        if self.always_copy or self.always_copy_from:
+            # Force all the relevant distros to be copied or activated
+            for dist in distros:
+                if dist.key not in self.installed_projects:
+                    self.easy_install(dist.as_requirement())
+        log.info("Finished processing dependencies for %s", requirement)
+
+    def should_unzip(self, dist):
+        if self.zip_ok is not None:
+            return not self.zip_ok
+        if dist.has_metadata('not-zip-safe'):
+            return True
+        if not dist.has_metadata('zip-safe'):
+            return True
+        return False
+
+    def maybe_move(self, spec, dist_filename, setup_base):
+        dst = os.path.join(self.build_directory, spec.key)
+        if os.path.exists(dst):
+            msg = (
+                "%r already exists in %s; build directory %s will not be kept"
+            )
+            log.warn(msg, spec.key, self.build_directory, setup_base)
+            return setup_base
+        if os.path.isdir(dist_filename):
+            setup_base = dist_filename
+        else:
+            if os.path.dirname(dist_filename) == setup_base:
+                os.unlink(dist_filename)  # get it out of the tmp dir
+            contents = os.listdir(setup_base)
+            if len(contents) == 1:
+                dist_filename = os.path.join(setup_base, contents[0])
+                if os.path.isdir(dist_filename):
+                    # if the only thing there is a directory, move it instead
+                    setup_base = dist_filename
+        ensure_directory(dst)
+        shutil.move(setup_base, dst)
+        return dst
+
+    def install_wrapper_scripts(self, dist):
+        if self.exclude_scripts:
+            return
+        for args in ScriptWriter.best().get_args(dist):
+            self.write_script(*args)
+
+    def install_script(self, dist, script_name, script_text, dev_path=None):
+        """Generate a legacy script wrapper and install it"""
+        spec = str(dist.as_requirement())
+        is_script = is_python_script(script_text, script_name)
+
+        if is_script:
+            body = self._load_template(dev_path) % locals()
+            script_text = ScriptWriter.get_header(script_text) + body
+        self.write_script(script_name, _to_ascii(script_text), 'b')
+
+    @staticmethod
+    def _load_template(dev_path):
+        """
+        There are a couple of template scripts in the package. This
+        function loads one of them and prepares it for use.
+        """
+        # See https://github.com/pypa/setuptools/issues/134 for info
+        # on script file naming and downstream issues with SVR4
+        name = 'script.tmpl'
+        if dev_path:
+            name = name.replace('.tmpl', ' (dev).tmpl')
+
+        raw_bytes = resource_string('setuptools', name)
+        return raw_bytes.decode('utf-8')
+
+    def write_script(self, script_name, contents, mode="t", blockers=()):
+        """Write an executable file to the scripts directory"""
+        self.delete_blockers(  # clean up old .py/.pyw w/o a script
+            [os.path.join(self.script_dir, x) for x in blockers]
+        )
+        log.info("Installing %s script to %s", script_name, self.script_dir)
+        target = os.path.join(self.script_dir, script_name)
+        self.add_output(target)
+
+        if self.dry_run:
+            return
+
+        mask = current_umask()
+        ensure_directory(target)
+        if os.path.exists(target):
+            os.unlink(target)
+        with open(target, "w" + mode) as f:
+            f.write(contents)
+        chmod(target, 0o777 - mask)
+
+    def install_eggs(self, spec, dist_filename, tmpdir):
+        # .egg dirs or files are already built, so just return them
+        if dist_filename.lower().endswith('.egg'):
+            return [self.install_egg(dist_filename, tmpdir)]
+        elif dist_filename.lower().endswith('.exe'):
+            return [self.install_exe(dist_filename, tmpdir)]
+        elif dist_filename.lower().endswith('.whl'):
+            return [self.install_wheel(dist_filename, tmpdir)]
+
+        # Anything else, try to extract and build
+        setup_base = tmpdir
+        if os.path.isfile(dist_filename) and not dist_filename.endswith('.py'):
+            unpack_archive(dist_filename, tmpdir, self.unpack_progress)
+        elif os.path.isdir(dist_filename):
+            setup_base = os.path.abspath(dist_filename)
+
+        if (setup_base.startswith(tmpdir)  # something we downloaded
+                and self.build_directory and spec is not None):
+            setup_base = self.maybe_move(spec, dist_filename, setup_base)
+
+        # Find the setup.py file
+        setup_script = os.path.join(setup_base, 'setup.py')
+
+        if not os.path.exists(setup_script):
+            setups = glob(os.path.join(setup_base, '*', 'setup.py'))
+            if not setups:
+                raise DistutilsError(
+                    "Couldn't find a setup script in %s" %
+                    os.path.abspath(dist_filename)
+                )
+            if len(setups) > 1:
+                raise DistutilsError(
+                    "Multiple setup scripts in %s" %
+                    os.path.abspath(dist_filename)
+                )
+            setup_script = setups[0]
+
+        # Now run it, and return the result
+        if self.editable:
+            log.info(self.report_editable(spec, setup_script))
+            return []
+        else:
+            return self.build_and_install(setup_script, setup_base)
+
+    def egg_distribution(self, egg_path):
+        if os.path.isdir(egg_path):
+            metadata = PathMetadata(egg_path, os.path.join(egg_path,
+                                                           'EGG-INFO'))
+        else:
+            metadata = EggMetadata(zipimport.zipimporter(egg_path))
+        return Distribution.from_filename(egg_path, metadata=metadata)
+
+    def install_egg(self, egg_path, tmpdir):
+        destination = os.path.join(
+            self.install_dir,
+            os.path.basename(egg_path),
+        )
+        destination = os.path.abspath(destination)
+        if not self.dry_run:
+            ensure_directory(destination)
+
+        dist = self.egg_distribution(egg_path)
+        if not samefile(egg_path, destination):
+            if os.path.isdir(destination) and not os.path.islink(destination):
+                dir_util.remove_tree(destination, dry_run=self.dry_run)
+            elif os.path.exists(destination):
+                self.execute(
+                    os.unlink,
+                    (destination,),
+                    "Removing " + destination,
+                )
+            try:
+                new_dist_is_zipped = False
+                if os.path.isdir(egg_path):
+                    if egg_path.startswith(tmpdir):
+                        f, m = shutil.move, "Moving"
+                    else:
+                        f, m = shutil.copytree, "Copying"
+                elif self.should_unzip(dist):
+                    self.mkpath(destination)
+                    f, m = self.unpack_and_compile, "Extracting"
+                else:
+                    new_dist_is_zipped = True
+                    if egg_path.startswith(tmpdir):
+                        f, m = shutil.move, "Moving"
+                    else:
+                        f, m = shutil.copy2, "Copying"
+                self.execute(
+                    f,
+                    (egg_path, destination),
+                    (m + " %s to %s") % (
+                        os.path.basename(egg_path),
+                        os.path.dirname(destination)
+                    ),
+                )
+                update_dist_caches(
+                    destination,
+                    fix_zipimporter_caches=new_dist_is_zipped,
+                )
+            except Exception:
+                update_dist_caches(destination, fix_zipimporter_caches=False)
+                raise
+
+        self.add_output(destination)
+        return self.egg_distribution(destination)
+
+    def install_exe(self, dist_filename, tmpdir):
+        # See if it's valid, get data
+        cfg = extract_wininst_cfg(dist_filename)
+        if cfg is None:
+            raise DistutilsError(
+                "%s is not a valid distutils Windows .exe" % dist_filename
+            )
+        # Create a dummy distribution object until we build the real distro
+        dist = Distribution(
+            None,
+            project_name=cfg.get('metadata', 'name'),
+            version=cfg.get('metadata', 'version'), platform=get_platform(),
+        )
+
+        # Convert the .exe to an unpacked egg
+        egg_path = os.path.join(tmpdir, dist.egg_name() + '.egg')
+        dist.location = egg_path
+        egg_tmp = egg_path + '.tmp'
+        _egg_info = os.path.join(egg_tmp, 'EGG-INFO')
+        pkg_inf = os.path.join(_egg_info, 'PKG-INFO')
+        ensure_directory(pkg_inf)  # make sure EGG-INFO dir exists
+        dist._provider = PathMetadata(egg_tmp, _egg_info)  # XXX
+        self.exe_to_egg(dist_filename, egg_tmp)
+
+        # Write EGG-INFO/PKG-INFO
+        if not os.path.exists(pkg_inf):
+            f = open(pkg_inf, 'w')
+            f.write('Metadata-Version: 1.0\n')
+            for k, v in cfg.items('metadata'):
+                if k != 'target_version':
+                    f.write('%s: %s\n' % (k.replace('_', '-').title(), v))
+            f.close()
+        script_dir = os.path.join(_egg_info, 'scripts')
+        # delete entry-point scripts to avoid duping
+        self.delete_blockers([
+            os.path.join(script_dir, args[0])
+            for args in ScriptWriter.get_args(dist)
+        ])
+        # Build .egg file from tmpdir
+        bdist_egg.make_zipfile(
+            egg_path, egg_tmp, verbose=self.verbose, dry_run=self.dry_run,
+        )
+        # install the .egg
+        return self.install_egg(egg_path, tmpdir)
+
+    def exe_to_egg(self, dist_filename, egg_tmp):
+        """Extract a bdist_wininst to the directories an egg would use"""
+        # Check for .pth file and set up prefix translations
+        prefixes = get_exe_prefixes(dist_filename)
+        to_compile = []
+        native_libs = []
+        top_level = {}
+
+        def process(src, dst):
+            s = src.lower()
+            for old, new in prefixes:
+                if s.startswith(old):
+                    src = new + src[len(old):]
+                    parts = src.split('/')
+                    dst = os.path.join(egg_tmp, *parts)
+                    dl = dst.lower()
+                    if dl.endswith('.pyd') or dl.endswith('.dll'):
+                        parts[-1] = bdist_egg.strip_module(parts[-1])
+                        top_level[os.path.splitext(parts[0])[0]] = 1
+                        native_libs.append(src)
+                    elif dl.endswith('.py') and old != 'SCRIPTS/':
+                        top_level[os.path.splitext(parts[0])[0]] = 1
+                        to_compile.append(dst)
+                    return dst
+            if not src.endswith('.pth'):
+                log.warn("WARNING: can't process %s", src)
+            return None
+
+        # extract, tracking .pyd/.dll->native_libs and .py -> to_compile
+        unpack_archive(dist_filename, egg_tmp, process)
+        stubs = []
+        for res in native_libs:
+            if res.lower().endswith('.pyd'):  # create stubs for .pyd's
+                parts = res.split('/')
+                resource = parts[-1]
+                parts[-1] = bdist_egg.strip_module(parts[-1]) + '.py'
+                pyfile = os.path.join(egg_tmp, *parts)
+                to_compile.append(pyfile)
+                stubs.append(pyfile)
+                bdist_egg.write_stub(resource, pyfile)
+        self.byte_compile(to_compile)  # compile .py's
+        bdist_egg.write_safety_flag(
+            os.path.join(egg_tmp, 'EGG-INFO'),
+            bdist_egg.analyze_egg(egg_tmp, stubs))  # write zip-safety flag
+
+        for name in 'top_level', 'native_libs':
+            if locals()[name]:
+                txt = os.path.join(egg_tmp, 'EGG-INFO', name + '.txt')
+                if not os.path.exists(txt):
+                    f = open(txt, 'w')
+                    f.write('\n'.join(locals()[name]) + '\n')
+                    f.close()
+
+    def install_wheel(self, wheel_path, tmpdir):
+        wheel = Wheel(wheel_path)
+        assert wheel.is_compatible()
+        destination = os.path.join(self.install_dir, wheel.egg_name())
+        destination = os.path.abspath(destination)
+        if not self.dry_run:
+            ensure_directory(destination)
+        if os.path.isdir(destination) and not os.path.islink(destination):
+            dir_util.remove_tree(destination, dry_run=self.dry_run)
+        elif os.path.exists(destination):
+            self.execute(
+                os.unlink,
+                (destination,),
+                "Removing " + destination,
+            )
+        try:
+            self.execute(
+                wheel.install_as_egg,
+                (destination,),
+                ("Installing %s to %s") % (
+                    os.path.basename(wheel_path),
+                    os.path.dirname(destination)
+                ),
+            )
+        finally:
+            update_dist_caches(destination, fix_zipimporter_caches=False)
+        self.add_output(destination)
+        return self.egg_distribution(destination)
+
+    __mv_warning = textwrap.dedent("""
+        Because this distribution was installed --multi-version, before you can
+        import modules from this package in an application, you will need to
+        'import pkg_resources' and then use a 'require()' call similar to one of
+        these examples, in order to select the desired version:
+
+            pkg_resources.require("%(name)s")  # latest installed version
+            pkg_resources.require("%(name)s==%(version)s")  # this exact version
+            pkg_resources.require("%(name)s>=%(version)s")  # this version or higher
+        """).lstrip()
+
+    __id_warning = textwrap.dedent("""
+        Note also that the installation directory must be on sys.path at runtime for
+        this to work.  (e.g. by being the application's script directory, by being on
+        PYTHONPATH, or by being added to sys.path by your code.)
+        """)
+
+    def installation_report(self, req, dist, what="Installed"):
+        """Helpful installation message for display to package users"""
+        msg = "\n%(what)s %(eggloc)s%(extras)s"
+        if self.multi_version and not self.no_report:
+            msg += '\n' + self.__mv_warning
+            if self.install_dir not in map(normalize_path, sys.path):
+                msg += '\n' + self.__id_warning
+
+        eggloc = dist.location
+        name = dist.project_name
+        version = dist.version
+        extras = ''  # TODO: self.report_extras(req, dist)
+        return msg % locals()
+
+    __editable_msg = textwrap.dedent("""
+        Extracted editable version of %(spec)s to %(dirname)s
+
+        If it uses setuptools in its setup script, you can activate it in
+        "development" mode by going to that directory and running::
+
+            %(python)s setup.py develop
+
+        See the setuptools documentation for the "develop" command for more info.
+        """).lstrip()
+
+    def report_editable(self, spec, setup_script):
+        dirname = os.path.dirname(setup_script)
+        python = sys.executable
+        return '\n' + self.__editable_msg % locals()
+
+    def run_setup(self, setup_script, setup_base, args):
+        sys.modules.setdefault('distutils.command.bdist_egg', bdist_egg)
+        sys.modules.setdefault('distutils.command.egg_info', egg_info)
+
+        args = list(args)
+        if self.verbose > 2:
+            v = 'v' * (self.verbose - 1)
+            args.insert(0, '-' + v)
+        elif self.verbose < 2:
+            args.insert(0, '-q')
+        if self.dry_run:
+            args.insert(0, '-n')
+        log.info(
+            "Running %s %s", setup_script[len(setup_base) + 1:], ' '.join(args)
+        )
+        try:
+            run_setup(setup_script, args)
+        except SystemExit as v:
+            raise DistutilsError("Setup script exited with %s" % (v.args[0],))
+
+    def build_and_install(self, setup_script, setup_base):
+        args = ['bdist_egg', '--dist-dir']
+
+        dist_dir = tempfile.mkdtemp(
+            prefix='egg-dist-tmp-', dir=os.path.dirname(setup_script)
+        )
+        try:
+            self._set_fetcher_options(os.path.dirname(setup_script))
+            args.append(dist_dir)
+
+            self.run_setup(setup_script, setup_base, args)
+            all_eggs = Environment([dist_dir])
+            eggs = []
+            for key in all_eggs:
+                for dist in all_eggs[key]:
+                    eggs.append(self.install_egg(dist.location, setup_base))
+            if not eggs and not self.dry_run:
+                log.warn("No eggs found in %s (setup script problem?)",
+                         dist_dir)
+            return eggs
+        finally:
+            rmtree(dist_dir)
+            log.set_verbosity(self.verbose)  # restore our log verbosity
+
+    def _set_fetcher_options(self, base):
+        """
+        When easy_install is about to run bdist_egg on a source dist, that
+        source dist might have 'setup_requires' directives, requiring
+        additional fetching. Ensure the fetcher options given to easy_install
+        are available to that command as well.
+        """
+        # find the fetch options from easy_install and write them out
+        # to the setup.cfg file.
+        ei_opts = self.distribution.get_option_dict('easy_install').copy()
+        fetch_directives = (
+            'find_links', 'site_dirs', 'index_url', 'optimize',
+            'site_dirs', 'allow_hosts',
+        )
+        fetch_options = {}
+        for key, val in ei_opts.items():
+            if key not in fetch_directives:
+                continue
+            fetch_options[key.replace('_', '-')] = val[1]
+        # create a settings dictionary suitable for `edit_config`
+        settings = dict(easy_install=fetch_options)
+        cfg_filename = os.path.join(base, 'setup.cfg')
+        setopt.edit_config(cfg_filename, settings)
+
+    def update_pth(self, dist):
+        if self.pth_file is None:
+            return
+
+        for d in self.pth_file[dist.key]:  # drop old entries
+            if self.multi_version or d.location != dist.location:
+                log.info("Removing %s from easy-install.pth file", d)
+                self.pth_file.remove(d)
+                if d.location in self.shadow_path:
+                    self.shadow_path.remove(d.location)
+
+        if not self.multi_version:
+            if dist.location in self.pth_file.paths:
+                log.info(
+                    "%s is already the active version in easy-install.pth",
+                    dist,
+                )
+            else:
+                log.info("Adding %s to easy-install.pth file", dist)
+                self.pth_file.add(dist)  # add new entry
+                if dist.location not in self.shadow_path:
+                    self.shadow_path.append(dist.location)
+
+        if not self.dry_run:
+
+            self.pth_file.save()
+
+            if dist.key == 'setuptools':
+                # Ensure that setuptools itself never becomes unavailable!
+                # XXX should this check for latest version?
+                filename = os.path.join(self.install_dir, 'setuptools.pth')
+                if os.path.islink(filename):
+                    os.unlink(filename)
+                f = open(filename, 'wt')
+                f.write(self.pth_file.make_relative(dist.location) + '\n')
+                f.close()
+
+    def unpack_progress(self, src, dst):
+        # Progress filter for unpacking
+        log.debug("Unpacking %s to %s", src, dst)
+        return dst  # only unpack-and-compile skips files for dry run
+
+    def unpack_and_compile(self, egg_path, destination):
+        to_compile = []
+        to_chmod = []
+
+        def pf(src, dst):
+            if dst.endswith('.py') and not src.startswith('EGG-INFO/'):
+                to_compile.append(dst)
+            elif dst.endswith('.dll') or dst.endswith('.so'):
+                to_chmod.append(dst)
+            self.unpack_progress(src, dst)
+            return not self.dry_run and dst or None
+
+        unpack_archive(egg_path, destination, pf)
+        self.byte_compile(to_compile)
+        if not self.dry_run:
+            for f in to_chmod:
+                mode = ((os.stat(f)[stat.ST_MODE]) | 0o555) & 0o7755
+                chmod(f, mode)
+
+    def byte_compile(self, to_compile):
+        if sys.dont_write_bytecode:
+            return
+
+        from distutils.util import byte_compile
+
+        try:
+            # try to make the byte compile messages quieter
+            log.set_verbosity(self.verbose - 1)
+
+            byte_compile(to_compile, optimize=0, force=1, dry_run=self.dry_run)
+            if self.optimize:
+                byte_compile(
+                    to_compile, optimize=self.optimize, force=1,
+                    dry_run=self.dry_run,
+                )
+        finally:
+            log.set_verbosity(self.verbose)  # restore original verbosity
+
+    __no_default_msg = textwrap.dedent("""
+        bad install directory or PYTHONPATH
+
+        You are attempting to install a package to a directory that is not
+        on PYTHONPATH and which Python does not read ".pth" files from.  The
+        installation directory you specified (via --install-dir, --prefix, or
+        the distutils default setting) was:
+
+            %s
+
+        and your PYTHONPATH environment variable currently contains:
+
+            %r
+
+        Here are some of your options for correcting the problem:
+
+        * You can choose a different installation directory, i.e., one that is
+          on PYTHONPATH or supports .pth files
+
+        * You can add the installation directory to the PYTHONPATH environment
+          variable.  (It must then also be on PYTHONPATH whenever you run
+          Python and want to use the package(s) you are installing.)
+
+        * You can set up the installation directory to support ".pth" files by
+          using one of the approaches described here:
+
+          https://setuptools.readthedocs.io/en/latest/easy_install.html#custom-installation-locations
+
+
+        Please make the appropriate changes for your system and try again.""").lstrip()
+
+    def no_default_version_msg(self):
+        template = self.__no_default_msg
+        return template % (self.install_dir, os.environ.get('PYTHONPATH', ''))
+
+    def install_site_py(self):
+        """Make sure there's a site.py in the target dir, if needed"""
+
+        if self.sitepy_installed:
+            return  # already did it, or don't need to
+
+        sitepy = os.path.join(self.install_dir, "site.py")
+        source = resource_string("setuptools", "site-patch.py")
+        source = source.decode('utf-8')
+        current = ""
+
+        if os.path.exists(sitepy):
+            log.debug("Checking existing site.py in %s", self.install_dir)
+            with io.open(sitepy) as strm:
+                current = strm.read()
+
+            if not current.startswith('def __boot():'):
+                raise DistutilsError(
+                    "%s is not a setuptools-generated site.py; please"
+                    " remove it." % sitepy
+                )
+
+        if current != source:
+            log.info("Creating %s", sitepy)
+            if not self.dry_run:
+                ensure_directory(sitepy)
+                with io.open(sitepy, 'w', encoding='utf-8') as strm:
+                    strm.write(source)
+            self.byte_compile([sitepy])
+
+        self.sitepy_installed = True
+
+    def create_home_path(self):
+        """Create directories under ~."""
+        if not self.user:
+            return
+        home = convert_path(os.path.expanduser("~"))
+        for name, path in six.iteritems(self.config_vars):
+            if path.startswith(home) and not os.path.isdir(path):
+                self.debug_print("os.makedirs('%s', 0o700)" % path)
+                os.makedirs(path, 0o700)
+
+    if sys.version[:3] in ('2.3', '2.4', '2.5') or 'real_prefix' in sys.__dict__:
+        sitedir_name = 'site-packages'
+    else:
+        sitedir_name = 'dist-packages'
+
+    INSTALL_SCHEMES = dict(
+        posix=dict(
+            install_dir='$base/lib/python$py_version_short/site-packages',
+            script_dir='$base/bin',
+        ),
+        unix_local = dict(
+            install_dir = '$base/local/lib/python$py_version_short/%s' % sitedir_name,
+            script_dir  = '$base/local/bin',
+        ),
+        posix_local = dict(
+            install_dir = '$base/local/lib/python$py_version_short/%s' % sitedir_name,
+            script_dir  = '$base/local/bin',
+        ),
+        deb_system = dict(
+            install_dir = '$base/lib/python3/%s' % sitedir_name,
+            script_dir  = '$base/bin',
+        ),
+    )
+
+    DEFAULT_SCHEME = dict(
+        install_dir='$base/Lib/site-packages',
+        script_dir='$base/Scripts',
+    )
+
+    def _expand(self, *attrs):
+        config_vars = self.get_finalized_command('install').config_vars
+
+        if self.prefix or self.install_layout:
+            if self.install_layout and self.install_layout in ['deb']:
+                    scheme_name = "deb_system"
+                    self.prefix = '/usr'
+            elif self.prefix or 'real_prefix' in sys.__dict__:
+                scheme_name = os.name
+            else:
+                scheme_name = "posix_local"
+            # Set default install_dir/scripts from --prefix
+            config_vars = config_vars.copy()
+            config_vars['base'] = self.prefix
+            scheme = self.INSTALL_SCHEMES.get(scheme_name,self.DEFAULT_SCHEME)
+            for attr, val in scheme.items():
+                if getattr(self, attr, None) is None:
+                    setattr(self, attr, val)
+
+        from distutils.util import subst_vars
+
+        for attr in attrs:
+            val = getattr(self, attr)
+            if val is not None:
+                val = subst_vars(val, config_vars)
+                if os.name == 'posix':
+                    val = os.path.expanduser(val)
+                setattr(self, attr, val)
+
+
+def _pythonpath():
+    items = os.environ.get('PYTHONPATH', '').split(os.pathsep)
+    return filter(None, items)
+
+
+def get_site_dirs():
+    """
+    Return a list of 'site' dirs
+    """
+
+    sitedirs = []
+
+    # start with PYTHONPATH
+    sitedirs.extend(_pythonpath())
+
+    prefixes = [sys.prefix]
+    if sys.exec_prefix != sys.prefix:
+        prefixes.append(sys.exec_prefix)
+    for prefix in prefixes:
+        if prefix:
+            if sys.platform in ('os2emx', 'riscos'):
+                sitedirs.append(os.path.join(prefix, "Lib", "site-packages"))
+            elif os.sep == '/':
+                sitedirs.extend([
+                    os.path.join(
+                        prefix,
+                        "local/lib",
+                        "python" + sys.version[:3],
+                        "dist-packages",
+                    ),
+                    os.path.join(
+                        prefix,
+                        "lib",
+                        "python" + sys.version[:3],
+                        "dist-packages",
+                    ),
+                    os.path.join(prefix, "lib", "site-python"),
+                ])
+            else:
+                sitedirs.extend([
+                    prefix,
+                    os.path.join(prefix, "lib", "site-packages"),
+                ])
+            if sys.platform == 'darwin':
+                # for framework builds *only* we add the standard Apple
+                # locations. Currently only per-user, but /Library and
+                # /Network/Library could be added too
+                if 'Python.framework' in prefix:
+                    home = os.environ.get('HOME')
+                    if home:
+                        home_sp = os.path.join(
+                            home,
+                            'Library',
+                            'Python',
+                            sys.version[:3],
+                            'site-packages',
+                        )
+                        sitedirs.append(home_sp)
+    lib_paths = get_path('purelib'), get_path('platlib')
+    for site_lib in lib_paths:
+        if site_lib not in sitedirs:
+            sitedirs.append(site_lib)
+
+    if site.ENABLE_USER_SITE:
+        sitedirs.append(site.USER_SITE)
+
+    try:
+        sitedirs.extend(site.getsitepackages())
+    except AttributeError:
+        pass
+
+    sitedirs = list(map(normalize_path, sitedirs))
+
+    return sitedirs
+
+
+def expand_paths(inputs):
+    """Yield sys.path directories that might contain "old-style" packages"""
+
+    seen = {}
+
+    for dirname in inputs:
+        dirname = normalize_path(dirname)
+        if dirname in seen:
+            continue
+
+        seen[dirname] = 1
+        if not os.path.isdir(dirname):
+            continue
+
+        files = os.listdir(dirname)
+        yield dirname, files
+
+        for name in files:
+            if not name.endswith('.pth'):
+                # We only care about the .pth files
+                continue
+            if name in ('easy-install.pth', 'setuptools.pth'):
+                # Ignore .pth files that we control
+                continue
+
+            # Read the .pth file
+            f = open(os.path.join(dirname, name))
+            lines = list(yield_lines(f))
+            f.close()
+
+            # Yield existing non-dupe, non-import directory lines from it
+            for line in lines:
+                if not line.startswith("import"):
+                    line = normalize_path(line.rstrip())
+                    if line not in seen:
+                        seen[line] = 1
+                        if not os.path.isdir(line):
+                            continue
+                        yield line, os.listdir(line)
+
+
+def extract_wininst_cfg(dist_filename):
+    """Extract configuration data from a bdist_wininst .exe
+
+    Returns a configparser.RawConfigParser, or None
+    """
+    f = open(dist_filename, 'rb')
+    try:
+        endrec = zipfile._EndRecData(f)
+        if endrec is None:
+            return None
+
+        prepended = (endrec[9] - endrec[5]) - endrec[6]
+        if prepended < 12:  # no wininst data here
+            return None
+        f.seek(prepended - 12)
+
+        tag, cfglen, bmlen = struct.unpack("<iii", f.read(12))
+        if tag not in (0x1234567A, 0x1234567B):
+            return None  # not a valid tag
+
+        f.seek(prepended - (12 + cfglen))
+        init = {'version': '', 'target_version': ''}
+        cfg = configparser.RawConfigParser(init)
+        try:
+            part = f.read(cfglen)
+            # Read up to the first null byte.
+            config = part.split(b'\0', 1)[0]
+            # Now the config is in bytes, but for RawConfigParser, it should
+            #  be text, so decode it.
+            config = config.decode(sys.getfilesystemencoding())
+            cfg.readfp(six.StringIO(config))
+        except configparser.Error:
+            return None
+        if not cfg.has_section('metadata') or not cfg.has_section('Setup'):
+            return None
+        return cfg
+
+    finally:
+        f.close()
+
+
+def get_exe_prefixes(exe_filename):
+    """Get exe->egg path translations for a given .exe file"""
+
+    prefixes = [
+        ('PURELIB/', ''),
+        ('PLATLIB/pywin32_system32', ''),
+        ('PLATLIB/', ''),
+        ('SCRIPTS/', 'EGG-INFO/scripts/'),
+        ('DATA/lib/site-packages', ''),
+    ]
+    z = zipfile.ZipFile(exe_filename)
+    try:
+        for info in z.infolist():
+            name = info.filename
+            parts = name.split('/')
+            if len(parts) == 3 and parts[2] == 'PKG-INFO':
+                if parts[1].endswith('.egg-info'):
+                    prefixes.insert(0, ('/'.join(parts[:2]), 'EGG-INFO/'))
+                    break
+            if len(parts) != 2 or not name.endswith('.pth'):
+                continue
+            if name.endswith('-nspkg.pth'):
+                continue
+            if parts[0].upper() in ('PURELIB', 'PLATLIB'):
+                contents = z.read(name)
+                if six.PY3:
+                    contents = contents.decode()
+                for pth in yield_lines(contents):
+                    pth = pth.strip().replace('\\', '/')
+                    if not pth.startswith('import'):
+                        prefixes.append((('%s/%s/' % (parts[0], pth)), ''))
+    finally:
+        z.close()
+    prefixes = [(x.lower(), y) for x, y in prefixes]
+    prefixes.sort()
+    prefixes.reverse()
+    return prefixes
+
+
+class PthDistributions(Environment):
+    """A .pth file with Distribution paths in it"""
+
+    dirty = False
+
+    def __init__(self, filename, sitedirs=()):
+        self.filename = filename
+        self.sitedirs = list(map(normalize_path, sitedirs))
+        self.basedir = normalize_path(os.path.dirname(self.filename))
+        self._load()
+        Environment.__init__(self, [], None, None)
+        for path in yield_lines(self.paths):
+            list(map(self.add, find_distributions(path, True)))
+
+    def _load(self):
+        self.paths = []
+        saw_import = False
+        seen = dict.fromkeys(self.sitedirs)
+        if os.path.isfile(self.filename):
+            f = open(self.filename, 'rt')
+            for line in f:
+                if line.startswith('import'):
+                    saw_import = True
+                    continue
+                path = line.rstrip()
+                self.paths.append(path)
+                if not path.strip() or path.strip().startswith('#'):
+                    continue
+                # skip non-existent paths, in case somebody deleted a package
+                # manually, and duplicate paths as well
+                path = self.paths[-1] = normalize_path(
+                    os.path.join(self.basedir, path)
+                )
+                if not os.path.exists(path) or path in seen:
+                    self.paths.pop()  # skip it
+                    self.dirty = True  # we cleaned up, so we're dirty now :)
+                    continue
+                seen[path] = 1
+            f.close()
+
+        if self.paths and not saw_import:
+            self.dirty = True  # ensure anything we touch has import wrappers
+        while self.paths and not self.paths[-1].strip():
+            self.paths.pop()
+
+    def save(self):
+        """Write changed .pth file back to disk"""
+        if not self.dirty:
+            return
+
+        rel_paths = list(map(self.make_relative, self.paths))
+        if rel_paths:
+            log.debug("Saving %s", self.filename)
+            lines = self._wrap_lines(rel_paths)
+            data = '\n'.join(lines) + '\n'
+
+            if os.path.islink(self.filename):
+                os.unlink(self.filename)
+            with open(self.filename, 'wt') as f:
+                f.write(data)
+
+        elif os.path.exists(self.filename):
+            log.debug("Deleting empty %s", self.filename)
+            os.unlink(self.filename)
+
+        self.dirty = False
+
+    @staticmethod
+    def _wrap_lines(lines):
+        return lines
+
+    def add(self, dist):
+        """Add `dist` to the distribution map"""
+        new_path = (
+            dist.location not in self.paths and (
+                dist.location not in self.sitedirs or
+                # account for '.' being in PYTHONPATH
+                dist.location == os.getcwd()
+            )
+        )
+        if new_path:
+            self.paths.append(dist.location)
+            self.dirty = True
+        Environment.add(self, dist)
+
+    def remove(self, dist):
+        """Remove `dist` from the distribution map"""
+        while dist.location in self.paths:
+            self.paths.remove(dist.location)
+            self.dirty = True
+        Environment.remove(self, dist)
+
+    def make_relative(self, path):
+        npath, last = os.path.split(normalize_path(path))
+        baselen = len(self.basedir)
+        parts = [last]
+        sep = os.altsep == '/' and '/' or os.sep
+        while len(npath) >= baselen:
+            if npath == self.basedir:
+                parts.append(os.curdir)
+                parts.reverse()
+                return sep.join(parts)
+            npath, last = os.path.split(npath)
+            parts.append(last)
+        else:
+            return path
+
+
+class RewritePthDistributions(PthDistributions):
+    @classmethod
+    def _wrap_lines(cls, lines):
+        yield cls.prelude
+        for line in lines:
+            yield line
+        yield cls.postlude
+
+    prelude = _one_liner("""
+        import sys
+        sys.__plen = len(sys.path)
+        """)
+    postlude = _one_liner("""
+        import sys
+        new = sys.path[sys.__plen:]
+        del sys.path[sys.__plen:]
+        p = getattr(sys, '__egginsert', 0)
+        sys.path[p:p] = new
+        sys.__egginsert = p + len(new)
+        """)
+
+
+if os.environ.get('SETUPTOOLS_SYS_PATH_TECHNIQUE', 'raw') == 'rewrite':
+    PthDistributions = RewritePthDistributions
+
+
+def _first_line_re():
+    """
+    Return a regular expression based on first_line_re suitable for matching
+    strings.
+    """
+    if isinstance(first_line_re.pattern, str):
+        return first_line_re
+
+    # first_line_re in Python >=3.1.4 and >=3.2.1 is a bytes pattern.
+    return re.compile(first_line_re.pattern.decode())
+
+
+def auto_chmod(func, arg, exc):
+    if func in [os.unlink, os.remove] and os.name == 'nt':
+        chmod(arg, stat.S_IWRITE)
+        return func(arg)
+    et, ev, _ = sys.exc_info()
+    six.reraise(et, (ev[0], ev[1] + (" %s %s" % (func, arg))))
+
+
+def update_dist_caches(dist_path, fix_zipimporter_caches):
+    """
+    Fix any globally cached `dist_path` related data
+
+    `dist_path` should be a path of a newly installed egg distribution (zipped
+    or unzipped).
+
+    sys.path_importer_cache contains finder objects that have been cached when
+    importing data from the original distribution. Any such finders need to be
+    cleared since the replacement distribution might be packaged differently,
+    e.g. a zipped egg distribution might get replaced with an unzipped egg
+    folder or vice versa. Having the old finders cached may then cause Python
+    to attempt loading modules from the replacement distribution using an
+    incorrect loader.
+
+    zipimport.zipimporter objects are Python loaders charged with importing
+    data packaged inside zip archives. If stale loaders referencing the
+    original distribution, are left behind, they can fail to load modules from
+    the replacement distribution. E.g. if an old zipimport.zipimporter instance
+    is used to load data from a new zipped egg archive, it may cause the
+    operation to attempt to locate the requested data in the wrong location -
+    one indicated by the original distribution's zip archive directory
+    information. Such an operation may then fail outright, e.g. report having
+    read a 'bad local file header', or even worse, it may fail silently &
+    return invalid data.
+
+    zipimport._zip_directory_cache contains cached zip archive directory
+    information for all existing zipimport.zipimporter instances and all such
+    instances connected to the same archive share the same cached directory
+    information.
+
+    If asked, and the underlying Python implementation allows it, we can fix
+    all existing zipimport.zipimporter instances instead of having to track
+    them down and remove them one by one, by updating their shared cached zip
+    archive directory information. This, of course, assumes that the
+    replacement distribution is packaged as a zipped egg.
+
+    If not asked to fix existing zipimport.zipimporter instances, we still do
+    our best to clear any remaining zipimport.zipimporter related cached data
+    that might somehow later get used when attempting to load data from the new
+    distribution and thus cause such load operations to fail. Note that when
+    tracking down such remaining stale data, we can not catch every conceivable
+    usage from here, and we clear only those that we know of and have found to
+    cause problems if left alive. Any remaining caches should be updated by
+    whomever is in charge of maintaining them, i.e. they should be ready to
+    handle us replacing their zip archives with new distributions at runtime.
+
+    """
+    # There are several other known sources of stale zipimport.zipimporter
+    # instances that we do not clear here, but might if ever given a reason to
+    # do so:
+    # * Global setuptools pkg_resources.working_set (a.k.a. 'master working
+    # set') may contain distributions which may in turn contain their
+    #   zipimport.zipimporter loaders.
+    # * Several zipimport.zipimporter loaders held by local variables further
+    #   up the function call stack when running the setuptools installation.
+    # * Already loaded modules may have their __loader__ attribute set to the
+    #   exact loader instance used when importing them. Python 3.4 docs state
+    #   that this information is intended mostly for introspection and so is
+    #   not expected to cause us problems.
+    normalized_path = normalize_path(dist_path)
+    _uncache(normalized_path, sys.path_importer_cache)
+    if fix_zipimporter_caches:
+        _replace_zip_directory_cache_data(normalized_path)
+    else:
+        # Here, even though we do not want to fix existing and now stale
+        # zipimporter cache information, we still want to remove it. Related to
+        # Python's zip archive directory information cache, we clear each of
+        # its stale entries in two phases:
+        #   1. Clear the entry so attempting to access zip archive information
+        #      via any existing stale zipimport.zipimporter instances fails.
+        #   2. Remove the entry from the cache so any newly constructed
+        #      zipimport.zipimporter instances do not end up using old stale
+        #      zip archive directory information.
+        # This whole stale data removal step does not seem strictly necessary,
+        # but has been left in because it was done before we started replacing
+        # the zip archive directory information cache content if possible, and
+        # there are no relevant unit tests that we can depend on to tell us if
+        # this is really needed.
+        _remove_and_clear_zip_directory_cache_data(normalized_path)
+
+
+def _collect_zipimporter_cache_entries(normalized_path, cache):
+    """
+    Return zipimporter cache entry keys related to a given normalized path.
+
+    Alternative path spellings (e.g. those using different character case or
+    those using alternative path separators) related to the same path are
+    included. Any sub-path entries are included as well, i.e. those
+    corresponding to zip archives embedded in other zip archives.
+
+    """
+    result = []
+    prefix_len = len(normalized_path)
+    for p in cache:
+        np = normalize_path(p)
+        if (np.startswith(normalized_path) and
+                np[prefix_len:prefix_len + 1] in (os.sep, '')):
+            result.append(p)
+    return result
+
+
+def _update_zipimporter_cache(normalized_path, cache, updater=None):
+    """
+    Update zipimporter cache data for a given normalized path.
+
+    Any sub-path entries are processed as well, i.e. those corresponding to zip
+    archives embedded in other zip archives.
+
+    Given updater is a callable taking a cache entry key and the original entry
+    (after already removing the entry from the cache), and expected to update
+    the entry and possibly return a new one to be inserted in its place.
+    Returning None indicates that the entry should not be replaced with a new
+    one. If no updater is given, the cache entries are simply removed without
+    any additional processing, the same as if the updater simply returned None.
+
+    """
+    for p in _collect_zipimporter_cache_entries(normalized_path, cache):
+        # N.B. pypy's custom zipimport._zip_directory_cache implementation does
+        # not support the complete dict interface:
+        # * Does not support item assignment, thus not allowing this function
+        #    to be used only for removing existing cache entries.
+        #  * Does not support the dict.pop() method, forcing us to use the
+        #    get/del patterns instead. For more detailed information see the
+        #    following links:
+        #      https://github.com/pypa/setuptools/issues/202#issuecomment-202913420
+        #      http://bit.ly/2h9itJX
+        old_entry = cache[p]
+        del cache[p]
+        new_entry = updater and updater(p, old_entry)
+        if new_entry is not None:
+            cache[p] = new_entry
+
+
+def _uncache(normalized_path, cache):
+    _update_zipimporter_cache(normalized_path, cache)
+
+
+def _remove_and_clear_zip_directory_cache_data(normalized_path):
+    def clear_and_remove_cached_zip_archive_directory_data(path, old_entry):
+        old_entry.clear()
+
+    _update_zipimporter_cache(
+        normalized_path, zipimport._zip_directory_cache,
+        updater=clear_and_remove_cached_zip_archive_directory_data)
+
+
+# PyPy Python implementation does not allow directly writing to the
+# zipimport._zip_directory_cache and so prevents us from attempting to correct
+# its content. The best we can do there is clear the problematic cache content
+# and have PyPy repopulate it as needed. The downside is that if there are any
+# stale zipimport.zipimporter instances laying around, attempting to use them
+# will fail due to not having its zip archive directory information available
+# instead of being automatically corrected to use the new correct zip archive
+# directory information.
+if '__pypy__' in sys.builtin_module_names:
+    _replace_zip_directory_cache_data = \
+        _remove_and_clear_zip_directory_cache_data
+else:
+
+    def _replace_zip_directory_cache_data(normalized_path):
+        def replace_cached_zip_archive_directory_data(path, old_entry):
+            # N.B. In theory, we could load the zip directory information just
+            # once for all updated path spellings, and then copy it locally and
+            # update its contained path strings to contain the correct
+            # spelling, but that seems like a way too invasive move (this cache
+            # structure is not officially documented anywhere and could in
+            # theory change with new Python releases) for no significant
+            # benefit.
+            old_entry.clear()
+            zipimport.zipimporter(path)
+            old_entry.update(zipimport._zip_directory_cache[path])
+            return old_entry
+
+        _update_zipimporter_cache(
+            normalized_path, zipimport._zip_directory_cache,
+            updater=replace_cached_zip_archive_directory_data)
+
+
+def is_python(text, filename='<string>'):
+    "Is this string a valid Python script?"
+    try:
+        compile(text, filename, 'exec')
+    except (SyntaxError, TypeError):
+        return False
+    else:
+        return True
+
+
+def is_sh(executable):
+    """Determine if the specified executable is a .sh (contains a #! line)"""
+    try:
+        with io.open(executable, encoding='latin-1') as fp:
+            magic = fp.read(2)
+    except (OSError, IOError):
+        return executable
+    return magic == '#!'
+
+
+def nt_quote_arg(arg):
+    """Quote a command line argument according to Windows parsing rules"""
+    return subprocess.list2cmdline([arg])
+
+
+def is_python_script(script_text, filename):
+    """Is this text, as a whole, a Python script? (as opposed to shell/bat/etc.
+    """
+    if filename.endswith('.py') or filename.endswith('.pyw'):
+        return True  # extension says it's Python
+    if is_python(script_text, filename):
+        return True  # it's syntactically valid Python
+    if script_text.startswith('#!'):
+        # It begins with a '#!' line, so check if 'python' is in it somewhere
+        return 'python' in script_text.splitlines()[0].lower()
+
+    return False  # Not any Python I can recognize
+
+
+try:
+    from os import chmod as _chmod
+except ImportError:
+    # Jython compatibility
+    def _chmod(*args):
+        pass
+
+
+def chmod(path, mode):
+    log.debug("changing mode of %s to %o", path, mode)
+    try:
+        _chmod(path, mode)
+    except os.error as e:
+        log.debug("chmod failed: %s", e)
+
+
+class CommandSpec(list):
+    """
+    A command spec for a #! header, specified as a list of arguments akin to
+    those passed to Popen.
+    """
+
+    options = []
+    split_args = dict()
+
+    @classmethod
+    def best(cls):
+        """
+        Choose the best CommandSpec class based on environmental conditions.
+        """
+        return cls
+
+    @classmethod
+    def _sys_executable(cls):
+        _default = os.path.normpath(sys.executable)
+        return os.environ.get('__PYVENV_LAUNCHER__', _default)
+
+    @classmethod
+    def from_param(cls, param):
+        """
+        Construct a CommandSpec from a parameter to build_scripts, which may
+        be None.
+        """
+        if isinstance(param, cls):
+            return param
+        if isinstance(param, list):
+            return cls(param)
+        if param is None:
+            return cls.from_environment()
+        # otherwise, assume it's a string.
+        return cls.from_string(param)
+
+    @classmethod
+    def from_environment(cls):
+        return cls([cls._sys_executable()])
+
+    @classmethod
+    def from_string(cls, string):
+        """
+        Construct a command spec from a simple string representing a command
+        line parseable by shlex.split.
+        """
+        items = shlex.split(string, **cls.split_args)
+        return cls(items)
+
+    def install_options(self, script_text):
+        self.options = shlex.split(self._extract_options(script_text))
+        cmdline = subprocess.list2cmdline(self)
+        if not isascii(cmdline):
+            self.options[:0] = ['-x']
+
+    @staticmethod
+    def _extract_options(orig_script):
+        """
+        Extract any options from the first line of the script.
+        """
+        first = (orig_script + '\n').splitlines()[0]
+        match = _first_line_re().match(first)
+        options = match.group(1) or '' if match else ''
+        return options.strip()
+
+    def as_header(self):
+        return self._render(self + list(self.options))
+
+    @staticmethod
+    def _strip_quotes(item):
+        _QUOTES = '"\''
+        for q in _QUOTES:
+            if item.startswith(q) and item.endswith(q):
+                return item[1:-1]
+        return item
+
+    @staticmethod
+    def _render(items):
+        cmdline = subprocess.list2cmdline(
+            CommandSpec._strip_quotes(item.strip()) for item in items)
+        return '#!' + cmdline + '\n'
+
+
+# For pbr compat; will be removed in a future version.
+sys_executable = CommandSpec._sys_executable()
+
+
+class WindowsCommandSpec(CommandSpec):
+    split_args = dict(posix=False)
+
+
+class ScriptWriter(object):
+    """
+    Encapsulates behavior around writing entry point scripts for console and
+    gui apps.
+    """
+
+    template = textwrap.dedent(r"""
+        # EASY-INSTALL-ENTRY-SCRIPT: %(spec)r,%(group)r,%(name)r
+        __requires__ = %(spec)r
+        import re
+        import sys
+        from pkg_resources import load_entry_point
+
+        if __name__ == '__main__':
+            sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])
+            sys.exit(
+                load_entry_point(%(spec)r, %(group)r, %(name)r)()
+            )
+    """).lstrip()
+
+    command_spec_class = CommandSpec
+
+    @classmethod
+    def get_script_args(cls, dist, executable=None, wininst=False):
+        # for backward compatibility
+        warnings.warn("Use get_args", DeprecationWarning)
+        writer = (WindowsScriptWriter if wininst else ScriptWriter).best()
+        header = cls.get_script_header("", executable, wininst)
+        return writer.get_args(dist, header)
+
+    @classmethod
+    def get_script_header(cls, script_text, executable=None, wininst=False):
+        # for backward compatibility
+        warnings.warn("Use get_header", DeprecationWarning)
+        if wininst:
+            executable = "python.exe"
+        cmd = cls.command_spec_class.best().from_param(executable)
+        cmd.install_options(script_text)
+        return cmd.as_header()
+
+    @classmethod
+    def get_args(cls, dist, header=None):
+        """
+        Yield write_script() argument tuples for a distribution's
+        console_scripts and gui_scripts entry points.
+        """
+        if header is None:
+            header = cls.get_header()
+        spec = str(dist.as_requirement())
+        for type_ in 'console', 'gui':
+            group = type_ + '_scripts'
+            for name, ep in dist.get_entry_map(group).items():
+                cls._ensure_safe_name(name)
+                script_text = cls.template % locals()
+                args = cls._get_script_args(type_, name, header, script_text)
+                for res in args:
+                    yield res
+
+    @staticmethod
+    def _ensure_safe_name(name):
+        """
+        Prevent paths in *_scripts entry point names.
+        """
+        has_path_sep = re.search(r'[\\/]', name)
+        if has_path_sep:
+            raise ValueError("Path separators not allowed in script names")
+
+    @classmethod
+    def get_writer(cls, force_windows):
+        # for backward compatibility
+        warnings.warn("Use best", DeprecationWarning)
+        return WindowsScriptWriter.best() if force_windows else cls.best()
+
+    @classmethod
+    def best(cls):
+        """
+        Select the best ScriptWriter for this environment.
+        """
+        if sys.platform == 'win32' or (os.name == 'java' and os._name == 'nt'):
+            return WindowsScriptWriter.best()
+        else:
+            return cls
+
+    @classmethod
+    def _get_script_args(cls, type_, name, header, script_text):
+        # Simply write the stub with no extension.
+        yield (name, header + script_text)
+
+    @classmethod
+    def get_header(cls, script_text="", executable=None):
+        """Create a #! line, getting options (if any) from script_text"""
+        cmd = cls.command_spec_class.best().from_param(executable)
+        cmd.install_options(script_text)
+        return cmd.as_header()
+
+
+class WindowsScriptWriter(ScriptWriter):
+    command_spec_class = WindowsCommandSpec
+
+    @classmethod
+    def get_writer(cls):
+        # for backward compatibility
+        warnings.warn("Use best", DeprecationWarning)
+        return cls.best()
+
+    @classmethod
+    def best(cls):
+        """
+        Select the best ScriptWriter suitable for Windows
+        """
+        writer_lookup = dict(
+            executable=WindowsExecutableLauncherWriter,
+            natural=cls,
+        )
+        # for compatibility, use the executable launcher by default
+        launcher = os.environ.get('SETUPTOOLS_LAUNCHER', 'executable')
+        return writer_lookup[launcher]
+
+    @classmethod
+    def _get_script_args(cls, type_, name, header, script_text):
+        "For Windows, add a .py extension"
+        ext = dict(console='.pya', gui='.pyw')[type_]
+        if ext not in os.environ['PATHEXT'].lower().split(';'):
+            msg = (
+                "{ext} not listed in PATHEXT; scripts will not be "
+                "recognized as executables."
+            ).format(**locals())
+            warnings.warn(msg, UserWarning)
+        old = ['.pya', '.py', '-script.py', '.pyc', '.pyo', '.pyw', '.exe']
+        old.remove(ext)
+        header = cls._adjust_header(type_, header)
+        blockers = [name + x for x in old]
+        yield name + ext, header + script_text, 't', blockers
+
+    @classmethod
+    def _adjust_header(cls, type_, orig_header):
+        """
+        Make sure 'pythonw' is used for gui and and 'python' is used for
+        console (regardless of what sys.executable is).
+        """
+        pattern = 'pythonw.exe'
+        repl = 'python.exe'
+        if type_ == 'gui':
+            pattern, repl = repl, pattern
+        pattern_ob = re.compile(re.escape(pattern), re.IGNORECASE)
+        new_header = pattern_ob.sub(string=orig_header, repl=repl)
+        return new_header if cls._use_header(new_header) else orig_header
+
+    @staticmethod
+    def _use_header(new_header):
+        """
+        Should _adjust_header use the replaced header?
+
+        On non-windows systems, always use. On
+        Windows systems, only use the replaced header if it resolves
+        to an executable on the system.
+        """
+        clean_header = new_header[2:-1].strip('"')
+        return sys.platform != 'win32' or find_executable(clean_header)
+
+
+class WindowsExecutableLauncherWriter(WindowsScriptWriter):
+    @classmethod
+    def _get_script_args(cls, type_, name, header, script_text):
+        """
+        For Windows, add a .py extension and an .exe launcher
+        """
+        if type_ == 'gui':
+            launcher_type = 'gui'
+            ext = '-script.pyw'
+            old = ['.pyw']
+        else:
+            launcher_type = 'cli'
+            ext = '-script.py'
+            old = ['.py', '.pyc', '.pyo']
+        hdr = cls._adjust_header(type_, header)
+        blockers = [name + x for x in old]
+        yield (name + ext, hdr + script_text, 't', blockers)
+        yield (
+            name + '.exe', get_win_launcher(launcher_type),
+            'b'  # write in binary mode
+        )
+        if not is_64bit():
+            # install a manifest for the launcher to prevent Windows
+            # from detecting it as an installer (which it will for
+            #  launchers like easy_install.exe). Consider only
+            #  adding a manifest for launchers detected as installers.
+            #  See Distribute #143 for details.
+            m_name = name + '.exe.manifest'
+            yield (m_name, load_launcher_manifest(name), 't')
+
+
+# for backward-compatibility
+get_script_args = ScriptWriter.get_script_args
+get_script_header = ScriptWriter.get_script_header
+
+
+def get_win_launcher(type):
+    """
+    Load the Windows launcher (executable) suitable for launching a script.
+
+    `type` should be either 'cli' or 'gui'
+
+    Returns the executable as a byte string.
+    """
+    launcher_fn = '%s.exe' % type
+    if is_64bit():
+        launcher_fn = launcher_fn.replace(".", "-64.")
+    else:
+        launcher_fn = launcher_fn.replace(".", "-32.")
+    return resource_string('setuptools', launcher_fn)
+
+
+def load_launcher_manifest(name):
+    manifest = pkg_resources.resource_string(__name__, 'launcher manifest.xml')
+    if six.PY2:
+        return manifest % vars()
+    else:
+        return manifest.decode('utf-8') % vars()
+
+
+def rmtree(path, ignore_errors=False, onerror=auto_chmod):
+    return shutil.rmtree(path, ignore_errors, onerror)
+
+
+def current_umask():
+    tmp = os.umask(0o022)
+    os.umask(tmp)
+    return tmp
+
+
+def bootstrap():
+    # This function is called when setuptools*.egg is run using /bin/sh
+    import setuptools
+
+    argv0 = os.path.dirname(setuptools.__path__[0])
+    sys.argv[0] = argv0
+    sys.argv.append(argv0)
+    main()
+
+
+def main(argv=None, **kw):
+    from setuptools import setup
+    from setuptools.dist import Distribution
+
+    class DistributionWithoutHelpCommands(Distribution):
+        common_usage = ""
+
+        def _show_help(self, *args, **kw):
+            with _patch_usage():
+                Distribution._show_help(self, *args, **kw)
+
+    if argv is None:
+        argv = sys.argv[1:]
+
+    with _patch_usage():
+        setup(
+            script_args=['-q', 'easy_install', '-v'] + argv,
+            script_name=sys.argv[0] or 'easy_install',
+            distclass=DistributionWithoutHelpCommands,
+            **kw
+        )
+
+
+@contextlib.contextmanager
+def _patch_usage():
+    import distutils.core
+    USAGE = textwrap.dedent("""
+        usage: %(script)s [options] requirement_or_url ...
+           or: %(script)s --help
+        """).lstrip()
+
+    def gen_usage(script_name):
+        return USAGE % dict(
+            script=os.path.basename(script_name),
+        )
+
+    saved = distutils.core.gen_usage
+    distutils.core.gen_usage = gen_usage
+    try:
+        yield
+    finally:
+        distutils.core.gen_usage = saved
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/egg_info.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/egg_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b0f4488751b04e13d6bc5148aebfb00577f508a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/egg_info.py
@@ -0,0 +1,696 @@
+"""setuptools.command.egg_info
+
+Create a distribution's .egg-info directory and contents"""
+
+from distutils.filelist import FileList as _FileList
+from distutils.errors import DistutilsInternalError
+from distutils.util import convert_path
+from distutils import log
+import distutils.errors
+import distutils.filelist
+import os
+import re
+import sys
+import io
+import warnings
+import time
+import collections
+
+from setuptools.extern import six
+from setuptools.extern.six.moves import map
+
+from setuptools import Command
+from setuptools.command.sdist import sdist
+from setuptools.command.sdist import walk_revctrl
+from setuptools.command.setopt import edit_config
+from setuptools.command import bdist_egg
+from pkg_resources import (
+    parse_requirements, safe_name, parse_version,
+    safe_version, yield_lines, EntryPoint, iter_entry_points, to_filename)
+import setuptools.unicode_utils as unicode_utils
+from setuptools.glob import glob
+
+from setuptools.extern import packaging
+
+
+def translate_pattern(glob):
+    """
+    Translate a file path glob like '*.txt' in to a regular expression.
+    This differs from fnmatch.translate which allows wildcards to match
+    directory separators. It also knows about '**/' which matches any number of
+    directories.
+    """
+    pat = ''
+
+    # This will split on '/' within [character classes]. This is deliberate.
+    chunks = glob.split(os.path.sep)
+
+    sep = re.escape(os.sep)
+    valid_char = '[^%s]' % (sep,)
+
+    for c, chunk in enumerate(chunks):
+        last_chunk = c == len(chunks) - 1
+
+        # Chunks that are a literal ** are globstars. They match anything.
+        if chunk == '**':
+            if last_chunk:
+                # Match anything if this is the last component
+                pat += '.*'
+            else:
+                # Match '(name/)*'
+                pat += '(?:%s+%s)*' % (valid_char, sep)
+            continue  # Break here as the whole path component has been handled
+
+        # Find any special characters in the remainder
+        i = 0
+        chunk_len = len(chunk)
+        while i < chunk_len:
+            char = chunk[i]
+            if char == '*':
+                # Match any number of name characters
+                pat += valid_char + '*'
+            elif char == '?':
+                # Match a name character
+                pat += valid_char
+            elif char == '[':
+                # Character class
+                inner_i = i + 1
+                # Skip initial !/] chars
+                if inner_i < chunk_len and chunk[inner_i] == '!':
+                    inner_i = inner_i + 1
+                if inner_i < chunk_len and chunk[inner_i] == ']':
+                    inner_i = inner_i + 1
+
+                # Loop till the closing ] is found
+                while inner_i < chunk_len and chunk[inner_i] != ']':
+                    inner_i = inner_i + 1
+
+                if inner_i >= chunk_len:
+                    # Got to the end of the string without finding a closing ]
+                    # Do not treat this as a matching group, but as a literal [
+                    pat += re.escape(char)
+                else:
+                    # Grab the insides of the [brackets]
+                    inner = chunk[i + 1:inner_i]
+                    char_class = ''
+
+                    # Class negation
+                    if inner[0] == '!':
+                        char_class = '^'
+                        inner = inner[1:]
+
+                    char_class += re.escape(inner)
+                    pat += '[%s]' % (char_class,)
+
+                    # Skip to the end ]
+                    i = inner_i
+            else:
+                pat += re.escape(char)
+            i += 1
+
+        # Join each chunk with the dir separator
+        if not last_chunk:
+            pat += sep
+
+    pat += r'\Z'
+    return re.compile(pat, flags=re.MULTILINE|re.DOTALL)
+
+
+class egg_info(Command):
+    description = "create a distribution's .egg-info directory"
+
+    user_options = [
+        ('egg-base=', 'e', "directory containing .egg-info directories"
+                           " (default: top of the source tree)"),
+        ('tag-date', 'd', "Add date stamp (e.g. 20050528) to version number"),
+        ('tag-build=', 'b', "Specify explicit tag to add to version number"),
+        ('no-date', 'D', "Don't include date stamp [default]"),
+    ]
+
+    boolean_options = ['tag-date']
+    negative_opt = {
+        'no-date': 'tag-date',
+    }
+
+    def initialize_options(self):
+        self.egg_name = None
+        self.egg_version = None
+        self.egg_base = None
+        self.egg_info = None
+        self.tag_build = None
+        self.tag_date = 0
+        self.broken_egg_info = False
+        self.vtags = None
+
+    ####################################
+    # allow the 'tag_svn_revision' to be detected and
+    # set, supporting sdists built on older Setuptools.
+    @property
+    def tag_svn_revision(self):
+        pass
+
+    @tag_svn_revision.setter
+    def tag_svn_revision(self, value):
+        pass
+    ####################################
+
+    def save_version_info(self, filename):
+        """
+        Materialize the value of date into the
+        build tag. Install build keys in a deterministic order
+        to avoid arbitrary reordering on subsequent builds.
+        """
+        egg_info = collections.OrderedDict()
+        # follow the order these keys would have been added
+        # when PYTHONHASHSEED=0
+        egg_info['tag_build'] = self.tags()
+        egg_info['tag_date'] = 0
+        edit_config(filename, dict(egg_info=egg_info))
+
+    def finalize_options(self):
+        self.egg_name = safe_name(self.distribution.get_name())
+        self.vtags = self.tags()
+        self.egg_version = self.tagged_version()
+
+        parsed_version = parse_version(self.egg_version)
+
+        try:
+            is_version = isinstance(parsed_version, packaging.version.Version)
+            spec = (
+                "%s==%s" if is_version else "%s===%s"
+            )
+            list(
+                parse_requirements(spec % (self.egg_name, self.egg_version))
+            )
+        except ValueError:
+            raise distutils.errors.DistutilsOptionError(
+                "Invalid distribution name or version syntax: %s-%s" %
+                (self.egg_name, self.egg_version)
+            )
+
+        if self.egg_base is None:
+            dirs = self.distribution.package_dir
+            self.egg_base = (dirs or {}).get('', os.curdir)
+
+        self.ensure_dirname('egg_base')
+        self.egg_info = to_filename(self.egg_name) + '.egg-info'
+        if self.egg_base != os.curdir:
+            self.egg_info = os.path.join(self.egg_base, self.egg_info)
+        if '-' in self.egg_name:
+            self.check_broken_egg_info()
+
+        # Set package version for the benefit of dumber commands
+        # (e.g. sdist, bdist_wininst, etc.)
+        #
+        self.distribution.metadata.version = self.egg_version
+
+        # If we bootstrapped around the lack of a PKG-INFO, as might be the
+        # case in a fresh checkout, make sure that any special tags get added
+        # to the version info
+        #
+        pd = self.distribution._patched_dist
+        if pd is not None and pd.key == self.egg_name.lower():
+            pd._version = self.egg_version
+            pd._parsed_version = parse_version(self.egg_version)
+            self.distribution._patched_dist = None
+
+    def write_or_delete_file(self, what, filename, data, force=False):
+        """Write `data` to `filename` or delete if empty
+
+        If `data` is non-empty, this routine is the same as ``write_file()``.
+        If `data` is empty but not ``None``, this is the same as calling
+        ``delete_file(filename)`.  If `data` is ``None``, then this is a no-op
+        unless `filename` exists, in which case a warning is issued about the
+        orphaned file (if `force` is false), or deleted (if `force` is true).
+        """
+        if data:
+            self.write_file(what, filename, data)
+        elif os.path.exists(filename):
+            if data is None and not force:
+                log.warn(
+                    "%s not set in setup(), but %s exists", what, filename
+                )
+                return
+            else:
+                self.delete_file(filename)
+
+    def write_file(self, what, filename, data):
+        """Write `data` to `filename` (if not a dry run) after announcing it
+
+        `what` is used in a log message to identify what is being written
+        to the file.
+        """
+        log.info("writing %s to %s", what, filename)
+        if six.PY3:
+            data = data.encode("utf-8")
+        if not self.dry_run:
+            f = open(filename, 'wb')
+            f.write(data)
+            f.close()
+
+    def delete_file(self, filename):
+        """Delete `filename` (if not a dry run) after announcing it"""
+        log.info("deleting %s", filename)
+        if not self.dry_run:
+            os.unlink(filename)
+
+    def tagged_version(self):
+        version = self.distribution.get_version()
+        # egg_info may be called more than once for a distribution,
+        # in which case the version string already contains all tags.
+        if self.vtags and version.endswith(self.vtags):
+            return safe_version(version)
+        return safe_version(version + self.vtags)
+
+    def run(self):
+        self.mkpath(self.egg_info)
+        installer = self.distribution.fetch_build_egg
+        for ep in iter_entry_points('egg_info.writers'):
+            ep.require(installer=installer)
+            writer = ep.resolve()
+            writer(self, ep.name, os.path.join(self.egg_info, ep.name))
+
+        # Get rid of native_libs.txt if it was put there by older bdist_egg
+        nl = os.path.join(self.egg_info, "native_libs.txt")
+        if os.path.exists(nl):
+            self.delete_file(nl)
+
+        self.find_sources()
+
+    def tags(self):
+        version = ''
+        if self.tag_build:
+            version += self.tag_build
+        if self.tag_date:
+            version += time.strftime("-%Y%m%d")
+        return version
+
+    def find_sources(self):
+        """Generate SOURCES.txt manifest file"""
+        manifest_filename = os.path.join(self.egg_info, "SOURCES.txt")
+        mm = manifest_maker(self.distribution)
+        mm.manifest = manifest_filename
+        mm.run()
+        self.filelist = mm.filelist
+
+    def check_broken_egg_info(self):
+        bei = self.egg_name + '.egg-info'
+        if self.egg_base != os.curdir:
+            bei = os.path.join(self.egg_base, bei)
+        if os.path.exists(bei):
+            log.warn(
+                "-" * 78 + '\n'
+                "Note: Your current .egg-info directory has a '-' in its name;"
+                '\nthis will not work correctly with "setup.py develop".\n\n'
+                'Please rename %s to %s to correct this problem.\n' + '-' * 78,
+                bei, self.egg_info
+            )
+            self.broken_egg_info = self.egg_info
+            self.egg_info = bei  # make it work for now
+
+
+class FileList(_FileList):
+    # Implementations of the various MANIFEST.in commands
+
+    def process_template_line(self, line):
+        # Parse the line: split it up, make sure the right number of words
+        # is there, and return the relevant words.  'action' is always
+        # defined: it's the first word of the line.  Which of the other
+        # three are defined depends on the action; it'll be either
+        # patterns, (dir and patterns), or (dir_pattern).
+        (action, patterns, dir, dir_pattern) = self._parse_template_line(line)
+
+        # OK, now we know that the action is valid and we have the
+        # right number of words on the line for that action -- so we
+        # can proceed with minimal error-checking.
+        if action == 'include':
+            self.debug_print("include " + ' '.join(patterns))
+            for pattern in patterns:
+                if not self.include(pattern):
+                    log.warn("warning: no files found matching '%s'", pattern)
+
+        elif action == 'exclude':
+            self.debug_print("exclude " + ' '.join(patterns))
+            for pattern in patterns:
+                if not self.exclude(pattern):
+                    log.warn(("warning: no previously-included files "
+                              "found matching '%s'"), pattern)
+
+        elif action == 'global-include':
+            self.debug_print("global-include " + ' '.join(patterns))
+            for pattern in patterns:
+                if not self.global_include(pattern):
+                    log.warn(("warning: no files found matching '%s' "
+                              "anywhere in distribution"), pattern)
+
+        elif action == 'global-exclude':
+            self.debug_print("global-exclude " + ' '.join(patterns))
+            for pattern in patterns:
+                if not self.global_exclude(pattern):
+                    log.warn(("warning: no previously-included files matching "
+                              "'%s' found anywhere in distribution"),
+                             pattern)
+
+        elif action == 'recursive-include':
+            self.debug_print("recursive-include %s %s" %
+                             (dir, ' '.join(patterns)))
+            for pattern in patterns:
+                if not self.recursive_include(dir, pattern):
+                    log.warn(("warning: no files found matching '%s' "
+                              "under directory '%s'"),
+                             pattern, dir)
+
+        elif action == 'recursive-exclude':
+            self.debug_print("recursive-exclude %s %s" %
+                             (dir, ' '.join(patterns)))
+            for pattern in patterns:
+                if not self.recursive_exclude(dir, pattern):
+                    log.warn(("warning: no previously-included files matching "
+                              "'%s' found under directory '%s'"),
+                             pattern, dir)
+
+        elif action == 'graft':
+            self.debug_print("graft " + dir_pattern)
+            if not self.graft(dir_pattern):
+                log.warn("warning: no directories found matching '%s'",
+                         dir_pattern)
+
+        elif action == 'prune':
+            self.debug_print("prune " + dir_pattern)
+            if not self.prune(dir_pattern):
+                log.warn(("no previously-included directories found "
+                          "matching '%s'"), dir_pattern)
+
+        else:
+            raise DistutilsInternalError(
+                "this cannot happen: invalid action '%s'" % action)
+
+    def _remove_files(self, predicate):
+        """
+        Remove all files from the file list that match the predicate.
+        Return True if any matching files were removed
+        """
+        found = False
+        for i in range(len(self.files) - 1, -1, -1):
+            if predicate(self.files[i]):
+                self.debug_print(" removing " + self.files[i])
+                del self.files[i]
+                found = True
+        return found
+
+    def include(self, pattern):
+        """Include files that match 'pattern'."""
+        found = [f for f in glob(pattern) if not os.path.isdir(f)]
+        self.extend(found)
+        return bool(found)
+
+    def exclude(self, pattern):
+        """Exclude files that match 'pattern'."""
+        match = translate_pattern(pattern)
+        return self._remove_files(match.match)
+
+    def recursive_include(self, dir, pattern):
+        """
+        Include all files anywhere in 'dir/' that match the pattern.
+        """
+        full_pattern = os.path.join(dir, '**', pattern)
+        found = [f for f in glob(full_pattern, recursive=True)
+                 if not os.path.isdir(f)]
+        self.extend(found)
+        return bool(found)
+
+    def recursive_exclude(self, dir, pattern):
+        """
+        Exclude any file anywhere in 'dir/' that match the pattern.
+        """
+        match = translate_pattern(os.path.join(dir, '**', pattern))
+        return self._remove_files(match.match)
+
+    def graft(self, dir):
+        """Include all files from 'dir/'."""
+        found = [
+            item
+            for match_dir in glob(dir)
+            for item in distutils.filelist.findall(match_dir)
+        ]
+        self.extend(found)
+        return bool(found)
+
+    def prune(self, dir):
+        """Filter out files from 'dir/'."""
+        match = translate_pattern(os.path.join(dir, '**'))
+        return self._remove_files(match.match)
+
+    def global_include(self, pattern):
+        """
+        Include all files anywhere in the current directory that match the
+        pattern. This is very inefficient on large file trees.
+        """
+        if self.allfiles is None:
+            self.findall()
+        match = translate_pattern(os.path.join('**', pattern))
+        found = [f for f in self.allfiles if match.match(f)]
+        self.extend(found)
+        return bool(found)
+
+    def global_exclude(self, pattern):
+        """
+        Exclude all files anywhere that match the pattern.
+        """
+        match = translate_pattern(os.path.join('**', pattern))
+        return self._remove_files(match.match)
+
+    def append(self, item):
+        if item.endswith('\r'):  # Fix older sdists built on Windows
+            item = item[:-1]
+        path = convert_path(item)
+
+        if self._safe_path(path):
+            self.files.append(path)
+
+    def extend(self, paths):
+        self.files.extend(filter(self._safe_path, paths))
+
+    def _repair(self):
+        """
+        Replace self.files with only safe paths
+
+        Because some owners of FileList manipulate the underlying
+        ``files`` attribute directly, this method must be called to
+        repair those paths.
+        """
+        self.files = list(filter(self._safe_path, self.files))
+
+    def _safe_path(self, path):
+        enc_warn = "'%s' not %s encodable -- skipping"
+
+        # To avoid accidental trans-codings errors, first to unicode
+        u_path = unicode_utils.filesys_decode(path)
+        if u_path is None:
+            log.warn("'%s' in unexpected encoding -- skipping" % path)
+            return False
+
+        # Must ensure utf-8 encodability
+        utf8_path = unicode_utils.try_encode(u_path, "utf-8")
+        if utf8_path is None:
+            log.warn(enc_warn, path, 'utf-8')
+            return False
+
+        try:
+            # accept is either way checks out
+            if os.path.exists(u_path) or os.path.exists(utf8_path):
+                return True
+        # this will catch any encode errors decoding u_path
+        except UnicodeEncodeError:
+            log.warn(enc_warn, path, sys.getfilesystemencoding())
+
+
+class manifest_maker(sdist):
+    template = "MANIFEST.in"
+
+    def initialize_options(self):
+        self.use_defaults = 1
+        self.prune = 1
+        self.manifest_only = 1
+        self.force_manifest = 1
+
+    def finalize_options(self):
+        pass
+
+    def run(self):
+        self.filelist = FileList()
+        if not os.path.exists(self.manifest):
+            self.write_manifest()  # it must exist so it'll get in the list
+        self.add_defaults()
+        if os.path.exists(self.template):
+            self.read_template()
+        self.prune_file_list()
+        self.filelist.sort()
+        self.filelist.remove_duplicates()
+        self.write_manifest()
+
+    def _manifest_normalize(self, path):
+        path = unicode_utils.filesys_decode(path)
+        return path.replace(os.sep, '/')
+
+    def write_manifest(self):
+        """
+        Write the file list in 'self.filelist' to the manifest file
+        named by 'self.manifest'.
+        """
+        self.filelist._repair()
+
+        # Now _repairs should encodability, but not unicode
+        files = [self._manifest_normalize(f) for f in self.filelist.files]
+        msg = "writing manifest file '%s'" % self.manifest
+        self.execute(write_file, (self.manifest, files), msg)
+
+    def warn(self, msg):
+        if not self._should_suppress_warning(msg):
+            sdist.warn(self, msg)
+
+    @staticmethod
+    def _should_suppress_warning(msg):
+        """
+        suppress missing-file warnings from sdist
+        """
+        return re.match(r"standard file .*not found", msg)
+
+    def add_defaults(self):
+        sdist.add_defaults(self)
+        self.filelist.append(self.template)
+        self.filelist.append(self.manifest)
+        rcfiles = list(walk_revctrl())
+        if rcfiles:
+            self.filelist.extend(rcfiles)
+        elif os.path.exists(self.manifest):
+            self.read_manifest()
+        ei_cmd = self.get_finalized_command('egg_info')
+        self.filelist.graft(ei_cmd.egg_info)
+
+    def prune_file_list(self):
+        build = self.get_finalized_command('build')
+        base_dir = self.distribution.get_fullname()
+        self.filelist.prune(build.build_base)
+        self.filelist.prune(base_dir)
+        sep = re.escape(os.sep)
+        self.filelist.exclude_pattern(r'(^|' + sep + r')(RCS|CVS|\.svn)' + sep,
+                                      is_regex=1)
+
+
+def write_file(filename, contents):
+    """Create a file with the specified name and write 'contents' (a
+    sequence of strings without line terminators) to it.
+    """
+    contents = "\n".join(contents)
+
+    # assuming the contents has been vetted for utf-8 encoding
+    contents = contents.encode("utf-8")
+
+    with open(filename, "wb") as f:  # always write POSIX-style manifest
+        f.write(contents)
+
+
+def write_pkg_info(cmd, basename, filename):
+    log.info("writing %s", filename)
+    if not cmd.dry_run:
+        metadata = cmd.distribution.metadata
+        metadata.version, oldver = cmd.egg_version, metadata.version
+        metadata.name, oldname = cmd.egg_name, metadata.name
+
+        try:
+            # write unescaped data to PKG-INFO, so older pkg_resources
+            # can still parse it
+            metadata.write_pkg_info(cmd.egg_info)
+        finally:
+            metadata.name, metadata.version = oldname, oldver
+
+        safe = getattr(cmd.distribution, 'zip_safe', None)
+
+        bdist_egg.write_safety_flag(cmd.egg_info, safe)
+
+
+def warn_depends_obsolete(cmd, basename, filename):
+    if os.path.exists(filename):
+        log.warn(
+            "WARNING: 'depends.txt' is not used by setuptools 0.6!\n"
+            "Use the install_requires/extras_require setup() args instead."
+        )
+
+
+def _write_requirements(stream, reqs):
+    lines = yield_lines(reqs or ())
+    append_cr = lambda line: line + '\n'
+    lines = map(append_cr, sorted(lines))
+    stream.writelines(lines)
+
+
+def write_requirements(cmd, basename, filename):
+    dist = cmd.distribution
+    data = six.StringIO()
+    _write_requirements(data, dist.install_requires)
+    extras_require = dist.extras_require or {}
+    for extra in sorted(extras_require):
+        data.write('\n[{extra}]\n'.format(**vars()))
+        _write_requirements(data, extras_require[extra])
+    cmd.write_or_delete_file("requirements", filename, data.getvalue())
+
+
+def write_setup_requirements(cmd, basename, filename):
+    data = io.StringIO()
+    _write_requirements(data, cmd.distribution.setup_requires)
+    cmd.write_or_delete_file("setup-requirements", filename, data.getvalue())
+
+
+def write_toplevel_names(cmd, basename, filename):
+    pkgs = dict.fromkeys(
+        [
+            k.split('.', 1)[0]
+            for k in cmd.distribution.iter_distribution_names()
+        ]
+    )
+    cmd.write_file("top-level names", filename, '\n'.join(sorted(pkgs)) + '\n')
+
+
+def overwrite_arg(cmd, basename, filename):
+    write_arg(cmd, basename, filename, True)
+
+
+def write_arg(cmd, basename, filename, force=False):
+    argname = os.path.splitext(basename)[0]
+    value = getattr(cmd.distribution, argname, None)
+    if value is not None:
+        value = '\n'.join(value) + '\n'
+    cmd.write_or_delete_file(argname, filename, value, force)
+
+
+def write_entries(cmd, basename, filename):
+    ep = cmd.distribution.entry_points
+
+    if isinstance(ep, six.string_types) or ep is None:
+        data = ep
+    elif ep is not None:
+        data = []
+        for section, contents in sorted(ep.items()):
+            if not isinstance(contents, six.string_types):
+                contents = EntryPoint.parse_group(section, contents)
+                contents = '\n'.join(sorted(map(str, contents.values())))
+            data.append('[%s]\n%s\n\n' % (section, contents))
+        data = ''.join(data)
+
+    cmd.write_or_delete_file('entry points', filename, data, True)
+
+
+def get_pkg_info_revision():
+    """
+    Get a -r### off of PKG-INFO Version in case this is an sdist of
+    a subversion revision.
+    """
+    warnings.warn("get_pkg_info_revision is deprecated.", DeprecationWarning)
+    if os.path.exists('PKG-INFO'):
+        with io.open('PKG-INFO') as f:
+            for line in f:
+                match = re.match(r"Version:.*-r(\d+)\s*$", line)
+                if match:
+                    return int(match.group(1))
+    return 0
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/install.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/install.py
new file mode 100644
index 0000000000000000000000000000000000000000..31a5ddb57739f8dc132cef1b9f5d252befb67ca7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/install.py
@@ -0,0 +1,125 @@
+from distutils.errors import DistutilsArgError
+import inspect
+import glob
+import warnings
+import platform
+import distutils.command.install as orig
+
+import setuptools
+
+# Prior to numpy 1.9, NumPy relies on the '_install' name, so provide it for
+# now. See https://github.com/pypa/setuptools/issues/199/
+_install = orig.install
+
+
+class install(orig.install):
+    """Use easy_install to install the package, w/dependencies"""
+
+    user_options = orig.install.user_options + [
+        ('old-and-unmanageable', None, "Try not to use this!"),
+        ('single-version-externally-managed', None,
+         "used by system package builders to create 'flat' eggs"),
+    ]
+    boolean_options = orig.install.boolean_options + [
+        'old-and-unmanageable', 'single-version-externally-managed',
+    ]
+    new_commands = [
+        ('install_egg_info', lambda self: True),
+        ('install_scripts', lambda self: True),
+    ]
+    _nc = dict(new_commands)
+
+    def initialize_options(self):
+        orig.install.initialize_options(self)
+        self.old_and_unmanageable = None
+        self.single_version_externally_managed = None
+
+    def finalize_options(self):
+        orig.install.finalize_options(self)
+        if self.root:
+            self.single_version_externally_managed = True
+        elif self.single_version_externally_managed:
+            if not self.root and not self.record:
+                raise DistutilsArgError(
+                    "You must specify --record or --root when building system"
+                    " packages"
+                )
+
+    def handle_extra_path(self):
+        if self.root or self.single_version_externally_managed:
+            # explicit backward-compatibility mode, allow extra_path to work
+            return orig.install.handle_extra_path(self)
+
+        # Ignore extra_path when installing an egg (or being run by another
+        # command without --root or --single-version-externally-managed
+        self.path_file = None
+        self.extra_dirs = ''
+
+    def run(self):
+        # Explicit request for old-style install?  Just do it
+        if self.old_and_unmanageable or self.single_version_externally_managed:
+            return orig.install.run(self)
+
+        if not self._called_from_setup(inspect.currentframe()):
+            # Run in backward-compatibility mode to support bdist_* commands.
+            orig.install.run(self)
+        else:
+            self.do_egg_install()
+
+    @staticmethod
+    def _called_from_setup(run_frame):
+        """
+        Attempt to detect whether run() was called from setup() or by another
+        command.  If called by setup(), the parent caller will be the
+        'run_command' method in 'distutils.dist', and *its* caller will be
+        the 'run_commands' method.  If called any other way, the
+        immediate caller *might* be 'run_command', but it won't have been
+        called by 'run_commands'. Return True in that case or if a call stack
+        is unavailable. Return False otherwise.
+        """
+        if run_frame is None:
+            msg = "Call stack not available. bdist_* commands may fail."
+            warnings.warn(msg)
+            if platform.python_implementation() == 'IronPython':
+                msg = "For best results, pass -X:Frames to enable call stack."
+                warnings.warn(msg)
+            return True
+        res = inspect.getouterframes(run_frame)[2]
+        caller, = res[:1]
+        info = inspect.getframeinfo(caller)
+        caller_module = caller.f_globals.get('__name__', '')
+        return (
+            caller_module == 'distutils.dist'
+            and info.function == 'run_commands'
+        )
+
+    def do_egg_install(self):
+
+        easy_install = self.distribution.get_command_class('easy_install')
+
+        cmd = easy_install(
+            self.distribution, args="x", root=self.root, record=self.record,
+        )
+        cmd.ensure_finalized()  # finalize before bdist_egg munges install cmd
+        cmd.always_copy_from = '.'  # make sure local-dir eggs get installed
+
+        # pick up setup-dir .egg files only: no .egg-info
+        cmd.package_index.scan(glob.glob('*.egg'))
+
+        self.run_command('bdist_egg')
+        args = [self.distribution.get_command_obj('bdist_egg').egg_output]
+
+        if setuptools.bootstrap_install_from:
+            # Bootstrap self-installation of setuptools
+            args.insert(0, setuptools.bootstrap_install_from)
+
+        cmd.args = args
+        cmd.run()
+        setuptools.bootstrap_install_from = None
+
+
+# XXX Python 3.1 doesn't see _nc if this is inside the class
+install.sub_commands = (
+    [cmd for cmd in orig.install.sub_commands if cmd[0] not in install._nc] +
+    install.new_commands
+)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/install_egg_info.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/install_egg_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f405bcad743bac704e90c5489713a5cd4404497
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/install_egg_info.py
@@ -0,0 +1,82 @@
+from distutils import log, dir_util
+import os, sys
+
+from setuptools import Command
+from setuptools import namespaces
+from setuptools.archive_util import unpack_archive
+import pkg_resources
+
+
+class install_egg_info(namespaces.Installer, Command):
+    """Install an .egg-info directory for the package"""
+
+    description = "Install an .egg-info directory for the package"
+
+    user_options = [
+        ('install-dir=', 'd', "directory to install to"),
+    ]
+
+    def initialize_options(self):
+        self.install_dir = None
+        self.install_layout = None
+        self.prefix_option = None
+
+    def finalize_options(self):
+        self.set_undefined_options('install_lib',
+                                   ('install_dir', 'install_dir'))
+        self.set_undefined_options('install',('install_layout','install_layout'))
+        if sys.hexversion > 0x2060000:
+            self.set_undefined_options('install',('prefix_option','prefix_option'))
+        ei_cmd = self.get_finalized_command("egg_info")
+        basename = pkg_resources.Distribution(
+            None, None, ei_cmd.egg_name, ei_cmd.egg_version
+        ).egg_name() + '.egg-info'
+
+        if self.install_layout:
+            if not self.install_layout.lower() in ['deb']:
+                raise DistutilsOptionError("unknown value for --install-layout")
+            self.install_layout = self.install_layout.lower()
+            basename = basename.replace('-py%s' % pkg_resources.PY_MAJOR, '')
+        elif self.prefix_option or 'real_prefix' in sys.__dict__:
+            # don't modify for virtualenv
+            pass
+        else:
+            basename = basename.replace('-py%s' % pkg_resources.PY_MAJOR, '')
+
+        self.source = ei_cmd.egg_info
+        self.target = os.path.join(self.install_dir, basename)
+        self.outputs = []
+
+    def run(self):
+        self.run_command('egg_info')
+        if os.path.isdir(self.target) and not os.path.islink(self.target):
+            dir_util.remove_tree(self.target, dry_run=self.dry_run)
+        elif os.path.exists(self.target):
+            self.execute(os.unlink, (self.target,), "Removing " + self.target)
+        if not self.dry_run:
+            pkg_resources.ensure_directory(self.target)
+        self.execute(
+            self.copytree, (), "Copying %s to %s" % (self.source, self.target)
+        )
+        self.install_namespaces()
+
+    def get_outputs(self):
+        return self.outputs
+
+    def copytree(self):
+        # Copy the .egg-info tree to site-packages
+        def skimmer(src, dst):
+            # filter out source-control directories; note that 'src' is always
+            # a '/'-separated path, regardless of platform.  'dst' is a
+            # platform-specific path.
+            for skip in '.svn/', 'CVS/':
+                if src.startswith(skip) or '/' + skip in src:
+                    return None
+            if self.install_layout and self.install_layout in ['deb'] and src.startswith('SOURCES.txt'):
+                log.info("Skipping SOURCES.txt")
+                return None
+            self.outputs.append(dst)
+            log.debug("Copying %s to %s", src, dst)
+            return dst
+
+        unpack_archive(self.source, self.target, skimmer)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/install_lib.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/install_lib.py
new file mode 100644
index 0000000000000000000000000000000000000000..578e002d41d2bc2a4c4d97c00c3a0514fb0b9ee0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/install_lib.py
@@ -0,0 +1,148 @@
+import os
+import sys
+import imp
+from itertools import product, starmap
+import distutils.command.install_lib as orig
+
+
+class install_lib(orig.install_lib):
+    """Don't add compiled flags to filenames of non-Python files"""
+
+    def initialize_options(self):
+        orig.install_lib.initialize_options(self)
+        self.multiarch = None
+        self.install_layout = None
+
+    def finalize_options(self):
+        orig.install_lib.finalize_options(self)
+        self.set_undefined_options('install',('install_layout','install_layout'))
+        if self.install_layout == 'deb' and sys.version_info[:2] >= (3, 3):
+            import sysconfig
+            self.multiarch = sysconfig.get_config_var('MULTIARCH')
+
+    def run(self):
+        self.build()
+        outfiles = self.install()
+        if outfiles is not None:
+            # always compile, in case we have any extension stubs to deal with
+            self.byte_compile(outfiles)
+
+    def get_exclusions(self):
+        """
+        Return a collections.Sized collections.Container of paths to be
+        excluded for single_version_externally_managed installations.
+        """
+        all_packages = (
+            pkg
+            for ns_pkg in self._get_SVEM_NSPs()
+            for pkg in self._all_packages(ns_pkg)
+        )
+
+        excl_specs = product(all_packages, self._gen_exclusion_paths())
+        return set(starmap(self._exclude_pkg_path, excl_specs))
+
+    def _exclude_pkg_path(self, pkg, exclusion_path):
+        """
+        Given a package name and exclusion path within that package,
+        compute the full exclusion path.
+        """
+        parts = pkg.split('.') + [exclusion_path]
+        return os.path.join(self.install_dir, *parts)
+
+    @staticmethod
+    def _all_packages(pkg_name):
+        """
+        >>> list(install_lib._all_packages('foo.bar.baz'))
+        ['foo.bar.baz', 'foo.bar', 'foo']
+        """
+        while pkg_name:
+            yield pkg_name
+            pkg_name, sep, child = pkg_name.rpartition('.')
+
+    def _get_SVEM_NSPs(self):
+        """
+        Get namespace packages (list) but only for
+        single_version_externally_managed installations and empty otherwise.
+        """
+        # TODO: is it necessary to short-circuit here? i.e. what's the cost
+        # if get_finalized_command is called even when namespace_packages is
+        # False?
+        if not self.distribution.namespace_packages:
+            return []
+
+        install_cmd = self.get_finalized_command('install')
+        svem = install_cmd.single_version_externally_managed
+
+        return self.distribution.namespace_packages if svem else []
+
+    @staticmethod
+    def _gen_exclusion_paths():
+        """
+        Generate file paths to be excluded for namespace packages (bytecode
+        cache files).
+        """
+        # always exclude the package module itself
+        yield '__init__.py'
+
+        yield '__init__.pyc'
+        yield '__init__.pyo'
+
+        if not hasattr(imp, 'get_tag'):
+            return
+
+        base = os.path.join('__pycache__', '__init__.' + imp.get_tag())
+        yield base + '.pyc'
+        yield base + '.pyo'
+        yield base + '.opt-1.pyc'
+        yield base + '.opt-2.pyc'
+
+    def copy_tree(
+            self, infile, outfile,
+            preserve_mode=1, preserve_times=1, preserve_symlinks=0, level=1
+    ):
+        assert preserve_mode and preserve_times and not preserve_symlinks
+        exclude = self.get_exclusions()
+
+        if not exclude:
+            import distutils.dir_util
+            distutils.dir_util._multiarch = self.multiarch
+            return orig.install_lib.copy_tree(self, infile, outfile)
+
+        # Exclude namespace package __init__.py* files from the output
+
+        from setuptools.archive_util import unpack_directory
+        from distutils import log
+
+        outfiles = []
+
+        if self.multiarch:
+            import sysconfig
+            ext_suffix = sysconfig.get_config_var ('EXT_SUFFIX')
+            if ext_suffix.endswith(self.multiarch + ext_suffix[-3:]):
+                new_suffix = None
+            else:
+                new_suffix = "%s-%s%s" % (ext_suffix[:-3], self.multiarch, ext_suffix[-3:])
+
+        def pf(src, dst):
+            if dst in exclude:
+                log.warn("Skipping installation of %s (namespace package)",
+                         dst)
+                return False
+
+            if self.multiarch and new_suffix and dst.endswith(ext_suffix) and not dst.endswith(new_suffix):
+                dst = dst.replace(ext_suffix, new_suffix)
+                log.info("renaming extension to %s", os.path.basename(dst))
+
+            log.info("copying %s -> %s", src, os.path.dirname(dst))
+            outfiles.append(dst)
+            return dst
+
+        unpack_directory(infile, outfile, pf)
+        return outfiles
+
+    def get_outputs(self):
+        outputs = orig.install_lib.get_outputs(self)
+        exclude = self.get_exclusions()
+        if exclude:
+            return [f for f in outputs if f not in exclude]
+        return outputs
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/install_scripts.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/install_scripts.py
new file mode 100644
index 0000000000000000000000000000000000000000..16234273a2d36b0b3d821a7a97bf8f03cf3f2948
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/install_scripts.py
@@ -0,0 +1,65 @@
+from distutils import log
+import distutils.command.install_scripts as orig
+import os
+import sys
+
+from pkg_resources import Distribution, PathMetadata, ensure_directory
+
+
+class install_scripts(orig.install_scripts):
+    """Do normal script install, plus any egg_info wrapper scripts"""
+
+    def initialize_options(self):
+        orig.install_scripts.initialize_options(self)
+        self.no_ep = False
+
+    def run(self):
+        import setuptools.command.easy_install as ei
+
+        self.run_command("egg_info")
+        if self.distribution.scripts:
+            orig.install_scripts.run(self)  # run first to set up self.outfiles
+        else:
+            self.outfiles = []
+        if self.no_ep:
+            # don't install entry point scripts into .egg file!
+            return
+
+        ei_cmd = self.get_finalized_command("egg_info")
+        dist = Distribution(
+            ei_cmd.egg_base, PathMetadata(ei_cmd.egg_base, ei_cmd.egg_info),
+            ei_cmd.egg_name, ei_cmd.egg_version,
+        )
+        bs_cmd = self.get_finalized_command('build_scripts')
+        exec_param = getattr(bs_cmd, 'executable', None)
+        bw_cmd = self.get_finalized_command("bdist_wininst")
+        is_wininst = getattr(bw_cmd, '_is_running', False)
+        writer = ei.ScriptWriter
+        if is_wininst:
+            exec_param = "python.exe"
+            writer = ei.WindowsScriptWriter
+        if exec_param == sys.executable:
+            # In case the path to the Python executable contains a space, wrap
+            # it so it's not split up.
+            exec_param = [exec_param]
+        # resolve the writer to the environment
+        writer = writer.best()
+        cmd = writer.command_spec_class.best().from_param(exec_param)
+        for args in writer.get_args(dist, cmd.as_header()):
+            self.write_script(*args)
+
+    def write_script(self, script_name, contents, mode="t", *ignored):
+        """Write an executable file to the scripts directory"""
+        from setuptools.command.easy_install import chmod, current_umask
+
+        log.info("Installing %s script to %s", script_name, self.install_dir)
+        target = os.path.join(self.install_dir, script_name)
+        self.outfiles.append(target)
+
+        mask = current_umask()
+        if not self.dry_run:
+            ensure_directory(target)
+            f = open(target, "w" + mode)
+            f.write(contents)
+            f.close()
+            chmod(target, 0o777 - mask)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/launcher manifest.xml b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/launcher manifest.xml
new file mode 100644
index 0000000000000000000000000000000000000000..5972a96d8ded85cc14147ffc1400ec67c3b5a578
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/launcher manifest.xml	
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
+    <assemblyIdentity version="1.0.0.0"
+                      processorArchitecture="X86"
+                      name="%(name)s"
+                      type="win32"/>
+    <!-- Identify the application security requirements. -->
+    <trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
+        <security>
+            <requestedPrivileges>
+                <requestedExecutionLevel level="asInvoker" uiAccess="false"/>
+            </requestedPrivileges>
+        </security>
+    </trustInfo>
+</assembly>
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/py36compat.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/py36compat.py
new file mode 100644
index 0000000000000000000000000000000000000000..61063e7542586c05c3af21d31cd917ebd1118272
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/py36compat.py
@@ -0,0 +1,136 @@
+import os
+from glob import glob
+from distutils.util import convert_path
+from distutils.command import sdist
+
+from setuptools.extern.six.moves import filter
+
+
+class sdist_add_defaults:
+    """
+    Mix-in providing forward-compatibility for functionality as found in
+    distutils on Python 3.7.
+
+    Do not edit the code in this class except to update functionality
+    as implemented in distutils. Instead, override in the subclass.
+    """
+
+    def add_defaults(self):
+        """Add all the default files to self.filelist:
+          - README or README.txt
+          - setup.py
+          - test/test*.py
+          - all pure Python modules mentioned in setup script
+          - all files pointed by package_data (build_py)
+          - all files defined in data_files.
+          - all files defined as scripts.
+          - all C sources listed as part of extensions or C libraries
+            in the setup script (doesn't catch C headers!)
+        Warns if (README or README.txt) or setup.py are missing; everything
+        else is optional.
+        """
+        self._add_defaults_standards()
+        self._add_defaults_optional()
+        self._add_defaults_python()
+        self._add_defaults_data_files()
+        self._add_defaults_ext()
+        self._add_defaults_c_libs()
+        self._add_defaults_scripts()
+
+    @staticmethod
+    def _cs_path_exists(fspath):
+        """
+        Case-sensitive path existence check
+
+        >>> sdist_add_defaults._cs_path_exists(__file__)
+        True
+        >>> sdist_add_defaults._cs_path_exists(__file__.upper())
+        False
+        """
+        if not os.path.exists(fspath):
+            return False
+        # make absolute so we always have a directory
+        abspath = os.path.abspath(fspath)
+        directory, filename = os.path.split(abspath)
+        return filename in os.listdir(directory)
+
+    def _add_defaults_standards(self):
+        standards = [self.READMES, self.distribution.script_name]
+        for fn in standards:
+            if isinstance(fn, tuple):
+                alts = fn
+                got_it = False
+                for fn in alts:
+                    if self._cs_path_exists(fn):
+                        got_it = True
+                        self.filelist.append(fn)
+                        break
+
+                if not got_it:
+                    self.warn("standard file not found: should have one of " +
+                              ', '.join(alts))
+            else:
+                if self._cs_path_exists(fn):
+                    self.filelist.append(fn)
+                else:
+                    self.warn("standard file '%s' not found" % fn)
+
+    def _add_defaults_optional(self):
+        optional = ['test/test*.py', 'setup.cfg']
+        for pattern in optional:
+            files = filter(os.path.isfile, glob(pattern))
+            self.filelist.extend(files)
+
+    def _add_defaults_python(self):
+        # build_py is used to get:
+        #  - python modules
+        #  - files defined in package_data
+        build_py = self.get_finalized_command('build_py')
+
+        # getting python files
+        if self.distribution.has_pure_modules():
+            self.filelist.extend(build_py.get_source_files())
+
+        # getting package_data files
+        # (computed in build_py.data_files by build_py.finalize_options)
+        for pkg, src_dir, build_dir, filenames in build_py.data_files:
+            for filename in filenames:
+                self.filelist.append(os.path.join(src_dir, filename))
+
+    def _add_defaults_data_files(self):
+        # getting distribution.data_files
+        if self.distribution.has_data_files():
+            for item in self.distribution.data_files:
+                if isinstance(item, str):
+                    # plain file
+                    item = convert_path(item)
+                    if os.path.isfile(item):
+                        self.filelist.append(item)
+                else:
+                    # a (dirname, filenames) tuple
+                    dirname, filenames = item
+                    for f in filenames:
+                        f = convert_path(f)
+                        if os.path.isfile(f):
+                            self.filelist.append(f)
+
+    def _add_defaults_ext(self):
+        if self.distribution.has_ext_modules():
+            build_ext = self.get_finalized_command('build_ext')
+            self.filelist.extend(build_ext.get_source_files())
+
+    def _add_defaults_c_libs(self):
+        if self.distribution.has_c_libraries():
+            build_clib = self.get_finalized_command('build_clib')
+            self.filelist.extend(build_clib.get_source_files())
+
+    def _add_defaults_scripts(self):
+        if self.distribution.has_scripts():
+            build_scripts = self.get_finalized_command('build_scripts')
+            self.filelist.extend(build_scripts.get_source_files())
+
+
+if hasattr(sdist.sdist, '_add_defaults_standards'):
+    # disable the functionality already available upstream
+    class sdist_add_defaults:
+        pass
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/register.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/register.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d6336a14da3d450f80dcbd732cddf3c0f14edfc
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/register.py
@@ -0,0 +1,10 @@
+import distutils.command.register as orig
+
+
+class register(orig.register):
+    __doc__ = orig.register.__doc__
+
+    def run(self):
+        # Make sure that we are using valid current name/version info
+        self.run_command('egg_info')
+        orig.register.run(self)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/rotate.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/rotate.py
new file mode 100644
index 0000000000000000000000000000000000000000..b89353f529b3d08e768dea69a9dc8b5e7403003d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/rotate.py
@@ -0,0 +1,66 @@
+from distutils.util import convert_path
+from distutils import log
+from distutils.errors import DistutilsOptionError
+import os
+import shutil
+
+from setuptools.extern import six
+
+from setuptools import Command
+
+
+class rotate(Command):
+    """Delete older distributions"""
+
+    description = "delete older distributions, keeping N newest files"
+    user_options = [
+        ('match=', 'm', "patterns to match (required)"),
+        ('dist-dir=', 'd', "directory where the distributions are"),
+        ('keep=', 'k', "number of matching distributions to keep"),
+    ]
+
+    boolean_options = []
+
+    def initialize_options(self):
+        self.match = None
+        self.dist_dir = None
+        self.keep = None
+
+    def finalize_options(self):
+        if self.match is None:
+            raise DistutilsOptionError(
+                "Must specify one or more (comma-separated) match patterns "
+                "(e.g. '.zip' or '.egg')"
+            )
+        if self.keep is None:
+            raise DistutilsOptionError("Must specify number of files to keep")
+        try:
+            self.keep = int(self.keep)
+        except ValueError:
+            raise DistutilsOptionError("--keep must be an integer")
+        if isinstance(self.match, six.string_types):
+            self.match = [
+                convert_path(p.strip()) for p in self.match.split(',')
+            ]
+        self.set_undefined_options('bdist', ('dist_dir', 'dist_dir'))
+
+    def run(self):
+        self.run_command("egg_info")
+        from glob import glob
+
+        for pattern in self.match:
+            pattern = self.distribution.get_name() + '*' + pattern
+            files = glob(os.path.join(self.dist_dir, pattern))
+            files = [(os.path.getmtime(f), f) for f in files]
+            files.sort()
+            files.reverse()
+
+            log.info("%d file(s) matching %s", len(files), pattern)
+            files = files[self.keep:]
+            for (t, f) in files:
+                log.info("Deleting %s", f)
+                if not self.dry_run:
+                    if os.path.isdir(f):
+                        shutil.rmtree(f)
+                    else:
+                        os.unlink(f)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/saveopts.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/saveopts.py
new file mode 100644
index 0000000000000000000000000000000000000000..611cec552867a6d50b7edd700c86c7396d906ea2
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/saveopts.py
@@ -0,0 +1,22 @@
+from setuptools.command.setopt import edit_config, option_base
+
+
+class saveopts(option_base):
+    """Save command-line options to a file"""
+
+    description = "save supplied options to setup.cfg or other config file"
+
+    def run(self):
+        dist = self.distribution
+        settings = {}
+
+        for cmd in dist.command_options:
+
+            if cmd == 'saveopts':
+                continue  # don't save our own options!
+
+            for opt, (src, val) in dist.get_option_dict(cmd).items():
+                if src == "command line":
+                    settings.setdefault(cmd, {})[opt] = val
+
+        edit_config(self.filename, settings, self.dry_run)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/sdist.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/sdist.py
new file mode 100644
index 0000000000000000000000000000000000000000..bcfae4d82f6c04e355ec82d8ec0f1b8de44912cb
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/sdist.py
@@ -0,0 +1,200 @@
+from distutils import log
+import distutils.command.sdist as orig
+import os
+import sys
+import io
+import contextlib
+
+from setuptools.extern import six
+
+from .py36compat import sdist_add_defaults
+
+import pkg_resources
+
+_default_revctrl = list
+
+
+def walk_revctrl(dirname=''):
+    """Find all files under revision control"""
+    for ep in pkg_resources.iter_entry_points('setuptools.file_finders'):
+        for item in ep.load()(dirname):
+            yield item
+
+
+class sdist(sdist_add_defaults, orig.sdist):
+    """Smart sdist that finds anything supported by revision control"""
+
+    user_options = [
+        ('formats=', None,
+         "formats for source distribution (comma-separated list)"),
+        ('keep-temp', 'k',
+         "keep the distribution tree around after creating " +
+         "archive file(s)"),
+        ('dist-dir=', 'd',
+         "directory to put the source distribution archive(s) in "
+         "[default: dist]"),
+    ]
+
+    negative_opt = {}
+
+    README_EXTENSIONS = ['', '.rst', '.txt', '.md']
+    READMES = tuple('README{0}'.format(ext) for ext in README_EXTENSIONS)
+
+    def run(self):
+        self.run_command('egg_info')
+        ei_cmd = self.get_finalized_command('egg_info')
+        self.filelist = ei_cmd.filelist
+        self.filelist.append(os.path.join(ei_cmd.egg_info, 'SOURCES.txt'))
+        self.check_readme()
+
+        # Run sub commands
+        for cmd_name in self.get_sub_commands():
+            self.run_command(cmd_name)
+
+        self.make_distribution()
+
+        dist_files = getattr(self.distribution, 'dist_files', [])
+        for file in self.archive_files:
+            data = ('sdist', '', file)
+            if data not in dist_files:
+                dist_files.append(data)
+
+    def initialize_options(self):
+        orig.sdist.initialize_options(self)
+
+        self._default_to_gztar()
+
+    def _default_to_gztar(self):
+        # only needed on Python prior to 3.6.
+        if sys.version_info >= (3, 6, 0, 'beta', 1):
+            return
+        self.formats = ['gztar']
+
+    def make_distribution(self):
+        """
+        Workaround for #516
+        """
+        with self._remove_os_link():
+            orig.sdist.make_distribution(self)
+
+    @staticmethod
+    @contextlib.contextmanager
+    def _remove_os_link():
+        """
+        In a context, remove and restore os.link if it exists
+        """
+
+        class NoValue:
+            pass
+
+        orig_val = getattr(os, 'link', NoValue)
+        try:
+            del os.link
+        except Exception:
+            pass
+        try:
+            yield
+        finally:
+            if orig_val is not NoValue:
+                setattr(os, 'link', orig_val)
+
+    def __read_template_hack(self):
+        # This grody hack closes the template file (MANIFEST.in) if an
+        #  exception occurs during read_template.
+        # Doing so prevents an error when easy_install attempts to delete the
+        #  file.
+        try:
+            orig.sdist.read_template(self)
+        except Exception:
+            _, _, tb = sys.exc_info()
+            tb.tb_next.tb_frame.f_locals['template'].close()
+            raise
+
+    # Beginning with Python 2.7.2, 3.1.4, and 3.2.1, this leaky file handle
+    #  has been fixed, so only override the method if we're using an earlier
+    #  Python.
+    has_leaky_handle = (
+        sys.version_info < (2, 7, 2)
+        or (3, 0) <= sys.version_info < (3, 1, 4)
+        or (3, 2) <= sys.version_info < (3, 2, 1)
+    )
+    if has_leaky_handle:
+        read_template = __read_template_hack
+
+    def _add_defaults_python(self):
+        """getting python files"""
+        if self.distribution.has_pure_modules():
+            build_py = self.get_finalized_command('build_py')
+            self.filelist.extend(build_py.get_source_files())
+            # This functionality is incompatible with include_package_data, and
+            # will in fact create an infinite recursion if include_package_data
+            # is True.  Use of include_package_data will imply that
+            # distutils-style automatic handling of package_data is disabled
+            if not self.distribution.include_package_data:
+                for _, src_dir, _, filenames in build_py.data_files:
+                    self.filelist.extend([os.path.join(src_dir, filename)
+                                          for filename in filenames])
+
+    def _add_defaults_data_files(self):
+        try:
+            if six.PY2:
+                sdist_add_defaults._add_defaults_data_files(self)
+            else:
+                super()._add_defaults_data_files()
+        except TypeError:
+            log.warn("data_files contains unexpected objects")
+
+    def check_readme(self):
+        for f in self.READMES:
+            if os.path.exists(f):
+                return
+        else:
+            self.warn(
+                "standard file not found: should have one of " +
+                ', '.join(self.READMES)
+            )
+
+    def make_release_tree(self, base_dir, files):
+        orig.sdist.make_release_tree(self, base_dir, files)
+
+        # Save any egg_info command line options used to create this sdist
+        dest = os.path.join(base_dir, 'setup.cfg')
+        if hasattr(os, 'link') and os.path.exists(dest):
+            # unlink and re-copy, since it might be hard-linked, and
+            # we don't want to change the source version
+            os.unlink(dest)
+            self.copy_file('setup.cfg', dest)
+
+        self.get_finalized_command('egg_info').save_version_info(dest)
+
+    def _manifest_is_not_generated(self):
+        # check for special comment used in 2.7.1 and higher
+        if not os.path.isfile(self.manifest):
+            return False
+
+        with io.open(self.manifest, 'rb') as fp:
+            first_line = fp.readline()
+        return (first_line !=
+                '# file GENERATED by distutils, do NOT edit\n'.encode())
+
+    def read_manifest(self):
+        """Read the manifest file (named by 'self.manifest') and use it to
+        fill in 'self.filelist', the list of files to include in the source
+        distribution.
+        """
+        log.info("reading manifest file '%s'", self.manifest)
+        manifest = open(self.manifest, 'rb')
+        for line in manifest:
+            # The manifest must contain UTF-8. See #303.
+            if six.PY3:
+                try:
+                    line = line.decode('UTF-8')
+                except UnicodeDecodeError:
+                    log.warn("%r not UTF-8 decodable -- skipping" % line)
+                    continue
+            # ignore comments and blank lines
+            line = line.strip()
+            if line.startswith('#') or not line:
+                continue
+            self.filelist.append(line)
+        manifest.close()
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/setopt.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/setopt.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e57cc02627fc3c3bb49613731a51c72452f96ba
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/setopt.py
@@ -0,0 +1,149 @@
+from distutils.util import convert_path
+from distutils import log
+from distutils.errors import DistutilsOptionError
+import distutils
+import os
+
+from setuptools.extern.six.moves import configparser
+
+from setuptools import Command
+
+__all__ = ['config_file', 'edit_config', 'option_base', 'setopt']
+
+
+def config_file(kind="local"):
+    """Get the filename of the distutils, local, global, or per-user config
+
+    `kind` must be one of "local", "global", or "user"
+    """
+    if kind == 'local':
+        return 'setup.cfg'
+    if kind == 'global':
+        return os.path.join(
+            os.path.dirname(distutils.__file__), 'distutils.cfg'
+        )
+    if kind == 'user':
+        dot = os.name == 'posix' and '.' or ''
+        return os.path.expanduser(convert_path("~/%spydistutils.cfg" % dot))
+    raise ValueError(
+        "config_file() type must be 'local', 'global', or 'user'", kind
+    )
+
+
+def edit_config(filename, settings, dry_run=False):
+    """Edit a configuration file to include `settings`
+
+    `settings` is a dictionary of dictionaries or ``None`` values, keyed by
+    command/section name.  A ``None`` value means to delete the entire section,
+    while a dictionary lists settings to be changed or deleted in that section.
+    A setting of ``None`` means to delete that setting.
+    """
+    log.debug("Reading configuration from %s", filename)
+    opts = configparser.RawConfigParser()
+    opts.read([filename])
+    for section, options in settings.items():
+        if options is None:
+            log.info("Deleting section [%s] from %s", section, filename)
+            opts.remove_section(section)
+        else:
+            if not opts.has_section(section):
+                log.debug("Adding new section [%s] to %s", section, filename)
+                opts.add_section(section)
+            for option, value in options.items():
+                if value is None:
+                    log.debug(
+                        "Deleting %s.%s from %s",
+                        section, option, filename
+                    )
+                    opts.remove_option(section, option)
+                    if not opts.options(section):
+                        log.info("Deleting empty [%s] section from %s",
+                                 section, filename)
+                        opts.remove_section(section)
+                else:
+                    log.debug(
+                        "Setting %s.%s to %r in %s",
+                        section, option, value, filename
+                    )
+                    opts.set(section, option, value)
+
+    log.info("Writing %s", filename)
+    if not dry_run:
+        with open(filename, 'w') as f:
+            opts.write(f)
+
+
+class option_base(Command):
+    """Abstract base class for commands that mess with config files"""
+
+    user_options = [
+        ('global-config', 'g',
+         "save options to the site-wide distutils.cfg file"),
+        ('user-config', 'u',
+         "save options to the current user's pydistutils.cfg file"),
+        ('filename=', 'f',
+         "configuration file to use (default=setup.cfg)"),
+    ]
+
+    boolean_options = [
+        'global-config', 'user-config',
+    ]
+
+    def initialize_options(self):
+        self.global_config = None
+        self.user_config = None
+        self.filename = None
+
+    def finalize_options(self):
+        filenames = []
+        if self.global_config:
+            filenames.append(config_file('global'))
+        if self.user_config:
+            filenames.append(config_file('user'))
+        if self.filename is not None:
+            filenames.append(self.filename)
+        if not filenames:
+            filenames.append(config_file('local'))
+        if len(filenames) > 1:
+            raise DistutilsOptionError(
+                "Must specify only one configuration file option",
+                filenames
+            )
+        self.filename, = filenames
+
+
+class setopt(option_base):
+    """Save command-line options to a file"""
+
+    description = "set an option in setup.cfg or another config file"
+
+    user_options = [
+        ('command=', 'c', 'command to set an option for'),
+        ('option=', 'o', 'option to set'),
+        ('set-value=', 's', 'value of the option'),
+        ('remove', 'r', 'remove (unset) the value'),
+    ] + option_base.user_options
+
+    boolean_options = option_base.boolean_options + ['remove']
+
+    def initialize_options(self):
+        option_base.initialize_options(self)
+        self.command = None
+        self.option = None
+        self.set_value = None
+        self.remove = None
+
+    def finalize_options(self):
+        option_base.finalize_options(self)
+        if self.command is None or self.option is None:
+            raise DistutilsOptionError("Must specify --command *and* --option")
+        if self.set_value is None and not self.remove:
+            raise DistutilsOptionError("Must specify --set-value or --remove")
+
+    def run(self):
+        edit_config(
+            self.filename, {
+                self.command: {self.option.replace('-', '_'): self.set_value}
+            },
+            self.dry_run
+        )
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/test.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..51aee1f7b18d12c35a0a09533aaf54745aabbcd9
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/test.py
@@ -0,0 +1,268 @@
+import os
+import operator
+import sys
+import contextlib
+import itertools
+import unittest
+from distutils.errors import DistutilsError, DistutilsOptionError
+from distutils import log
+from unittest import TestLoader
+
+from setuptools.extern import six
+from setuptools.extern.six.moves import map, filter
+
+from pkg_resources import (resource_listdir, resource_exists, normalize_path,
+                           working_set, _namespace_packages, evaluate_marker,
+                           add_activation_listener, require, EntryPoint)
+from setuptools import Command
+
+
+class ScanningLoader(TestLoader):
+
+    def __init__(self):
+        TestLoader.__init__(self)
+        self._visited = set()
+
+    def loadTestsFromModule(self, module, pattern=None):
+        """Return a suite of all tests cases contained in the given module
+
+        If the module is a package, load tests from all the modules in it.
+        If the module has an ``additional_tests`` function, call it and add
+        the return value to the tests.
+        """
+        if module in self._visited:
+            return None
+        self._visited.add(module)
+
+        tests = []
+        tests.append(TestLoader.loadTestsFromModule(self, module))
+
+        if hasattr(module, "additional_tests"):
+            tests.append(module.additional_tests())
+
+        if hasattr(module, '__path__'):
+            for file in resource_listdir(module.__name__, ''):
+                if file.endswith('.py') and file != '__init__.py':
+                    submodule = module.__name__ + '.' + file[:-3]
+                else:
+                    if resource_exists(module.__name__, file + '/__init__.py'):
+                        submodule = module.__name__ + '.' + file
+                    else:
+                        continue
+                tests.append(self.loadTestsFromName(submodule))
+
+        if len(tests) != 1:
+            return self.suiteClass(tests)
+        else:
+            return tests[0]  # don't create a nested suite for only one return
+
+
+# adapted from jaraco.classes.properties:NonDataProperty
+class NonDataProperty(object):
+    def __init__(self, fget):
+        self.fget = fget
+
+    def __get__(self, obj, objtype=None):
+        if obj is None:
+            return self
+        return self.fget(obj)
+
+
+class test(Command):
+    """Command to run unit tests after in-place build"""
+
+    description = "run unit tests after in-place build"
+
+    user_options = [
+        ('test-module=', 'm', "Run 'test_suite' in specified module"),
+        ('test-suite=', 's',
+         "Run single test, case or suite (e.g. 'module.test_suite')"),
+        ('test-runner=', 'r', "Test runner to use"),
+    ]
+
+    def initialize_options(self):
+        self.test_suite = None
+        self.test_module = None
+        self.test_loader = None
+        self.test_runner = None
+
+    def finalize_options(self):
+
+        if self.test_suite and self.test_module:
+            msg = "You may specify a module or a suite, but not both"
+            raise DistutilsOptionError(msg)
+
+        if self.test_suite is None:
+            if self.test_module is None:
+                self.test_suite = self.distribution.test_suite
+            else:
+                self.test_suite = self.test_module + ".test_suite"
+
+        if self.test_loader is None:
+            self.test_loader = getattr(self.distribution, 'test_loader', None)
+        if self.test_loader is None:
+            self.test_loader = "setuptools.command.test:ScanningLoader"
+        if self.test_runner is None:
+            self.test_runner = getattr(self.distribution, 'test_runner', None)
+
+    @NonDataProperty
+    def test_args(self):
+        return list(self._test_args())
+
+    def _test_args(self):
+        if not self.test_suite and sys.version_info >= (2, 7):
+            yield 'discover'
+        if self.verbose:
+            yield '--verbose'
+        if self.test_suite:
+            yield self.test_suite
+
+    def with_project_on_sys_path(self, func):
+        """
+        Backward compatibility for project_on_sys_path context.
+        """
+        with self.project_on_sys_path():
+            func()
+
+    @contextlib.contextmanager
+    def project_on_sys_path(self, include_dists=[]):
+        with_2to3 = six.PY3 and getattr(self.distribution, 'use_2to3', False)
+
+        if with_2to3:
+            # If we run 2to3 we can not do this inplace:
+
+            # Ensure metadata is up-to-date
+            self.reinitialize_command('build_py', inplace=0)
+            self.run_command('build_py')
+            bpy_cmd = self.get_finalized_command("build_py")
+            build_path = normalize_path(bpy_cmd.build_lib)
+
+            # Build extensions
+            self.reinitialize_command('egg_info', egg_base=build_path)
+            self.run_command('egg_info')
+
+            self.reinitialize_command('build_ext', inplace=0)
+            self.run_command('build_ext')
+        else:
+            # Without 2to3 inplace works fine:
+            self.run_command('egg_info')
+
+            # Build extensions in-place
+            self.reinitialize_command('build_ext', inplace=1)
+            self.run_command('build_ext')
+
+        ei_cmd = self.get_finalized_command("egg_info")
+
+        old_path = sys.path[:]
+        old_modules = sys.modules.copy()
+
+        try:
+            project_path = normalize_path(ei_cmd.egg_base)
+            sys.path.insert(0, project_path)
+            working_set.__init__()
+            add_activation_listener(lambda dist: dist.activate())
+            require('%s==%s' % (ei_cmd.egg_name, ei_cmd.egg_version))
+            with self.paths_on_pythonpath([project_path]):
+                yield
+        finally:
+            sys.path[:] = old_path
+            sys.modules.clear()
+            sys.modules.update(old_modules)
+            working_set.__init__()
+
+    @staticmethod
+    @contextlib.contextmanager
+    def paths_on_pythonpath(paths):
+        """
+        Add the indicated paths to the head of the PYTHONPATH environment
+        variable so that subprocesses will also see the packages at
+        these paths.
+
+        Do this in a context that restores the value on exit.
+        """
+        nothing = object()
+        orig_pythonpath = os.environ.get('PYTHONPATH', nothing)
+        current_pythonpath = os.environ.get('PYTHONPATH', '')
+        try:
+            prefix = os.pathsep.join(paths)
+            to_join = filter(None, [prefix, current_pythonpath])
+            new_path = os.pathsep.join(to_join)
+            if new_path:
+                os.environ['PYTHONPATH'] = new_path
+            yield
+        finally:
+            if orig_pythonpath is nothing:
+                os.environ.pop('PYTHONPATH', None)
+            else:
+                os.environ['PYTHONPATH'] = orig_pythonpath
+
+    @staticmethod
+    def install_dists(dist):
+        """
+        Install the requirements indicated by self.distribution and
+        return an iterable of the dists that were built.
+        """
+        ir_d = dist.fetch_build_eggs(dist.install_requires)
+        tr_d = dist.fetch_build_eggs(dist.tests_require or [])
+        er_d = dist.fetch_build_eggs(
+            v for k, v in dist.extras_require.items()
+            if k.startswith(':') and evaluate_marker(k[1:])
+        )
+        return itertools.chain(ir_d, tr_d, er_d)
+
+    def run(self):
+        installed_dists = self.install_dists(self.distribution)
+
+        cmd = ' '.join(self._argv)
+        if self.dry_run:
+            self.announce('skipping "%s" (dry run)' % cmd)
+            return
+
+        self.announce('running "%s"' % cmd)
+
+        paths = map(operator.attrgetter('location'), installed_dists)
+        with self.paths_on_pythonpath(paths):
+            with self.project_on_sys_path():
+                self.run_tests()
+
+    def run_tests(self):
+        # Purge modules under test from sys.modules. The test loader will
+        # re-import them from the build location. Required when 2to3 is used
+        # with namespace packages.
+        if six.PY3 and getattr(self.distribution, 'use_2to3', False):
+            module = self.test_suite.split('.')[0]
+            if module in _namespace_packages:
+                del_modules = []
+                if module in sys.modules:
+                    del_modules.append(module)
+                module += '.'
+                for name in sys.modules:
+                    if name.startswith(module):
+                        del_modules.append(name)
+                list(map(sys.modules.__delitem__, del_modules))
+
+        test = unittest.main(
+            None, None, self._argv,
+            testLoader=self._resolve_as_ep(self.test_loader),
+            testRunner=self._resolve_as_ep(self.test_runner),
+            exit=False,
+        )
+        if not test.result.wasSuccessful():
+            msg = 'Test failed: %s' % test.result
+            self.announce(msg, log.ERROR)
+            raise DistutilsError(msg)
+
+    @property
+    def _argv(self):
+        return ['unittest'] + self.test_args
+
+    @staticmethod
+    def _resolve_as_ep(val):
+        """
+        Load the indicated attribute value, called, as a as if it were
+        specified as an entry point.
+        """
+        if val is None:
+            return
+        parsed = EntryPoint.parse("x=" + val)
+        return parsed.resolve()()
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/upload.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/upload.py
new file mode 100644
index 0000000000000000000000000000000000000000..a44173a99abacdc75559bdbe8a1c09b5f192091b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/upload.py
@@ -0,0 +1,42 @@
+import getpass
+from distutils.command import upload as orig
+
+
+class upload(orig.upload):
+    """
+    Override default upload behavior to obtain password
+    in a variety of different ways.
+    """
+
+    def finalize_options(self):
+        orig.upload.finalize_options(self)
+        self.username = (
+            self.username or
+            getpass.getuser()
+        )
+        # Attempt to obtain password. Short circuit evaluation at the first
+        # sign of success.
+        self.password = (
+            self.password or
+            self._load_password_from_keyring() or
+            self._prompt_for_password()
+        )
+
+    def _load_password_from_keyring(self):
+        """
+        Attempt to load password from keyring. Suppress Exceptions.
+        """
+        try:
+            keyring = __import__('keyring')
+            return keyring.get_password(self.repository, self.username)
+        except Exception:
+            pass
+
+    def _prompt_for_password(self):
+        """
+        Prompt for a password on the tty. Suppress Exceptions.
+        """
+        try:
+            return getpass.getpass()
+        except (Exception, KeyboardInterrupt):
+            pass
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/upload_docs.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/upload_docs.py
new file mode 100644
index 0000000000000000000000000000000000000000..07aa564af451ce41d818d72f8ee93cb46887cecf
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/command/upload_docs.py
@@ -0,0 +1,206 @@
+# -*- coding: utf-8 -*-
+"""upload_docs
+
+Implements a Distutils 'upload_docs' subcommand (upload documentation to
+PyPI's pythonhosted.org).
+"""
+
+from base64 import standard_b64encode
+from distutils import log
+from distutils.errors import DistutilsOptionError
+import os
+import socket
+import zipfile
+import tempfile
+import shutil
+import itertools
+import functools
+
+from setuptools.extern import six
+from setuptools.extern.six.moves import http_client, urllib
+
+from pkg_resources import iter_entry_points
+from .upload import upload
+
+
+def _encode(s):
+    errors = 'surrogateescape' if six.PY3 else 'strict'
+    return s.encode('utf-8', errors)
+
+
+class upload_docs(upload):
+    # override the default repository as upload_docs isn't
+    # supported by Warehouse (and won't be).
+    DEFAULT_REPOSITORY = 'https://pypi.python.org/pypi/'
+
+    description = 'Upload documentation to PyPI'
+
+    user_options = [
+        ('repository=', 'r',
+         "url of repository [default: %s]" % upload.DEFAULT_REPOSITORY),
+        ('show-response', None,
+         'display full response text from server'),
+        ('upload-dir=', None, 'directory to upload'),
+    ]
+    boolean_options = upload.boolean_options
+
+    def has_sphinx(self):
+        if self.upload_dir is None:
+            for ep in iter_entry_points('distutils.commands', 'build_sphinx'):
+                return True
+
+    sub_commands = [('build_sphinx', has_sphinx)]
+
+    def initialize_options(self):
+        upload.initialize_options(self)
+        self.upload_dir = None
+        self.target_dir = None
+
+    def finalize_options(self):
+        upload.finalize_options(self)
+        if self.upload_dir is None:
+            if self.has_sphinx():
+                build_sphinx = self.get_finalized_command('build_sphinx')
+                self.target_dir = build_sphinx.builder_target_dir
+            else:
+                build = self.get_finalized_command('build')
+                self.target_dir = os.path.join(build.build_base, 'docs')
+        else:
+            self.ensure_dirname('upload_dir')
+            self.target_dir = self.upload_dir
+        if 'pypi.python.org' in self.repository:
+            log.warn("Upload_docs command is deprecated. Use RTD instead.")
+        self.announce('Using upload directory %s' % self.target_dir)
+
+    def create_zipfile(self, filename):
+        zip_file = zipfile.ZipFile(filename, "w")
+        try:
+            self.mkpath(self.target_dir)  # just in case
+            for root, dirs, files in os.walk(self.target_dir):
+                if root == self.target_dir and not files:
+                    tmpl = "no files found in upload directory '%s'"
+                    raise DistutilsOptionError(tmpl % self.target_dir)
+                for name in files:
+                    full = os.path.join(root, name)
+                    relative = root[len(self.target_dir):].lstrip(os.path.sep)
+                    dest = os.path.join(relative, name)
+                    zip_file.write(full, dest)
+        finally:
+            zip_file.close()
+
+    def run(self):
+        # Run sub commands
+        for cmd_name in self.get_sub_commands():
+            self.run_command(cmd_name)
+
+        tmp_dir = tempfile.mkdtemp()
+        name = self.distribution.metadata.get_name()
+        zip_file = os.path.join(tmp_dir, "%s.zip" % name)
+        try:
+            self.create_zipfile(zip_file)
+            self.upload_file(zip_file)
+        finally:
+            shutil.rmtree(tmp_dir)
+
+    @staticmethod
+    def _build_part(item, sep_boundary):
+        key, values = item
+        title = '\nContent-Disposition: form-data; name="%s"' % key
+        # handle multiple entries for the same name
+        if not isinstance(values, list):
+            values = [values]
+        for value in values:
+            if isinstance(value, tuple):
+                title += '; filename="%s"' % value[0]
+                value = value[1]
+            else:
+                value = _encode(value)
+            yield sep_boundary
+            yield _encode(title)
+            yield b"\n\n"
+            yield value
+            if value and value[-1:] == b'\r':
+                yield b'\n'  # write an extra newline (lurve Macs)
+
+    @classmethod
+    def _build_multipart(cls, data):
+        """
+        Build up the MIME payload for the POST data
+        """
+        boundary = b'--------------GHSKFJDLGDS7543FJKLFHRE75642756743254'
+        sep_boundary = b'\n--' + boundary
+        end_boundary = sep_boundary + b'--'
+        end_items = end_boundary, b"\n",
+        builder = functools.partial(
+            cls._build_part,
+            sep_boundary=sep_boundary,
+        )
+        part_groups = map(builder, data.items())
+        parts = itertools.chain.from_iterable(part_groups)
+        body_items = itertools.chain(parts, end_items)
+        content_type = 'multipart/form-data; boundary=%s' % boundary.decode('ascii')
+        return b''.join(body_items), content_type
+
+    def upload_file(self, filename):
+        with open(filename, 'rb') as f:
+            content = f.read()
+        meta = self.distribution.metadata
+        data = {
+            ':action': 'doc_upload',
+            'name': meta.get_name(),
+            'content': (os.path.basename(filename), content),
+        }
+        # set up the authentication
+        credentials = _encode(self.username + ':' + self.password)
+        credentials = standard_b64encode(credentials)
+        if six.PY3:
+            credentials = credentials.decode('ascii')
+        auth = "Basic " + credentials
+
+        body, ct = self._build_multipart(data)
+
+        msg = "Submitting documentation to %s" % (self.repository)
+        self.announce(msg, log.INFO)
+
+        # build the Request
+        # We can't use urllib2 since we need to send the Basic
+        # auth right with the first request
+        schema, netloc, url, params, query, fragments = \
+            urllib.parse.urlparse(self.repository)
+        assert not params and not query and not fragments
+        if schema == 'http':
+            conn = http_client.HTTPConnection(netloc)
+        elif schema == 'https':
+            conn = http_client.HTTPSConnection(netloc)
+        else:
+            raise AssertionError("unsupported schema " + schema)
+
+        data = ''
+        try:
+            conn.connect()
+            conn.putrequest("POST", url)
+            content_type = ct
+            conn.putheader('Content-type', content_type)
+            conn.putheader('Content-length', str(len(body)))
+            conn.putheader('Authorization', auth)
+            conn.endheaders()
+            conn.send(body)
+        except socket.error as e:
+            self.announce(str(e), log.ERROR)
+            return
+
+        r = conn.getresponse()
+        if r.status == 200:
+            msg = 'Server response (%s): %s' % (r.status, r.reason)
+            self.announce(msg, log.INFO)
+        elif r.status == 301:
+            location = r.getheader('Location')
+            if location is None:
+                location = 'https://pythonhosted.org/%s/' % meta.get_name()
+            msg = 'Upload successful. Visit %s' % location
+            self.announce(msg, log.INFO)
+        else:
+            msg = 'Upload failed (%s): %s' % (r.status, r.reason)
+            self.announce(msg, log.ERROR)
+        if self.show_response:
+            print('-' * 75, r.read(), '-' * 75)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/config.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..8eddcae88acc86d274eb445dff6b8771ba6ecf86
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/config.py
@@ -0,0 +1,556 @@
+from __future__ import absolute_import, unicode_literals
+import io
+import os
+import sys
+from collections import defaultdict
+from functools import partial
+from importlib import import_module
+
+from distutils.errors import DistutilsOptionError, DistutilsFileError
+from setuptools.extern.six import string_types
+
+
+def read_configuration(
+        filepath, find_others=False, ignore_option_errors=False):
+    """Read given configuration file and returns options from it as a dict.
+
+    :param str|unicode filepath: Path to configuration file
+        to get options from.
+
+    :param bool find_others: Whether to search for other configuration files
+        which could be on in various places.
+
+    :param bool ignore_option_errors: Whether to silently ignore
+        options, values of which could not be resolved (e.g. due to exceptions
+        in directives such as file:, attr:, etc.).
+        If False exceptions are propagated as expected.
+
+    :rtype: dict
+    """
+    from setuptools.dist import Distribution, _Distribution
+
+    filepath = os.path.abspath(filepath)
+
+    if not os.path.isfile(filepath):
+        raise DistutilsFileError(
+            'Configuration file %s does not exist.' % filepath)
+
+    current_directory = os.getcwd()
+    os.chdir(os.path.dirname(filepath))
+
+    try:
+        dist = Distribution()
+
+        filenames = dist.find_config_files() if find_others else []
+        if filepath not in filenames:
+            filenames.append(filepath)
+
+        _Distribution.parse_config_files(dist, filenames=filenames)
+
+        handlers = parse_configuration(
+            dist, dist.command_options,
+            ignore_option_errors=ignore_option_errors)
+
+    finally:
+        os.chdir(current_directory)
+
+    return configuration_to_dict(handlers)
+
+
+def configuration_to_dict(handlers):
+    """Returns configuration data gathered by given handlers as a dict.
+
+    :param list[ConfigHandler] handlers: Handlers list,
+        usually from parse_configuration()
+
+    :rtype: dict
+    """
+    config_dict = defaultdict(dict)
+
+    for handler in handlers:
+
+        obj_alias = handler.section_prefix
+        target_obj = handler.target_obj
+
+        for option in handler.set_options:
+            getter = getattr(target_obj, 'get_%s' % option, None)
+
+            if getter is None:
+                value = getattr(target_obj, option)
+
+            else:
+                value = getter()
+
+            config_dict[obj_alias][option] = value
+
+    return config_dict
+
+
+def parse_configuration(
+        distribution, command_options, ignore_option_errors=False):
+    """Performs additional parsing of configuration options
+    for a distribution.
+
+    Returns a list of used option handlers.
+
+    :param Distribution distribution:
+    :param dict command_options:
+    :param bool ignore_option_errors: Whether to silently ignore
+        options, values of which could not be resolved (e.g. due to exceptions
+        in directives such as file:, attr:, etc.).
+        If False exceptions are propagated as expected.
+    :rtype: list
+    """
+    meta = ConfigMetadataHandler(
+        distribution.metadata, command_options, ignore_option_errors)
+    meta.parse()
+
+    options = ConfigOptionsHandler(
+        distribution, command_options, ignore_option_errors)
+    options.parse()
+
+    return meta, options
+
+
+class ConfigHandler(object):
+    """Handles metadata supplied in configuration files."""
+
+    section_prefix = None
+    """Prefix for config sections handled by this handler.
+    Must be provided by class heirs.
+
+    """
+
+    aliases = {}
+    """Options aliases.
+    For compatibility with various packages. E.g.: d2to1 and pbr.
+    Note: `-` in keys is replaced with `_` by config parser.
+
+    """
+
+    def __init__(self, target_obj, options, ignore_option_errors=False):
+        sections = {}
+
+        section_prefix = self.section_prefix
+        for section_name, section_options in options.items():
+            if not section_name.startswith(section_prefix):
+                continue
+
+            section_name = section_name.replace(section_prefix, '').strip('.')
+            sections[section_name] = section_options
+
+        self.ignore_option_errors = ignore_option_errors
+        self.target_obj = target_obj
+        self.sections = sections
+        self.set_options = []
+
+    @property
+    def parsers(self):
+        """Metadata item name to parser function mapping."""
+        raise NotImplementedError(
+            '%s must provide .parsers property' % self.__class__.__name__)
+
+    def __setitem__(self, option_name, value):
+        unknown = tuple()
+        target_obj = self.target_obj
+
+        # Translate alias into real name.
+        option_name = self.aliases.get(option_name, option_name)
+
+        current_value = getattr(target_obj, option_name, unknown)
+
+        if current_value is unknown:
+            raise KeyError(option_name)
+
+        if current_value:
+            # Already inhabited. Skipping.
+            return
+
+        skip_option = False
+        parser = self.parsers.get(option_name)
+        if parser:
+            try:
+                value = parser(value)
+
+            except Exception:
+                skip_option = True
+                if not self.ignore_option_errors:
+                    raise
+
+        if skip_option:
+            return
+
+        setter = getattr(target_obj, 'set_%s' % option_name, None)
+        if setter is None:
+            setattr(target_obj, option_name, value)
+        else:
+            setter(value)
+
+        self.set_options.append(option_name)
+
+    @classmethod
+    def _parse_list(cls, value, separator=','):
+        """Represents value as a list.
+
+        Value is split either by separator (defaults to comma) or by lines.
+
+        :param value:
+        :param separator: List items separator character.
+        :rtype: list
+        """
+        if isinstance(value, list):  # _get_parser_compound case
+            return value
+
+        if '\n' in value:
+            value = value.splitlines()
+        else:
+            value = value.split(separator)
+
+        return [chunk.strip() for chunk in value if chunk.strip()]
+
+    @classmethod
+    def _parse_dict(cls, value):
+        """Represents value as a dict.
+
+        :param value:
+        :rtype: dict
+        """
+        separator = '='
+        result = {}
+        for line in cls._parse_list(value):
+            key, sep, val = line.partition(separator)
+            if sep != separator:
+                raise DistutilsOptionError(
+                    'Unable to parse option value to dict: %s' % value)
+            result[key.strip()] = val.strip()
+
+        return result
+
+    @classmethod
+    def _parse_bool(cls, value):
+        """Represents value as boolean.
+
+        :param value:
+        :rtype: bool
+        """
+        value = value.lower()
+        return value in ('1', 'true', 'yes')
+
+    @classmethod
+    def _parse_file(cls, value):
+        """Represents value as a string, allowing including text
+        from nearest files using `file:` directive.
+
+        Directive is sandboxed and won't reach anything outside
+        directory with setup.py.
+
+        Examples:
+            file: LICENSE
+            file: README.rst, CHANGELOG.md, src/file.txt
+
+        :param str value:
+        :rtype: str
+        """
+        include_directive = 'file:'
+
+        if not isinstance(value, string_types):
+            return value
+
+        if not value.startswith(include_directive):
+            return value
+
+        spec = value[len(include_directive):]
+        filepaths = (os.path.abspath(path.strip()) for path in spec.split(','))
+        return '\n'.join(
+            cls._read_file(path)
+            for path in filepaths
+            if (cls._assert_local(path) or True)
+            and os.path.isfile(path)
+        )
+
+    @staticmethod
+    def _assert_local(filepath):
+        if not filepath.startswith(os.getcwd()):
+            raise DistutilsOptionError(
+                '`file:` directive can not access %s' % filepath)
+
+    @staticmethod
+    def _read_file(filepath):
+        with io.open(filepath, encoding='utf-8') as f:
+            return f.read()
+
+    @classmethod
+    def _parse_attr(cls, value):
+        """Represents value as a module attribute.
+
+        Examples:
+            attr: package.attr
+            attr: package.module.attr
+
+        :param str value:
+        :rtype: str
+        """
+        attr_directive = 'attr:'
+        if not value.startswith(attr_directive):
+            return value
+
+        attrs_path = value.replace(attr_directive, '').strip().split('.')
+        attr_name = attrs_path.pop()
+
+        module_name = '.'.join(attrs_path)
+        module_name = module_name or '__init__'
+
+        sys.path.insert(0, os.getcwd())
+        try:
+            module = import_module(module_name)
+            value = getattr(module, attr_name)
+
+        finally:
+            sys.path = sys.path[1:]
+
+        return value
+
+    @classmethod
+    def _get_parser_compound(cls, *parse_methods):
+        """Returns parser function to represents value as a list.
+
+        Parses a value applying given methods one after another.
+
+        :param parse_methods:
+        :rtype: callable
+        """
+        def parse(value):
+            parsed = value
+
+            for method in parse_methods:
+                parsed = method(parsed)
+
+            return parsed
+
+        return parse
+
+    @classmethod
+    def _parse_section_to_dict(cls, section_options, values_parser=None):
+        """Parses section options into a dictionary.
+
+        Optionally applies a given parser to values.
+
+        :param dict section_options:
+        :param callable values_parser:
+        :rtype: dict
+        """
+        value = {}
+        values_parser = values_parser or (lambda val: val)
+        for key, (_, val) in section_options.items():
+            value[key] = values_parser(val)
+        return value
+
+    def parse_section(self, section_options):
+        """Parses configuration file section.
+
+        :param dict section_options:
+        """
+        for (name, (_, value)) in section_options.items():
+            try:
+                self[name] = value
+
+            except KeyError:
+                pass  # Keep silent for a new option may appear anytime.
+
+    def parse(self):
+        """Parses configuration file items from one
+        or more related sections.
+
+        """
+        for section_name, section_options in self.sections.items():
+
+            method_postfix = ''
+            if section_name:  # [section.option] variant
+                method_postfix = '_%s' % section_name
+
+            section_parser_method = getattr(
+                self,
+                # Dots in section names are tranlsated into dunderscores.
+                ('parse_section%s' % method_postfix).replace('.', '__'),
+                None)
+
+            if section_parser_method is None:
+                raise DistutilsOptionError(
+                    'Unsupported distribution option section: [%s.%s]' % (
+                        self.section_prefix, section_name))
+
+            section_parser_method(section_options)
+
+
+class ConfigMetadataHandler(ConfigHandler):
+
+    section_prefix = 'metadata'
+
+    aliases = {
+        'home_page': 'url',
+        'summary': 'description',
+        'classifier': 'classifiers',
+        'platform': 'platforms',
+    }
+
+    strict_mode = False
+    """We need to keep it loose, to be partially compatible with
+    `pbr` and `d2to1` packages which also uses `metadata` section.
+
+    """
+
+    @property
+    def parsers(self):
+        """Metadata item name to parser function mapping."""
+        parse_list = self._parse_list
+        parse_file = self._parse_file
+        parse_dict = self._parse_dict
+
+        return {
+            'platforms': parse_list,
+            'keywords': parse_list,
+            'provides': parse_list,
+            'requires': parse_list,
+            'obsoletes': parse_list,
+            'classifiers': self._get_parser_compound(parse_file, parse_list),
+            'license': parse_file,
+            'description': parse_file,
+            'long_description': parse_file,
+            'version': self._parse_version,
+            'project_urls': parse_dict,
+        }
+
+    def _parse_version(self, value):
+        """Parses `version` option value.
+
+        :param value:
+        :rtype: str
+
+        """
+        version = self._parse_attr(value)
+
+        if callable(version):
+            version = version()
+
+        if not isinstance(version, string_types):
+            if hasattr(version, '__iter__'):
+                version = '.'.join(map(str, version))
+            else:
+                version = '%s' % version
+
+        return version
+
+
+class ConfigOptionsHandler(ConfigHandler):
+
+    section_prefix = 'options'
+
+    @property
+    def parsers(self):
+        """Metadata item name to parser function mapping."""
+        parse_list = self._parse_list
+        parse_list_semicolon = partial(self._parse_list, separator=';')
+        parse_bool = self._parse_bool
+        parse_dict = self._parse_dict
+
+        return {
+            'zip_safe': parse_bool,
+            'use_2to3': parse_bool,
+            'include_package_data': parse_bool,
+            'package_dir': parse_dict,
+            'use_2to3_fixers': parse_list,
+            'use_2to3_exclude_fixers': parse_list,
+            'convert_2to3_doctests': parse_list,
+            'scripts': parse_list,
+            'eager_resources': parse_list,
+            'dependency_links': parse_list,
+            'namespace_packages': parse_list,
+            'install_requires': parse_list_semicolon,
+            'setup_requires': parse_list_semicolon,
+            'tests_require': parse_list_semicolon,
+            'packages': self._parse_packages,
+            'entry_points': self._parse_file,
+            'py_modules': parse_list,
+        }
+
+    def _parse_packages(self, value):
+        """Parses `packages` option value.
+
+        :param value:
+        :rtype: list
+        """
+        find_directive = 'find:'
+
+        if not value.startswith(find_directive):
+            return self._parse_list(value)
+
+        # Read function arguments from a dedicated section.
+        find_kwargs = self.parse_section_packages__find(
+            self.sections.get('packages.find', {}))
+
+        from setuptools import find_packages
+
+        return find_packages(**find_kwargs)
+
+    def parse_section_packages__find(self, section_options):
+        """Parses `packages.find` configuration file section.
+
+        To be used in conjunction with _parse_packages().
+
+        :param dict section_options:
+        """
+        section_data = self._parse_section_to_dict(
+            section_options, self._parse_list)
+
+        valid_keys = ['where', 'include', 'exclude']
+
+        find_kwargs = dict(
+            [(k, v) for k, v in section_data.items() if k in valid_keys and v])
+
+        where = find_kwargs.get('where')
+        if where is not None:
+            find_kwargs['where'] = where[0]  # cast list to single val
+
+        return find_kwargs
+
+    def parse_section_entry_points(self, section_options):
+        """Parses `entry_points` configuration file section.
+
+        :param dict section_options:
+        """
+        parsed = self._parse_section_to_dict(section_options, self._parse_list)
+        self['entry_points'] = parsed
+
+    def _parse_package_data(self, section_options):
+        parsed = self._parse_section_to_dict(section_options, self._parse_list)
+
+        root = parsed.get('*')
+        if root:
+            parsed[''] = root
+            del parsed['*']
+
+        return parsed
+
+    def parse_section_package_data(self, section_options):
+        """Parses `package_data` configuration file section.
+
+        :param dict section_options:
+        """
+        self['package_data'] = self._parse_package_data(section_options)
+
+    def parse_section_exclude_package_data(self, section_options):
+        """Parses `exclude_package_data` configuration file section.
+
+        :param dict section_options:
+        """
+        self['exclude_package_data'] = self._parse_package_data(
+            section_options)
+
+    def parse_section_extras_require(self, section_options):
+        """Parses `extras_require` configuration file section.
+
+        :param dict section_options:
+        """
+        parse_list = partial(self._parse_list, separator=';')
+        self['extras_require'] = self._parse_section_to_dict(
+            section_options, parse_list)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/dep_util.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/dep_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..2931c13ec35aa60b742ac4c46ceabd4ed32a5511
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/dep_util.py
@@ -0,0 +1,23 @@
+from distutils.dep_util import newer_group
+
+# yes, this is was almost entirely copy-pasted from
+# 'newer_pairwise()', this is just another convenience
+# function.
+def newer_pairwise_group(sources_groups, targets):
+    """Walk both arguments in parallel, testing if each source group is newer
+    than its corresponding target. Returns a pair of lists (sources_groups,
+    targets) where sources is newer than target, according to the semantics
+    of 'newer_group()'.
+    """
+    if len(sources_groups) != len(targets):
+        raise ValueError("'sources_group' and 'targets' must be the same length")
+
+    # build a pair of lists (sources_groups, targets) where source is newer
+    n_sources = []
+    n_targets = []
+    for i in range(len(sources_groups)):
+        if newer_group(sources_groups[i], targets[i]):
+            n_sources.append(sources_groups[i])
+            n_targets.append(targets[i])
+
+    return n_sources, n_targets
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/depends.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/depends.py
new file mode 100644
index 0000000000000000000000000000000000000000..45e7052d8d8561a946f782a67dc6f83b6ad6ab99
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/depends.py
@@ -0,0 +1,186 @@
+import sys
+import imp
+import marshal
+from distutils.version import StrictVersion
+from imp import PKG_DIRECTORY, PY_COMPILED, PY_SOURCE, PY_FROZEN
+
+from .py33compat import Bytecode
+
+
+__all__ = [
+    'Require', 'find_module', 'get_module_constant', 'extract_constant'
+]
+
+
+class Require:
+    """A prerequisite to building or installing a distribution"""
+
+    def __init__(self, name, requested_version, module, homepage='',
+            attribute=None, format=None):
+
+        if format is None and requested_version is not None:
+            format = StrictVersion
+
+        if format is not None:
+            requested_version = format(requested_version)
+            if attribute is None:
+                attribute = '__version__'
+
+        self.__dict__.update(locals())
+        del self.self
+
+    def full_name(self):
+        """Return full package/distribution name, w/version"""
+        if self.requested_version is not None:
+            return '%s-%s' % (self.name, self.requested_version)
+        return self.name
+
+    def version_ok(self, version):
+        """Is 'version' sufficiently up-to-date?"""
+        return self.attribute is None or self.format is None or \
+            str(version) != "unknown" and version >= self.requested_version
+
+    def get_version(self, paths=None, default="unknown"):
+        """Get version number of installed module, 'None', or 'default'
+
+        Search 'paths' for module.  If not found, return 'None'.  If found,
+        return the extracted version attribute, or 'default' if no version
+        attribute was specified, or the value cannot be determined without
+        importing the module.  The version is formatted according to the
+        requirement's version format (if any), unless it is 'None' or the
+        supplied 'default'.
+        """
+
+        if self.attribute is None:
+            try:
+                f, p, i = find_module(self.module, paths)
+                if f:
+                    f.close()
+                return default
+            except ImportError:
+                return None
+
+        v = get_module_constant(self.module, self.attribute, default, paths)
+
+        if v is not None and v is not default and self.format is not None:
+            return self.format(v)
+
+        return v
+
+    def is_present(self, paths=None):
+        """Return true if dependency is present on 'paths'"""
+        return self.get_version(paths) is not None
+
+    def is_current(self, paths=None):
+        """Return true if dependency is present and up-to-date on 'paths'"""
+        version = self.get_version(paths)
+        if version is None:
+            return False
+        return self.version_ok(version)
+
+
+def find_module(module, paths=None):
+    """Just like 'imp.find_module()', but with package support"""
+
+    parts = module.split('.')
+
+    while parts:
+        part = parts.pop(0)
+        f, path, (suffix, mode, kind) = info = imp.find_module(part, paths)
+
+        if kind == PKG_DIRECTORY:
+            parts = parts or ['__init__']
+            paths = [path]
+
+        elif parts:
+            raise ImportError("Can't find %r in %s" % (parts, module))
+
+    return info
+
+
+def get_module_constant(module, symbol, default=-1, paths=None):
+    """Find 'module' by searching 'paths', and extract 'symbol'
+
+    Return 'None' if 'module' does not exist on 'paths', or it does not define
+    'symbol'.  If the module defines 'symbol' as a constant, return the
+    constant.  Otherwise, return 'default'."""
+
+    try:
+        f, path, (suffix, mode, kind) = find_module(module, paths)
+    except ImportError:
+        # Module doesn't exist
+        return None
+
+    try:
+        if kind == PY_COMPILED:
+            f.read(8)  # skip magic & date
+            code = marshal.load(f)
+        elif kind == PY_FROZEN:
+            code = imp.get_frozen_object(module)
+        elif kind == PY_SOURCE:
+            code = compile(f.read(), path, 'exec')
+        else:
+            # Not something we can parse; we'll have to import it.  :(
+            if module not in sys.modules:
+                imp.load_module(module, f, path, (suffix, mode, kind))
+            return getattr(sys.modules[module], symbol, None)
+
+    finally:
+        if f:
+            f.close()
+
+    return extract_constant(code, symbol, default)
+
+
+def extract_constant(code, symbol, default=-1):
+    """Extract the constant value of 'symbol' from 'code'
+
+    If the name 'symbol' is bound to a constant value by the Python code
+    object 'code', return that value.  If 'symbol' is bound to an expression,
+    return 'default'.  Otherwise, return 'None'.
+
+    Return value is based on the first assignment to 'symbol'.  'symbol' must
+    be a global, or at least a non-"fast" local in the code block.  That is,
+    only 'STORE_NAME' and 'STORE_GLOBAL' opcodes are checked, and 'symbol'
+    must be present in 'code.co_names'.
+    """
+    if symbol not in code.co_names:
+        # name's not there, can't possibly be an assignment
+        return None
+
+    name_idx = list(code.co_names).index(symbol)
+
+    STORE_NAME = 90
+    STORE_GLOBAL = 97
+    LOAD_CONST = 100
+
+    const = default
+
+    for byte_code in Bytecode(code):
+        op = byte_code.opcode
+        arg = byte_code.arg
+
+        if op == LOAD_CONST:
+            const = code.co_consts[arg]
+        elif arg == name_idx and (op == STORE_NAME or op == STORE_GLOBAL):
+            return const
+        else:
+            const = default
+
+
+def _update_globals():
+    """
+    Patch the globals to remove the objects not available on some platforms.
+
+    XXX it'd be better to test assertions about bytecode instead.
+    """
+
+    if not sys.platform.startswith('java') and sys.platform != 'cli':
+        return
+    incompatible = 'extract_constant', 'get_module_constant'
+    for name in incompatible:
+        del globals()[name]
+        __all__.remove(name)
+
+
+_update_globals()
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/dist.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/dist.py
new file mode 100644
index 0000000000000000000000000000000000000000..5dc696ff486598e809cec647339fdfdbfe92498c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/dist.py
@@ -0,0 +1,1070 @@
+# -*- coding: utf-8 -*-
+__all__ = ['Distribution']
+
+import re
+import os
+import warnings
+import numbers
+import distutils.log
+import distutils.core
+import distutils.cmd
+import distutils.dist
+import itertools
+from collections import defaultdict
+from distutils.errors import (
+    DistutilsOptionError, DistutilsPlatformError, DistutilsSetupError,
+)
+from distutils.util import rfc822_escape
+from distutils.version import StrictVersion
+
+from setuptools.extern import six
+from setuptools.extern import packaging
+from setuptools.extern.six.moves import map, filter, filterfalse
+
+from setuptools.depends import Require
+from setuptools import windows_support
+from setuptools.monkey import get_unpatched
+from setuptools.config import parse_configuration
+import pkg_resources
+from .py36compat import Distribution_parse_config_files
+
+__import__('setuptools.extern.packaging.specifiers')
+__import__('setuptools.extern.packaging.version')
+
+
+def _get_unpatched(cls):
+    warnings.warn("Do not call this function", DeprecationWarning)
+    return get_unpatched(cls)
+
+
+def get_metadata_version(dist_md):
+    if dist_md.long_description_content_type or dist_md.provides_extras:
+        return StrictVersion('2.1')
+    elif (dist_md.maintainer is not None or
+          dist_md.maintainer_email is not None or
+          getattr(dist_md, 'python_requires', None) is not None):
+        return StrictVersion('1.2')
+    elif (dist_md.provides or dist_md.requires or dist_md.obsoletes or
+            dist_md.classifiers or dist_md.download_url):
+        return StrictVersion('1.1')
+
+    return StrictVersion('1.0')
+
+
+# Based on Python 3.5 version
+def write_pkg_file(self, file):
+    """Write the PKG-INFO format data to a file object.
+    """
+    version = get_metadata_version(self)
+
+    file.write('Metadata-Version: %s\n' % version)
+    file.write('Name: %s\n' % self.get_name())
+    file.write('Version: %s\n' % self.get_version())
+    file.write('Summary: %s\n' % self.get_description())
+    file.write('Home-page: %s\n' % self.get_url())
+
+    if version < StrictVersion('1.2'):
+        file.write('Author: %s\n' % self.get_contact())
+        file.write('Author-email: %s\n' % self.get_contact_email())
+    else:
+        optional_fields = (
+            ('Author', 'author'),
+            ('Author-email', 'author_email'),
+            ('Maintainer', 'maintainer'),
+            ('Maintainer-email', 'maintainer_email'),
+        )
+
+        for field, attr in optional_fields:
+            attr_val = getattr(self, attr)
+            if six.PY2:
+                attr_val = self._encode_field(attr_val)
+
+            if attr_val is not None:
+                file.write('%s: %s\n' % (field, attr_val))
+
+    file.write('License: %s\n' % self.get_license())
+    if self.download_url:
+        file.write('Download-URL: %s\n' % self.download_url)
+    for project_url in self.project_urls.items():
+        file.write('Project-URL: %s, %s\n' % project_url)
+
+    long_desc = rfc822_escape(self.get_long_description())
+    file.write('Description: %s\n' % long_desc)
+
+    keywords = ','.join(self.get_keywords())
+    if keywords:
+        file.write('Keywords: %s\n' % keywords)
+
+    if version >= StrictVersion('1.2'):
+        for platform in self.get_platforms():
+            file.write('Platform: %s\n' % platform)
+    else:
+        self._write_list(file, 'Platform', self.get_platforms())
+
+    self._write_list(file, 'Classifier', self.get_classifiers())
+
+    # PEP 314
+    self._write_list(file, 'Requires', self.get_requires())
+    self._write_list(file, 'Provides', self.get_provides())
+    self._write_list(file, 'Obsoletes', self.get_obsoletes())
+
+    # Setuptools specific for PEP 345
+    if hasattr(self, 'python_requires'):
+        file.write('Requires-Python: %s\n' % self.python_requires)
+
+    # PEP 566
+    if self.long_description_content_type:
+        file.write(
+            'Description-Content-Type: %s\n' %
+            self.long_description_content_type
+        )
+    if self.provides_extras:
+        for extra in sorted(self.provides_extras):
+            file.write('Provides-Extra: %s\n' % extra)
+
+
+# from Python 3.4
+def write_pkg_info(self, base_dir):
+    """Write the PKG-INFO file into the release tree.
+    """
+    with open(os.path.join(base_dir, 'PKG-INFO'), 'w',
+              encoding='UTF-8') as pkg_info:
+        self.write_pkg_file(pkg_info)
+
+
+sequence = tuple, list
+
+
+def check_importable(dist, attr, value):
+    try:
+        ep = pkg_resources.EntryPoint.parse('x=' + value)
+        assert not ep.extras
+    except (TypeError, ValueError, AttributeError, AssertionError):
+        raise DistutilsSetupError(
+            "%r must be importable 'module:attrs' string (got %r)"
+            % (attr, value)
+        )
+
+
+def assert_string_list(dist, attr, value):
+    """Verify that value is a string list or None"""
+    try:
+        assert ''.join(value) != value
+    except (TypeError, ValueError, AttributeError, AssertionError):
+        raise DistutilsSetupError(
+            "%r must be a list of strings (got %r)" % (attr, value)
+        )
+
+
+def check_nsp(dist, attr, value):
+    """Verify that namespace packages are valid"""
+    ns_packages = value
+    assert_string_list(dist, attr, ns_packages)
+    for nsp in ns_packages:
+        if not dist.has_contents_for(nsp):
+            raise DistutilsSetupError(
+                "Distribution contains no modules or packages for " +
+                "namespace package %r" % nsp
+            )
+        parent, sep, child = nsp.rpartition('.')
+        if parent and parent not in ns_packages:
+            distutils.log.warn(
+                "WARNING: %r is declared as a package namespace, but %r"
+                " is not: please correct this in setup.py", nsp, parent
+            )
+
+
+def check_extras(dist, attr, value):
+    """Verify that extras_require mapping is valid"""
+    try:
+        list(itertools.starmap(_check_extra, value.items()))
+    except (TypeError, ValueError, AttributeError):
+        raise DistutilsSetupError(
+            "'extras_require' must be a dictionary whose values are "
+            "strings or lists of strings containing valid project/version "
+            "requirement specifiers."
+        )
+
+
+def _check_extra(extra, reqs):
+    name, sep, marker = extra.partition(':')
+    if marker and pkg_resources.invalid_marker(marker):
+        raise DistutilsSetupError("Invalid environment marker: " + marker)
+    list(pkg_resources.parse_requirements(reqs))
+
+
+def assert_bool(dist, attr, value):
+    """Verify that value is True, False, 0, or 1"""
+    if bool(value) != value:
+        tmpl = "{attr!r} must be a boolean value (got {value!r})"
+        raise DistutilsSetupError(tmpl.format(attr=attr, value=value))
+
+
+def check_requirements(dist, attr, value):
+    """Verify that install_requires is a valid requirements list"""
+    try:
+        list(pkg_resources.parse_requirements(value))
+        if isinstance(value, (dict, set)):
+            raise TypeError("Unordered types are not allowed")
+    except (TypeError, ValueError) as error:
+        tmpl = (
+            "{attr!r} must be a string or list of strings "
+            "containing valid project/version requirement specifiers; {error}"
+        )
+        raise DistutilsSetupError(tmpl.format(attr=attr, error=error))
+
+
+def check_specifier(dist, attr, value):
+    """Verify that value is a valid version specifier"""
+    try:
+        packaging.specifiers.SpecifierSet(value)
+    except packaging.specifiers.InvalidSpecifier as error:
+        tmpl = (
+            "{attr!r} must be a string "
+            "containing valid version specifiers; {error}"
+        )
+        raise DistutilsSetupError(tmpl.format(attr=attr, error=error))
+
+
+def check_entry_points(dist, attr, value):
+    """Verify that entry_points map is parseable"""
+    try:
+        pkg_resources.EntryPoint.parse_map(value)
+    except ValueError as e:
+        raise DistutilsSetupError(e)
+
+
+def check_test_suite(dist, attr, value):
+    if not isinstance(value, six.string_types):
+        raise DistutilsSetupError("test_suite must be a string")
+
+
+def check_package_data(dist, attr, value):
+    """Verify that value is a dictionary of package names to glob lists"""
+    if isinstance(value, dict):
+        for k, v in value.items():
+            if not isinstance(k, str):
+                break
+            try:
+                iter(v)
+            except TypeError:
+                break
+        else:
+            return
+    raise DistutilsSetupError(
+        attr + " must be a dictionary mapping package names to lists of "
+        "wildcard patterns"
+    )
+
+
+def check_packages(dist, attr, value):
+    for pkgname in value:
+        if not re.match(r'\w+(\.\w+)*', pkgname):
+            distutils.log.warn(
+                "WARNING: %r not a valid package name; please use only "
+                ".-separated package names in setup.py", pkgname
+            )
+
+
+_Distribution = get_unpatched(distutils.core.Distribution)
+
+
+class Distribution(Distribution_parse_config_files, _Distribution):
+    """Distribution with support for features, tests, and package data
+
+    This is an enhanced version of 'distutils.dist.Distribution' that
+    effectively adds the following new optional keyword arguments to 'setup()':
+
+     'install_requires' -- a string or sequence of strings specifying project
+        versions that the distribution requires when installed, in the format
+        used by 'pkg_resources.require()'.  They will be installed
+        automatically when the package is installed.  If you wish to use
+        packages that are not available in PyPI, or want to give your users an
+        alternate download location, you can add a 'find_links' option to the
+        '[easy_install]' section of your project's 'setup.cfg' file, and then
+        setuptools will scan the listed web pages for links that satisfy the
+        requirements.
+
+     'extras_require' -- a dictionary mapping names of optional "extras" to the
+        additional requirement(s) that using those extras incurs. For example,
+        this::
+
+            extras_require = dict(reST = ["docutils>=0.3", "reSTedit"])
+
+        indicates that the distribution can optionally provide an extra
+        capability called "reST", but it can only be used if docutils and
+        reSTedit are installed.  If the user installs your package using
+        EasyInstall and requests one of your extras, the corresponding
+        additional requirements will be installed if needed.
+
+     'features' **deprecated** -- a dictionary mapping option names to
+        'setuptools.Feature'
+        objects.  Features are a portion of the distribution that can be
+        included or excluded based on user options, inter-feature dependencies,
+        and availability on the current system.  Excluded features are omitted
+        from all setup commands, including source and binary distributions, so
+        you can create multiple distributions from the same source tree.
+        Feature names should be valid Python identifiers, except that they may
+        contain the '-' (minus) sign.  Features can be included or excluded
+        via the command line options '--with-X' and '--without-X', where 'X' is
+        the name of the feature.  Whether a feature is included by default, and
+        whether you are allowed to control this from the command line, is
+        determined by the Feature object.  See the 'Feature' class for more
+        information.
+
+     'test_suite' -- the name of a test suite to run for the 'test' command.
+        If the user runs 'python setup.py test', the package will be installed,
+        and the named test suite will be run.  The format is the same as
+        would be used on a 'unittest.py' command line.  That is, it is the
+        dotted name of an object to import and call to generate a test suite.
+
+     'package_data' -- a dictionary mapping package names to lists of filenames
+        or globs to use to find data files contained in the named packages.
+        If the dictionary has filenames or globs listed under '""' (the empty
+        string), those names will be searched for in every package, in addition
+        to any names for the specific package.  Data files found using these
+        names/globs will be installed along with the package, in the same
+        location as the package.  Note that globs are allowed to reference
+        the contents of non-package subdirectories, as long as you use '/' as
+        a path separator.  (Globs are automatically converted to
+        platform-specific paths at runtime.)
+
+    In addition to these new keywords, this class also has several new methods
+    for manipulating the distribution's contents.  For example, the 'include()'
+    and 'exclude()' methods can be thought of as in-place add and subtract
+    commands that add or remove packages, modules, extensions, and so on from
+    the distribution.  They are used by the feature subsystem to configure the
+    distribution for the included and excluded features.
+    """
+
+    _patched_dist = None
+
+    def patch_missing_pkg_info(self, attrs):
+        # Fake up a replacement for the data that would normally come from
+        # PKG-INFO, but which might not yet be built if this is a fresh
+        # checkout.
+        #
+        if not attrs or 'name' not in attrs or 'version' not in attrs:
+            return
+        key = pkg_resources.safe_name(str(attrs['name'])).lower()
+        dist = pkg_resources.working_set.by_key.get(key)
+        if dist is not None and not dist.has_metadata('PKG-INFO'):
+            dist._version = pkg_resources.safe_version(str(attrs['version']))
+            self._patched_dist = dist
+
+    def __init__(self, attrs=None):
+        have_package_data = hasattr(self, "package_data")
+        if not have_package_data:
+            self.package_data = {}
+        attrs = attrs or {}
+        if 'features' in attrs or 'require_features' in attrs:
+            Feature.warn_deprecated()
+        self.require_features = []
+        self.features = {}
+        self.dist_files = []
+        self.src_root = attrs.pop("src_root", None)
+        self.patch_missing_pkg_info(attrs)
+        self.project_urls = attrs.get('project_urls', {})
+        self.dependency_links = attrs.pop('dependency_links', [])
+        self.setup_requires = attrs.pop('setup_requires', [])
+        for ep in pkg_resources.iter_entry_points('distutils.setup_keywords'):
+            vars(self).setdefault(ep.name, None)
+        _Distribution.__init__(self, attrs)
+
+        # The project_urls attribute may not be supported in distutils, so
+        # prime it here from our value if not automatically set
+        self.metadata.project_urls = getattr(
+            self.metadata, 'project_urls', self.project_urls)
+        self.metadata.long_description_content_type = attrs.get(
+            'long_description_content_type'
+        )
+        self.metadata.provides_extras = getattr(
+            self.metadata, 'provides_extras', set()
+        )
+
+        if isinstance(self.metadata.version, numbers.Number):
+            # Some people apparently take "version number" too literally :)
+            self.metadata.version = str(self.metadata.version)
+
+        if self.metadata.version is not None:
+            try:
+                ver = packaging.version.Version(self.metadata.version)
+                normalized_version = str(ver)
+                if self.metadata.version != normalized_version:
+                    warnings.warn(
+                        "Normalizing '%s' to '%s'" % (
+                            self.metadata.version,
+                            normalized_version,
+                        )
+                    )
+                    self.metadata.version = normalized_version
+            except (packaging.version.InvalidVersion, TypeError):
+                warnings.warn(
+                    "The version specified (%r) is an invalid version, this "
+                    "may not work as expected with newer versions of "
+                    "setuptools, pip, and PyPI. Please see PEP 440 for more "
+                    "details." % self.metadata.version
+                )
+        self._finalize_requires()
+
+    def _finalize_requires(self):
+        """
+        Set `metadata.python_requires` and fix environment markers
+        in `install_requires` and `extras_require`.
+        """
+        if getattr(self, 'python_requires', None):
+            self.metadata.python_requires = self.python_requires
+
+        if getattr(self, 'extras_require', None):
+            for extra in self.extras_require.keys():
+                # Since this gets called multiple times at points where the
+                # keys have become 'converted' extras, ensure that we are only
+                # truly adding extras we haven't seen before here.
+                extra = extra.split(':')[0]
+                if extra:
+                    self.metadata.provides_extras.add(extra)
+
+        self._convert_extras_requirements()
+        self._move_install_requirements_markers()
+
+    def _convert_extras_requirements(self):
+        """
+        Convert requirements in `extras_require` of the form
+        `"extra": ["barbazquux; {marker}"]` to
+        `"extra:{marker}": ["barbazquux"]`.
+        """
+        spec_ext_reqs = getattr(self, 'extras_require', None) or {}
+        self._tmp_extras_require = defaultdict(list)
+        for section, v in spec_ext_reqs.items():
+            # Do not strip empty sections.
+            self._tmp_extras_require[section]
+            for r in pkg_resources.parse_requirements(v):
+                suffix = self._suffix_for(r)
+                self._tmp_extras_require[section + suffix].append(r)
+
+    @staticmethod
+    def _suffix_for(req):
+        """
+        For a requirement, return the 'extras_require' suffix for
+        that requirement.
+        """
+        return ':' + str(req.marker) if req.marker else ''
+
+    def _move_install_requirements_markers(self):
+        """
+        Move requirements in `install_requires` that are using environment
+        markers `extras_require`.
+        """
+
+        # divide the install_requires into two sets, simple ones still
+        # handled by install_requires and more complex ones handled
+        # by extras_require.
+
+        def is_simple_req(req):
+            return not req.marker
+
+        spec_inst_reqs = getattr(self, 'install_requires', None) or ()
+        inst_reqs = list(pkg_resources.parse_requirements(spec_inst_reqs))
+        simple_reqs = filter(is_simple_req, inst_reqs)
+        complex_reqs = filterfalse(is_simple_req, inst_reqs)
+        self.install_requires = list(map(str, simple_reqs))
+
+        for r in complex_reqs:
+            self._tmp_extras_require[':' + str(r.marker)].append(r)
+        self.extras_require = dict(
+            (k, [str(r) for r in map(self._clean_req, v)])
+            for k, v in self._tmp_extras_require.items()
+        )
+
+    def _clean_req(self, req):
+        """
+        Given a Requirement, remove environment markers and return it.
+        """
+        req.marker = None
+        return req
+
+    def parse_config_files(self, filenames=None, ignore_option_errors=False):
+        """Parses configuration files from various levels
+        and loads configuration.
+
+        """
+        _Distribution.parse_config_files(self, filenames=filenames)
+
+        parse_configuration(self, self.command_options,
+                            ignore_option_errors=ignore_option_errors)
+        self._finalize_requires()
+
+    def parse_command_line(self):
+        """Process features after parsing command line options"""
+        result = _Distribution.parse_command_line(self)
+        if self.features:
+            self._finalize_features()
+        return result
+
+    def _feature_attrname(self, name):
+        """Convert feature name to corresponding option attribute name"""
+        return 'with_' + name.replace('-', '_')
+
+    def fetch_build_eggs(self, requires):
+        """Resolve pre-setup requirements"""
+        resolved_dists = pkg_resources.working_set.resolve(
+            pkg_resources.parse_requirements(requires),
+            installer=self.fetch_build_egg,
+            replace_conflicting=True,
+        )
+        for dist in resolved_dists:
+            pkg_resources.working_set.add(dist, replace=True)
+        return resolved_dists
+
+    def finalize_options(self):
+        _Distribution.finalize_options(self)
+        if self.features:
+            self._set_global_opts_from_features()
+
+        for ep in pkg_resources.iter_entry_points('distutils.setup_keywords'):
+            value = getattr(self, ep.name, None)
+            if value is not None:
+                ep.require(installer=self.fetch_build_egg)
+                ep.load()(self, ep.name, value)
+        if getattr(self, 'convert_2to3_doctests', None):
+            # XXX may convert to set here when we can rely on set being builtin
+            self.convert_2to3_doctests = [
+                os.path.abspath(p)
+                for p in self.convert_2to3_doctests
+            ]
+        else:
+            self.convert_2to3_doctests = []
+
+    def get_egg_cache_dir(self):
+        egg_cache_dir = os.path.join(os.curdir, '.eggs')
+        if not os.path.exists(egg_cache_dir):
+            os.mkdir(egg_cache_dir)
+            windows_support.hide_file(egg_cache_dir)
+            readme_txt_filename = os.path.join(egg_cache_dir, 'README.txt')
+            with open(readme_txt_filename, 'w') as f:
+                f.write('This directory contains eggs that were downloaded '
+                        'by setuptools to build, test, and run plug-ins.\n\n')
+                f.write('This directory caches those eggs to prevent '
+                        'repeated downloads.\n\n')
+                f.write('However, it is safe to delete this directory.\n\n')
+
+        return egg_cache_dir
+
+    def fetch_build_egg(self, req):
+        """Fetch an egg needed for building"""
+        from setuptools.command.easy_install import easy_install
+        dist = self.__class__({'script_args': ['easy_install']})
+        opts = dist.get_option_dict('easy_install')
+        opts.clear()
+        opts.update(
+            (k, v)
+            for k, v in self.get_option_dict('easy_install').items()
+            if k in (
+                # don't use any other settings
+                'find_links', 'site_dirs', 'index_url',
+                'optimize', 'site_dirs', 'allow_hosts',
+            ))
+        if self.dependency_links:
+            links = self.dependency_links[:]
+            if 'find_links' in opts:
+                links = opts['find_links'][1] + links
+            opts['find_links'] = ('setup', links)
+        install_dir = self.get_egg_cache_dir()
+        cmd = easy_install(
+            dist, args=["x"], install_dir=install_dir,
+            exclude_scripts=True,
+            always_copy=False, build_directory=None, editable=False,
+            upgrade=False, multi_version=True, no_report=True, user=False
+        )
+        cmd.ensure_finalized()
+        return cmd.easy_install(req)
+
+    def _set_global_opts_from_features(self):
+        """Add --with-X/--without-X options based on optional features"""
+
+        go = []
+        no = self.negative_opt.copy()
+
+        for name, feature in self.features.items():
+            self._set_feature(name, None)
+            feature.validate(self)
+
+            if feature.optional:
+                descr = feature.description
+                incdef = ' (default)'
+                excdef = ''
+                if not feature.include_by_default():
+                    excdef, incdef = incdef, excdef
+
+                new = (
+                    ('with-' + name, None, 'include ' + descr + incdef),
+                    ('without-' + name, None, 'exclude ' + descr + excdef),
+                )
+                go.extend(new)
+                no['without-' + name] = 'with-' + name
+
+        self.global_options = self.feature_options = go + self.global_options
+        self.negative_opt = self.feature_negopt = no
+
+    def _finalize_features(self):
+        """Add/remove features and resolve dependencies between them"""
+
+        # First, flag all the enabled items (and thus their dependencies)
+        for name, feature in self.features.items():
+            enabled = self.feature_is_included(name)
+            if enabled or (enabled is None and feature.include_by_default()):
+                feature.include_in(self)
+                self._set_feature(name, 1)
+
+        # Then disable the rest, so that off-by-default features don't
+        # get flagged as errors when they're required by an enabled feature
+        for name, feature in self.features.items():
+            if not self.feature_is_included(name):
+                feature.exclude_from(self)
+                self._set_feature(name, 0)
+
+    def get_command_class(self, command):
+        """Pluggable version of get_command_class()"""
+        if command in self.cmdclass:
+            return self.cmdclass[command]
+
+        eps = pkg_resources.iter_entry_points('distutils.commands', command)
+        for ep in eps:
+            ep.require(installer=self.fetch_build_egg)
+            self.cmdclass[command] = cmdclass = ep.load()
+            return cmdclass
+        else:
+            return _Distribution.get_command_class(self, command)
+
+    def print_commands(self):
+        for ep in pkg_resources.iter_entry_points('distutils.commands'):
+            if ep.name not in self.cmdclass:
+                # don't require extras as the commands won't be invoked
+                cmdclass = ep.resolve()
+                self.cmdclass[ep.name] = cmdclass
+        return _Distribution.print_commands(self)
+
+    def get_command_list(self):
+        for ep in pkg_resources.iter_entry_points('distutils.commands'):
+            if ep.name not in self.cmdclass:
+                # don't require extras as the commands won't be invoked
+                cmdclass = ep.resolve()
+                self.cmdclass[ep.name] = cmdclass
+        return _Distribution.get_command_list(self)
+
+    def _set_feature(self, name, status):
+        """Set feature's inclusion status"""
+        setattr(self, self._feature_attrname(name), status)
+
+    def feature_is_included(self, name):
+        """Return 1 if feature is included, 0 if excluded, 'None' if unknown"""
+        return getattr(self, self._feature_attrname(name))
+
+    def include_feature(self, name):
+        """Request inclusion of feature named 'name'"""
+
+        if self.feature_is_included(name) == 0:
+            descr = self.features[name].description
+            raise DistutilsOptionError(
+                descr + " is required, but was excluded or is not available"
+            )
+        self.features[name].include_in(self)
+        self._set_feature(name, 1)
+
+    def include(self, **attrs):
+        """Add items to distribution that are named in keyword arguments
+
+        For example, 'dist.exclude(py_modules=["x"])' would add 'x' to
+        the distribution's 'py_modules' attribute, if it was not already
+        there.
+
+        Currently, this method only supports inclusion for attributes that are
+        lists or tuples.  If you need to add support for adding to other
+        attributes in this or a subclass, you can add an '_include_X' method,
+        where 'X' is the name of the attribute.  The method will be called with
+        the value passed to 'include()'.  So, 'dist.include(foo={"bar":"baz"})'
+        will try to call 'dist._include_foo({"bar":"baz"})', which can then
+        handle whatever special inclusion logic is needed.
+        """
+        for k, v in attrs.items():
+            include = getattr(self, '_include_' + k, None)
+            if include:
+                include(v)
+            else:
+                self._include_misc(k, v)
+
+    def exclude_package(self, package):
+        """Remove packages, modules, and extensions in named package"""
+
+        pfx = package + '.'
+        if self.packages:
+            self.packages = [
+                p for p in self.packages
+                if p != package and not p.startswith(pfx)
+            ]
+
+        if self.py_modules:
+            self.py_modules = [
+                p for p in self.py_modules
+                if p != package and not p.startswith(pfx)
+            ]
+
+        if self.ext_modules:
+            self.ext_modules = [
+                p for p in self.ext_modules
+                if p.name != package and not p.name.startswith(pfx)
+            ]
+
+    def has_contents_for(self, package):
+        """Return true if 'exclude_package(package)' would do something"""
+
+        pfx = package + '.'
+
+        for p in self.iter_distribution_names():
+            if p == package or p.startswith(pfx):
+                return True
+
+    def _exclude_misc(self, name, value):
+        """Handle 'exclude()' for list/tuple attrs without a special handler"""
+        if not isinstance(value, sequence):
+            raise DistutilsSetupError(
+                "%s: setting must be a list or tuple (%r)" % (name, value)
+            )
+        try:
+            old = getattr(self, name)
+        except AttributeError:
+            raise DistutilsSetupError(
+                "%s: No such distribution setting" % name
+            )
+        if old is not None and not isinstance(old, sequence):
+            raise DistutilsSetupError(
+                name + ": this setting cannot be changed via include/exclude"
+            )
+        elif old:
+            setattr(self, name, [item for item in old if item not in value])
+
+    def _include_misc(self, name, value):
+        """Handle 'include()' for list/tuple attrs without a special handler"""
+
+        if not isinstance(value, sequence):
+            raise DistutilsSetupError(
+                "%s: setting must be a list (%r)" % (name, value)
+            )
+        try:
+            old = getattr(self, name)
+        except AttributeError:
+            raise DistutilsSetupError(
+                "%s: No such distribution setting" % name
+            )
+        if old is None:
+            setattr(self, name, value)
+        elif not isinstance(old, sequence):
+            raise DistutilsSetupError(
+                name + ": this setting cannot be changed via include/exclude"
+            )
+        else:
+            new = [item for item in value if item not in old]
+            setattr(self, name, old + new)
+
+    def exclude(self, **attrs):
+        """Remove items from distribution that are named in keyword arguments
+
+        For example, 'dist.exclude(py_modules=["x"])' would remove 'x' from
+        the distribution's 'py_modules' attribute.  Excluding packages uses
+        the 'exclude_package()' method, so all of the package's contained
+        packages, modules, and extensions are also excluded.
+
+        Currently, this method only supports exclusion from attributes that are
+        lists or tuples.  If you need to add support for excluding from other
+        attributes in this or a subclass, you can add an '_exclude_X' method,
+        where 'X' is the name of the attribute.  The method will be called with
+        the value passed to 'exclude()'.  So, 'dist.exclude(foo={"bar":"baz"})'
+        will try to call 'dist._exclude_foo({"bar":"baz"})', which can then
+        handle whatever special exclusion logic is needed.
+        """
+        for k, v in attrs.items():
+            exclude = getattr(self, '_exclude_' + k, None)
+            if exclude:
+                exclude(v)
+            else:
+                self._exclude_misc(k, v)
+
+    def _exclude_packages(self, packages):
+        if not isinstance(packages, sequence):
+            raise DistutilsSetupError(
+                "packages: setting must be a list or tuple (%r)" % (packages,)
+            )
+        list(map(self.exclude_package, packages))
+
+    def _parse_command_opts(self, parser, args):
+        # Remove --with-X/--without-X options when processing command args
+        self.global_options = self.__class__.global_options
+        self.negative_opt = self.__class__.negative_opt
+
+        # First, expand any aliases
+        command = args[0]
+        aliases = self.get_option_dict('aliases')
+        while command in aliases:
+            src, alias = aliases[command]
+            del aliases[command]  # ensure each alias can expand only once!
+            import shlex
+            args[:1] = shlex.split(alias, True)
+            command = args[0]
+
+        nargs = _Distribution._parse_command_opts(self, parser, args)
+
+        # Handle commands that want to consume all remaining arguments
+        cmd_class = self.get_command_class(command)
+        if getattr(cmd_class, 'command_consumes_arguments', None):
+            self.get_option_dict(command)['args'] = ("command line", nargs)
+            if nargs is not None:
+                return []
+
+        return nargs
+
+    def get_cmdline_options(self):
+        """Return a '{cmd: {opt:val}}' map of all command-line options
+
+        Option names are all long, but do not include the leading '--', and
+        contain dashes rather than underscores.  If the option doesn't take
+        an argument (e.g. '--quiet'), the 'val' is 'None'.
+
+        Note that options provided by config files are intentionally excluded.
+        """
+
+        d = {}
+
+        for cmd, opts in self.command_options.items():
+
+            for opt, (src, val) in opts.items():
+
+                if src != "command line":
+                    continue
+
+                opt = opt.replace('_', '-')
+
+                if val == 0:
+                    cmdobj = self.get_command_obj(cmd)
+                    neg_opt = self.negative_opt.copy()
+                    neg_opt.update(getattr(cmdobj, 'negative_opt', {}))
+                    for neg, pos in neg_opt.items():
+                        if pos == opt:
+                            opt = neg
+                            val = None
+                            break
+                    else:
+                        raise AssertionError("Shouldn't be able to get here")
+
+                elif val == 1:
+                    val = None
+
+                d.setdefault(cmd, {})[opt] = val
+
+        return d
+
+    def iter_distribution_names(self):
+        """Yield all packages, modules, and extension names in distribution"""
+
+        for pkg in self.packages or ():
+            yield pkg
+
+        for module in self.py_modules or ():
+            yield module
+
+        for ext in self.ext_modules or ():
+            if isinstance(ext, tuple):
+                name, buildinfo = ext
+            else:
+                name = ext.name
+            if name.endswith('module'):
+                name = name[:-6]
+            yield name
+
+    def handle_display_options(self, option_order):
+        """If there were any non-global "display-only" options
+        (--help-commands or the metadata display options) on the command
+        line, display the requested info and return true; else return
+        false.
+        """
+        import sys
+
+        if six.PY2 or self.help_commands:
+            return _Distribution.handle_display_options(self, option_order)
+
+        # Stdout may be StringIO (e.g. in tests)
+        import io
+        if not isinstance(sys.stdout, io.TextIOWrapper):
+            return _Distribution.handle_display_options(self, option_order)
+
+        # Don't wrap stdout if utf-8 is already the encoding. Provides
+        #  workaround for #334.
+        if sys.stdout.encoding.lower() in ('utf-8', 'utf8'):
+            return _Distribution.handle_display_options(self, option_order)
+
+        # Print metadata in UTF-8 no matter the platform
+        encoding = sys.stdout.encoding
+        errors = sys.stdout.errors
+        newline = sys.platform != 'win32' and '\n' or None
+        line_buffering = sys.stdout.line_buffering
+
+        sys.stdout = io.TextIOWrapper(
+            sys.stdout.detach(), 'utf-8', errors, newline, line_buffering)
+        try:
+            return _Distribution.handle_display_options(self, option_order)
+        finally:
+            sys.stdout = io.TextIOWrapper(
+                sys.stdout.detach(), encoding, errors, newline, line_buffering)
+
+
+class Feature:
+    """
+    **deprecated** -- The `Feature` facility was never completely implemented
+    or supported, `has reported issues
+    <https://github.com/pypa/setuptools/issues/58>`_ and will be removed in
+    a future version.
+
+    A subset of the distribution that can be excluded if unneeded/wanted
+
+    Features are created using these keyword arguments:
+
+      'description' -- a short, human readable description of the feature, to
+         be used in error messages, and option help messages.
+
+      'standard' -- if true, the feature is included by default if it is
+         available on the current system.  Otherwise, the feature is only
+         included if requested via a command line '--with-X' option, or if
+         another included feature requires it.  The default setting is 'False'.
+
+      'available' -- if true, the feature is available for installation on the
+         current system.  The default setting is 'True'.
+
+      'optional' -- if true, the feature's inclusion can be controlled from the
+         command line, using the '--with-X' or '--without-X' options.  If
+         false, the feature's inclusion status is determined automatically,
+         based on 'availabile', 'standard', and whether any other feature
+         requires it.  The default setting is 'True'.
+
+      'require_features' -- a string or sequence of strings naming features
+         that should also be included if this feature is included.  Defaults to
+         empty list.  May also contain 'Require' objects that should be
+         added/removed from the distribution.
+
+      'remove' -- a string or list of strings naming packages to be removed
+         from the distribution if this feature is *not* included.  If the
+         feature *is* included, this argument is ignored.  This argument exists
+         to support removing features that "crosscut" a distribution, such as
+         defining a 'tests' feature that removes all the 'tests' subpackages
+         provided by other features.  The default for this argument is an empty
+         list.  (Note: the named package(s) or modules must exist in the base
+         distribution when the 'setup()' function is initially called.)
+
+      other keywords -- any other keyword arguments are saved, and passed to
+         the distribution's 'include()' and 'exclude()' methods when the
+         feature is included or excluded, respectively.  So, for example, you
+         could pass 'packages=["a","b"]' to cause packages 'a' and 'b' to be
+         added or removed from the distribution as appropriate.
+
+    A feature must include at least one 'requires', 'remove', or other
+    keyword argument.  Otherwise, it can't affect the distribution in any way.
+    Note also that you can subclass 'Feature' to create your own specialized
+    feature types that modify the distribution in other ways when included or
+    excluded.  See the docstrings for the various methods here for more detail.
+    Aside from the methods, the only feature attributes that distributions look
+    at are 'description' and 'optional'.
+    """
+
+    @staticmethod
+    def warn_deprecated():
+        msg = (
+            "Features are deprecated and will be removed in a future "
+            "version. See https://github.com/pypa/setuptools/issues/65."
+        )
+        warnings.warn(msg, DeprecationWarning, stacklevel=3)
+
+    def __init__(
+            self, description, standard=False, available=True,
+            optional=True, require_features=(), remove=(), **extras):
+        self.warn_deprecated()
+
+        self.description = description
+        self.standard = standard
+        self.available = available
+        self.optional = optional
+        if isinstance(require_features, (str, Require)):
+            require_features = require_features,
+
+        self.require_features = [
+            r for r in require_features if isinstance(r, str)
+        ]
+        er = [r for r in require_features if not isinstance(r, str)]
+        if er:
+            extras['require_features'] = er
+
+        if isinstance(remove, str):
+            remove = remove,
+        self.remove = remove
+        self.extras = extras
+
+        if not remove and not require_features and not extras:
+            raise DistutilsSetupError(
+                "Feature %s: must define 'require_features', 'remove', or "
+                "at least one of 'packages', 'py_modules', etc."
+            )
+
+    def include_by_default(self):
+        """Should this feature be included by default?"""
+        return self.available and self.standard
+
+    def include_in(self, dist):
+        """Ensure feature and its requirements are included in distribution
+
+        You may override this in a subclass to perform additional operations on
+        the distribution.  Note that this method may be called more than once
+        per feature, and so should be idempotent.
+
+        """
+
+        if not self.available:
+            raise DistutilsPlatformError(
+                self.description + " is required, "
+                "but is not available on this platform"
+            )
+
+        dist.include(**self.extras)
+
+        for f in self.require_features:
+            dist.include_feature(f)
+
+    def exclude_from(self, dist):
+        """Ensure feature is excluded from distribution
+
+        You may override this in a subclass to perform additional operations on
+        the distribution.  This method will be called at most once per
+        feature, and only after all included features have been asked to
+        include themselves.
+        """
+
+        dist.exclude(**self.extras)
+
+        if self.remove:
+            for item in self.remove:
+                dist.exclude_package(item)
+
+    def validate(self, dist):
+        """Verify that feature makes sense in context of distribution
+
+        This method is called by the distribution just before it parses its
+        command line.  It checks to ensure that the 'remove' attribute, if any,
+        contains only valid package/module names that are present in the base
+        distribution when 'setup()' is called.  You may override it in a
+        subclass to perform any other required validation of the feature
+        against a target distribution.
+        """
+
+        for item in self.remove:
+            if not dist.has_contents_for(item):
+                raise DistutilsSetupError(
+                    "%s wants to be able to remove %s, but the distribution"
+                    " doesn't contain any packages or modules under %s"
+                    % (self.description, item, item)
+                )
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/extension.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/extension.py
new file mode 100644
index 0000000000000000000000000000000000000000..29468894f828128f4c36660167dd1f9e68e584be
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/extension.py
@@ -0,0 +1,57 @@
+import re
+import functools
+import distutils.core
+import distutils.errors
+import distutils.extension
+
+from setuptools.extern.six.moves import map
+
+from .monkey import get_unpatched
+
+
+def _have_cython():
+    """
+    Return True if Cython can be imported.
+    """
+    cython_impl = 'Cython.Distutils.build_ext'
+    try:
+        # from (cython_impl) import build_ext
+        __import__(cython_impl, fromlist=['build_ext']).build_ext
+        return True
+    except Exception:
+        pass
+    return False
+
+
+# for compatibility
+have_pyrex = _have_cython
+
+_Extension = get_unpatched(distutils.core.Extension)
+
+
+class Extension(_Extension):
+    """Extension that uses '.c' files in place of '.pyx' files"""
+
+    def __init__(self, name, sources, *args, **kw):
+        # The *args is needed for compatibility as calls may use positional
+        # arguments. py_limited_api may be set only via keyword.
+        self.py_limited_api = kw.pop("py_limited_api", False)
+        _Extension.__init__(self, name, sources, *args, **kw)
+
+    def _convert_pyx_sources_to_lang(self):
+        """
+        Replace sources with .pyx extensions to sources with the target
+        language extension. This mechanism allows language authors to supply
+        pre-converted sources but to prefer the .pyx sources.
+        """
+        if _have_cython():
+            # the build has Cython, so allow it to compile the .pyx files
+            return
+        lang = self.language or ''
+        target_ext = '.cpp' if lang.lower() == 'c++' else '.c'
+        sub = functools.partial(re.sub, '.pyx$', target_ext)
+        self.sources = list(map(sub, self.sources))
+
+
+class Library(Extension):
+    """Just like a regular Extension, but built as a library instead"""
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/extern/__init__.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/extern/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..da3d668d997b4a6387d747639b749151207b3ccc
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/extern/__init__.py
@@ -0,0 +1,73 @@
+import sys
+
+
+class VendorImporter:
+    """
+    A PEP 302 meta path importer for finding optionally-vendored
+    or otherwise naturally-installed packages from root_name.
+    """
+
+    def __init__(self, root_name, vendored_names=(), vendor_pkg=None):
+        self.root_name = root_name
+        self.vendored_names = set(vendored_names)
+        self.vendor_pkg = vendor_pkg or root_name.replace('extern', '_vendor')
+
+    @property
+    def search_path(self):
+        """
+        Search first the vendor package then as a natural package.
+        """
+        yield self.vendor_pkg + '.'
+        yield ''
+
+    def find_module(self, fullname, path=None):
+        """
+        Return self when fullname starts with root_name and the
+        target module is one vendored through this importer.
+        """
+        root, base, target = fullname.partition(self.root_name + '.')
+        if root:
+            return
+        if not any(map(target.startswith, self.vendored_names)):
+            return
+        return self
+
+    def load_module(self, fullname):
+        """
+        Iterate over the search path to locate and load fullname.
+        """
+        root, base, target = fullname.partition(self.root_name + '.')
+        for prefix in self.search_path:
+            try:
+                extant = prefix + target
+                __import__(extant)
+                mod = sys.modules[extant]
+                sys.modules[fullname] = mod
+                # mysterious hack:
+                # Remove the reference to the extant package/module
+                # on later Python versions to cause relative imports
+                # in the vendor package to resolve the same modules
+                # as those going through this importer.
+                if sys.version_info > (3, 3):
+                    del sys.modules[extant]
+                return mod
+            except ImportError:
+                pass
+        else:
+            raise ImportError(
+                "The '{target}' package is required; "
+                "normally this is bundled with this package so if you get "
+                "this warning, consult the packager of your "
+                "distribution.".format(**locals())
+            )
+
+    def install(self):
+        """
+        Install this importer into sys.meta_path if not already present.
+        """
+        if self not in sys.meta_path:
+            sys.meta_path.append(self)
+
+
+names = 'six', 'packaging', 'pyparsing',
+VendorImporter(__name__, names, 'setuptools._vendor').install()
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/glibc.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/glibc.py
new file mode 100644
index 0000000000000000000000000000000000000000..a134591c30547300e0e55c6898a3a1590b3f0171
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/glibc.py
@@ -0,0 +1,86 @@
+# This file originally from pip:
+# https://github.com/pypa/pip/blob/8f4f15a5a95d7d5b511ceaee9ed261176c181970/src/pip/_internal/utils/glibc.py
+from __future__ import absolute_import
+
+import ctypes
+import re
+import warnings
+
+
+def glibc_version_string():
+    "Returns glibc version string, or None if not using glibc."
+
+    # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen
+    # manpage says, "If filename is NULL, then the returned handle is for the
+    # main program". This way we can let the linker do the work to figure out
+    # which libc our process is actually using.
+    process_namespace = ctypes.CDLL(None)
+    try:
+        gnu_get_libc_version = process_namespace.gnu_get_libc_version
+    except AttributeError:
+        # Symbol doesn't exist -> therefore, we are not linked to
+        # glibc.
+        return None
+
+    # Call gnu_get_libc_version, which returns a string like "2.5"
+    gnu_get_libc_version.restype = ctypes.c_char_p
+    version_str = gnu_get_libc_version()
+    # py2 / py3 compatibility:
+    if not isinstance(version_str, str):
+        version_str = version_str.decode("ascii")
+
+    return version_str
+
+
+# Separated out from have_compatible_glibc for easier unit testing
+def check_glibc_version(version_str, required_major, minimum_minor):
+    # Parse string and check against requested version.
+    #
+    # We use a regexp instead of str.split because we want to discard any
+    # random junk that might come after the minor version -- this might happen
+    # in patched/forked versions of glibc (e.g. Linaro's version of glibc
+    # uses version strings like "2.20-2014.11"). See gh-3588.
+    m = re.match(r"(?P<major>[0-9]+)\.(?P<minor>[0-9]+)", version_str)
+    if not m:
+        warnings.warn("Expected glibc version with 2 components major.minor,"
+                      " got: %s" % version_str, RuntimeWarning)
+        return False
+    return (int(m.group("major")) == required_major and
+            int(m.group("minor")) >= minimum_minor)
+
+
+def have_compatible_glibc(required_major, minimum_minor):
+    version_str = glibc_version_string()
+    if version_str is None:
+        return False
+    return check_glibc_version(version_str, required_major, minimum_minor)
+
+
+# platform.libc_ver regularly returns completely nonsensical glibc
+# versions. E.g. on my computer, platform says:
+#
+#   ~$ python2.7 -c 'import platform; print(platform.libc_ver())'
+#   ('glibc', '2.7')
+#   ~$ python3.5 -c 'import platform; print(platform.libc_ver())'
+#   ('glibc', '2.9')
+#
+# But the truth is:
+#
+#   ~$ ldd --version
+#   ldd (Debian GLIBC 2.22-11) 2.22
+#
+# This is unfortunate, because it means that the linehaul data on libc
+# versions that was generated by pip 8.1.2 and earlier is useless and
+# misleading. Solution: instead of using platform, use our code that actually
+# works.
+def libc_ver():
+    """Try to determine the glibc version
+
+    Returns a tuple of strings (lib, version) which default to empty strings
+    in case the lookup fails.
+    """
+    glibc_version = glibc_version_string()
+    if glibc_version is None:
+        return ("", "")
+    else:
+        return ("glibc", glibc_version)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/glob.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/glob.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c781de349abbc8fe7d66c4b47d08a7ba961a74f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/glob.py
@@ -0,0 +1,176 @@
+"""
+Filename globbing utility. Mostly a copy of `glob` from Python 3.5.
+
+Changes include:
+ * `yield from` and PEP3102 `*` removed.
+ * `bytes` changed to `six.binary_type`.
+ * Hidden files are not ignored.
+"""
+
+import os
+import re
+import fnmatch
+from setuptools.extern.six import binary_type
+
+__all__ = ["glob", "iglob", "escape"]
+
+
+def glob(pathname, recursive=False):
+    """Return a list of paths matching a pathname pattern.
+
+    The pattern may contain simple shell-style wildcards a la
+    fnmatch. However, unlike fnmatch, filenames starting with a
+    dot are special cases that are not matched by '*' and '?'
+    patterns.
+
+    If recursive is true, the pattern '**' will match any files and
+    zero or more directories and subdirectories.
+    """
+    return list(iglob(pathname, recursive=recursive))
+
+
+def iglob(pathname, recursive=False):
+    """Return an iterator which yields the paths matching a pathname pattern.
+
+    The pattern may contain simple shell-style wildcards a la
+    fnmatch. However, unlike fnmatch, filenames starting with a
+    dot are special cases that are not matched by '*' and '?'
+    patterns.
+
+    If recursive is true, the pattern '**' will match any files and
+    zero or more directories and subdirectories.
+    """
+    it = _iglob(pathname, recursive)
+    if recursive and _isrecursive(pathname):
+        s = next(it)  # skip empty string
+        assert not s
+    return it
+
+
+def _iglob(pathname, recursive):
+    dirname, basename = os.path.split(pathname)
+    if not has_magic(pathname):
+        if basename:
+            if os.path.lexists(pathname):
+                yield pathname
+        else:
+            # Patterns ending with a slash should match only directories
+            if os.path.isdir(dirname):
+                yield pathname
+        return
+    if not dirname:
+        if recursive and _isrecursive(basename):
+            for x in glob2(dirname, basename):
+                yield x
+        else:
+            for x in glob1(dirname, basename):
+                yield x
+        return
+    # `os.path.split()` returns the argument itself as a dirname if it is a
+    # drive or UNC path.  Prevent an infinite recursion if a drive or UNC path
+    # contains magic characters (i.e. r'\\?\C:').
+    if dirname != pathname and has_magic(dirname):
+        dirs = _iglob(dirname, recursive)
+    else:
+        dirs = [dirname]
+    if has_magic(basename):
+        if recursive and _isrecursive(basename):
+            glob_in_dir = glob2
+        else:
+            glob_in_dir = glob1
+    else:
+        glob_in_dir = glob0
+    for dirname in dirs:
+        for name in glob_in_dir(dirname, basename):
+            yield os.path.join(dirname, name)
+
+
+# These 2 helper functions non-recursively glob inside a literal directory.
+# They return a list of basenames. `glob1` accepts a pattern while `glob0`
+# takes a literal basename (so it only has to check for its existence).
+
+
+def glob1(dirname, pattern):
+    if not dirname:
+        if isinstance(pattern, binary_type):
+            dirname = os.curdir.encode('ASCII')
+        else:
+            dirname = os.curdir
+    try:
+        names = os.listdir(dirname)
+    except OSError:
+        return []
+    return fnmatch.filter(names, pattern)
+
+
+def glob0(dirname, basename):
+    if not basename:
+        # `os.path.split()` returns an empty basename for paths ending with a
+        # directory separator.  'q*x/' should match only directories.
+        if os.path.isdir(dirname):
+            return [basename]
+    else:
+        if os.path.lexists(os.path.join(dirname, basename)):
+            return [basename]
+    return []
+
+
+# This helper function recursively yields relative pathnames inside a literal
+# directory.
+
+
+def glob2(dirname, pattern):
+    assert _isrecursive(pattern)
+    yield pattern[:0]
+    for x in _rlistdir(dirname):
+        yield x
+
+
+# Recursively yields relative pathnames inside a literal directory.
+def _rlistdir(dirname):
+    if not dirname:
+        if isinstance(dirname, binary_type):
+            dirname = binary_type(os.curdir, 'ASCII')
+        else:
+            dirname = os.curdir
+    try:
+        names = os.listdir(dirname)
+    except os.error:
+        return
+    for x in names:
+        yield x
+        path = os.path.join(dirname, x) if dirname else x
+        for y in _rlistdir(path):
+            yield os.path.join(x, y)
+
+
+magic_check = re.compile('([*?[])')
+magic_check_bytes = re.compile(b'([*?[])')
+
+
+def has_magic(s):
+    if isinstance(s, binary_type):
+        match = magic_check_bytes.search(s)
+    else:
+        match = magic_check.search(s)
+    return match is not None
+
+
+def _isrecursive(pattern):
+    if isinstance(pattern, binary_type):
+        return pattern == b'**'
+    else:
+        return pattern == '**'
+
+
+def escape(pathname):
+    """Escape all special characters.
+    """
+    # Escaping is done by wrapping any of "*?[" between square brackets.
+    # Metacharacters do not work in the drive part and shouldn't be escaped.
+    drive, pathname = os.path.splitdrive(pathname)
+    if isinstance(pathname, binary_type):
+        pathname = magic_check_bytes.sub(br'[\1]', pathname)
+    else:
+        pathname = magic_check.sub(r'[\1]', pathname)
+    return drive + pathname
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/gui-32.exe b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/gui-32.exe
new file mode 100644
index 0000000000000000000000000000000000000000..f8d3509653ba8f80ca7f3aa7f95616142ba83a94
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/gui-32.exe differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/gui-64.exe b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/gui-64.exe
new file mode 100644
index 0000000000000000000000000000000000000000..330c51a5dde15a0bb610a48cd0ca11770c914dae
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/gui-64.exe differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/gui.exe b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/gui.exe
new file mode 100644
index 0000000000000000000000000000000000000000..f8d3509653ba8f80ca7f3aa7f95616142ba83a94
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/gui.exe differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/launch.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/launch.py
new file mode 100644
index 0000000000000000000000000000000000000000..308283ea939ed9bced7b099eb8a1879aa9c203d4
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/launch.py
@@ -0,0 +1,35 @@
+"""
+Launch the Python script on the command line after
+setuptools is bootstrapped via import.
+"""
+
+# Note that setuptools gets imported implicitly by the
+# invocation of this script using python -m setuptools.launch
+
+import tokenize
+import sys
+
+
+def run():
+    """
+    Run the script in sys.argv[1] as if it had
+    been invoked naturally.
+    """
+    __builtins__
+    script_name = sys.argv[1]
+    namespace = dict(
+        __file__=script_name,
+        __name__='__main__',
+        __doc__=None,
+    )
+    sys.argv[:] = sys.argv[1:]
+
+    open_ = getattr(tokenize, 'open', open)
+    script = open_(script_name).read()
+    norm_script = script.replace('\\r\\n', '\\n')
+    code = compile(norm_script, script_name, 'exec')
+    exec(code, namespace)
+
+
+if __name__ == '__main__':
+    run()
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/lib2to3_ex.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/lib2to3_ex.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b1a73feb26fdad65bafdeb21f5ce6abfb905fc0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/lib2to3_ex.py
@@ -0,0 +1,62 @@
+"""
+Customized Mixin2to3 support:
+
+ - adds support for converting doctests
+
+
+This module raises an ImportError on Python 2.
+"""
+
+from distutils.util import Mixin2to3 as _Mixin2to3
+from distutils import log
+from lib2to3.refactor import RefactoringTool, get_fixers_from_package
+
+import setuptools
+
+
+class DistutilsRefactoringTool(RefactoringTool):
+    def log_error(self, msg, *args, **kw):
+        log.error(msg, *args)
+
+    def log_message(self, msg, *args):
+        log.info(msg, *args)
+
+    def log_debug(self, msg, *args):
+        log.debug(msg, *args)
+
+
+class Mixin2to3(_Mixin2to3):
+    def run_2to3(self, files, doctests=False):
+        # See of the distribution option has been set, otherwise check the
+        # setuptools default.
+        if self.distribution.use_2to3 is not True:
+            return
+        if not files:
+            return
+        log.info("Fixing " + " ".join(files))
+        self.__build_fixer_names()
+        self.__exclude_fixers()
+        if doctests:
+            if setuptools.run_2to3_on_doctests:
+                r = DistutilsRefactoringTool(self.fixer_names)
+                r.refactor(files, write=True, doctests_only=True)
+        else:
+            _Mixin2to3.run_2to3(self, files)
+
+    def __build_fixer_names(self):
+        if self.fixer_names:
+            return
+        self.fixer_names = []
+        for p in setuptools.lib2to3_fixer_packages:
+            self.fixer_names.extend(get_fixers_from_package(p))
+        if self.distribution.use_2to3_fixers is not None:
+            for p in self.distribution.use_2to3_fixers:
+                self.fixer_names.extend(get_fixers_from_package(p))
+
+    def __exclude_fixers(self):
+        excluded_fixers = getattr(self, 'exclude_fixers', [])
+        if self.distribution.use_2to3_exclude_fixers is not None:
+            excluded_fixers.extend(self.distribution.use_2to3_exclude_fixers)
+        for fixer_name in excluded_fixers:
+            if fixer_name in self.fixer_names:
+                self.fixer_names.remove(fixer_name)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/monkey.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/monkey.py
new file mode 100644
index 0000000000000000000000000000000000000000..d9eb7d7b2946519137ec27ebf5dbf9e14ada6f81
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/monkey.py
@@ -0,0 +1,197 @@
+"""
+Monkey patching of distutils.
+"""
+
+import sys
+import distutils.filelist
+import platform
+import types
+import functools
+from importlib import import_module
+import inspect
+
+from setuptools.extern import six
+
+import setuptools
+
+__all__ = []
+"""
+Everything is private. Contact the project team
+if you think you need this functionality.
+"""
+
+
+def _get_mro(cls):
+    """
+    Returns the bases classes for cls sorted by the MRO.
+
+    Works around an issue on Jython where inspect.getmro will not return all
+    base classes if multiple classes share the same name. Instead, this
+    function will return a tuple containing the class itself, and the contents
+    of cls.__bases__. See https://github.com/pypa/setuptools/issues/1024.
+    """
+    if platform.python_implementation() == "Jython":
+        return (cls,) + cls.__bases__
+    return inspect.getmro(cls)
+
+
+def get_unpatched(item):
+    lookup = (
+        get_unpatched_class if isinstance(item, six.class_types) else
+        get_unpatched_function if isinstance(item, types.FunctionType) else
+        lambda item: None
+    )
+    return lookup(item)
+
+
+def get_unpatched_class(cls):
+    """Protect against re-patching the distutils if reloaded
+
+    Also ensures that no other distutils extension monkeypatched the distutils
+    first.
+    """
+    external_bases = (
+        cls
+        for cls in _get_mro(cls)
+        if not cls.__module__.startswith('setuptools')
+    )
+    base = next(external_bases)
+    if not base.__module__.startswith('distutils'):
+        msg = "distutils has already been patched by %r" % cls
+        raise AssertionError(msg)
+    return base
+
+
+def patch_all():
+    # we can't patch distutils.cmd, alas
+    distutils.core.Command = setuptools.Command
+
+    has_issue_12885 = sys.version_info <= (3, 5, 3)
+
+    if has_issue_12885:
+        # fix findall bug in distutils (http://bugs.python.org/issue12885)
+        distutils.filelist.findall = setuptools.findall
+
+    needs_warehouse = (
+        sys.version_info < (2, 7, 13)
+        or
+        (3, 0) < sys.version_info < (3, 3, 7)
+        or
+        (3, 4) < sys.version_info < (3, 4, 6)
+        or
+        (3, 5) < sys.version_info <= (3, 5, 3)
+    )
+
+    if needs_warehouse:
+        warehouse = 'https://upload.pypi.org/legacy/'
+        distutils.config.PyPIRCCommand.DEFAULT_REPOSITORY = warehouse
+
+    _patch_distribution_metadata_write_pkg_file()
+    _patch_distribution_metadata_write_pkg_info()
+
+    # Install Distribution throughout the distutils
+    for module in distutils.dist, distutils.core, distutils.cmd:
+        module.Distribution = setuptools.dist.Distribution
+
+    # Install the patched Extension
+    distutils.core.Extension = setuptools.extension.Extension
+    distutils.extension.Extension = setuptools.extension.Extension
+    if 'distutils.command.build_ext' in sys.modules:
+        sys.modules['distutils.command.build_ext'].Extension = (
+            setuptools.extension.Extension
+        )
+
+    patch_for_msvc_specialized_compiler()
+
+
+def _patch_distribution_metadata_write_pkg_file():
+    """Patch write_pkg_file to also write Requires-Python/Requires-External"""
+    distutils.dist.DistributionMetadata.write_pkg_file = (
+        setuptools.dist.write_pkg_file
+    )
+
+
+def _patch_distribution_metadata_write_pkg_info():
+    """
+    Workaround issue #197 - Python 3 prior to 3.2.2 uses an environment-local
+    encoding to save the pkg_info. Monkey-patch its write_pkg_info method to
+    correct this undesirable behavior.
+    """
+    environment_local = (3,) <= sys.version_info[:3] < (3, 2, 2)
+    if not environment_local:
+        return
+
+    distutils.dist.DistributionMetadata.write_pkg_info = (
+        setuptools.dist.write_pkg_info
+    )
+
+
+def patch_func(replacement, target_mod, func_name):
+    """
+    Patch func_name in target_mod with replacement
+
+    Important - original must be resolved by name to avoid
+    patching an already patched function.
+    """
+    original = getattr(target_mod, func_name)
+
+    # set the 'unpatched' attribute on the replacement to
+    # point to the original.
+    vars(replacement).setdefault('unpatched', original)
+
+    # replace the function in the original module
+    setattr(target_mod, func_name, replacement)
+
+
+def get_unpatched_function(candidate):
+    return getattr(candidate, 'unpatched')
+
+
+def patch_for_msvc_specialized_compiler():
+    """
+    Patch functions in distutils to use standalone Microsoft Visual C++
+    compilers.
+    """
+    # import late to avoid circular imports on Python < 3.5
+    msvc = import_module('setuptools.msvc')
+
+    if platform.system() != 'Windows':
+        # Compilers only availables on Microsoft Windows
+        return
+
+    def patch_params(mod_name, func_name):
+        """
+        Prepare the parameters for patch_func to patch indicated function.
+        """
+        repl_prefix = 'msvc9_' if 'msvc9' in mod_name else 'msvc14_'
+        repl_name = repl_prefix + func_name.lstrip('_')
+        repl = getattr(msvc, repl_name)
+        mod = import_module(mod_name)
+        if not hasattr(mod, func_name):
+            raise ImportError(func_name)
+        return repl, mod, func_name
+
+    # Python 2.7 to 3.4
+    msvc9 = functools.partial(patch_params, 'distutils.msvc9compiler')
+
+    # Python 3.5+
+    msvc14 = functools.partial(patch_params, 'distutils._msvccompiler')
+
+    try:
+        # Patch distutils.msvc9compiler
+        patch_func(*msvc9('find_vcvarsall'))
+        patch_func(*msvc9('query_vcvarsall'))
+    except ImportError:
+        pass
+
+    try:
+        # Patch distutils._msvccompiler._get_vc_env
+        patch_func(*msvc14('_get_vc_env'))
+    except ImportError:
+        pass
+
+    try:
+        # Patch distutils._msvccompiler.gen_lib_options for Numpy
+        patch_func(*msvc14('gen_lib_options'))
+    except ImportError:
+        pass
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/msvc.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/msvc.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e20b3f1ddfaa9c4994c03d7bcff5140794018cc
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/msvc.py
@@ -0,0 +1,1302 @@
+"""
+Improved support for Microsoft Visual C++ compilers.
+
+Known supported compilers:
+--------------------------
+Microsoft Visual C++ 9.0:
+    Microsoft Visual C++ Compiler for Python 2.7 (x86, amd64)
+    Microsoft Windows SDK 6.1 (x86, x64, ia64)
+    Microsoft Windows SDK 7.0 (x86, x64, ia64)
+
+Microsoft Visual C++ 10.0:
+    Microsoft Windows SDK 7.1 (x86, x64, ia64)
+
+Microsoft Visual C++ 14.0:
+    Microsoft Visual C++ Build Tools 2015 (x86, x64, arm)
+    Microsoft Visual Studio 2017 (x86, x64, arm, arm64)
+    Microsoft Visual Studio Build Tools 2017 (x86, x64, arm, arm64)
+"""
+
+import os
+import sys
+import platform
+import itertools
+import distutils.errors
+from setuptools.extern.packaging.version import LegacyVersion
+
+from setuptools.extern.six.moves import filterfalse
+
+from .monkey import get_unpatched
+
+if platform.system() == 'Windows':
+    from setuptools.extern.six.moves import winreg
+    safe_env = os.environ
+else:
+    """
+    Mock winreg and environ so the module can be imported
+    on this platform.
+    """
+
+    class winreg:
+        HKEY_USERS = None
+        HKEY_CURRENT_USER = None
+        HKEY_LOCAL_MACHINE = None
+        HKEY_CLASSES_ROOT = None
+
+    safe_env = dict()
+
+_msvc9_suppress_errors = (
+    # msvc9compiler isn't available on some platforms
+    ImportError,
+
+    # msvc9compiler raises DistutilsPlatformError in some
+    # environments. See #1118.
+    distutils.errors.DistutilsPlatformError,
+)
+
+try:
+    from distutils.msvc9compiler import Reg
+except _msvc9_suppress_errors:
+    pass
+
+
+def msvc9_find_vcvarsall(version):
+    """
+    Patched "distutils.msvc9compiler.find_vcvarsall" to use the standalone
+    compiler build for Python (VCForPython). Fall back to original behavior
+    when the standalone compiler is not available.
+
+    Redirect the path of "vcvarsall.bat".
+
+    Known supported compilers
+    -------------------------
+    Microsoft Visual C++ 9.0:
+        Microsoft Visual C++ Compiler for Python 2.7 (x86, amd64)
+
+    Parameters
+    ----------
+    version: float
+        Required Microsoft Visual C++ version.
+
+    Return
+    ------
+    vcvarsall.bat path: str
+    """
+    VC_BASE = r'Software\%sMicrosoft\DevDiv\VCForPython\%0.1f'
+    key = VC_BASE % ('', version)
+    try:
+        # Per-user installs register the compiler path here
+        productdir = Reg.get_value(key, "installdir")
+    except KeyError:
+        try:
+            # All-user installs on a 64-bit system register here
+            key = VC_BASE % ('Wow6432Node\\', version)
+            productdir = Reg.get_value(key, "installdir")
+        except KeyError:
+            productdir = None
+
+    if productdir:
+        vcvarsall = os.path.os.path.join(productdir, "vcvarsall.bat")
+        if os.path.isfile(vcvarsall):
+            return vcvarsall
+
+    return get_unpatched(msvc9_find_vcvarsall)(version)
+
+
+def msvc9_query_vcvarsall(ver, arch='x86', *args, **kwargs):
+    """
+    Patched "distutils.msvc9compiler.query_vcvarsall" for support extra
+    compilers.
+
+    Set environment without use of "vcvarsall.bat".
+
+    Known supported compilers
+    -------------------------
+    Microsoft Visual C++ 9.0:
+        Microsoft Visual C++ Compiler for Python 2.7 (x86, amd64)
+        Microsoft Windows SDK 6.1 (x86, x64, ia64)
+        Microsoft Windows SDK 7.0 (x86, x64, ia64)
+
+    Microsoft Visual C++ 10.0:
+        Microsoft Windows SDK 7.1 (x86, x64, ia64)
+
+    Parameters
+    ----------
+    ver: float
+        Required Microsoft Visual C++ version.
+    arch: str
+        Target architecture.
+
+    Return
+    ------
+    environment: dict
+    """
+    # Try to get environement from vcvarsall.bat (Classical way)
+    try:
+        orig = get_unpatched(msvc9_query_vcvarsall)
+        return orig(ver, arch, *args, **kwargs)
+    except distutils.errors.DistutilsPlatformError:
+        # Pass error if Vcvarsall.bat is missing
+        pass
+    except ValueError:
+        # Pass error if environment not set after executing vcvarsall.bat
+        pass
+
+    # If error, try to set environment directly
+    try:
+        return EnvironmentInfo(arch, ver).return_env()
+    except distutils.errors.DistutilsPlatformError as exc:
+        _augment_exception(exc, ver, arch)
+        raise
+
+
+def msvc14_get_vc_env(plat_spec):
+    """
+    Patched "distutils._msvccompiler._get_vc_env" for support extra
+    compilers.
+
+    Set environment without use of "vcvarsall.bat".
+
+    Known supported compilers
+    -------------------------
+    Microsoft Visual C++ 14.0:
+        Microsoft Visual C++ Build Tools 2015 (x86, x64, arm)
+        Microsoft Visual Studio 2017 (x86, x64, arm, arm64)
+        Microsoft Visual Studio Build Tools 2017 (x86, x64, arm, arm64)
+
+    Parameters
+    ----------
+    plat_spec: str
+        Target architecture.
+
+    Return
+    ------
+    environment: dict
+    """
+    # Try to get environment from vcvarsall.bat (Classical way)
+    try:
+        return get_unpatched(msvc14_get_vc_env)(plat_spec)
+    except distutils.errors.DistutilsPlatformError:
+        # Pass error Vcvarsall.bat is missing
+        pass
+
+    # If error, try to set environment directly
+    try:
+        return EnvironmentInfo(plat_spec, vc_min_ver=14.0).return_env()
+    except distutils.errors.DistutilsPlatformError as exc:
+        _augment_exception(exc, 14.0)
+        raise
+
+
+def msvc14_gen_lib_options(*args, **kwargs):
+    """
+    Patched "distutils._msvccompiler.gen_lib_options" for fix
+    compatibility between "numpy.distutils" and "distutils._msvccompiler"
+    (for Numpy < 1.11.2)
+    """
+    if "numpy.distutils" in sys.modules:
+        import numpy as np
+        if LegacyVersion(np.__version__) < LegacyVersion('1.11.2'):
+            return np.distutils.ccompiler.gen_lib_options(*args, **kwargs)
+    return get_unpatched(msvc14_gen_lib_options)(*args, **kwargs)
+
+
+def _augment_exception(exc, version, arch=''):
+    """
+    Add details to the exception message to help guide the user
+    as to what action will resolve it.
+    """
+    # Error if MSVC++ directory not found or environment not set
+    message = exc.args[0]
+
+    if "vcvarsall" in message.lower() or "visual c" in message.lower():
+        # Special error message if MSVC++ not installed
+        tmpl = 'Microsoft Visual C++ {version:0.1f} is required.'
+        message = tmpl.format(**locals())
+        msdownload = 'www.microsoft.com/download/details.aspx?id=%d'
+        if version == 9.0:
+            if arch.lower().find('ia64') > -1:
+                # For VC++ 9.0, if IA64 support is needed, redirect user
+                # to Windows SDK 7.0
+                message += ' Get it with "Microsoft Windows SDK 7.0": '
+                message += msdownload % 3138
+            else:
+                # For VC++ 9.0 redirect user to Vc++ for Python 2.7 :
+                # This redirection link is maintained by Microsoft.
+                # Contact vspython@microsoft.com if it needs updating.
+                message += ' Get it from http://aka.ms/vcpython27'
+        elif version == 10.0:
+            # For VC++ 10.0 Redirect user to Windows SDK 7.1
+            message += ' Get it with "Microsoft Windows SDK 7.1": '
+            message += msdownload % 8279
+        elif version >= 14.0:
+            # For VC++ 14.0 Redirect user to Visual C++ Build Tools
+            message += (' Get it with "Microsoft Visual C++ Build Tools": '
+                        r'http://landinghub.visualstudio.com/'
+                        'visual-cpp-build-tools')
+
+    exc.args = (message, )
+
+
+class PlatformInfo:
+    """
+    Current and Target Architectures informations.
+
+    Parameters
+    ----------
+    arch: str
+        Target architecture.
+    """
+    current_cpu = safe_env.get('processor_architecture', '').lower()
+
+    def __init__(self, arch):
+        self.arch = arch.lower().replace('x64', 'amd64')
+
+    @property
+    def target_cpu(self):
+        return self.arch[self.arch.find('_') + 1:]
+
+    def target_is_x86(self):
+        return self.target_cpu == 'x86'
+
+    def current_is_x86(self):
+        return self.current_cpu == 'x86'
+
+    def current_dir(self, hidex86=False, x64=False):
+        """
+        Current platform specific subfolder.
+
+        Parameters
+        ----------
+        hidex86: bool
+            return '' and not '\x86' if architecture is x86.
+        x64: bool
+            return '\x64' and not '\amd64' if architecture is amd64.
+
+        Return
+        ------
+        subfolder: str
+            '\target', or '' (see hidex86 parameter)
+        """
+        return (
+            '' if (self.current_cpu == 'x86' and hidex86) else
+            r'\x64' if (self.current_cpu == 'amd64' and x64) else
+            r'\%s' % self.current_cpu
+        )
+
+    def target_dir(self, hidex86=False, x64=False):
+        r"""
+        Target platform specific subfolder.
+
+        Parameters
+        ----------
+        hidex86: bool
+            return '' and not '\x86' if architecture is x86.
+        x64: bool
+            return '\x64' and not '\amd64' if architecture is amd64.
+
+        Return
+        ------
+        subfolder: str
+            '\current', or '' (see hidex86 parameter)
+        """
+        return (
+            '' if (self.target_cpu == 'x86' and hidex86) else
+            r'\x64' if (self.target_cpu == 'amd64' and x64) else
+            r'\%s' % self.target_cpu
+        )
+
+    def cross_dir(self, forcex86=False):
+        r"""
+        Cross platform specific subfolder.
+
+        Parameters
+        ----------
+        forcex86: bool
+            Use 'x86' as current architecture even if current acritecture is
+            not x86.
+
+        Return
+        ------
+        subfolder: str
+            '' if target architecture is current architecture,
+            '\current_target' if not.
+        """
+        current = 'x86' if forcex86 else self.current_cpu
+        return (
+            '' if self.target_cpu == current else
+            self.target_dir().replace('\\', '\\%s_' % current)
+        )
+
+
+class RegistryInfo:
+    """
+    Microsoft Visual Studio related registry informations.
+
+    Parameters
+    ----------
+    platform_info: PlatformInfo
+        "PlatformInfo" instance.
+    """
+    HKEYS = (winreg.HKEY_USERS,
+             winreg.HKEY_CURRENT_USER,
+             winreg.HKEY_LOCAL_MACHINE,
+             winreg.HKEY_CLASSES_ROOT)
+
+    def __init__(self, platform_info):
+        self.pi = platform_info
+
+    @property
+    def visualstudio(self):
+        """
+        Microsoft Visual Studio root registry key.
+        """
+        return 'VisualStudio'
+
+    @property
+    def sxs(self):
+        """
+        Microsoft Visual Studio SxS registry key.
+        """
+        return os.path.join(self.visualstudio, 'SxS')
+
+    @property
+    def vc(self):
+        """
+        Microsoft Visual C++ VC7 registry key.
+        """
+        return os.path.join(self.sxs, 'VC7')
+
+    @property
+    def vs(self):
+        """
+        Microsoft Visual Studio VS7 registry key.
+        """
+        return os.path.join(self.sxs, 'VS7')
+
+    @property
+    def vc_for_python(self):
+        """
+        Microsoft Visual C++ for Python registry key.
+        """
+        return r'DevDiv\VCForPython'
+
+    @property
+    def microsoft_sdk(self):
+        """
+        Microsoft SDK registry key.
+        """
+        return 'Microsoft SDKs'
+
+    @property
+    def windows_sdk(self):
+        """
+        Microsoft Windows/Platform SDK registry key.
+        """
+        return os.path.join(self.microsoft_sdk, 'Windows')
+
+    @property
+    def netfx_sdk(self):
+        """
+        Microsoft .NET Framework SDK registry key.
+        """
+        return os.path.join(self.microsoft_sdk, 'NETFXSDK')
+
+    @property
+    def windows_kits_roots(self):
+        """
+        Microsoft Windows Kits Roots registry key.
+        """
+        return r'Windows Kits\Installed Roots'
+
+    def microsoft(self, key, x86=False):
+        """
+        Return key in Microsoft software registry.
+
+        Parameters
+        ----------
+        key: str
+            Registry key path where look.
+        x86: str
+            Force x86 software registry.
+
+        Return
+        ------
+        str: value
+        """
+        node64 = '' if self.pi.current_is_x86() or x86 else 'Wow6432Node'
+        return os.path.join('Software', node64, 'Microsoft', key)
+
+    def lookup(self, key, name):
+        """
+        Look for values in registry in Microsoft software registry.
+
+        Parameters
+        ----------
+        key: str
+            Registry key path where look.
+        name: str
+            Value name to find.
+
+        Return
+        ------
+        str: value
+        """
+        KEY_READ = winreg.KEY_READ
+        openkey = winreg.OpenKey
+        ms = self.microsoft
+        for hkey in self.HKEYS:
+            try:
+                bkey = openkey(hkey, ms(key), 0, KEY_READ)
+            except (OSError, IOError):
+                if not self.pi.current_is_x86():
+                    try:
+                        bkey = openkey(hkey, ms(key, True), 0, KEY_READ)
+                    except (OSError, IOError):
+                        continue
+                else:
+                    continue
+            try:
+                return winreg.QueryValueEx(bkey, name)[0]
+            except (OSError, IOError):
+                pass
+
+
+class SystemInfo:
+    """
+    Microsoft Windows and Visual Studio related system inormations.
+
+    Parameters
+    ----------
+    registry_info: RegistryInfo
+        "RegistryInfo" instance.
+    vc_ver: float
+        Required Microsoft Visual C++ version.
+    """
+
+    # Variables and properties in this class use originals CamelCase variables
+    # names from Microsoft source files for more easy comparaison.
+    WinDir = safe_env.get('WinDir', '')
+    ProgramFiles = safe_env.get('ProgramFiles', '')
+    ProgramFilesx86 = safe_env.get('ProgramFiles(x86)', ProgramFiles)
+
+    def __init__(self, registry_info, vc_ver=None):
+        self.ri = registry_info
+        self.pi = self.ri.pi
+        self.vc_ver = vc_ver or self._find_latest_available_vc_ver()
+
+    def _find_latest_available_vc_ver(self):
+        try:
+            return self.find_available_vc_vers()[-1]
+        except IndexError:
+            err = 'No Microsoft Visual C++ version found'
+            raise distutils.errors.DistutilsPlatformError(err)
+
+    def find_available_vc_vers(self):
+        """
+        Find all available Microsoft Visual C++ versions.
+        """
+        ms = self.ri.microsoft
+        vckeys = (self.ri.vc, self.ri.vc_for_python, self.ri.vs)
+        vc_vers = []
+        for hkey in self.ri.HKEYS:
+            for key in vckeys:
+                try:
+                    bkey = winreg.OpenKey(hkey, ms(key), 0, winreg.KEY_READ)
+                except (OSError, IOError):
+                    continue
+                subkeys, values, _ = winreg.QueryInfoKey(bkey)
+                for i in range(values):
+                    try:
+                        ver = float(winreg.EnumValue(bkey, i)[0])
+                        if ver not in vc_vers:
+                            vc_vers.append(ver)
+                    except ValueError:
+                        pass
+                for i in range(subkeys):
+                    try:
+                        ver = float(winreg.EnumKey(bkey, i))
+                        if ver not in vc_vers:
+                            vc_vers.append(ver)
+                    except ValueError:
+                        pass
+        return sorted(vc_vers)
+
+    @property
+    def VSInstallDir(self):
+        """
+        Microsoft Visual Studio directory.
+        """
+        # Default path
+        name = 'Microsoft Visual Studio %0.1f' % self.vc_ver
+        default = os.path.join(self.ProgramFilesx86, name)
+
+        # Try to get path from registry, if fail use default path
+        return self.ri.lookup(self.ri.vs, '%0.1f' % self.vc_ver) or default
+
+    @property
+    def VCInstallDir(self):
+        """
+        Microsoft Visual C++ directory.
+        """
+        self.VSInstallDir
+
+        guess_vc = self._guess_vc() or self._guess_vc_legacy()
+
+        # Try to get "VC++ for Python" path from registry as default path
+        reg_path = os.path.join(self.ri.vc_for_python, '%0.1f' % self.vc_ver)
+        python_vc = self.ri.lookup(reg_path, 'installdir')
+        default_vc = os.path.join(python_vc, 'VC') if python_vc else guess_vc
+
+        # Try to get path from registry, if fail use default path
+        path = self.ri.lookup(self.ri.vc, '%0.1f' % self.vc_ver) or default_vc
+
+        if not os.path.isdir(path):
+            msg = 'Microsoft Visual C++ directory not found'
+            raise distutils.errors.DistutilsPlatformError(msg)
+
+        return path
+
+    def _guess_vc(self):
+        """
+        Locate Visual C for 2017
+        """
+        if self.vc_ver <= 14.0:
+            return
+
+        default = r'VC\Tools\MSVC'
+        guess_vc = os.path.join(self.VSInstallDir, default)
+        # Subdir with VC exact version as name
+        try:
+            vc_exact_ver = os.listdir(guess_vc)[-1]
+            return os.path.join(guess_vc, vc_exact_ver)
+        except (OSError, IOError, IndexError):
+            pass
+
+    def _guess_vc_legacy(self):
+        """
+        Locate Visual C for versions prior to 2017
+        """
+        default = r'Microsoft Visual Studio %0.1f\VC' % self.vc_ver
+        return os.path.join(self.ProgramFilesx86, default)
+
+    @property
+    def WindowsSdkVersion(self):
+        """
+        Microsoft Windows SDK versions for specified MSVC++ version.
+        """
+        if self.vc_ver <= 9.0:
+            return ('7.0', '6.1', '6.0a')
+        elif self.vc_ver == 10.0:
+            return ('7.1', '7.0a')
+        elif self.vc_ver == 11.0:
+            return ('8.0', '8.0a')
+        elif self.vc_ver == 12.0:
+            return ('8.1', '8.1a')
+        elif self.vc_ver >= 14.0:
+            return ('10.0', '8.1')
+
+    @property
+    def WindowsSdkLastVersion(self):
+        """
+        Microsoft Windows SDK last version
+        """
+        return self._use_last_dir_name(os.path.join(
+            self.WindowsSdkDir, 'lib'))
+
+    @property
+    def WindowsSdkDir(self):
+        """
+        Microsoft Windows SDK directory.
+        """
+        sdkdir = ''
+        for ver in self.WindowsSdkVersion:
+            # Try to get it from registry
+            loc = os.path.join(self.ri.windows_sdk, 'v%s' % ver)
+            sdkdir = self.ri.lookup(loc, 'installationfolder')
+            if sdkdir:
+                break
+        if not sdkdir or not os.path.isdir(sdkdir):
+            # Try to get "VC++ for Python" version from registry
+            path = os.path.join(self.ri.vc_for_python, '%0.1f' % self.vc_ver)
+            install_base = self.ri.lookup(path, 'installdir')
+            if install_base:
+                sdkdir = os.path.join(install_base, 'WinSDK')
+        if not sdkdir or not os.path.isdir(sdkdir):
+            # If fail, use default new path
+            for ver in self.WindowsSdkVersion:
+                intver = ver[:ver.rfind('.')]
+                path = r'Microsoft SDKs\Windows Kits\%s' % (intver)
+                d = os.path.join(self.ProgramFiles, path)
+                if os.path.isdir(d):
+                    sdkdir = d
+        if not sdkdir or not os.path.isdir(sdkdir):
+            # If fail, use default old path
+            for ver in self.WindowsSdkVersion:
+                path = r'Microsoft SDKs\Windows\v%s' % ver
+                d = os.path.join(self.ProgramFiles, path)
+                if os.path.isdir(d):
+                    sdkdir = d
+        if not sdkdir:
+            # If fail, use Platform SDK
+            sdkdir = os.path.join(self.VCInstallDir, 'PlatformSDK')
+        return sdkdir
+
+    @property
+    def WindowsSDKExecutablePath(self):
+        """
+        Microsoft Windows SDK executable directory.
+        """
+        # Find WinSDK NetFx Tools registry dir name
+        if self.vc_ver <= 11.0:
+            netfxver = 35
+            arch = ''
+        else:
+            netfxver = 40
+            hidex86 = True if self.vc_ver <= 12.0 else False
+            arch = self.pi.current_dir(x64=True, hidex86=hidex86)
+        fx = 'WinSDK-NetFx%dTools%s' % (netfxver, arch.replace('\\', '-'))
+
+        # liste all possibles registry paths
+        regpaths = []
+        if self.vc_ver >= 14.0:
+            for ver in self.NetFxSdkVersion:
+                regpaths += [os.path.join(self.ri.netfx_sdk, ver, fx)]
+
+        for ver in self.WindowsSdkVersion:
+            regpaths += [os.path.join(self.ri.windows_sdk, 'v%sA' % ver, fx)]
+
+        # Return installation folder from the more recent path
+        for path in regpaths:
+            execpath = self.ri.lookup(path, 'installationfolder')
+            if execpath:
+                break
+        return execpath
+
+    @property
+    def FSharpInstallDir(self):
+        """
+        Microsoft Visual F# directory.
+        """
+        path = r'%0.1f\Setup\F#' % self.vc_ver
+        path = os.path.join(self.ri.visualstudio, path)
+        return self.ri.lookup(path, 'productdir') or ''
+
+    @property
+    def UniversalCRTSdkDir(self):
+        """
+        Microsoft Universal CRT SDK directory.
+        """
+        # Set Kit Roots versions for specified MSVC++ version
+        if self.vc_ver >= 14.0:
+            vers = ('10', '81')
+        else:
+            vers = ()
+
+        # Find path of the more recent Kit
+        for ver in vers:
+            sdkdir = self.ri.lookup(self.ri.windows_kits_roots,
+                                    'kitsroot%s' % ver)
+            if sdkdir:
+                break
+        return sdkdir or ''
+
+    @property
+    def UniversalCRTSdkLastVersion(self):
+        """
+        Microsoft Universal C Runtime SDK last version
+        """
+        return self._use_last_dir_name(os.path.join(
+            self.UniversalCRTSdkDir, 'lib'))
+
+    @property
+    def NetFxSdkVersion(self):
+        """
+        Microsoft .NET Framework SDK versions.
+        """
+        # Set FxSdk versions for specified MSVC++ version
+        if self.vc_ver >= 14.0:
+            return ('4.6.1', '4.6')
+        else:
+            return ()
+
+    @property
+    def NetFxSdkDir(self):
+        """
+        Microsoft .NET Framework SDK directory.
+        """
+        for ver in self.NetFxSdkVersion:
+            loc = os.path.join(self.ri.netfx_sdk, ver)
+            sdkdir = self.ri.lookup(loc, 'kitsinstallationfolder')
+            if sdkdir:
+                break
+        return sdkdir or ''
+
+    @property
+    def FrameworkDir32(self):
+        """
+        Microsoft .NET Framework 32bit directory.
+        """
+        # Default path
+        guess_fw = os.path.join(self.WinDir, r'Microsoft.NET\Framework')
+
+        # Try to get path from registry, if fail use default path
+        return self.ri.lookup(self.ri.vc, 'frameworkdir32') or guess_fw
+
+    @property
+    def FrameworkDir64(self):
+        """
+        Microsoft .NET Framework 64bit directory.
+        """
+        # Default path
+        guess_fw = os.path.join(self.WinDir, r'Microsoft.NET\Framework64')
+
+        # Try to get path from registry, if fail use default path
+        return self.ri.lookup(self.ri.vc, 'frameworkdir64') or guess_fw
+
+    @property
+    def FrameworkVersion32(self):
+        """
+        Microsoft .NET Framework 32bit versions.
+        """
+        return self._find_dot_net_versions(32)
+
+    @property
+    def FrameworkVersion64(self):
+        """
+        Microsoft .NET Framework 64bit versions.
+        """
+        return self._find_dot_net_versions(64)
+
+    def _find_dot_net_versions(self, bits):
+        """
+        Find Microsoft .NET Framework versions.
+
+        Parameters
+        ----------
+        bits: int
+            Platform number of bits: 32 or 64.
+        """
+        # Find actual .NET version in registry
+        reg_ver = self.ri.lookup(self.ri.vc, 'frameworkver%d' % bits)
+        dot_net_dir = getattr(self, 'FrameworkDir%d' % bits)
+        ver = reg_ver or self._use_last_dir_name(dot_net_dir, 'v') or ''
+
+        # Set .NET versions for specified MSVC++ version
+        if self.vc_ver >= 12.0:
+            frameworkver = (ver, 'v4.0')
+        elif self.vc_ver >= 10.0:
+            frameworkver = ('v4.0.30319' if ver.lower()[:2] != 'v4' else ver,
+                            'v3.5')
+        elif self.vc_ver == 9.0:
+            frameworkver = ('v3.5', 'v2.0.50727')
+        if self.vc_ver == 8.0:
+            frameworkver = ('v3.0', 'v2.0.50727')
+        return frameworkver
+
+    def _use_last_dir_name(self, path, prefix=''):
+        """
+        Return name of the last dir in path or '' if no dir found.
+
+        Parameters
+        ----------
+        path: str
+            Use dirs in this path
+        prefix: str
+            Use only dirs startings by this prefix
+        """
+        matching_dirs = (
+            dir_name
+            for dir_name in reversed(os.listdir(path))
+            if os.path.isdir(os.path.join(path, dir_name)) and
+            dir_name.startswith(prefix)
+        )
+        return next(matching_dirs, None) or ''
+
+
+class EnvironmentInfo:
+    """
+    Return environment variables for specified Microsoft Visual C++ version
+    and platform : Lib, Include, Path and libpath.
+
+    This function is compatible with Microsoft Visual C++ 9.0 to 14.0.
+
+    Script created by analysing Microsoft environment configuration files like
+    "vcvars[...].bat", "SetEnv.Cmd", "vcbuildtools.bat", ...
+
+    Parameters
+    ----------
+    arch: str
+        Target architecture.
+    vc_ver: float
+        Required Microsoft Visual C++ version. If not set, autodetect the last
+        version.
+    vc_min_ver: float
+        Minimum Microsoft Visual C++ version.
+    """
+
+    # Variables and properties in this class use originals CamelCase variables
+    # names from Microsoft source files for more easy comparaison.
+
+    def __init__(self, arch, vc_ver=None, vc_min_ver=0):
+        self.pi = PlatformInfo(arch)
+        self.ri = RegistryInfo(self.pi)
+        self.si = SystemInfo(self.ri, vc_ver)
+
+        if self.vc_ver < vc_min_ver:
+            err = 'No suitable Microsoft Visual C++ version found'
+            raise distutils.errors.DistutilsPlatformError(err)
+
+    @property
+    def vc_ver(self):
+        """
+        Microsoft Visual C++ version.
+        """
+        return self.si.vc_ver
+
+    @property
+    def VSTools(self):
+        """
+        Microsoft Visual Studio Tools
+        """
+        paths = [r'Common7\IDE', r'Common7\Tools']
+
+        if self.vc_ver >= 14.0:
+            arch_subdir = self.pi.current_dir(hidex86=True, x64=True)
+            paths += [r'Common7\IDE\CommonExtensions\Microsoft\TestWindow']
+            paths += [r'Team Tools\Performance Tools']
+            paths += [r'Team Tools\Performance Tools%s' % arch_subdir]
+
+        return [os.path.join(self.si.VSInstallDir, path) for path in paths]
+
+    @property
+    def VCIncludes(self):
+        """
+        Microsoft Visual C++ & Microsoft Foundation Class Includes
+        """
+        return [os.path.join(self.si.VCInstallDir, 'Include'),
+                os.path.join(self.si.VCInstallDir, r'ATLMFC\Include')]
+
+    @property
+    def VCLibraries(self):
+        """
+        Microsoft Visual C++ & Microsoft Foundation Class Libraries
+        """
+        if self.vc_ver >= 15.0:
+            arch_subdir = self.pi.target_dir(x64=True)
+        else:
+            arch_subdir = self.pi.target_dir(hidex86=True)
+        paths = ['Lib%s' % arch_subdir, r'ATLMFC\Lib%s' % arch_subdir]
+
+        if self.vc_ver >= 14.0:
+            paths += [r'Lib\store%s' % arch_subdir]
+
+        return [os.path.join(self.si.VCInstallDir, path) for path in paths]
+
+    @property
+    def VCStoreRefs(self):
+        """
+        Microsoft Visual C++ store references Libraries
+        """
+        if self.vc_ver < 14.0:
+            return []
+        return [os.path.join(self.si.VCInstallDir, r'Lib\store\references')]
+
+    @property
+    def VCTools(self):
+        """
+        Microsoft Visual C++ Tools
+        """
+        si = self.si
+        tools = [os.path.join(si.VCInstallDir, 'VCPackages')]
+
+        forcex86 = True if self.vc_ver <= 10.0 else False
+        arch_subdir = self.pi.cross_dir(forcex86)
+        if arch_subdir:
+            tools += [os.path.join(si.VCInstallDir, 'Bin%s' % arch_subdir)]
+
+        if self.vc_ver == 14.0:
+            path = 'Bin%s' % self.pi.current_dir(hidex86=True)
+            tools += [os.path.join(si.VCInstallDir, path)]
+
+        elif self.vc_ver >= 15.0:
+            host_dir = (r'bin\HostX86%s' if self.pi.current_is_x86() else
+                        r'bin\HostX64%s')
+            tools += [os.path.join(
+                si.VCInstallDir, host_dir % self.pi.target_dir(x64=True))]
+
+            if self.pi.current_cpu != self.pi.target_cpu:
+                tools += [os.path.join(
+                    si.VCInstallDir, host_dir % self.pi.current_dir(x64=True))]
+
+        else:
+            tools += [os.path.join(si.VCInstallDir, 'Bin')]
+
+        return tools
+
+    @property
+    def OSLibraries(self):
+        """
+        Microsoft Windows SDK Libraries
+        """
+        if self.vc_ver <= 10.0:
+            arch_subdir = self.pi.target_dir(hidex86=True, x64=True)
+            return [os.path.join(self.si.WindowsSdkDir, 'Lib%s' % arch_subdir)]
+
+        else:
+            arch_subdir = self.pi.target_dir(x64=True)
+            lib = os.path.join(self.si.WindowsSdkDir, 'lib')
+            libver = self._sdk_subdir
+            return [os.path.join(lib, '%sum%s' % (libver , arch_subdir))]
+
+    @property
+    def OSIncludes(self):
+        """
+        Microsoft Windows SDK Include
+        """
+        include = os.path.join(self.si.WindowsSdkDir, 'include')
+
+        if self.vc_ver <= 10.0:
+            return [include, os.path.join(include, 'gl')]
+
+        else:
+            if self.vc_ver >= 14.0:
+                sdkver = self._sdk_subdir
+            else:
+                sdkver = ''
+            return [os.path.join(include, '%sshared' % sdkver),
+                    os.path.join(include, '%sum' % sdkver),
+                    os.path.join(include, '%swinrt' % sdkver)]
+
+    @property
+    def OSLibpath(self):
+        """
+        Microsoft Windows SDK Libraries Paths
+        """
+        ref = os.path.join(self.si.WindowsSdkDir, 'References')
+        libpath = []
+
+        if self.vc_ver <= 9.0:
+            libpath += self.OSLibraries
+
+        if self.vc_ver >= 11.0:
+            libpath += [os.path.join(ref, r'CommonConfiguration\Neutral')]
+
+        if self.vc_ver >= 14.0:
+            libpath += [
+                ref,
+                os.path.join(self.si.WindowsSdkDir, 'UnionMetadata'),
+                os.path.join(
+                    ref,
+                    'Windows.Foundation.UniversalApiContract',
+                    '1.0.0.0',
+                ),
+                os.path.join(
+                    ref,
+                    'Windows.Foundation.FoundationContract',
+                    '1.0.0.0',
+                ),
+                os.path.join(
+                    ref,
+                    'Windows.Networking.Connectivity.WwanContract',
+                    '1.0.0.0',
+                ),
+                os.path.join(
+                    self.si.WindowsSdkDir,
+                    'ExtensionSDKs',
+                    'Microsoft.VCLibs',
+                    '%0.1f' % self.vc_ver,
+                    'References',
+                    'CommonConfiguration',
+                    'neutral',
+                ),
+            ]
+        return libpath
+
+    @property
+    def SdkTools(self):
+        """
+        Microsoft Windows SDK Tools
+        """
+        return list(self._sdk_tools())
+
+    def _sdk_tools(self):
+        """
+        Microsoft Windows SDK Tools paths generator
+        """
+        if self.vc_ver < 15.0:
+            bin_dir = 'Bin' if self.vc_ver <= 11.0 else r'Bin\x86'
+            yield os.path.join(self.si.WindowsSdkDir, bin_dir)
+
+        if not self.pi.current_is_x86():
+            arch_subdir = self.pi.current_dir(x64=True)
+            path = 'Bin%s' % arch_subdir
+            yield os.path.join(self.si.WindowsSdkDir, path)
+
+        if self.vc_ver == 10.0 or self.vc_ver == 11.0:
+            if self.pi.target_is_x86():
+                arch_subdir = ''
+            else:
+                arch_subdir = self.pi.current_dir(hidex86=True, x64=True)
+            path = r'Bin\NETFX 4.0 Tools%s' % arch_subdir
+            yield os.path.join(self.si.WindowsSdkDir, path)
+
+        elif self.vc_ver >= 15.0:
+            path = os.path.join(self.si.WindowsSdkDir, 'Bin')
+            arch_subdir = self.pi.current_dir(x64=True)
+            sdkver = self.si.WindowsSdkLastVersion
+            yield os.path.join(path, '%s%s' % (sdkver, arch_subdir))
+
+        if self.si.WindowsSDKExecutablePath:
+            yield self.si.WindowsSDKExecutablePath
+
+    @property
+    def _sdk_subdir(self):
+        """
+        Microsoft Windows SDK version subdir
+        """
+        ucrtver = self.si.WindowsSdkLastVersion
+        return ('%s\\' % ucrtver) if ucrtver else ''
+
+    @property
+    def SdkSetup(self):
+        """
+        Microsoft Windows SDK Setup
+        """
+        if self.vc_ver > 9.0:
+            return []
+
+        return [os.path.join(self.si.WindowsSdkDir, 'Setup')]
+
+    @property
+    def FxTools(self):
+        """
+        Microsoft .NET Framework Tools
+        """
+        pi = self.pi
+        si = self.si
+
+        if self.vc_ver <= 10.0:
+            include32 = True
+            include64 = not pi.target_is_x86() and not pi.current_is_x86()
+        else:
+            include32 = pi.target_is_x86() or pi.current_is_x86()
+            include64 = pi.current_cpu == 'amd64' or pi.target_cpu == 'amd64'
+
+        tools = []
+        if include32:
+            tools += [os.path.join(si.FrameworkDir32, ver)
+                      for ver in si.FrameworkVersion32]
+        if include64:
+            tools += [os.path.join(si.FrameworkDir64, ver)
+                      for ver in si.FrameworkVersion64]
+        return tools
+
+    @property
+    def NetFxSDKLibraries(self):
+        """
+        Microsoft .Net Framework SDK Libraries
+        """
+        if self.vc_ver < 14.0 or not self.si.NetFxSdkDir:
+            return []
+
+        arch_subdir = self.pi.target_dir(x64=True)
+        return [os.path.join(self.si.NetFxSdkDir, r'lib\um%s' % arch_subdir)]
+
+    @property
+    def NetFxSDKIncludes(self):
+        """
+        Microsoft .Net Framework SDK Includes
+        """
+        if self.vc_ver < 14.0 or not self.si.NetFxSdkDir:
+            return []
+
+        return [os.path.join(self.si.NetFxSdkDir, r'include\um')]
+
+    @property
+    def VsTDb(self):
+        """
+        Microsoft Visual Studio Team System Database
+        """
+        return [os.path.join(self.si.VSInstallDir, r'VSTSDB\Deploy')]
+
+    @property
+    def MSBuild(self):
+        """
+        Microsoft Build Engine
+        """
+        if self.vc_ver < 12.0:
+            return []
+        elif self.vc_ver < 15.0:
+            base_path = self.si.ProgramFilesx86
+            arch_subdir = self.pi.current_dir(hidex86=True)
+        else:
+            base_path = self.si.VSInstallDir
+            arch_subdir = ''
+
+        path = r'MSBuild\%0.1f\bin%s' % (self.vc_ver, arch_subdir)
+        build = [os.path.join(base_path, path)]
+
+        if self.vc_ver >= 15.0:
+            # Add Roslyn C# & Visual Basic Compiler
+            build += [os.path.join(base_path, path, 'Roslyn')]
+
+        return build
+
+    @property
+    def HTMLHelpWorkshop(self):
+        """
+        Microsoft HTML Help Workshop
+        """
+        if self.vc_ver < 11.0:
+            return []
+
+        return [os.path.join(self.si.ProgramFilesx86, 'HTML Help Workshop')]
+
+    @property
+    def UCRTLibraries(self):
+        """
+        Microsoft Universal C Runtime SDK Libraries
+        """
+        if self.vc_ver < 14.0:
+            return []
+
+        arch_subdir = self.pi.target_dir(x64=True)
+        lib = os.path.join(self.si.UniversalCRTSdkDir, 'lib')
+        ucrtver = self._ucrt_subdir
+        return [os.path.join(lib, '%sucrt%s' % (ucrtver, arch_subdir))]
+
+    @property
+    def UCRTIncludes(self):
+        """
+        Microsoft Universal C Runtime SDK Include
+        """
+        if self.vc_ver < 14.0:
+            return []
+
+        include = os.path.join(self.si.UniversalCRTSdkDir, 'include')
+        return [os.path.join(include, '%sucrt' % self._ucrt_subdir)]
+
+    @property
+    def _ucrt_subdir(self):
+        """
+        Microsoft Universal C Runtime SDK version subdir
+        """
+        ucrtver = self.si.UniversalCRTSdkLastVersion
+        return ('%s\\' % ucrtver) if ucrtver else ''
+
+    @property
+    def FSharp(self):
+        """
+        Microsoft Visual F#
+        """
+        if self.vc_ver < 11.0 and self.vc_ver > 12.0:
+            return []
+
+        return self.si.FSharpInstallDir
+
+    @property
+    def VCRuntimeRedist(self):
+        """
+        Microsoft Visual C++ runtime redistribuable dll
+        """
+        arch_subdir = self.pi.target_dir(x64=True)
+        if self.vc_ver < 15:
+            redist_path = self.si.VCInstallDir
+            vcruntime = 'redist%s\\Microsoft.VC%d0.CRT\\vcruntime%d0.dll'
+        else:
+            redist_path = self.si.VCInstallDir.replace('\\Tools', '\\Redist')
+            vcruntime = 'onecore%s\\Microsoft.VC%d0.CRT\\vcruntime%d0.dll'
+
+        # Visual Studio 2017  is still Visual C++ 14.0
+        dll_ver = 14.0 if self.vc_ver == 15 else self.vc_ver
+
+        vcruntime = vcruntime % (arch_subdir, self.vc_ver, dll_ver)
+        return os.path.join(redist_path, vcruntime)
+
+    def return_env(self, exists=True):
+        """
+        Return environment dict.
+
+        Parameters
+        ----------
+        exists: bool
+            It True, only return existing paths.
+        """
+        env = dict(
+            include=self._build_paths('include',
+                                      [self.VCIncludes,
+                                       self.OSIncludes,
+                                       self.UCRTIncludes,
+                                       self.NetFxSDKIncludes],
+                                      exists),
+            lib=self._build_paths('lib',
+                                  [self.VCLibraries,
+                                   self.OSLibraries,
+                                   self.FxTools,
+                                   self.UCRTLibraries,
+                                   self.NetFxSDKLibraries],
+                                  exists),
+            libpath=self._build_paths('libpath',
+                                      [self.VCLibraries,
+                                       self.FxTools,
+                                       self.VCStoreRefs,
+                                       self.OSLibpath],
+                                      exists),
+            path=self._build_paths('path',
+                                   [self.VCTools,
+                                    self.VSTools,
+                                    self.VsTDb,
+                                    self.SdkTools,
+                                    self.SdkSetup,
+                                    self.FxTools,
+                                    self.MSBuild,
+                                    self.HTMLHelpWorkshop,
+                                    self.FSharp],
+                                   exists),
+        )
+        if self.vc_ver >= 14 and os.path.isfile(self.VCRuntimeRedist):
+            env['py_vcruntime_redist'] = self.VCRuntimeRedist
+        return env
+
+    def _build_paths(self, name, spec_path_lists, exists):
+        """
+        Given an environment variable name and specified paths,
+        return a pathsep-separated string of paths containing
+        unique, extant, directories from those paths and from
+        the environment variable. Raise an error if no paths
+        are resolved.
+        """
+        # flatten spec_path_lists
+        spec_paths = itertools.chain.from_iterable(spec_path_lists)
+        env_paths = safe_env.get(name, '').split(os.pathsep)
+        paths = itertools.chain(spec_paths, env_paths)
+        extant_paths = list(filter(os.path.isdir, paths)) if exists else paths
+        if not extant_paths:
+            msg = "%s environment variable is empty" % name.upper()
+            raise distutils.errors.DistutilsPlatformError(msg)
+        unique_paths = self._unique_everseen(extant_paths)
+        return os.pathsep.join(unique_paths)
+
+    # from Python docs
+    def _unique_everseen(self, iterable, key=None):
+        """
+        List unique elements, preserving order.
+        Remember all elements ever seen.
+
+        _unique_everseen('AAAABBBCCDAABBB') --> A B C D
+
+        _unique_everseen('ABBCcAD', str.lower) --> A B C D
+        """
+        seen = set()
+        seen_add = seen.add
+        if key is None:
+            for element in filterfalse(seen.__contains__, iterable):
+                seen_add(element)
+                yield element
+        else:
+            for element in iterable:
+                k = key(element)
+                if k not in seen:
+                    seen_add(k)
+                    yield element
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/namespaces.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/namespaces.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc16106d3dc7048a160129745756bbc9b1fb51d9
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/namespaces.py
@@ -0,0 +1,107 @@
+import os
+from distutils import log
+import itertools
+
+from setuptools.extern.six.moves import map
+
+
+flatten = itertools.chain.from_iterable
+
+
+class Installer:
+
+    nspkg_ext = '-nspkg.pth'
+
+    def install_namespaces(self):
+        nsp = self._get_all_ns_packages()
+        if not nsp:
+            return
+        filename, ext = os.path.splitext(self._get_target())
+        filename += self.nspkg_ext
+        self.outputs.append(filename)
+        log.info("Installing %s", filename)
+        lines = map(self._gen_nspkg_line, nsp)
+
+        if self.dry_run:
+            # always generate the lines, even in dry run
+            list(lines)
+            return
+
+        with open(filename, 'wt') as f:
+            f.writelines(lines)
+
+    def uninstall_namespaces(self):
+        filename, ext = os.path.splitext(self._get_target())
+        filename += self.nspkg_ext
+        if not os.path.exists(filename):
+            return
+        log.info("Removing %s", filename)
+        os.remove(filename)
+
+    def _get_target(self):
+        return self.target
+
+    _nspkg_tmpl = (
+        "import sys, types, os",
+        "has_mfs = sys.version_info > (3, 5)",
+        "p = os.path.join(%(root)s, *%(pth)r)",
+        "importlib = has_mfs and __import__('importlib.util')",
+        "has_mfs and __import__('importlib.machinery')",
+        "m = has_mfs and "
+            "sys.modules.setdefault(%(pkg)r, "
+                "importlib.util.module_from_spec("
+                    "importlib.machinery.PathFinder.find_spec(%(pkg)r, "
+                        "[os.path.dirname(p)])))",
+        "m = m or "
+            "sys.modules.setdefault(%(pkg)r, types.ModuleType(%(pkg)r))",
+        "mp = (m or []) and m.__dict__.setdefault('__path__',[])",
+        "(p not in mp) and mp.append(p)",
+    )
+    "lines for the namespace installer"
+
+    _nspkg_tmpl_multi = (
+        'm and setattr(sys.modules[%(parent)r], %(child)r, m)',
+    )
+    "additional line(s) when a parent package is indicated"
+
+    def _get_root(self):
+        return "sys._getframe(1).f_locals['sitedir']"
+
+    def _gen_nspkg_line(self, pkg):
+        # ensure pkg is not a unicode string under Python 2.7
+        pkg = str(pkg)
+        pth = tuple(pkg.split('.'))
+        root = self._get_root()
+        tmpl_lines = self._nspkg_tmpl
+        parent, sep, child = pkg.rpartition('.')
+        if parent:
+            tmpl_lines += self._nspkg_tmpl_multi
+        return ';'.join(tmpl_lines) % locals() + '\n'
+
+    def _get_all_ns_packages(self):
+        """Return sorted list of all package namespaces"""
+        pkgs = self.distribution.namespace_packages or []
+        return sorted(flatten(map(self._pkg_names, pkgs)))
+
+    @staticmethod
+    def _pkg_names(pkg):
+        """
+        Given a namespace package, yield the components of that
+        package.
+
+        >>> names = Installer._pkg_names('a.b.c')
+        >>> set(names) == set(['a', 'a.b', 'a.b.c'])
+        True
+        """
+        parts = pkg.split('.')
+        while parts:
+            yield '.'.join(parts)
+            parts.pop()
+
+
+class DevelopInstaller(Installer):
+    def _get_root(self):
+        return repr(str(self.egg_path))
+
+    def _get_target(self):
+        return self.egg_link
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/package_index.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/package_index.py
new file mode 100644
index 0000000000000000000000000000000000000000..914b5e6159e7f6be0c1721299d5bcffd0f487c9b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/package_index.py
@@ -0,0 +1,1119 @@
+"""PyPI and direct package downloading"""
+import sys
+import os
+import re
+import shutil
+import socket
+import base64
+import hashlib
+import itertools
+from functools import wraps
+
+from setuptools.extern import six
+from setuptools.extern.six.moves import urllib, http_client, configparser, map
+
+import setuptools
+from pkg_resources import (
+    CHECKOUT_DIST, Distribution, BINARY_DIST, normalize_path, SOURCE_DIST,
+    Environment, find_distributions, safe_name, safe_version,
+    to_filename, Requirement, DEVELOP_DIST, EGG_DIST,
+)
+from setuptools import ssl_support
+from distutils import log
+from distutils.errors import DistutilsError
+from fnmatch import translate
+from setuptools.py27compat import get_all_headers
+from setuptools.py33compat import unescape
+from setuptools.wheel import Wheel
+
+EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.+!]+)$')
+HREF = re.compile("""href\\s*=\\s*['"]?([^'"> ]+)""", re.I)
+# this is here to fix emacs' cruddy broken syntax highlighting
+PYPI_MD5 = re.compile(
+    '<a href="([^"#]+)">([^<]+)</a>\n\\s+\\(<a (?:title="MD5 hash"\n\\s+)'
+    'href="[^?]+\\?:action=show_md5&amp;digest=([0-9a-f]{32})">md5</a>\\)'
+)
+URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):', re.I).match
+EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
+
+__all__ = [
+    'PackageIndex', 'distros_for_url', 'parse_bdist_wininst',
+    'interpret_distro_name',
+]
+
+_SOCKET_TIMEOUT = 15
+
+_tmpl = "setuptools/{setuptools.__version__} Python-urllib/{py_major}"
+user_agent = _tmpl.format(py_major=sys.version[:3], setuptools=setuptools)
+
+
+def parse_requirement_arg(spec):
+    try:
+        return Requirement.parse(spec)
+    except ValueError:
+        raise DistutilsError(
+            "Not a URL, existing file, or requirement spec: %r" % (spec,)
+        )
+
+
+def parse_bdist_wininst(name):
+    """Return (base,pyversion) or (None,None) for possible .exe name"""
+
+    lower = name.lower()
+    base, py_ver, plat = None, None, None
+
+    if lower.endswith('.exe'):
+        if lower.endswith('.win32.exe'):
+            base = name[:-10]
+            plat = 'win32'
+        elif lower.startswith('.win32-py', -16):
+            py_ver = name[-7:-4]
+            base = name[:-16]
+            plat = 'win32'
+        elif lower.endswith('.win-amd64.exe'):
+            base = name[:-14]
+            plat = 'win-amd64'
+        elif lower.startswith('.win-amd64-py', -20):
+            py_ver = name[-7:-4]
+            base = name[:-20]
+            plat = 'win-amd64'
+    return base, py_ver, plat
+
+
+def egg_info_for_url(url):
+    parts = urllib.parse.urlparse(url)
+    scheme, server, path, parameters, query, fragment = parts
+    base = urllib.parse.unquote(path.split('/')[-1])
+    if server == 'sourceforge.net' and base == 'download':  # XXX Yuck
+        base = urllib.parse.unquote(path.split('/')[-2])
+    if '#' in base:
+        base, fragment = base.split('#', 1)
+    return base, fragment
+
+
+def distros_for_url(url, metadata=None):
+    """Yield egg or source distribution objects that might be found at a URL"""
+    base, fragment = egg_info_for_url(url)
+    for dist in distros_for_location(url, base, metadata):
+        yield dist
+    if fragment:
+        match = EGG_FRAGMENT.match(fragment)
+        if match:
+            for dist in interpret_distro_name(
+                url, match.group(1), metadata, precedence=CHECKOUT_DIST
+            ):
+                yield dist
+
+
+def distros_for_location(location, basename, metadata=None):
+    """Yield egg or source distribution objects based on basename"""
+    if basename.endswith('.egg.zip'):
+        basename = basename[:-4]  # strip the .zip
+    if basename.endswith('.egg') and '-' in basename:
+        # only one, unambiguous interpretation
+        return [Distribution.from_location(location, basename, metadata)]
+    if basename.endswith('.whl') and '-' in basename:
+        wheel = Wheel(basename)
+        if not wheel.is_compatible():
+            return []
+        return [Distribution(
+            location=location,
+            project_name=wheel.project_name,
+            version=wheel.version,
+            # Increase priority over eggs.
+            precedence=EGG_DIST + 1,
+        )]
+    if basename.endswith('.exe'):
+        win_base, py_ver, platform = parse_bdist_wininst(basename)
+        if win_base is not None:
+            return interpret_distro_name(
+                location, win_base, metadata, py_ver, BINARY_DIST, platform
+            )
+    # Try source distro extensions (.zip, .tgz, etc.)
+    #
+    for ext in EXTENSIONS:
+        if basename.endswith(ext):
+            basename = basename[:-len(ext)]
+            return interpret_distro_name(location, basename, metadata)
+    return []  # no extension matched
+
+
+def distros_for_filename(filename, metadata=None):
+    """Yield possible egg or source distribution objects based on a filename"""
+    return distros_for_location(
+        normalize_path(filename), os.path.basename(filename), metadata
+    )
+
+
+def interpret_distro_name(
+        location, basename, metadata, py_version=None, precedence=SOURCE_DIST,
+        platform=None
+):
+    """Generate alternative interpretations of a source distro name
+
+    Note: if `location` is a filesystem filename, you should call
+    ``pkg_resources.normalize_path()`` on it before passing it to this
+    routine!
+    """
+    # Generate alternative interpretations of a source distro name
+    # Because some packages are ambiguous as to name/versions split
+    # e.g. "adns-python-1.1.0", "egenix-mx-commercial", etc.
+    # So, we generate each possible interepretation (e.g. "adns, python-1.1.0"
+    # "adns-python, 1.1.0", and "adns-python-1.1.0, no version").  In practice,
+    # the spurious interpretations should be ignored, because in the event
+    # there's also an "adns" package, the spurious "python-1.1.0" version will
+    # compare lower than any numeric version number, and is therefore unlikely
+    # to match a request for it.  It's still a potential problem, though, and
+    # in the long run PyPI and the distutils should go for "safe" names and
+    # versions in distribution archive names (sdist and bdist).
+
+    parts = basename.split('-')
+    if not py_version and any(re.match(r'py\d\.\d$', p) for p in parts[2:]):
+        # it is a bdist_dumb, not an sdist -- bail out
+        return
+
+    for p in range(1, len(parts) + 1):
+        yield Distribution(
+            location, metadata, '-'.join(parts[:p]), '-'.join(parts[p:]),
+            py_version=py_version, precedence=precedence,
+            platform=platform
+        )
+
+
+# From Python 2.7 docs
+def unique_everseen(iterable, key=None):
+    "List unique elements, preserving order. Remember all elements ever seen."
+    # unique_everseen('AAAABBBCCDAABBB') --> A B C D
+    # unique_everseen('ABBCcAD', str.lower) --> A B C D
+    seen = set()
+    seen_add = seen.add
+    if key is None:
+        for element in six.moves.filterfalse(seen.__contains__, iterable):
+            seen_add(element)
+            yield element
+    else:
+        for element in iterable:
+            k = key(element)
+            if k not in seen:
+                seen_add(k)
+                yield element
+
+
+def unique_values(func):
+    """
+    Wrap a function returning an iterable such that the resulting iterable
+    only ever yields unique items.
+    """
+
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        return unique_everseen(func(*args, **kwargs))
+
+    return wrapper
+
+
+REL = re.compile(r"""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I)
+# this line is here to fix emacs' cruddy broken syntax highlighting
+
+
+@unique_values
+def find_external_links(url, page):
+    """Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
+
+    for match in REL.finditer(page):
+        tag, rel = match.groups()
+        rels = set(map(str.strip, rel.lower().split(',')))
+        if 'homepage' in rels or 'download' in rels:
+            for match in HREF.finditer(tag):
+                yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
+
+    for tag in ("<th>Home Page", "<th>Download URL"):
+        pos = page.find(tag)
+        if pos != -1:
+            match = HREF.search(page, pos)
+            if match:
+                yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
+
+
+class ContentChecker(object):
+    """
+    A null content checker that defines the interface for checking content
+    """
+
+    def feed(self, block):
+        """
+        Feed a block of data to the hash.
+        """
+        return
+
+    def is_valid(self):
+        """
+        Check the hash. Return False if validation fails.
+        """
+        return True
+
+    def report(self, reporter, template):
+        """
+        Call reporter with information about the checker (hash name)
+        substituted into the template.
+        """
+        return
+
+
+class HashChecker(ContentChecker):
+    pattern = re.compile(
+        r'(?P<hash_name>sha1|sha224|sha384|sha256|sha512|md5)='
+        r'(?P<expected>[a-f0-9]+)'
+    )
+
+    def __init__(self, hash_name, expected):
+        self.hash_name = hash_name
+        self.hash = hashlib.new(hash_name)
+        self.expected = expected
+
+    @classmethod
+    def from_url(cls, url):
+        "Construct a (possibly null) ContentChecker from a URL"
+        fragment = urllib.parse.urlparse(url)[-1]
+        if not fragment:
+            return ContentChecker()
+        match = cls.pattern.search(fragment)
+        if not match:
+            return ContentChecker()
+        return cls(**match.groupdict())
+
+    def feed(self, block):
+        self.hash.update(block)
+
+    def is_valid(self):
+        return self.hash.hexdigest() == self.expected
+
+    def report(self, reporter, template):
+        msg = template % self.hash_name
+        return reporter(msg)
+
+
+class PackageIndex(Environment):
+    """A distribution index that scans web pages for download URLs"""
+
+    def __init__(
+            self, index_url="https://pypi.python.org/simple", hosts=('*',),
+            ca_bundle=None, verify_ssl=True, *args, **kw
+    ):
+        Environment.__init__(self, *args, **kw)
+        self.index_url = index_url + "/" [:not index_url.endswith('/')]
+        self.scanned_urls = {}
+        self.fetched_urls = {}
+        self.package_pages = {}
+        self.allows = re.compile('|'.join(map(translate, hosts))).match
+        self.to_scan = []
+        use_ssl = (
+            verify_ssl
+            and ssl_support.is_available
+            and (ca_bundle or ssl_support.find_ca_bundle())
+        )
+        if use_ssl:
+            self.opener = ssl_support.opener_for(ca_bundle)
+        else:
+            self.opener = urllib.request.urlopen
+
+    def process_url(self, url, retrieve=False):
+        """Evaluate a URL as a possible download, and maybe retrieve it"""
+        if url in self.scanned_urls and not retrieve:
+            return
+        self.scanned_urls[url] = True
+        if not URL_SCHEME(url):
+            self.process_filename(url)
+            return
+        else:
+            dists = list(distros_for_url(url))
+            if dists:
+                if not self.url_ok(url):
+                    return
+                self.debug("Found link: %s", url)
+
+        if dists or not retrieve or url in self.fetched_urls:
+            list(map(self.add, dists))
+            return  # don't need the actual page
+
+        if not self.url_ok(url):
+            self.fetched_urls[url] = True
+            return
+
+        self.info("Reading %s", url)
+        self.fetched_urls[url] = True  # prevent multiple fetch attempts
+        tmpl = "Download error on %s: %%s -- Some packages may not be found!"
+        f = self.open_url(url, tmpl % url)
+        if f is None:
+            return
+        self.fetched_urls[f.url] = True
+        if 'html' not in f.headers.get('content-type', '').lower():
+            f.close()  # not html, we can't process it
+            return
+
+        base = f.url  # handle redirects
+        page = f.read()
+        if not isinstance(page, str):
+            # In Python 3 and got bytes but want str.
+            if isinstance(f, urllib.error.HTTPError):
+                # Errors have no charset, assume latin1:
+                charset = 'latin-1'
+            else:
+                charset = f.headers.get_param('charset') or 'latin-1'
+            page = page.decode(charset, "ignore")
+        f.close()
+        for match in HREF.finditer(page):
+            link = urllib.parse.urljoin(base, htmldecode(match.group(1)))
+            self.process_url(link)
+        if url.startswith(self.index_url) and getattr(f, 'code', None) != 404:
+            page = self.process_index(url, page)
+
+    def process_filename(self, fn, nested=False):
+        # process filenames or directories
+        if not os.path.exists(fn):
+            self.warn("Not found: %s", fn)
+            return
+
+        if os.path.isdir(fn) and not nested:
+            path = os.path.realpath(fn)
+            for item in os.listdir(path):
+                self.process_filename(os.path.join(path, item), True)
+
+        dists = distros_for_filename(fn)
+        if dists:
+            self.debug("Found: %s", fn)
+            list(map(self.add, dists))
+
+    def url_ok(self, url, fatal=False):
+        s = URL_SCHEME(url)
+        is_file = s and s.group(1).lower() == 'file'
+        if is_file or self.allows(urllib.parse.urlparse(url)[1]):
+            return True
+        msg = (
+            "\nNote: Bypassing %s (disallowed host; see "
+            "http://bit.ly/2hrImnY for details).\n")
+        if fatal:
+            raise DistutilsError(msg % url)
+        else:
+            self.warn(msg, url)
+
+    def scan_egg_links(self, search_path):
+        dirs = filter(os.path.isdir, search_path)
+        egg_links = (
+            (path, entry)
+            for path in dirs
+            for entry in os.listdir(path)
+            if entry.endswith('.egg-link')
+        )
+        list(itertools.starmap(self.scan_egg_link, egg_links))
+
+    def scan_egg_link(self, path, entry):
+        with open(os.path.join(path, entry)) as raw_lines:
+            # filter non-empty lines
+            lines = list(filter(None, map(str.strip, raw_lines)))
+
+        if len(lines) != 2:
+            # format is not recognized; punt
+            return
+
+        egg_path, setup_path = lines
+
+        for dist in find_distributions(os.path.join(path, egg_path)):
+            dist.location = os.path.join(path, *lines)
+            dist.precedence = SOURCE_DIST
+            self.add(dist)
+
+    def process_index(self, url, page):
+        """Process the contents of a PyPI page"""
+
+        def scan(link):
+            # Process a URL to see if it's for a package page
+            if link.startswith(self.index_url):
+                parts = list(map(
+                    urllib.parse.unquote, link[len(self.index_url):].split('/')
+                ))
+                if len(parts) == 2 and '#' not in parts[1]:
+                    # it's a package page, sanitize and index it
+                    pkg = safe_name(parts[0])
+                    ver = safe_version(parts[1])
+                    self.package_pages.setdefault(pkg.lower(), {})[link] = True
+                    return to_filename(pkg), to_filename(ver)
+            return None, None
+
+        # process an index page into the package-page index
+        for match in HREF.finditer(page):
+            try:
+                scan(urllib.parse.urljoin(url, htmldecode(match.group(1))))
+            except ValueError:
+                pass
+
+        pkg, ver = scan(url)  # ensure this page is in the page index
+        if pkg:
+            # process individual package page
+            for new_url in find_external_links(url, page):
+                # Process the found URL
+                base, frag = egg_info_for_url(new_url)
+                if base.endswith('.py') and not frag:
+                    if ver:
+                        new_url += '#egg=%s-%s' % (pkg, ver)
+                    else:
+                        self.need_version_info(url)
+                self.scan_url(new_url)
+
+            return PYPI_MD5.sub(
+                lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page
+            )
+        else:
+            return ""  # no sense double-scanning non-package pages
+
+    def need_version_info(self, url):
+        self.scan_all(
+            "Page at %s links to .py file(s) without version info; an index "
+            "scan is required.", url
+        )
+
+    def scan_all(self, msg=None, *args):
+        if self.index_url not in self.fetched_urls:
+            if msg:
+                self.warn(msg, *args)
+            self.info(
+                "Scanning index of all packages (this may take a while)"
+            )
+        self.scan_url(self.index_url)
+
+    def find_packages(self, requirement):
+        self.scan_url(self.index_url + requirement.unsafe_name + '/')
+
+        if not self.package_pages.get(requirement.key):
+            # Fall back to safe version of the name
+            self.scan_url(self.index_url + requirement.project_name + '/')
+
+        if not self.package_pages.get(requirement.key):
+            # We couldn't find the target package, so search the index page too
+            self.not_found_in_index(requirement)
+
+        for url in list(self.package_pages.get(requirement.key, ())):
+            # scan each page that might be related to the desired package
+            self.scan_url(url)
+
+    def obtain(self, requirement, installer=None):
+        self.prescan()
+        self.find_packages(requirement)
+        for dist in self[requirement.key]:
+            if dist in requirement:
+                return dist
+            self.debug("%s does not match %s", requirement, dist)
+        return super(PackageIndex, self).obtain(requirement, installer)
+
+    def check_hash(self, checker, filename, tfp):
+        """
+        checker is a ContentChecker
+        """
+        checker.report(
+            self.debug,
+            "Validating %%s checksum for %s" % filename)
+        if not checker.is_valid():
+            tfp.close()
+            os.unlink(filename)
+            raise DistutilsError(
+                "%s validation failed for %s; "
+                "possible download problem?"
+                % (checker.hash.name, os.path.basename(filename))
+            )
+
+    def add_find_links(self, urls):
+        """Add `urls` to the list that will be prescanned for searches"""
+        for url in urls:
+            if (
+                self.to_scan is None  # if we have already "gone online"
+                or not URL_SCHEME(url)  # or it's a local file/directory
+                or url.startswith('file:')
+                or list(distros_for_url(url))  # or a direct package link
+            ):
+                # then go ahead and process it now
+                self.scan_url(url)
+            else:
+                # otherwise, defer retrieval till later
+                self.to_scan.append(url)
+
+    def prescan(self):
+        """Scan urls scheduled for prescanning (e.g. --find-links)"""
+        if self.to_scan:
+            list(map(self.scan_url, self.to_scan))
+        self.to_scan = None  # from now on, go ahead and process immediately
+
+    def not_found_in_index(self, requirement):
+        if self[requirement.key]:  # we've seen at least one distro
+            meth, msg = self.info, "Couldn't retrieve index page for %r"
+        else:  # no distros seen for this name, might be misspelled
+            meth, msg = (
+                self.warn,
+                "Couldn't find index page for %r (maybe misspelled?)")
+        meth(msg, requirement.unsafe_name)
+        self.scan_all()
+
+    def download(self, spec, tmpdir):
+        """Locate and/or download `spec` to `tmpdir`, returning a local path
+
+        `spec` may be a ``Requirement`` object, or a string containing a URL,
+        an existing local filename, or a project/version requirement spec
+        (i.e. the string form of a ``Requirement`` object).  If it is the URL
+        of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one
+        that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is
+        automatically created alongside the downloaded file.
+
+        If `spec` is a ``Requirement`` object or a string containing a
+        project/version requirement spec, this method returns the location of
+        a matching distribution (possibly after downloading it to `tmpdir`).
+        If `spec` is a locally existing file or directory name, it is simply
+        returned unchanged.  If `spec` is a URL, it is downloaded to a subpath
+        of `tmpdir`, and the local filename is returned.  Various errors may be
+        raised if a problem occurs during downloading.
+        """
+        if not isinstance(spec, Requirement):
+            scheme = URL_SCHEME(spec)
+            if scheme:
+                # It's a url, download it to tmpdir
+                found = self._download_url(scheme.group(1), spec, tmpdir)
+                base, fragment = egg_info_for_url(spec)
+                if base.endswith('.py'):
+                    found = self.gen_setup(found, fragment, tmpdir)
+                return found
+            elif os.path.exists(spec):
+                # Existing file or directory, just return it
+                return spec
+            else:
+                spec = parse_requirement_arg(spec)
+        return getattr(self.fetch_distribution(spec, tmpdir), 'location', None)
+
+    def fetch_distribution(
+            self, requirement, tmpdir, force_scan=False, source=False,
+            develop_ok=False, local_index=None):
+        """Obtain a distribution suitable for fulfilling `requirement`
+
+        `requirement` must be a ``pkg_resources.Requirement`` instance.
+        If necessary, or if the `force_scan` flag is set, the requirement is
+        searched for in the (online) package index as well as the locally
+        installed packages.  If a distribution matching `requirement` is found,
+        the returned distribution's ``location`` is the value you would have
+        gotten from calling the ``download()`` method with the matching
+        distribution's URL or filename.  If no matching distribution is found,
+        ``None`` is returned.
+
+        If the `source` flag is set, only source distributions and source
+        checkout links will be considered.  Unless the `develop_ok` flag is
+        set, development and system eggs (i.e., those using the ``.egg-info``
+        format) will be ignored.
+        """
+        # process a Requirement
+        self.info("Searching for %s", requirement)
+        skipped = {}
+        dist = None
+
+        def find(req, env=None):
+            if env is None:
+                env = self
+            # Find a matching distribution; may be called more than once
+
+            for dist in env[req.key]:
+
+                if dist.precedence == DEVELOP_DIST and not develop_ok:
+                    if dist not in skipped:
+                        self.warn(
+                            "Skipping development or system egg: %s", dist,
+                        )
+                        skipped[dist] = 1
+                    continue
+
+                test = (
+                    dist in req
+                    and (dist.precedence <= SOURCE_DIST or not source)
+                )
+                if test:
+                    loc = self.download(dist.location, tmpdir)
+                    dist.download_location = loc
+                    if os.path.exists(dist.download_location):
+                        return dist
+
+        if force_scan:
+            self.prescan()
+            self.find_packages(requirement)
+            dist = find(requirement)
+
+        if not dist and local_index is not None:
+            dist = find(requirement, local_index)
+
+        if dist is None:
+            if self.to_scan is not None:
+                self.prescan()
+            dist = find(requirement)
+
+        if dist is None and not force_scan:
+            self.find_packages(requirement)
+            dist = find(requirement)
+
+        if dist is None:
+            self.warn(
+                "No local packages or working download links found for %s%s",
+                (source and "a source distribution of " or ""),
+                requirement,
+            )
+        else:
+            self.info("Best match: %s", dist)
+            return dist.clone(location=dist.download_location)
+
+    def fetch(self, requirement, tmpdir, force_scan=False, source=False):
+        """Obtain a file suitable for fulfilling `requirement`
+
+        DEPRECATED; use the ``fetch_distribution()`` method now instead.  For
+        backward compatibility, this routine is identical but returns the
+        ``location`` of the downloaded distribution instead of a distribution
+        object.
+        """
+        dist = self.fetch_distribution(requirement, tmpdir, force_scan, source)
+        if dist is not None:
+            return dist.location
+        return None
+
+    def gen_setup(self, filename, fragment, tmpdir):
+        match = EGG_FRAGMENT.match(fragment)
+        dists = match and [
+            d for d in
+            interpret_distro_name(filename, match.group(1), None) if d.version
+        ] or []
+
+        if len(dists) == 1:  # unambiguous ``#egg`` fragment
+            basename = os.path.basename(filename)
+
+            # Make sure the file has been downloaded to the temp dir.
+            if os.path.dirname(filename) != tmpdir:
+                dst = os.path.join(tmpdir, basename)
+                from setuptools.command.easy_install import samefile
+                if not samefile(filename, dst):
+                    shutil.copy2(filename, dst)
+                    filename = dst
+
+            with open(os.path.join(tmpdir, 'setup.py'), 'w') as file:
+                file.write(
+                    "from setuptools import setup\n"
+                    "setup(name=%r, version=%r, py_modules=[%r])\n"
+                    % (
+                        dists[0].project_name, dists[0].version,
+                        os.path.splitext(basename)[0]
+                    )
+                )
+            return filename
+
+        elif match:
+            raise DistutilsError(
+                "Can't unambiguously interpret project/version identifier %r; "
+                "any dashes in the name or version should be escaped using "
+                "underscores. %r" % (fragment, dists)
+            )
+        else:
+            raise DistutilsError(
+                "Can't process plain .py files without an '#egg=name-version'"
+                " suffix to enable automatic setup script generation."
+            )
+
+    dl_blocksize = 8192
+
+    def _download_to(self, url, filename):
+        self.info("Downloading %s", url)
+        # Download the file
+        fp = None
+        try:
+            checker = HashChecker.from_url(url)
+            fp = self.open_url(url)
+            if isinstance(fp, urllib.error.HTTPError):
+                raise DistutilsError(
+                    "Can't download %s: %s %s" % (url, fp.code, fp.msg)
+                )
+            headers = fp.info()
+            blocknum = 0
+            bs = self.dl_blocksize
+            size = -1
+            if "content-length" in headers:
+                # Some servers return multiple Content-Length headers :(
+                sizes = get_all_headers(headers, 'Content-Length')
+                size = max(map(int, sizes))
+                self.reporthook(url, filename, blocknum, bs, size)
+            with open(filename, 'wb') as tfp:
+                while True:
+                    block = fp.read(bs)
+                    if block:
+                        checker.feed(block)
+                        tfp.write(block)
+                        blocknum += 1
+                        self.reporthook(url, filename, blocknum, bs, size)
+                    else:
+                        break
+                self.check_hash(checker, filename, tfp)
+            return headers
+        finally:
+            if fp:
+                fp.close()
+
+    def reporthook(self, url, filename, blocknum, blksize, size):
+        pass  # no-op
+
+    def open_url(self, url, warning=None):
+        if url.startswith('file:'):
+            return local_open(url)
+        try:
+            return open_with_auth(url, self.opener)
+        except (ValueError, http_client.InvalidURL) as v:
+            msg = ' '.join([str(arg) for arg in v.args])
+            if warning:
+                self.warn(warning, msg)
+            else:
+                raise DistutilsError('%s %s' % (url, msg))
+        except urllib.error.HTTPError as v:
+            return v
+        except urllib.error.URLError as v:
+            if warning:
+                self.warn(warning, v.reason)
+            else:
+                raise DistutilsError("Download error for %s: %s"
+                                     % (url, v.reason))
+        except http_client.BadStatusLine as v:
+            if warning:
+                self.warn(warning, v.line)
+            else:
+                raise DistutilsError(
+                    '%s returned a bad status line. The server might be '
+                    'down, %s' %
+                    (url, v.line)
+                )
+        except (http_client.HTTPException, socket.error) as v:
+            if warning:
+                self.warn(warning, v)
+            else:
+                raise DistutilsError("Download error for %s: %s"
+                                     % (url, v))
+
+    def _download_url(self, scheme, url, tmpdir):
+        # Determine download filename
+        #
+        name, fragment = egg_info_for_url(url)
+        if name:
+            while '..' in name:
+                name = name.replace('..', '.').replace('\\', '_')
+        else:
+            name = "__downloaded__"  # default if URL has no path contents
+
+        if name.endswith('.egg.zip'):
+            name = name[:-4]  # strip the extra .zip before download
+
+        filename = os.path.join(tmpdir, name)
+
+        # Download the file
+        #
+        if scheme == 'svn' or scheme.startswith('svn+'):
+            return self._download_svn(url, filename)
+        elif scheme == 'git' or scheme.startswith('git+'):
+            return self._download_git(url, filename)
+        elif scheme.startswith('hg+'):
+            return self._download_hg(url, filename)
+        elif scheme == 'file':
+            return urllib.request.url2pathname(urllib.parse.urlparse(url)[2])
+        else:
+            self.url_ok(url, True)  # raises error if not allowed
+            return self._attempt_download(url, filename)
+
+    def scan_url(self, url):
+        self.process_url(url, True)
+
+    def _attempt_download(self, url, filename):
+        headers = self._download_to(url, filename)
+        if 'html' in headers.get('content-type', '').lower():
+            return self._download_html(url, headers, filename)
+        else:
+            return filename
+
+    def _download_html(self, url, headers, filename):
+        file = open(filename)
+        for line in file:
+            if line.strip():
+                # Check for a subversion index page
+                if re.search(r'<title>([^- ]+ - )?Revision \d+:', line):
+                    # it's a subversion index page:
+                    file.close()
+                    os.unlink(filename)
+                    return self._download_svn(url, filename)
+                break  # not an index page
+        file.close()
+        os.unlink(filename)
+        raise DistutilsError("Unexpected HTML page found at " + url)
+
+    def _download_svn(self, url, filename):
+        url = url.split('#', 1)[0]  # remove any fragment for svn's sake
+        creds = ''
+        if url.lower().startswith('svn:') and '@' in url:
+            scheme, netloc, path, p, q, f = urllib.parse.urlparse(url)
+            if not netloc and path.startswith('//') and '/' in path[2:]:
+                netloc, path = path[2:].split('/', 1)
+                auth, host = urllib.parse.splituser(netloc)
+                if auth:
+                    if ':' in auth:
+                        user, pw = auth.split(':', 1)
+                        creds = " --username=%s --password=%s" % (user, pw)
+                    else:
+                        creds = " --username=" + auth
+                    netloc = host
+                    parts = scheme, netloc, url, p, q, f
+                    url = urllib.parse.urlunparse(parts)
+        self.info("Doing subversion checkout from %s to %s", url, filename)
+        os.system("svn checkout%s -q %s %s" % (creds, url, filename))
+        return filename
+
+    @staticmethod
+    def _vcs_split_rev_from_url(url, pop_prefix=False):
+        scheme, netloc, path, query, frag = urllib.parse.urlsplit(url)
+
+        scheme = scheme.split('+', 1)[-1]
+
+        # Some fragment identification fails
+        path = path.split('#', 1)[0]
+
+        rev = None
+        if '@' in path:
+            path, rev = path.rsplit('@', 1)
+
+        # Also, discard fragment
+        url = urllib.parse.urlunsplit((scheme, netloc, path, query, ''))
+
+        return url, rev
+
+    def _download_git(self, url, filename):
+        filename = filename.split('#', 1)[0]
+        url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
+
+        self.info("Doing git clone from %s to %s", url, filename)
+        os.system("git clone --quiet %s %s" % (url, filename))
+
+        if rev is not None:
+            self.info("Checking out %s", rev)
+            os.system("(cd %s && git checkout --quiet %s)" % (
+                filename,
+                rev,
+            ))
+
+        return filename
+
+    def _download_hg(self, url, filename):
+        filename = filename.split('#', 1)[0]
+        url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
+
+        self.info("Doing hg clone from %s to %s", url, filename)
+        os.system("hg clone --quiet %s %s" % (url, filename))
+
+        if rev is not None:
+            self.info("Updating to %s", rev)
+            os.system("(cd %s && hg up -C -r %s -q)" % (
+                filename,
+                rev,
+            ))
+
+        return filename
+
+    def debug(self, msg, *args):
+        log.debug(msg, *args)
+
+    def info(self, msg, *args):
+        log.info(msg, *args)
+
+    def warn(self, msg, *args):
+        log.warn(msg, *args)
+
+
+# This pattern matches a character entity reference (a decimal numeric
+# references, a hexadecimal numeric reference, or a named reference).
+entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
+
+
+def decode_entity(match):
+    what = match.group(1)
+    return unescape(what)
+
+
+def htmldecode(text):
+    """Decode HTML entities in the given text."""
+    return entity_sub(decode_entity, text)
+
+
+def socket_timeout(timeout=15):
+    def _socket_timeout(func):
+        def _socket_timeout(*args, **kwargs):
+            old_timeout = socket.getdefaulttimeout()
+            socket.setdefaulttimeout(timeout)
+            try:
+                return func(*args, **kwargs)
+            finally:
+                socket.setdefaulttimeout(old_timeout)
+
+        return _socket_timeout
+
+    return _socket_timeout
+
+
+def _encode_auth(auth):
+    """
+    A function compatible with Python 2.3-3.3 that will encode
+    auth from a URL suitable for an HTTP header.
+    >>> str(_encode_auth('username%3Apassword'))
+    'dXNlcm5hbWU6cGFzc3dvcmQ='
+
+    Long auth strings should not cause a newline to be inserted.
+    >>> long_auth = 'username:' + 'password'*10
+    >>> chr(10) in str(_encode_auth(long_auth))
+    False
+    """
+    auth_s = urllib.parse.unquote(auth)
+    # convert to bytes
+    auth_bytes = auth_s.encode()
+    # use the legacy interface for Python 2.3 support
+    encoded_bytes = base64.encodestring(auth_bytes)
+    # convert back to a string
+    encoded = encoded_bytes.decode()
+    # strip the trailing carriage return
+    return encoded.replace('\n', '')
+
+
+class Credential(object):
+    """
+    A username/password pair. Use like a namedtuple.
+    """
+
+    def __init__(self, username, password):
+        self.username = username
+        self.password = password
+
+    def __iter__(self):
+        yield self.username
+        yield self.password
+
+    def __str__(self):
+        return '%(username)s:%(password)s' % vars(self)
+
+
+class PyPIConfig(configparser.RawConfigParser):
+    def __init__(self):
+        """
+        Load from ~/.pypirc
+        """
+        defaults = dict.fromkeys(['username', 'password', 'repository'], '')
+        configparser.RawConfigParser.__init__(self, defaults)
+
+        rc = os.path.join(os.path.expanduser('~'), '.pypirc')
+        if os.path.exists(rc):
+            self.read(rc)
+
+    @property
+    def creds_by_repository(self):
+        sections_with_repositories = [
+            section for section in self.sections()
+            if self.get(section, 'repository').strip()
+        ]
+
+        return dict(map(self._get_repo_cred, sections_with_repositories))
+
+    def _get_repo_cred(self, section):
+        repo = self.get(section, 'repository').strip()
+        return repo, Credential(
+            self.get(section, 'username').strip(),
+            self.get(section, 'password').strip(),
+        )
+
+    def find_credential(self, url):
+        """
+        If the URL indicated appears to be a repository defined in this
+        config, return the credential for that repository.
+        """
+        for repository, cred in self.creds_by_repository.items():
+            if url.startswith(repository):
+                return cred
+
+
+def open_with_auth(url, opener=urllib.request.urlopen):
+    """Open a urllib2 request, handling HTTP authentication"""
+
+    scheme, netloc, path, params, query, frag = urllib.parse.urlparse(url)
+
+    # Double scheme does not raise on Mac OS X as revealed by a
+    # failing test. We would expect "nonnumeric port". Refs #20.
+    if netloc.endswith(':'):
+        raise http_client.InvalidURL("nonnumeric port: ''")
+
+    if scheme in ('http', 'https'):
+        auth, host = urllib.parse.splituser(netloc)
+    else:
+        auth = None
+
+    if not auth:
+        cred = PyPIConfig().find_credential(url)
+        if cred:
+            auth = str(cred)
+            info = cred.username, url
+            log.info('Authenticating as %s for %s (from .pypirc)', *info)
+
+    if auth:
+        auth = "Basic " + _encode_auth(auth)
+        parts = scheme, host, path, params, query, frag
+        new_url = urllib.parse.urlunparse(parts)
+        request = urllib.request.Request(new_url)
+        request.add_header("Authorization", auth)
+    else:
+        request = urllib.request.Request(url)
+
+    request.add_header('User-Agent', user_agent)
+    fp = opener(request)
+
+    if auth:
+        # Put authentication info back into request URL if same host,
+        # so that links found on the page will work
+        s2, h2, path2, param2, query2, frag2 = urllib.parse.urlparse(fp.url)
+        if s2 == scheme and h2 == host:
+            parts = s2, netloc, path2, param2, query2, frag2
+            fp.url = urllib.parse.urlunparse(parts)
+
+    return fp
+
+
+# adding a timeout to avoid freezing package_index
+open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth)
+
+
+def fix_sf_url(url):
+    return url  # backward compatibility
+
+
+def local_open(url):
+    """Read a local path, with special support for directories"""
+    scheme, server, path, param, query, frag = urllib.parse.urlparse(url)
+    filename = urllib.request.url2pathname(path)
+    if os.path.isfile(filename):
+        return urllib.request.urlopen(url)
+    elif path.endswith('/') and os.path.isdir(filename):
+        files = []
+        for f in os.listdir(filename):
+            filepath = os.path.join(filename, f)
+            if f == 'index.html':
+                with open(filepath, 'r') as fp:
+                    body = fp.read()
+                break
+            elif os.path.isdir(filepath):
+                f += '/'
+            files.append('<a href="{name}">{name}</a>'.format(name=f))
+        else:
+            tmpl = (
+                "<html><head><title>{url}</title>"
+                "</head><body>{files}</body></html>")
+            body = tmpl.format(url=url, files='\n'.join(files))
+        status, message = 200, "OK"
+    else:
+        status, message, body = 404, "Path not found", "Not found"
+
+    headers = {'content-type': 'text/html'}
+    body_stream = six.StringIO(body)
+    return urllib.error.HTTPError(url, status, message, headers, body_stream)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/pep425tags.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/pep425tags.py
new file mode 100644
index 0000000000000000000000000000000000000000..dfe55d587a870dda328796e2a72815763326e26c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/pep425tags.py
@@ -0,0 +1,316 @@
+# This file originally from pip:
+# https://github.com/pypa/pip/blob/8f4f15a5a95d7d5b511ceaee9ed261176c181970/src/pip/_internal/pep425tags.py
+"""Generate and work with PEP 425 Compatibility Tags."""
+from __future__ import absolute_import
+
+import distutils.util
+import platform
+import re
+import sys
+import sysconfig
+import warnings
+from collections import OrderedDict
+
+from . import glibc
+
+_osx_arch_pat = re.compile(r'(.+)_(\d+)_(\d+)_(.+)')
+
+
+def get_config_var(var):
+    try:
+        return sysconfig.get_config_var(var)
+    except IOError as e:  # Issue #1074
+        warnings.warn("{}".format(e), RuntimeWarning)
+        return None
+
+
+def get_abbr_impl():
+    """Return abbreviated implementation name."""
+    if hasattr(sys, 'pypy_version_info'):
+        pyimpl = 'pp'
+    elif sys.platform.startswith('java'):
+        pyimpl = 'jy'
+    elif sys.platform == 'cli':
+        pyimpl = 'ip'
+    else:
+        pyimpl = 'cp'
+    return pyimpl
+
+
+def get_impl_ver():
+    """Return implementation version."""
+    impl_ver = get_config_var("py_version_nodot")
+    if not impl_ver or get_abbr_impl() == 'pp':
+        impl_ver = ''.join(map(str, get_impl_version_info()))
+    return impl_ver
+
+
+def get_impl_version_info():
+    """Return sys.version_info-like tuple for use in decrementing the minor
+    version."""
+    if get_abbr_impl() == 'pp':
+        # as per https://github.com/pypa/pip/issues/2882
+        return (sys.version_info[0], sys.pypy_version_info.major,
+                sys.pypy_version_info.minor)
+    else:
+        return sys.version_info[0], sys.version_info[1]
+
+
+def get_impl_tag():
+    """
+    Returns the Tag for this specific implementation.
+    """
+    return "{}{}".format(get_abbr_impl(), get_impl_ver())
+
+
+def get_flag(var, fallback, expected=True, warn=True):
+    """Use a fallback method for determining SOABI flags if the needed config
+    var is unset or unavailable."""
+    val = get_config_var(var)
+    if val is None:
+        if warn:
+            warnings.warn("Config variable '{0}' is unset, Python ABI tag may "
+                          "be incorrect".format(var), RuntimeWarning, 2)
+        return fallback()
+    return val == expected
+
+
+def get_abi_tag():
+    """Return the ABI tag based on SOABI (if available) or emulate SOABI
+    (CPython 2, PyPy)."""
+    soabi = get_config_var('SOABI')
+    impl = get_abbr_impl()
+    if not soabi and impl in {'cp', 'pp'} and hasattr(sys, 'maxunicode'):
+        d = ''
+        m = ''
+        u = ''
+        if get_flag('Py_DEBUG',
+                    lambda: hasattr(sys, 'gettotalrefcount'),
+                    warn=(impl == 'cp')):
+            d = 'd'
+        if get_flag('WITH_PYMALLOC',
+                    lambda: impl == 'cp',
+                    warn=(impl == 'cp')):
+            m = 'm'
+        if get_flag('Py_UNICODE_SIZE',
+                    lambda: sys.maxunicode == 0x10ffff,
+                    expected=4,
+                    warn=(impl == 'cp' and
+                          sys.version_info < (3, 3))) \
+                and sys.version_info < (3, 3):
+            u = 'u'
+        abi = '%s%s%s%s%s' % (impl, get_impl_ver(), d, m, u)
+    elif soabi and soabi.startswith('cpython-'):
+        abi = 'cp' + soabi.split('-')[1]
+    elif soabi:
+        abi = soabi.replace('.', '_').replace('-', '_')
+    else:
+        abi = None
+    return abi
+
+
+def _is_running_32bit():
+    return sys.maxsize == 2147483647
+
+
+def get_platform():
+    """Return our platform name 'win32', 'linux_x86_64'"""
+    if sys.platform == 'darwin':
+        # distutils.util.get_platform() returns the release based on the value
+        # of MACOSX_DEPLOYMENT_TARGET on which Python was built, which may
+        # be significantly older than the user's current machine.
+        release, _, machine = platform.mac_ver()
+        split_ver = release.split('.')
+
+        if machine == "x86_64" and _is_running_32bit():
+            machine = "i386"
+        elif machine == "ppc64" and _is_running_32bit():
+            machine = "ppc"
+
+        return 'macosx_{}_{}_{}'.format(split_ver[0], split_ver[1], machine)
+
+    # XXX remove distutils dependency
+    result = distutils.util.get_platform().replace('.', '_').replace('-', '_')
+    if result == "linux_x86_64" and _is_running_32bit():
+        # 32 bit Python program (running on a 64 bit Linux): pip should only
+        # install and run 32 bit compiled extensions in that case.
+        result = "linux_i686"
+
+    return result
+
+
+def is_manylinux1_compatible():
+    # Only Linux, and only x86-64 / i686
+    if get_platform() not in {"linux_x86_64", "linux_i686"}:
+        return False
+
+    # Check for presence of _manylinux module
+    try:
+        import _manylinux
+        return bool(_manylinux.manylinux1_compatible)
+    except (ImportError, AttributeError):
+        # Fall through to heuristic check below
+        pass
+
+    # Check glibc version. CentOS 5 uses glibc 2.5.
+    return glibc.have_compatible_glibc(2, 5)
+
+
+def get_darwin_arches(major, minor, machine):
+    """Return a list of supported arches (including group arches) for
+    the given major, minor and machine architecture of an macOS machine.
+    """
+    arches = []
+
+    def _supports_arch(major, minor, arch):
+        # Looking at the application support for macOS versions in the chart
+        # provided by https://en.wikipedia.org/wiki/OS_X#Versions it appears
+        # our timeline looks roughly like:
+        #
+        # 10.0 - Introduces ppc support.
+        # 10.4 - Introduces ppc64, i386, and x86_64 support, however the ppc64
+        #        and x86_64 support is CLI only, and cannot be used for GUI
+        #        applications.
+        # 10.5 - Extends ppc64 and x86_64 support to cover GUI applications.
+        # 10.6 - Drops support for ppc64
+        # 10.7 - Drops support for ppc
+        #
+        # Given that we do not know if we're installing a CLI or a GUI
+        # application, we must be conservative and assume it might be a GUI
+        # application and behave as if ppc64 and x86_64 support did not occur
+        # until 10.5.
+        #
+        # Note: The above information is taken from the "Application support"
+        #       column in the chart not the "Processor support" since I believe
+        #       that we care about what instruction sets an application can use
+        #       not which processors the OS supports.
+        if arch == 'ppc':
+            return (major, minor) <= (10, 5)
+        if arch == 'ppc64':
+            return (major, minor) == (10, 5)
+        if arch == 'i386':
+            return (major, minor) >= (10, 4)
+        if arch == 'x86_64':
+            return (major, minor) >= (10, 5)
+        if arch in groups:
+            for garch in groups[arch]:
+                if _supports_arch(major, minor, garch):
+                    return True
+        return False
+
+    groups = OrderedDict([
+        ("fat", ("i386", "ppc")),
+        ("intel", ("x86_64", "i386")),
+        ("fat64", ("x86_64", "ppc64")),
+        ("fat32", ("x86_64", "i386", "ppc")),
+    ])
+
+    if _supports_arch(major, minor, machine):
+        arches.append(machine)
+
+    for garch in groups:
+        if machine in groups[garch] and _supports_arch(major, minor, garch):
+            arches.append(garch)
+
+    arches.append('universal')
+
+    return arches
+
+
+def get_supported(versions=None, noarch=False, platform=None,
+                  impl=None, abi=None):
+    """Return a list of supported tags for each version specified in
+    `versions`.
+
+    :param versions: a list of string versions, of the form ["33", "32"],
+        or None. The first version will be assumed to support our ABI.
+    :param platform: specify the exact platform you want valid
+        tags for, or None. If None, use the local system platform.
+    :param impl: specify the exact implementation you want valid
+        tags for, or None. If None, use the local interpreter impl.
+    :param abi: specify the exact abi you want valid
+        tags for, or None. If None, use the local interpreter abi.
+    """
+    supported = []
+
+    # Versions must be given with respect to the preference
+    if versions is None:
+        versions = []
+        version_info = get_impl_version_info()
+        major = version_info[:-1]
+        # Support all previous minor Python versions.
+        for minor in range(version_info[-1], -1, -1):
+            versions.append(''.join(map(str, major + (minor,))))
+
+    impl = impl or get_abbr_impl()
+
+    abis = []
+
+    abi = abi or get_abi_tag()
+    if abi:
+        abis[0:0] = [abi]
+
+    abi3s = set()
+    import imp
+    for suffix in imp.get_suffixes():
+        if suffix[0].startswith('.abi'):
+            abi3s.add(suffix[0].split('.', 2)[1])
+
+    abis.extend(sorted(list(abi3s)))
+
+    abis.append('none')
+
+    if not noarch:
+        arch = platform or get_platform()
+        if arch.startswith('macosx'):
+            # support macosx-10.6-intel on macosx-10.9-x86_64
+            match = _osx_arch_pat.match(arch)
+            if match:
+                name, major, minor, actual_arch = match.groups()
+                tpl = '{}_{}_%i_%s'.format(name, major)
+                arches = []
+                for m in reversed(range(int(minor) + 1)):
+                    for a in get_darwin_arches(int(major), m, actual_arch):
+                        arches.append(tpl % (m, a))
+            else:
+                # arch pattern didn't match (?!)
+                arches = [arch]
+        elif platform is None and is_manylinux1_compatible():
+            arches = [arch.replace('linux', 'manylinux1'), arch]
+        else:
+            arches = [arch]
+
+        # Current version, current API (built specifically for our Python):
+        for abi in abis:
+            for arch in arches:
+                supported.append(('%s%s' % (impl, versions[0]), abi, arch))
+
+        # abi3 modules compatible with older version of Python
+        for version in versions[1:]:
+            # abi3 was introduced in Python 3.2
+            if version in {'31', '30'}:
+                break
+            for abi in abi3s:   # empty set if not Python 3
+                for arch in arches:
+                    supported.append(("%s%s" % (impl, version), abi, arch))
+
+        # Has binaries, does not use the Python API:
+        for arch in arches:
+            supported.append(('py%s' % (versions[0][0]), 'none', arch))
+
+    # No abi / arch, but requires our implementation:
+    supported.append(('%s%s' % (impl, versions[0]), 'none', 'any'))
+    # Tagged specifically as being cross-version compatible
+    # (with just the major version specified)
+    supported.append(('%s%s' % (impl, versions[0][0]), 'none', 'any'))
+
+    # No abi / arch, generic Python
+    for i, version in enumerate(versions):
+        supported.append(('py%s' % (version,), 'none', 'any'))
+        if i == 0:
+            supported.append(('py%s' % (version[0]), 'none', 'any'))
+
+    return supported
+
+
+implementation_tag = get_impl_tag()
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/py27compat.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/py27compat.py
new file mode 100644
index 0000000000000000000000000000000000000000..2985011b92f7449e784f5f056dd24ffd991d24bd
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/py27compat.py
@@ -0,0 +1,28 @@
+"""
+Compatibility Support for Python 2.7 and earlier
+"""
+
+import platform
+
+from setuptools.extern import six
+
+
+def get_all_headers(message, key):
+    """
+    Given an HTTPMessage, return all headers matching a given key.
+    """
+    return message.get_all(key)
+
+
+if six.PY2:
+    def get_all_headers(message, key):
+        return message.getheaders(key)
+
+
+linux_py2_ascii = (
+    platform.system() == 'Linux' and
+    six.PY2
+)
+
+rmtree_safe = str if linux_py2_ascii else lambda x: x
+"""Workaround for http://bugs.python.org/issue24672"""
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/py31compat.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/py31compat.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ea953201f48a7049f70eab4ea0c32b06f16e6eb
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/py31compat.py
@@ -0,0 +1,41 @@
+__all__ = ['get_config_vars', 'get_path']
+
+try:
+    # Python 2.7 or >=3.2
+    from sysconfig import get_config_vars, get_path
+except ImportError:
+    from distutils.sysconfig import get_config_vars, get_python_lib
+
+    def get_path(name):
+        if name not in ('platlib', 'purelib'):
+            raise ValueError("Name must be purelib or platlib")
+        return get_python_lib(name == 'platlib')
+
+
+try:
+    # Python >=3.2
+    from tempfile import TemporaryDirectory
+except ImportError:
+    import shutil
+    import tempfile
+
+    class TemporaryDirectory(object):
+        """
+        Very simple temporary directory context manager.
+        Will try to delete afterward, but will also ignore OS and similar
+        errors on deletion.
+        """
+
+        def __init__(self):
+            self.name = None  # Handle mkdtemp raising an exception
+            self.name = tempfile.mkdtemp()
+
+        def __enter__(self):
+            return self.name
+
+        def __exit__(self, exctype, excvalue, exctrace):
+            try:
+                shutil.rmtree(self.name, True)
+            except OSError:  # removal errors are not the only possible
+                pass
+            self.name = None
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/py33compat.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/py33compat.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a73ebb3c7a34af961fc45f8eb437113dc55c2bc
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/py33compat.py
@@ -0,0 +1,54 @@
+import dis
+import array
+import collections
+
+try:
+    import html
+except ImportError:
+    html = None
+
+from setuptools.extern import six
+from setuptools.extern.six.moves import html_parser
+
+
+OpArg = collections.namedtuple('OpArg', 'opcode arg')
+
+
+class Bytecode_compat(object):
+    def __init__(self, code):
+        self.code = code
+
+    def __iter__(self):
+        """Yield '(op,arg)' pair for each operation in code object 'code'"""
+
+        bytes = array.array('b', self.code.co_code)
+        eof = len(self.code.co_code)
+
+        ptr = 0
+        extended_arg = 0
+
+        while ptr < eof:
+
+            op = bytes[ptr]
+
+            if op >= dis.HAVE_ARGUMENT:
+
+                arg = bytes[ptr + 1] + bytes[ptr + 2] * 256 + extended_arg
+                ptr += 3
+
+                if op == dis.EXTENDED_ARG:
+                    long_type = six.integer_types[-1]
+                    extended_arg = arg * long_type(65536)
+                    continue
+
+            else:
+                arg = None
+                ptr += 1
+
+            yield OpArg(op, arg)
+
+
+Bytecode = getattr(dis, 'Bytecode', Bytecode_compat)
+
+
+unescape = getattr(html, 'unescape', html_parser.HTMLParser().unescape)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/py36compat.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/py36compat.py
new file mode 100644
index 0000000000000000000000000000000000000000..f5279696457b690024f9bd0456293f027056bcf1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/py36compat.py
@@ -0,0 +1,82 @@
+import sys
+from distutils.errors import DistutilsOptionError
+from distutils.util import strtobool
+from distutils.debug import DEBUG
+
+
+class Distribution_parse_config_files:
+    """
+    Mix-in providing forward-compatibility for functionality to be
+    included by default on Python 3.7.
+
+    Do not edit the code in this class except to update functionality
+    as implemented in distutils.
+    """
+    def parse_config_files(self, filenames=None):
+        from configparser import ConfigParser
+
+        # Ignore install directory options if we have a venv
+        if sys.prefix != sys.base_prefix:
+            ignore_options = [
+                'install-base', 'install-platbase', 'install-lib',
+                'install-platlib', 'install-purelib', 'install-headers',
+                'install-scripts', 'install-data', 'prefix', 'exec-prefix',
+                'home', 'user', 'root']
+        else:
+            ignore_options = []
+
+        ignore_options = frozenset(ignore_options)
+
+        if filenames is None:
+            filenames = self.find_config_files()
+
+        if DEBUG:
+            self.announce("Distribution.parse_config_files():")
+
+        parser = ConfigParser(interpolation=None)
+        for filename in filenames:
+            if DEBUG:
+                self.announce("  reading %s" % filename)
+            parser.read(filename)
+            for section in parser.sections():
+                options = parser.options(section)
+                opt_dict = self.get_option_dict(section)
+
+                for opt in options:
+                    if opt != '__name__' and opt not in ignore_options:
+                        val = parser.get(section,opt)
+                        opt = opt.replace('-', '_')
+                        opt_dict[opt] = (filename, val)
+
+            # Make the ConfigParser forget everything (so we retain
+            # the original filenames that options come from)
+            parser.__init__()
+
+        # If there was a "global" section in the config file, use it
+        # to set Distribution options.
+
+        if 'global' in self.command_options:
+            for (opt, (src, val)) in self.command_options['global'].items():
+                alias = self.negative_opt.get(opt)
+                try:
+                    if alias:
+                        setattr(self, alias, not strtobool(val))
+                    elif opt in ('verbose', 'dry_run'): # ugh!
+                        setattr(self, opt, strtobool(val))
+                    else:
+                        setattr(self, opt, val)
+                except ValueError as msg:
+                    raise DistutilsOptionError(msg)
+
+
+if sys.version_info < (3,):
+    # Python 2 behavior is sufficient
+    class Distribution_parse_config_files:
+        pass
+
+
+if False:
+    # When updated behavior is available upstream,
+    # disable override here.
+    class Distribution_parse_config_files:
+        pass
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/sandbox.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/sandbox.py
new file mode 100644
index 0000000000000000000000000000000000000000..685f3f72e3611a5fa99c999e233ffd179c431a6d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/sandbox.py
@@ -0,0 +1,491 @@
+import os
+import sys
+import tempfile
+import operator
+import functools
+import itertools
+import re
+import contextlib
+import pickle
+import textwrap
+
+from setuptools.extern import six
+from setuptools.extern.six.moves import builtins, map
+
+import pkg_resources.py31compat
+
+if sys.platform.startswith('java'):
+    import org.python.modules.posix.PosixModule as _os
+else:
+    _os = sys.modules[os.name]
+try:
+    _file = file
+except NameError:
+    _file = None
+_open = open
+from distutils.errors import DistutilsError
+from pkg_resources import working_set
+
+
+__all__ = [
+    "AbstractSandbox", "DirectorySandbox", "SandboxViolation", "run_setup",
+]
+
+
+def _execfile(filename, globals, locals=None):
+    """
+    Python 3 implementation of execfile.
+    """
+    mode = 'rb'
+    with open(filename, mode) as stream:
+        script = stream.read()
+    if locals is None:
+        locals = globals
+    code = compile(script, filename, 'exec')
+    exec(code, globals, locals)
+
+
+@contextlib.contextmanager
+def save_argv(repl=None):
+    saved = sys.argv[:]
+    if repl is not None:
+        sys.argv[:] = repl
+    try:
+        yield saved
+    finally:
+        sys.argv[:] = saved
+
+
+@contextlib.contextmanager
+def save_path():
+    saved = sys.path[:]
+    try:
+        yield saved
+    finally:
+        sys.path[:] = saved
+
+
+@contextlib.contextmanager
+def override_temp(replacement):
+    """
+    Monkey-patch tempfile.tempdir with replacement, ensuring it exists
+    """
+    pkg_resources.py31compat.makedirs(replacement, exist_ok=True)
+
+    saved = tempfile.tempdir
+
+    tempfile.tempdir = replacement
+
+    try:
+        yield
+    finally:
+        tempfile.tempdir = saved
+
+
+@contextlib.contextmanager
+def pushd(target):
+    saved = os.getcwd()
+    os.chdir(target)
+    try:
+        yield saved
+    finally:
+        os.chdir(saved)
+
+
+class UnpickleableException(Exception):
+    """
+    An exception representing another Exception that could not be pickled.
+    """
+
+    @staticmethod
+    def dump(type, exc):
+        """
+        Always return a dumped (pickled) type and exc. If exc can't be pickled,
+        wrap it in UnpickleableException first.
+        """
+        try:
+            return pickle.dumps(type), pickle.dumps(exc)
+        except Exception:
+            # get UnpickleableException inside the sandbox
+            from setuptools.sandbox import UnpickleableException as cls
+            return cls.dump(cls, cls(repr(exc)))
+
+
+class ExceptionSaver:
+    """
+    A Context Manager that will save an exception, serialized, and restore it
+    later.
+    """
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, exc, tb):
+        if not exc:
+            return
+
+        # dump the exception
+        self._saved = UnpickleableException.dump(type, exc)
+        self._tb = tb
+
+        # suppress the exception
+        return True
+
+    def resume(self):
+        "restore and re-raise any exception"
+
+        if '_saved' not in vars(self):
+            return
+
+        type, exc = map(pickle.loads, self._saved)
+        six.reraise(type, exc, self._tb)
+
+
+@contextlib.contextmanager
+def save_modules():
+    """
+    Context in which imported modules are saved.
+
+    Translates exceptions internal to the context into the equivalent exception
+    outside the context.
+    """
+    saved = sys.modules.copy()
+    with ExceptionSaver() as saved_exc:
+        yield saved
+
+    sys.modules.update(saved)
+    # remove any modules imported since
+    del_modules = (
+        mod_name for mod_name in sys.modules
+        if mod_name not in saved
+        # exclude any encodings modules. See #285
+        and not mod_name.startswith('encodings.')
+    )
+    _clear_modules(del_modules)
+
+    saved_exc.resume()
+
+
+def _clear_modules(module_names):
+    for mod_name in list(module_names):
+        del sys.modules[mod_name]
+
+
+@contextlib.contextmanager
+def save_pkg_resources_state():
+    saved = pkg_resources.__getstate__()
+    try:
+        yield saved
+    finally:
+        pkg_resources.__setstate__(saved)
+
+
+@contextlib.contextmanager
+def setup_context(setup_dir):
+    temp_dir = os.path.join(setup_dir, 'temp')
+    with save_pkg_resources_state():
+        with save_modules():
+            hide_setuptools()
+            with save_path():
+                with save_argv():
+                    with override_temp(temp_dir):
+                        with pushd(setup_dir):
+                            # ensure setuptools commands are available
+                            __import__('setuptools')
+                            yield
+
+
+def _needs_hiding(mod_name):
+    """
+    >>> _needs_hiding('setuptools')
+    True
+    >>> _needs_hiding('pkg_resources')
+    True
+    >>> _needs_hiding('setuptools_plugin')
+    False
+    >>> _needs_hiding('setuptools.__init__')
+    True
+    >>> _needs_hiding('distutils')
+    True
+    >>> _needs_hiding('os')
+    False
+    >>> _needs_hiding('Cython')
+    True
+    """
+    pattern = re.compile(r'(setuptools|pkg_resources|distutils|Cython)(\.|$)')
+    return bool(pattern.match(mod_name))
+
+
+def hide_setuptools():
+    """
+    Remove references to setuptools' modules from sys.modules to allow the
+    invocation to import the most appropriate setuptools. This technique is
+    necessary to avoid issues such as #315 where setuptools upgrading itself
+    would fail to find a function declared in the metadata.
+    """
+    modules = filter(_needs_hiding, sys.modules)
+    _clear_modules(modules)
+
+
+def run_setup(setup_script, args):
+    """Run a distutils setup script, sandboxed in its directory"""
+    setup_dir = os.path.abspath(os.path.dirname(setup_script))
+    with setup_context(setup_dir):
+        try:
+            sys.argv[:] = [setup_script] + list(args)
+            sys.path.insert(0, setup_dir)
+            # reset to include setup dir, w/clean callback list
+            working_set.__init__()
+            working_set.callbacks.append(lambda dist: dist.activate())
+
+            # __file__ should be a byte string on Python 2 (#712)
+            dunder_file = (
+                setup_script
+                if isinstance(setup_script, str) else
+                setup_script.encode(sys.getfilesystemencoding())
+            )
+
+            with DirectorySandbox(setup_dir):
+                ns = dict(__file__=dunder_file, __name__='__main__')
+                _execfile(setup_script, ns)
+        except SystemExit as v:
+            if v.args and v.args[0]:
+                raise
+            # Normal exit, just return
+
+
+class AbstractSandbox:
+    """Wrap 'os' module and 'open()' builtin for virtualizing setup scripts"""
+
+    _active = False
+
+    def __init__(self):
+        self._attrs = [
+            name for name in dir(_os)
+            if not name.startswith('_') and hasattr(self, name)
+        ]
+
+    def _copy(self, source):
+        for name in self._attrs:
+            setattr(os, name, getattr(source, name))
+
+    def __enter__(self):
+        self._copy(self)
+        if _file:
+            builtins.file = self._file
+        builtins.open = self._open
+        self._active = True
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self._active = False
+        if _file:
+            builtins.file = _file
+        builtins.open = _open
+        self._copy(_os)
+
+    def run(self, func):
+        """Run 'func' under os sandboxing"""
+        with self:
+            return func()
+
+    def _mk_dual_path_wrapper(name):
+        original = getattr(_os, name)
+
+        def wrap(self, src, dst, *args, **kw):
+            if self._active:
+                src, dst = self._remap_pair(name, src, dst, *args, **kw)
+            return original(src, dst, *args, **kw)
+
+        return wrap
+
+    for name in ["rename", "link", "symlink"]:
+        if hasattr(_os, name):
+            locals()[name] = _mk_dual_path_wrapper(name)
+
+    def _mk_single_path_wrapper(name, original=None):
+        original = original or getattr(_os, name)
+
+        def wrap(self, path, *args, **kw):
+            if self._active:
+                path = self._remap_input(name, path, *args, **kw)
+            return original(path, *args, **kw)
+
+        return wrap
+
+    if _file:
+        _file = _mk_single_path_wrapper('file', _file)
+    _open = _mk_single_path_wrapper('open', _open)
+    for name in [
+        "stat", "listdir", "chdir", "open", "chmod", "chown", "mkdir",
+        "remove", "unlink", "rmdir", "utime", "lchown", "chroot", "lstat",
+        "startfile", "mkfifo", "mknod", "pathconf", "access"
+    ]:
+        if hasattr(_os, name):
+            locals()[name] = _mk_single_path_wrapper(name)
+
+    def _mk_single_with_return(name):
+        original = getattr(_os, name)
+
+        def wrap(self, path, *args, **kw):
+            if self._active:
+                path = self._remap_input(name, path, *args, **kw)
+                return self._remap_output(name, original(path, *args, **kw))
+            return original(path, *args, **kw)
+
+        return wrap
+
+    for name in ['readlink', 'tempnam']:
+        if hasattr(_os, name):
+            locals()[name] = _mk_single_with_return(name)
+
+    def _mk_query(name):
+        original = getattr(_os, name)
+
+        def wrap(self, *args, **kw):
+            retval = original(*args, **kw)
+            if self._active:
+                return self._remap_output(name, retval)
+            return retval
+
+        return wrap
+
+    for name in ['getcwd', 'tmpnam']:
+        if hasattr(_os, name):
+            locals()[name] = _mk_query(name)
+
+    def _validate_path(self, path):
+        """Called to remap or validate any path, whether input or output"""
+        return path
+
+    def _remap_input(self, operation, path, *args, **kw):
+        """Called for path inputs"""
+        return self._validate_path(path)
+
+    def _remap_output(self, operation, path):
+        """Called for path outputs"""
+        return self._validate_path(path)
+
+    def _remap_pair(self, operation, src, dst, *args, **kw):
+        """Called for path pairs like rename, link, and symlink operations"""
+        return (
+            self._remap_input(operation + '-from', src, *args, **kw),
+            self._remap_input(operation + '-to', dst, *args, **kw)
+        )
+
+
+if hasattr(os, 'devnull'):
+    _EXCEPTIONS = [os.devnull,]
+else:
+    _EXCEPTIONS = []
+
+
+class DirectorySandbox(AbstractSandbox):
+    """Restrict operations to a single subdirectory - pseudo-chroot"""
+
+    write_ops = dict.fromkeys([
+        "open", "chmod", "chown", "mkdir", "remove", "unlink", "rmdir",
+        "utime", "lchown", "chroot", "mkfifo", "mknod", "tempnam",
+    ])
+
+    _exception_patterns = [
+        # Allow lib2to3 to attempt to save a pickled grammar object (#121)
+        r'.*lib2to3.*\.pickle$',
+    ]
+    "exempt writing to paths that match the pattern"
+
+    def __init__(self, sandbox, exceptions=_EXCEPTIONS):
+        self._sandbox = os.path.normcase(os.path.realpath(sandbox))
+        self._prefix = os.path.join(self._sandbox, '')
+        self._exceptions = [
+            os.path.normcase(os.path.realpath(path))
+            for path in exceptions
+        ]
+        AbstractSandbox.__init__(self)
+
+    def _violation(self, operation, *args, **kw):
+        from setuptools.sandbox import SandboxViolation
+        raise SandboxViolation(operation, args, kw)
+
+    if _file:
+
+        def _file(self, path, mode='r', *args, **kw):
+            if mode not in ('r', 'rt', 'rb', 'rU', 'U') and not self._ok(path):
+                self._violation("file", path, mode, *args, **kw)
+            return _file(path, mode, *args, **kw)
+
+    def _open(self, path, mode='r', *args, **kw):
+        if mode not in ('r', 'rt', 'rb', 'rU', 'U') and not self._ok(path):
+            self._violation("open", path, mode, *args, **kw)
+        return _open(path, mode, *args, **kw)
+
+    def tmpnam(self):
+        self._violation("tmpnam")
+
+    def _ok(self, path):
+        active = self._active
+        try:
+            self._active = False
+            realpath = os.path.normcase(os.path.realpath(path))
+            return (
+                self._exempted(realpath)
+                or realpath == self._sandbox
+                or realpath.startswith(self._prefix)
+            )
+        finally:
+            self._active = active
+
+    def _exempted(self, filepath):
+        start_matches = (
+            filepath.startswith(exception)
+            for exception in self._exceptions
+        )
+        pattern_matches = (
+            re.match(pattern, filepath)
+            for pattern in self._exception_patterns
+        )
+        candidates = itertools.chain(start_matches, pattern_matches)
+        return any(candidates)
+
+    def _remap_input(self, operation, path, *args, **kw):
+        """Called for path inputs"""
+        if operation in self.write_ops and not self._ok(path):
+            self._violation(operation, os.path.realpath(path), *args, **kw)
+        return path
+
+    def _remap_pair(self, operation, src, dst, *args, **kw):
+        """Called for path pairs like rename, link, and symlink operations"""
+        if not self._ok(src) or not self._ok(dst):
+            self._violation(operation, src, dst, *args, **kw)
+        return (src, dst)
+
+    def open(self, file, flags, mode=0o777, *args, **kw):
+        """Called for low-level os.open()"""
+        if flags & WRITE_FLAGS and not self._ok(file):
+            self._violation("os.open", file, flags, mode, *args, **kw)
+        return _os.open(file, flags, mode, *args, **kw)
+
+
+WRITE_FLAGS = functools.reduce(
+    operator.or_, [getattr(_os, a, 0) for a in
+        "O_WRONLY O_RDWR O_APPEND O_CREAT O_TRUNC O_TEMPORARY".split()]
+)
+
+
+class SandboxViolation(DistutilsError):
+    """A setup script attempted to modify the filesystem outside the sandbox"""
+
+    tmpl = textwrap.dedent("""
+        SandboxViolation: {cmd}{args!r} {kwargs}
+
+        The package setup script has attempted to modify files on your system
+        that are not within the EasyInstall build area, and has been aborted.
+
+        This package cannot be safely installed by EasyInstall, and may not
+        support alternate installation locations even if you run its setup
+        script by hand.  Please inform the package's author and the EasyInstall
+        maintainers to find out if a fix or workaround is available.
+        """).lstrip()
+
+    def __str__(self):
+        cmd, args, kwargs = self.args
+        return self.tmpl.format(**locals())
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/script (dev).tmpl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/script (dev).tmpl
new file mode 100644
index 0000000000000000000000000000000000000000..d58b1bb5bfa58964966863bef1c8102a729fef79
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/script (dev).tmpl	
@@ -0,0 +1,5 @@
+# EASY-INSTALL-DEV-SCRIPT: %(spec)r,%(script_name)r
+__requires__ = %(spec)r
+__import__('pkg_resources').require(%(spec)r)
+__file__ = %(dev_path)r
+exec(compile(open(__file__).read(), __file__, 'exec'))
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/script.tmpl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/script.tmpl
new file mode 100644
index 0000000000000000000000000000000000000000..ff5efbcab3b58063dd84787181c26a95fb663d94
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/script.tmpl
@@ -0,0 +1,3 @@
+# EASY-INSTALL-SCRIPT: %(spec)r,%(script_name)r
+__requires__ = %(spec)r
+__import__('pkg_resources').run_script(%(spec)r, %(script_name)r)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/site-patch.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/site-patch.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d2d2ff8da3960ecdaa6591fcee836c186fb8c91
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/site-patch.py
@@ -0,0 +1,74 @@
+def __boot():
+    import sys
+    import os
+    PYTHONPATH = os.environ.get('PYTHONPATH')
+    if PYTHONPATH is None or (sys.platform == 'win32' and not PYTHONPATH):
+        PYTHONPATH = []
+    else:
+        PYTHONPATH = PYTHONPATH.split(os.pathsep)
+
+    pic = getattr(sys, 'path_importer_cache', {})
+    stdpath = sys.path[len(PYTHONPATH):]
+    mydir = os.path.dirname(__file__)
+
+    for item in stdpath:
+        if item == mydir or not item:
+            continue  # skip if current dir. on Windows, or my own directory
+        importer = pic.get(item)
+        if importer is not None:
+            loader = importer.find_module('site')
+            if loader is not None:
+                # This should actually reload the current module
+                loader.load_module('site')
+                break
+        else:
+            try:
+                import imp  # Avoid import loop in Python >= 3.3
+                stream, path, descr = imp.find_module('site', [item])
+            except ImportError:
+                continue
+            if stream is None:
+                continue
+            try:
+                # This should actually reload the current module
+                imp.load_module('site', stream, path, descr)
+            finally:
+                stream.close()
+            break
+    else:
+        raise ImportError("Couldn't find the real 'site' module")
+
+    known_paths = dict([(makepath(item)[1], 1) for item in sys.path])  # 2.2 comp
+
+    oldpos = getattr(sys, '__egginsert', 0)  # save old insertion position
+    sys.__egginsert = 0  # and reset the current one
+
+    for item in PYTHONPATH:
+        addsitedir(item)
+
+    sys.__egginsert += oldpos  # restore effective old position
+
+    d, nd = makepath(stdpath[0])
+    insert_at = None
+    new_path = []
+
+    for item in sys.path:
+        p, np = makepath(item)
+
+        if np == nd and insert_at is None:
+            # We've hit the first 'system' path entry, so added entries go here
+            insert_at = len(new_path)
+
+        if np in known_paths or insert_at is None:
+            new_path.append(item)
+        else:
+            # new path after the insert point, back-insert it
+            new_path.insert(insert_at, item)
+            insert_at += 1
+
+    sys.path[:] = new_path
+
+
+if __name__ == 'site':
+    __boot()
+    del __boot
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/ssl_support.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/ssl_support.py
new file mode 100644
index 0000000000000000000000000000000000000000..6362f1f426910ab36f8b26d1382adf08f43ba992
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/ssl_support.py
@@ -0,0 +1,260 @@
+import os
+import socket
+import atexit
+import re
+import functools
+
+from setuptools.extern.six.moves import urllib, http_client, map, filter
+
+from pkg_resources import ResolutionError, ExtractionError
+
+try:
+    import ssl
+except ImportError:
+    ssl = None
+
+__all__ = [
+    'VerifyingHTTPSHandler', 'find_ca_bundle', 'is_available', 'cert_paths',
+    'opener_for'
+]
+
+cert_paths = """
+/etc/pki/tls/certs/ca-bundle.crt
+/etc/ssl/certs/ca-certificates.crt
+/usr/share/ssl/certs/ca-bundle.crt
+/usr/local/share/certs/ca-root.crt
+/etc/ssl/cert.pem
+/System/Library/OpenSSL/certs/cert.pem
+/usr/local/share/certs/ca-root-nss.crt
+/etc/ssl/ca-bundle.pem
+""".strip().split()
+
+try:
+    HTTPSHandler = urllib.request.HTTPSHandler
+    HTTPSConnection = http_client.HTTPSConnection
+except AttributeError:
+    HTTPSHandler = HTTPSConnection = object
+
+is_available = ssl is not None and object not in (HTTPSHandler, HTTPSConnection)
+
+
+try:
+    from ssl import CertificateError, match_hostname
+except ImportError:
+    try:
+        from backports.ssl_match_hostname import CertificateError
+        from backports.ssl_match_hostname import match_hostname
+    except ImportError:
+        CertificateError = None
+        match_hostname = None
+
+if not CertificateError:
+
+    class CertificateError(ValueError):
+        pass
+
+
+if not match_hostname:
+
+    def _dnsname_match(dn, hostname, max_wildcards=1):
+        """Matching according to RFC 6125, section 6.4.3
+
+        http://tools.ietf.org/html/rfc6125#section-6.4.3
+        """
+        pats = []
+        if not dn:
+            return False
+
+        # Ported from python3-syntax:
+        # leftmost, *remainder = dn.split(r'.')
+        parts = dn.split(r'.')
+        leftmost = parts[0]
+        remainder = parts[1:]
+
+        wildcards = leftmost.count('*')
+        if wildcards > max_wildcards:
+            # Issue #17980: avoid denials of service by refusing more
+            # than one wildcard per fragment.  A survey of established
+            # policy among SSL implementations showed it to be a
+            # reasonable choice.
+            raise CertificateError(
+                "too many wildcards in certificate DNS name: " + repr(dn))
+
+        # speed up common case w/o wildcards
+        if not wildcards:
+            return dn.lower() == hostname.lower()
+
+        # RFC 6125, section 6.4.3, subitem 1.
+        # The client SHOULD NOT attempt to match a presented identifier in which
+        # the wildcard character comprises a label other than the left-most label.
+        if leftmost == '*':
+            # When '*' is a fragment by itself, it matches a non-empty dotless
+            # fragment.
+            pats.append('[^.]+')
+        elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
+            # RFC 6125, section 6.4.3, subitem 3.
+            # The client SHOULD NOT attempt to match a presented identifier
+            # where the wildcard character is embedded within an A-label or
+            # U-label of an internationalized domain name.
+            pats.append(re.escape(leftmost))
+        else:
+            # Otherwise, '*' matches any dotless string, e.g. www*
+            pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
+
+        # add the remaining fragments, ignore any wildcards
+        for frag in remainder:
+            pats.append(re.escape(frag))
+
+        pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
+        return pat.match(hostname)
+
+    def match_hostname(cert, hostname):
+        """Verify that *cert* (in decoded format as returned by
+        SSLSocket.getpeercert()) matches the *hostname*.  RFC 2818 and RFC 6125
+        rules are followed, but IP addresses are not accepted for *hostname*.
+
+        CertificateError is raised on failure. On success, the function
+        returns nothing.
+        """
+        if not cert:
+            raise ValueError("empty or no certificate")
+        dnsnames = []
+        san = cert.get('subjectAltName', ())
+        for key, value in san:
+            if key == 'DNS':
+                if _dnsname_match(value, hostname):
+                    return
+                dnsnames.append(value)
+        if not dnsnames:
+            # The subject is only checked when there is no dNSName entry
+            # in subjectAltName
+            for sub in cert.get('subject', ()):
+                for key, value in sub:
+                    # XXX according to RFC 2818, the most specific Common Name
+                    # must be used.
+                    if key == 'commonName':
+                        if _dnsname_match(value, hostname):
+                            return
+                        dnsnames.append(value)
+        if len(dnsnames) > 1:
+            raise CertificateError("hostname %r "
+                "doesn't match either of %s"
+                % (hostname, ', '.join(map(repr, dnsnames))))
+        elif len(dnsnames) == 1:
+            raise CertificateError("hostname %r "
+                "doesn't match %r"
+                % (hostname, dnsnames[0]))
+        else:
+            raise CertificateError("no appropriate commonName or "
+                "subjectAltName fields were found")
+
+
+class VerifyingHTTPSHandler(HTTPSHandler):
+    """Simple verifying handler: no auth, subclasses, timeouts, etc."""
+
+    def __init__(self, ca_bundle):
+        self.ca_bundle = ca_bundle
+        HTTPSHandler.__init__(self)
+
+    def https_open(self, req):
+        return self.do_open(
+            lambda host, **kw: VerifyingHTTPSConn(host, self.ca_bundle, **kw), req
+        )
+
+
+class VerifyingHTTPSConn(HTTPSConnection):
+    """Simple verifying connection: no auth, subclasses, timeouts, etc."""
+
+    def __init__(self, host, ca_bundle, **kw):
+        HTTPSConnection.__init__(self, host, **kw)
+        self.ca_bundle = ca_bundle
+
+    def connect(self):
+        sock = socket.create_connection(
+            (self.host, self.port), getattr(self, 'source_address', None)
+        )
+
+        # Handle the socket if a (proxy) tunnel is present
+        if hasattr(self, '_tunnel') and getattr(self, '_tunnel_host', None):
+            self.sock = sock
+            self._tunnel()
+            # http://bugs.python.org/issue7776: Python>=3.4.1 and >=2.7.7
+            # change self.host to mean the proxy server host when tunneling is
+            # being used. Adapt, since we are interested in the destination
+            # host for the match_hostname() comparison.
+            actual_host = self._tunnel_host
+        else:
+            actual_host = self.host
+
+        if hasattr(ssl, 'create_default_context'):
+            ctx = ssl.create_default_context(cafile=self.ca_bundle)
+            self.sock = ctx.wrap_socket(sock, server_hostname=actual_host)
+        else:
+            # This is for python < 2.7.9 and < 3.4?
+            self.sock = ssl.wrap_socket(
+                sock, cert_reqs=ssl.CERT_REQUIRED, ca_certs=self.ca_bundle
+            )
+        try:
+            match_hostname(self.sock.getpeercert(), actual_host)
+        except CertificateError:
+            self.sock.shutdown(socket.SHUT_RDWR)
+            self.sock.close()
+            raise
+
+
+def opener_for(ca_bundle=None):
+    """Get a urlopen() replacement that uses ca_bundle for verification"""
+    return urllib.request.build_opener(
+        VerifyingHTTPSHandler(ca_bundle or find_ca_bundle())
+    ).open
+
+
+# from jaraco.functools
+def once(func):
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        if not hasattr(func, 'always_returns'):
+            func.always_returns = func(*args, **kwargs)
+        return func.always_returns
+    return wrapper
+
+
+@once
+def get_win_certfile():
+    try:
+        import wincertstore
+    except ImportError:
+        return None
+
+    class CertFile(wincertstore.CertFile):
+        def __init__(self):
+            super(CertFile, self).__init__()
+            atexit.register(self.close)
+
+        def close(self):
+            try:
+                super(CertFile, self).close()
+            except OSError:
+                pass
+
+    _wincerts = CertFile()
+    _wincerts.addstore('CA')
+    _wincerts.addstore('ROOT')
+    return _wincerts.name
+
+
+def find_ca_bundle():
+    """Return an existing CA bundle path, or None"""
+    extant_cert_paths = filter(os.path.isfile, cert_paths)
+    return (
+        get_win_certfile()
+        or next(extant_cert_paths, None)
+        or _certifi_where()
+    )
+
+
+def _certifi_where():
+    try:
+        return __import__('certifi').where()
+    except (ImportError, ResolutionError, ExtractionError):
+        pass
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/unicode_utils.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/unicode_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c63efd20b350358ab25c079166dbb00ef49f8d2
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/unicode_utils.py
@@ -0,0 +1,44 @@
+import unicodedata
+import sys
+
+from setuptools.extern import six
+
+
+# HFS Plus uses decomposed UTF-8
+def decompose(path):
+    if isinstance(path, six.text_type):
+        return unicodedata.normalize('NFD', path)
+    try:
+        path = path.decode('utf-8')
+        path = unicodedata.normalize('NFD', path)
+        path = path.encode('utf-8')
+    except UnicodeError:
+        pass  # Not UTF-8
+    return path
+
+
+def filesys_decode(path):
+    """
+    Ensure that the given path is decoded,
+    NONE when no expected encoding works
+    """
+
+    if isinstance(path, six.text_type):
+        return path
+
+    fs_enc = sys.getfilesystemencoding() or 'utf-8'
+    candidates = fs_enc, 'utf-8'
+
+    for enc in candidates:
+        try:
+            return path.decode(enc)
+        except UnicodeDecodeError:
+            continue
+
+
+def try_encode(string, enc):
+    "turn unicode encoding into a functional routine"
+    try:
+        return string.encode(enc)
+    except UnicodeEncodeError:
+        return None
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/version.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..95e1869658566aac3060562d8cd5a6b647887d1e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/version.py
@@ -0,0 +1,6 @@
+import pkg_resources
+
+try:
+    __version__ = pkg_resources.get_distribution('setuptools').version
+except Exception:
+    __version__ = 'unknown'
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/wheel.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/wheel.py
new file mode 100644
index 0000000000000000000000000000000000000000..37dfa53103ec1261b2b7c8c0c8d72994eaf2d984
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/wheel.py
@@ -0,0 +1,163 @@
+'''Wheels support.'''
+
+from distutils.util import get_platform
+import email
+import itertools
+import os
+import re
+import zipfile
+
+from pkg_resources import Distribution, PathMetadata, parse_version
+from setuptools.extern.six import PY3
+from setuptools import Distribution as SetuptoolsDistribution
+from setuptools import pep425tags
+from setuptools.command.egg_info import write_requirements
+
+
+WHEEL_NAME = re.compile(
+    r"""^(?P<project_name>.+?)-(?P<version>\d.*?)
+    ((-(?P<build>\d.*?))?-(?P<py_version>.+?)-(?P<abi>.+?)-(?P<platform>.+?)
+    )\.whl$""",
+re.VERBOSE).match
+
+NAMESPACE_PACKAGE_INIT = '''\
+try:
+    __import__('pkg_resources').declare_namespace(__name__)
+except ImportError:
+    __path__ = __import__('pkgutil').extend_path(__path__, __name__)
+'''
+
+
+def unpack(src_dir, dst_dir):
+    '''Move everything under `src_dir` to `dst_dir`, and delete the former.'''
+    for dirpath, dirnames, filenames in os.walk(src_dir):
+        subdir = os.path.relpath(dirpath, src_dir)
+        for f in filenames:
+            src = os.path.join(dirpath, f)
+            dst = os.path.join(dst_dir, subdir, f)
+            os.renames(src, dst)
+        for n, d in reversed(list(enumerate(dirnames))):
+            src = os.path.join(dirpath, d)
+            dst = os.path.join(dst_dir, subdir, d)
+            if not os.path.exists(dst):
+                # Directory does not exist in destination,
+                # rename it and prune it from os.walk list.
+                os.renames(src, dst)
+                del dirnames[n]
+    # Cleanup.
+    for dirpath, dirnames, filenames in os.walk(src_dir, topdown=True):
+        assert not filenames
+        os.rmdir(dirpath)
+
+
+class Wheel(object):
+
+    def __init__(self, filename):
+        match = WHEEL_NAME(os.path.basename(filename))
+        if match is None:
+            raise ValueError('invalid wheel name: %r' % filename)
+        self.filename = filename
+        for k, v in match.groupdict().items():
+            setattr(self, k, v)
+
+    def tags(self):
+        '''List tags (py_version, abi, platform) supported by this wheel.'''
+        return itertools.product(self.py_version.split('.'),
+                                 self.abi.split('.'),
+                                 self.platform.split('.'))
+
+    def is_compatible(self):
+        '''Is the wheel is compatible with the current platform?'''
+        supported_tags = pep425tags.get_supported()
+        return next((True for t in self.tags() if t in supported_tags), False)
+
+    def egg_name(self):
+        return Distribution(
+            project_name=self.project_name, version=self.version,
+            platform=(None if self.platform == 'any' else get_platform()),
+        ).egg_name() + '.egg'
+
+    def install_as_egg(self, destination_eggdir):
+        '''Install wheel as an egg directory.'''
+        with zipfile.ZipFile(self.filename) as zf:
+            dist_basename = '%s-%s' % (self.project_name, self.version)
+            dist_info = '%s.dist-info' % dist_basename
+            dist_data = '%s.data' % dist_basename
+            def get_metadata(name):
+                with zf.open('%s/%s' % (dist_info, name)) as fp:
+                    value = fp.read().decode('utf-8') if PY3 else fp.read()
+                    return email.parser.Parser().parsestr(value)
+            wheel_metadata = get_metadata('WHEEL')
+            dist_metadata = get_metadata('METADATA')
+            # Check wheel format version is supported.
+            wheel_version = parse_version(wheel_metadata.get('Wheel-Version'))
+            if not parse_version('1.0') <= wheel_version < parse_version('2.0dev0'):
+                raise ValueError('unsupported wheel format version: %s' % wheel_version)
+            # Extract to target directory.
+            os.mkdir(destination_eggdir)
+            zf.extractall(destination_eggdir)
+            # Convert metadata.
+            dist_info = os.path.join(destination_eggdir, dist_info)
+            dist = Distribution.from_location(
+                destination_eggdir, dist_info,
+                metadata=PathMetadata(destination_eggdir, dist_info)
+            )
+            # Note: we need to evaluate and strip markers now,
+            # as we can't easily convert back from the syntax:
+            # foobar; "linux" in sys_platform and extra == 'test'
+            def raw_req(req):
+                req.marker = None
+                return str(req)
+            install_requires = list(sorted(map(raw_req, dist.requires())))
+            extras_require = {
+                extra: list(sorted(
+                    req
+                    for req in map(raw_req, dist.requires((extra,)))
+                    if req not in install_requires
+                ))
+                for extra in dist.extras
+            }
+            egg_info = os.path.join(destination_eggdir, 'EGG-INFO')
+            os.rename(dist_info, egg_info)
+            os.rename(os.path.join(egg_info, 'METADATA'),
+                      os.path.join(egg_info, 'PKG-INFO'))
+            setup_dist = SetuptoolsDistribution(attrs=dict(
+                install_requires=install_requires,
+                extras_require=extras_require,
+            ))
+            write_requirements(setup_dist.get_command_obj('egg_info'),
+                               None, os.path.join(egg_info, 'requires.txt'))
+            # Move data entries to their correct location.
+            dist_data = os.path.join(destination_eggdir, dist_data)
+            dist_data_scripts = os.path.join(dist_data, 'scripts')
+            if os.path.exists(dist_data_scripts):
+                egg_info_scripts = os.path.join(destination_eggdir,
+                                                'EGG-INFO', 'scripts')
+                os.mkdir(egg_info_scripts)
+                for entry in os.listdir(dist_data_scripts):
+                    # Remove bytecode, as it's not properly handled
+                    # during easy_install scripts install phase.
+                    if entry.endswith('.pyc'):
+                        os.unlink(os.path.join(dist_data_scripts, entry))
+                    else:
+                        os.rename(os.path.join(dist_data_scripts, entry),
+                                  os.path.join(egg_info_scripts, entry))
+                os.rmdir(dist_data_scripts)
+            for subdir in filter(os.path.exists, (
+                os.path.join(dist_data, d)
+                for d in ('data', 'headers', 'purelib', 'platlib')
+            )):
+                unpack(subdir, destination_eggdir)
+            if os.path.exists(dist_data):
+                os.rmdir(dist_data)
+            # Fix namespace packages.
+            namespace_packages = os.path.join(egg_info, 'namespace_packages.txt')
+            if os.path.exists(namespace_packages):
+                with open(namespace_packages) as fp:
+                    namespace_packages = fp.read().split()
+                for mod in namespace_packages:
+                    mod_dir = os.path.join(destination_eggdir, *mod.split('.'))
+                    mod_init = os.path.join(mod_dir, '__init__.py')
+                    if os.path.exists(mod_dir) and not os.path.exists(mod_init):
+                        with open(mod_init, 'w') as fp:
+                            fp.write(NAMESPACE_PACKAGE_INIT)
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/windows_support.py b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/windows_support.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb977cff9545ef5d48ad7cf13f2cbe1ebc3e7cd0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib/python3.6/site-packages/setuptools/windows_support.py
@@ -0,0 +1,29 @@
+import platform
+import ctypes
+
+
+def windows_only(func):
+    if platform.system() != 'Windows':
+        return lambda *args, **kwargs: None
+    return func
+
+
+@windows_only
+def hide_file(path):
+    """
+    Set the hidden attribute on a file or directory.
+
+    From http://stackoverflow.com/questions/19622133/
+
+    `path` must be text.
+    """
+    __import__('ctypes.wintypes')
+    SetFileAttributes = ctypes.windll.kernel32.SetFileAttributesW
+    SetFileAttributes.argtypes = ctypes.wintypes.LPWSTR, ctypes.wintypes.DWORD
+    SetFileAttributes.restype = ctypes.wintypes.BOOL
+
+    FILE_ATTRIBUTE_HIDDEN = 0x02
+
+    ret = SetFileAttributes(path, FILE_ATTRIBUTE_HIDDEN)
+    if not ret:
+        raise ctypes.WinError()
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib64 b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib64
new file mode 120000
index 0000000000000000000000000000000000000000..7951405f85a569efbacc12fccfee529ef1866602
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/lib64
@@ -0,0 +1 @@
+lib
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/pyvenv.cfg b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/pyvenv.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..7f0d9af4cba706101ca8c0bd6f2b3f570aa2ba9e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/pyvenv.cfg
@@ -0,0 +1,3 @@
+home = /usr/bin
+include-system-site-packages = false
+version = 3.6.8
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/CacheControl-0.11.7-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/CacheControl-0.11.7-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..36659e5fb7b65436e89d778eda8f3909bed7d4b9
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/CacheControl-0.11.7-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/appdirs-1.4.3-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/appdirs-1.4.3-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..4c1abd7ef53f59c2f974ecf2d09b84e31b55f523
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/appdirs-1.4.3-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/certifi-2018.1.18-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/certifi-2018.1.18-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..144996e06cc8ba53f4bdea554705883e120be146
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/certifi-2018.1.18-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/chardet-3.0.4-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/chardet-3.0.4-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..8f911ef8972202b4f3fb3ab1d26ce44c23bab513
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/chardet-3.0.4-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/colorama-0.3.7-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/colorama-0.3.7-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..75345fcb6c5bc0991ede4e3455948968fe022294
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/colorama-0.3.7-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/distlib-0.2.6-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/distlib-0.2.6-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..161b4c21bb114ba04a707b1da0b0d9d69aba56c8
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/distlib-0.2.6-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/distro-1.0.1-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/distro-1.0.1-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..ac59d49fccdb435f849848c9e0a6d989e99a6301
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/distro-1.0.1-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/html5lib-0.999999999-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/html5lib-0.999999999-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..d9fda41f3a6a2142d85b014814c1b67937204256
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/html5lib-0.999999999-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/idna-2.6-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/idna-2.6-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..ab09a80bbe43144c1ef9137bfcba5bfe33f9cf77
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/idna-2.6-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/ipaddress-0.0.0-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/ipaddress-0.0.0-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..aa2a948b7174e381ffdb27805917503a08356df4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/ipaddress-0.0.0-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/lockfile-0.12.2-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/lockfile-0.12.2-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..c91f5425d5af3f889f74599ab46418b6554dade9
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/lockfile-0.12.2-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/packaging-17.1-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/packaging-17.1-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..f42c38c4ee8e8bdef3d53cea356d6907e0918651
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/packaging-17.1-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/pip-9.0.1-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/pip-9.0.1-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..0e229b5d71d7597bb2b9500800069958de10499c
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/pip-9.0.1-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/pkg_resources-0.0.0-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/pkg_resources-0.0.0-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..386ab77a6e7117fe3504b26d0ff7986c90dc618a
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/pkg_resources-0.0.0-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/progress-1.2-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/progress-1.2-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..df711803849fccd7438961472d741fadc954bd8e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/progress-1.2-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/pyparsing-2.2.0-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/pyparsing-2.2.0-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..1411d0cf9a81b5ad9113b1b2e4b1ffc477e91abc
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/pyparsing-2.2.0-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/requests-2.18.4-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/requests-2.18.4-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..d14ee550d1767059e21a843584809ec965c89e1b
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/requests-2.18.4-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/retrying-1.3.3-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/retrying-1.3.3-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..1f717e62ecfee1ec25fa48d6312e6161699e3016
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/retrying-1.3.3-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/setuptools-39.0.1-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/setuptools-39.0.1-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..7f6a1773023271e72b621721a74d7739e991a422
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/setuptools-39.0.1-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/six-1.11.0-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/six-1.11.0-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..40c26d83dbc95c3fe3738298a2b67eb07343ac02
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/six-1.11.0-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/urllib3-1.22-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/urllib3-1.22-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..62fca28e15e00b19c91293eec7edc1324f0102fe
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/urllib3-1.22-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/webencodings-0.5-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/webencodings-0.5-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..9bbaed0c4f2e61af4b2bd830b59b96d98a2557b1
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/webencodings-0.5-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/wheel-0.30.0-py2.py3-none-any.whl b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/wheel-0.30.0-py2.py3-none-any.whl
new file mode 100644
index 0000000000000000000000000000000000000000..527fefd066e5a5ba17e4efa50cea2b5f911bda37
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/nxcals-python3-env/share/python-wheels/wheel-0.30.0-py2.py3-none-any.whl differ
diff --git a/spark-2.4.0-bin-hadoop2.7/python/.coveragerc b/spark-2.4.0-bin-hadoop2.7/python/.coveragerc
new file mode 100644
index 0000000000000000000000000000000000000000..b3339cd356a6ed7aa51f2ce31ada37db9ba92e65
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/.coveragerc
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+[run]
+branch = true
+parallel = true
+data_file = ${COVERAGE_DIR}/coverage_data/coverage
diff --git a/spark-2.4.0-bin-hadoop2.7/python/.gitignore b/spark-2.4.0-bin-hadoop2.7/python/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..52128cf844a79ef582b386ecc870d7e4e414f0c2
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/.gitignore
@@ -0,0 +1,5 @@
+*.pyc
+docs/_build/
+pyspark.egg-info
+build/
+dist/
diff --git a/spark-2.4.0-bin-hadoop2.7/python/MANIFEST.in b/spark-2.4.0-bin-hadoop2.7/python/MANIFEST.in
new file mode 100644
index 0000000000000000000000000000000000000000..40f1fb2f1ee7ebba29406be545edc24d56384d2e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/MANIFEST.in
@@ -0,0 +1,24 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+global-exclude *.py[cod] __pycache__ .DS_Store
+recursive-include deps/jars *.jar
+graft deps/bin
+recursive-include deps/data *.data *.txt
+recursive-include deps/licenses *.txt
+recursive-include deps/examples *.py
+recursive-include lib *.zip
+include README.md
diff --git a/spark-2.4.0-bin-hadoop2.7/python/README.md b/spark-2.4.0-bin-hadoop2.7/python/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c020d84b01ffd48c40860649eacae5abad007f3f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/README.md
@@ -0,0 +1,32 @@
+# Apache Spark
+
+Spark is a fast and general cluster computing system for Big Data. It provides
+high-level APIs in Scala, Java, Python, and R, and an optimized engine that
+supports general computation graphs for data analysis. It also supports a
+rich set of higher-level tools including Spark SQL for SQL and DataFrames,
+MLlib for machine learning, GraphX for graph processing,
+and Spark Streaming for stream processing.
+
+<http://spark.apache.org/>
+
+## Online Documentation
+
+You can find the latest Spark documentation, including a programming
+guide, on the [project web page](http://spark.apache.org/documentation.html)
+
+
+## Python Packaging
+
+This README file only contains basic information related to pip installed PySpark.
+This packaging is currently experimental and may change in future versions (although we will do our best to keep compatibility).
+Using PySpark requires the Spark JARs, and if you are building this from source please see the builder instructions at
+["Building Spark"](http://spark.apache.org/docs/latest/building-spark.html).
+
+The Python packaging for Spark is not intended to replace all of the other use cases. This Python packaged version of Spark is suitable for interacting with an existing cluster (be it Spark standalone, YARN, or Mesos) - but does not contain the tools required to set up your own standalone Spark cluster. You can download the full version of Spark from the [Apache Spark downloads page](http://spark.apache.org/downloads.html).
+
+
+**NOTE:** If you are using this with a Spark standalone cluster you must ensure that the version (including minor version) matches or you may experience odd errors.
+
+## Python Requirements
+
+At its core PySpark depends on Py4J (currently version 0.10.7), but some additional sub-packages have their own extra requirements for some features (including numpy, pandas, and pyarrow).
diff --git a/spark-2.4.0-bin-hadoop2.7/python/docs/Makefile b/spark-2.4.0-bin-hadoop2.7/python/docs/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..1ed1f33af2326a72c55059a49560f7c44393fbe3
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/docs/Makefile
@@ -0,0 +1,204 @@
+# Makefile for Sphinx documentation
+#
+
+ifndef SPHINXBUILD
+ifndef SPHINXPYTHON
+SPHINXBUILD = sphinx-build
+endif
+endif
+
+ifdef SPHINXBUILD
+# User-friendly check for sphinx-build.
+ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
+$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
+endif
+else
+# Note that there is an issue with Python version and Sphinx in PySpark documentation generation.
+# Please remove this check below when this issue is fixed. See SPARK-24530 for more details.
+PYTHON_VERSION_CHECK = $(shell $(SPHINXPYTHON) -c 'import sys; print(sys.version_info < (3, 0, 0))')
+ifeq ($(PYTHON_VERSION_CHECK), True)
+$(error Note that Python 3 is required to generate PySpark documentation correctly for now. Current Python executable was less than Python 3. See SPARK-24530. To force Sphinx to use a specific Python executable, please set SPHINXPYTHON to point to the Python 3 executable.)
+endif
+# Check if Sphinx is installed.
+ifeq ($(shell $(SPHINXPYTHON) -c 'import sphinx' >/dev/null 2>&1; echo $$?), 1)
+$(error Python executable '$(SPHINXPYTHON)' did not have Sphinx installed. Make sure you have Sphinx installed, then set the SPHINXPYTHON environment variable to point to the Python executable having Sphinx installed. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
+endif
+# Use 'SPHINXPYTHON -msphinx' instead of 'sphinx-build'. See https://github.com/sphinx-doc/sphinx/pull/3523 for more details.
+SPHINXBUILD = $(SPHINXPYTHON) -msphinx
+endif
+
+# You can set these variables from the command line.
+SPHINXOPTS    ?=
+PAPER         ?=
+BUILDDIR      ?= _build
+# You can set SPHINXBUILD to specify Sphinx build executable or SPHINXPYTHON to specify the Python executable used in Sphinx.
+# They follow:
+#   1. if SPHINXPYTHON is set, use Python. If SPHINXBUILD is set, use sphinx-build.
+#   2. If both are set, SPHINXBUILD has a higher priority over SPHINXPYTHON
+#   3. By default, SPHINXBUILD is used as 'sphinx-build'.
+
+export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.7-src.zip)
+
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
+
+help:
+	@echo "Please use \`make <target>' where <target> is one of"
+	@echo "  html       to make standalone HTML files"
+	@echo "  dirhtml    to make HTML files named index.html in directories"
+	@echo "  singlehtml to make a single large HTML file"
+	@echo "  pickle     to make pickle files"
+	@echo "  json       to make JSON files"
+	@echo "  htmlhelp   to make HTML files and a HTML help project"
+	@echo "  qthelp     to make HTML files and a qthelp project"
+	@echo "  devhelp    to make HTML files and a Devhelp project"
+	@echo "  epub       to make an epub"
+	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
+	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
+	@echo "  text       to make text files"
+	@echo "  man        to make manual pages"
+	@echo "  texinfo    to make Texinfo files"
+	@echo "  info       to make Texinfo files and run them through makeinfo"
+	@echo "  gettext    to make PO message catalogs"
+	@echo "  changes    to make an overview of all changed/added/deprecated items"
+	@echo "  xml        to make Docutils-native XML files"
+	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
+	@echo "  linkcheck  to check all external links for integrity"
+	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+	rm -rf $(BUILDDIR)/*
+
+html:
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+singlehtml:
+	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+	@echo
+	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+pickle:
+	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+	@echo
+	@echo "Build finished; now you can process the pickle files."
+
+json:
+	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+	@echo
+	@echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+	@echo
+	@echo "Build finished; now you can run HTML Help Workshop with the" \
+	      ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+	@echo
+	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
+	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pyspark.qhcp"
+	@echo "To view the help file:"
+	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pyspark.qhc"
+
+devhelp:
+	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+	@echo
+	@echo "Build finished."
+	@echo "To view the help file:"
+	@echo "# mkdir -p $$HOME/.local/share/devhelp/pyspark"
+	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pyspark"
+	@echo "# devhelp"
+
+epub:
+	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+	@echo
+	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+latex:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo
+	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+	@echo "Run \`make' in that directory to run these through (pdf)latex" \
+	      "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through pdflatex..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+latexpdfja:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through platex and dvipdfmx..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+text:
+	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+	@echo
+	@echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+man:
+	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+	@echo
+	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+texinfo:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo
+	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+	@echo "Run \`make' in that directory to run these through makeinfo" \
+	      "(use \`make info' here to do that automatically)."
+
+info:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo "Running Texinfo files through makeinfo..."
+	make -C $(BUILDDIR)/texinfo info
+	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+gettext:
+	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+	@echo
+	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+changes:
+	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+	@echo
+	@echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+	@echo
+	@echo "Link check complete; look for any errors in the above output " \
+	      "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+	@echo "Testing of doctests in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/doctest/output.txt."
+
+xml:
+	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
+	@echo
+	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
+
+pseudoxml:
+	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
+	@echo
+	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
diff --git a/spark-2.4.0-bin-hadoop2.7/python/docs/_static/pyspark.css b/spark-2.4.0-bin-hadoop2.7/python/docs/_static/pyspark.css
new file mode 100644
index 0000000000000000000000000000000000000000..41106f2f6e26dad855c9e9a5ad691637bfdeb132
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/docs/_static/pyspark.css
@@ -0,0 +1,90 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+body {
+    background-color: #ffffff;
+}
+
+div.sphinxsidebar {
+    width: 274px;
+}
+
+div.bodywrapper {
+    margin: 0 0 0 274px;
+}
+
+div.sphinxsidebar ul {
+    margin-right: 10px;
+}
+
+div.sphinxsidebar li a {
+    word-break: break-all;
+}
+
+span.pys-tag {
+    font-size: 11px;
+    font-weight: bold;
+    margin: 0 0 0 2px;
+    padding: 1px 3px 1px 3px;
+    -moz-border-radius: 3px;
+    -webkit-border-radius: 3px;
+    border-radius: 3px;
+    text-align: center;
+    text-decoration: none;
+}
+
+span.pys-tag-experimental {
+    background-color: rgb(37, 112, 128);
+    color: rgb(255, 255, 255);
+}
+
+span.pys-tag-deprecated {
+    background-color: rgb(238, 238, 238);
+    color: rgb(62, 67, 73);
+}
+
+div.pys-note-experimental {
+    background-color: rgb(88, 151, 165);
+    border-color: rgb(59, 115, 127);
+    color: rgb(255, 255, 255);
+}
+
+div.pys-note-deprecated {
+}
+
+.hasTooltip {
+    position:relative;
+}
+.hasTooltip span {
+    display:none;
+}
+
+.hasTooltip:hover span.tooltip {
+    display: inline-block;
+    -moz-border-radius: 2px;
+    -webkit-border-radius: 2px;
+    border-radius: 2px;
+    background-color: rgb(250, 250, 250);
+    color: rgb(68, 68, 68);
+    font-weight: normal;
+    box-shadow: 1px 1px 3px rgb(127, 127, 127);
+    position: absolute;
+    padding: 0 3px 0 3px;
+    top: 1.3em;
+    left: 14px;
+    z-index: 9999
+}
diff --git a/spark-2.4.0-bin-hadoop2.7/python/docs/_static/pyspark.js b/spark-2.4.0-bin-hadoop2.7/python/docs/_static/pyspark.js
new file mode 100644
index 0000000000000000000000000000000000000000..75e4c42492a480de9761b0d1bcce2bf7646d877a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/docs/_static/pyspark.js
@@ -0,0 +1,99 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+$(function (){
+
+    function startsWith(s, prefix) {
+        return s && s.indexOf(prefix) === 0;
+    }
+
+    function buildSidebarLinkMap() {
+        var linkMap = {};
+        $('div.sphinxsidebar a.reference.internal').each(function (i,a)  {
+            var href = $(a).attr('href');
+            if (startsWith(href, '#module-')) {
+                var id = href.substr(8);
+                linkMap[id] = [$(a), null];
+            }
+        })
+        return linkMap;
+    };
+
+    function getAdNoteDivs(dd) {
+        var noteDivs = {};
+        dd.find('> div.admonition.note > p.last').each(function (i, p) {
+            var text = $(p).text();
+            if (!noteDivs.experimental && startsWith(text, 'Experimental')) {
+                noteDivs.experimental = $(p).parent();
+            }
+            if (!noteDivs.deprecated && startsWith(text, 'Deprecated')) {
+                noteDivs.deprecated = $(p).parent();
+            }
+        });
+        return noteDivs;
+    }
+
+    function getParentId(name) {
+        var last_idx = name.lastIndexOf('.');
+        return last_idx == -1? '': name.substr(0, last_idx);
+    }
+
+    function buildTag(text, cls, tooltip) {
+        return '<span class="pys-tag ' + cls + ' hasTooltip">' + text + '<span class="tooltip">'
+            + tooltip + '</span></span>'
+    }
+
+
+    var sidebarLinkMap = buildSidebarLinkMap();
+
+    $('dl.class, dl.function').each(function (i,dl)  {
+
+        dl = $(dl);
+        dt = dl.children('dt').eq(0);
+        dd = dl.children('dd').eq(0);
+        var id = dt.attr('id');
+        var desc = dt.find('> .descname').text();
+        var adNoteDivs = getAdNoteDivs(dd);
+
+        if (id) {
+            var parent_id = getParentId(id);
+
+            var r = sidebarLinkMap[parent_id];
+            if (r) {
+                if (r[1] === null) {
+                    r[1] = $('<ul/>');
+                    r[0].parent().append(r[1]);
+                }
+                var tags = '';
+                if (adNoteDivs.experimental) {
+                    tags += buildTag('E', 'pys-tag-experimental', 'Experimental');
+                    adNoteDivs.experimental.addClass('pys-note pys-note-experimental');
+                }
+                if (adNoteDivs.deprecated) {
+                    tags += buildTag('D', 'pys-tag-deprecated', 'Deprecated');
+                    adNoteDivs.deprecated.addClass('pys-note pys-note-deprecated');
+                }
+                var li = $('<li/>');
+                var a = $('<a href="#' + id + '">' + desc + '</a>');
+                li.append(a);
+                li.append(tags);
+                r[1].append(li);
+                sidebarLinkMap[id] = [a, null];
+            }
+        }
+    });
+});
diff --git a/spark-2.4.0-bin-hadoop2.7/python/docs/_templates/layout.html b/spark-2.4.0-bin-hadoop2.7/python/docs/_templates/layout.html
new file mode 100644
index 0000000000000000000000000000000000000000..ab36ebababf88f43303d8eb6f7459e56a273ad05
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/docs/_templates/layout.html
@@ -0,0 +1,6 @@
+{% extends "!layout.html" %}
+{% set script_files = script_files + ["_static/pyspark.js"] %}
+{% set css_files = css_files + ['_static/pyspark.css'] %}
+{% block rootrellink %}
+    {{ super() }}
+{% endblock %}
diff --git a/spark-2.4.0-bin-hadoop2.7/python/docs/conf.py b/spark-2.4.0-bin-hadoop2.7/python/docs/conf.py
new file mode 100644
index 0000000000000000000000000000000000000000..50fb3175a7dc4efbb641a583484cdde994ed9958
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/docs/conf.py
@@ -0,0 +1,340 @@
+# -*- coding: utf-8 -*-
+#
+# pyspark documentation build configuration file, created by
+# sphinx-quickstart on Thu Aug 28 15:17:47 2014.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys
+import os
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+sys.path.insert(0, os.path.abspath('.'))
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+needs_sphinx = '1.2'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.viewcode',
+    'epytext',
+    'sphinx.ext.mathjax',
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'PySpark'
+copyright = u''
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = 'master'
+# The full version, including alpha/beta/rc tags.
+release = os.environ.get('RELEASE_VERSION', version)
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = ['_build']
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+#keep_warnings = False
+
+# -- Options for autodoc --------------------------------------------------
+
+# Look at the first line of the docstring for function and method signatures.
+autodoc_docstring_signature = True
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+html_theme = 'nature'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+#html_theme_path = []
+
+# The name for this set of Sphinx documents.  If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+html_logo = "../../docs/img/spark-logo-hd.png"
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+#html_extra_path = []
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+html_domain_indices = False
+
+# If false, no index is generated.
+html_use_index = False
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'pysparkdoc'
+
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+# The paper size ('letterpaper' or 'a4paper').
+#'papersize': 'letterpaper',
+
+# The font size ('10pt', '11pt' or '12pt').
+#'pointsize': '10pt',
+
+# Additional stuff for the LaTeX preamble.
+#'preamble': '',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+  ('index', 'pyspark.tex', u'pyspark Documentation',
+   u'Author', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    ('index', 'pyspark', u'pyspark Documentation',
+     [u'Author'], 1)
+]
+
+# If true, show URL addresses after external links.
+#man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+  ('index', 'pyspark', u'pyspark Documentation',
+   u'Author', 'pyspark', 'One line description of project.',
+   'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#texinfo_appendices = []
+
+# If false, no module index is generated.
+#texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+#texinfo_no_detailmenu = False
+
+
+# -- Options for Epub output ----------------------------------------------
+
+# Bibliographic Dublin Core info.
+epub_title = u'pyspark'
+epub_author = u'Author'
+epub_publisher = u'Author'
+epub_copyright = u'2014, Author'
+
+# The basename for the epub file. It defaults to the project name.
+#epub_basename = u'pyspark'
+
+# The HTML theme for the epub output. Since the default themes are not optimized
+# for small screen space, using the same theme for HTML and epub output is
+# usually not wise. This defaults to 'epub', a theme designed to save visual
+# space.
+#epub_theme = 'epub'
+
+# The language of the text. It defaults to the language option
+# or en if the language is not set.
+#epub_language = ''
+
+# The scheme of the identifier. Typical schemes are ISBN or URL.
+#epub_scheme = ''
+
+# The unique identifier of the text. This can be a ISBN number
+# or the project homepage.
+#epub_identifier = ''
+
+# A unique identification for the text.
+#epub_uid = ''
+
+# A tuple containing the cover image and cover page html template filenames.
+#epub_cover = ()
+
+# A sequence of (type, uri, title) tuples for the guide element of content.opf.
+#epub_guide = ()
+
+# HTML files that should be inserted before the pages created by sphinx.
+# The format is a list of tuples containing the path and title.
+#epub_pre_files = []
+
+# HTML files shat should be inserted after the pages created by sphinx.
+# The format is a list of tuples containing the path and title.
+#epub_post_files = []
+
+# A list of files that should not be packed into the epub file.
+epub_exclude_files = ['search.html']
+
+# The depth of the table of contents in toc.ncx.
+#epub_tocdepth = 3
+
+# Allow duplicate toc entries.
+#epub_tocdup = True
+
+# Choose between 'default' and 'includehidden'.
+#epub_tocscope = 'default'
+
+# Fix unsupported image types using the PIL.
+#epub_fix_images = False
+
+# Scale large images.
+#epub_max_image_width = 0
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#epub_show_urls = 'inline'
+
+# If false, no index is generated.
+#epub_use_index = True
+
+# Skip sample endpoint link (not expected to resolve)
+linkcheck_ignore = [r'https://kinesis.us-east-1.amazonaws.com']
diff --git a/spark-2.4.0-bin-hadoop2.7/python/docs/epytext.py b/spark-2.4.0-bin-hadoop2.7/python/docs/epytext.py
new file mode 100644
index 0000000000000000000000000000000000000000..4bbbf650a13e97e56d99b42707e8b2a0c52245e6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/docs/epytext.py
@@ -0,0 +1,30 @@
+import re
+
+RULES = (
+    (r"<(!BLANKLINE)[\w.]+>", r""),
+    (r"L{([\w.()]+)}", r":class:`\1`"),
+    (r"[LC]{(\w+\.\w+)\(\)}", r":func:`\1`"),
+    (r"C{([\w.()]+)}", r":class:`\1`"),
+    (r"[IBCM]{([^}]+)}", r"`\1`"),
+    ('pyspark.rdd.RDD', 'RDD'),
+)
+
+
+def _convert_epytext(line):
+    """
+    >>> _convert_epytext("L{A}")
+    :class:`A`
+    """
+    line = line.replace('@', ':')
+    for p, sub in RULES:
+        line = re.sub(p, sub, line)
+    return line
+
+
+def _process_docstring(app, what, name, obj, options, lines):
+    for i in range(len(lines)):
+        lines[i] = _convert_epytext(lines[i])
+
+
+def setup(app):
+    app.connect("autodoc-process-docstring", _process_docstring)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/docs/index.rst b/spark-2.4.0-bin-hadoop2.7/python/docs/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..421c8de86a3cc1b895bfcf5b2159d2b0bcc51658
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/docs/index.rst
@@ -0,0 +1,52 @@
+.. pyspark documentation master file, created by
+   sphinx-quickstart on Thu Aug 28 15:17:47 2014.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to Spark Python API Docs!
+===================================
+
+Contents:
+
+.. toctree::
+   :maxdepth: 2
+
+   pyspark
+   pyspark.sql
+   pyspark.streaming
+   pyspark.ml
+   pyspark.mllib
+
+
+Core classes:
+---------------
+
+    :class:`pyspark.SparkContext`
+
+    Main entry point for Spark functionality.
+
+    :class:`pyspark.RDD`
+
+    A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
+
+    :class:`pyspark.streaming.StreamingContext`
+
+    Main entry point for Spark Streaming functionality.
+
+    :class:`pyspark.streaming.DStream`
+
+    A Discretized Stream (DStream), the basic abstraction in Spark Streaming.
+
+    :class:`pyspark.sql.SQLContext`
+
+    Main entry point for DataFrame and SQL functionality.
+
+    :class:`pyspark.sql.DataFrame`
+
+    A distributed collection of data grouped into named columns.
+
+
+Indices and tables
+==================
+
+* :ref:`search`
diff --git a/spark-2.4.0-bin-hadoop2.7/python/docs/make.bat b/spark-2.4.0-bin-hadoop2.7/python/docs/make.bat
new file mode 100644
index 0000000000000000000000000000000000000000..cc29acdc196866b0403822261904a2046ea650e4
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/docs/make.bat
@@ -0,0 +1,6 @@
+@ECHO OFF
+
+rem This is the entry point for running Sphinx documentation. To avoid polluting the
+rem environment, it just launches a new cmd to do the real work.
+
+cmd /V /E /C %~dp0make2.bat %*
diff --git a/spark-2.4.0-bin-hadoop2.7/python/docs/make2.bat b/spark-2.4.0-bin-hadoop2.7/python/docs/make2.bat
new file mode 100644
index 0000000000000000000000000000000000000000..05d22eb5cdd23e8ea0a5d51b60cde2bd870dd5a3
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/docs/make2.bat
@@ -0,0 +1,243 @@
+@ECHO OFF
+
+REM Command file for Sphinx documentation
+
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set BUILDDIR=_build
+set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
+set I18NSPHINXOPTS=%SPHINXOPTS% .
+if NOT "%PAPER%" == "" (
+	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
+	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
+)
+
+if "%1" == "" goto help
+
+if "%1" == "help" (
+	:help
+	echo.Please use `make ^<target^>` where ^<target^> is one of
+	echo.  html       to make standalone HTML files
+	echo.  dirhtml    to make HTML files named index.html in directories
+	echo.  singlehtml to make a single large HTML file
+	echo.  pickle     to make pickle files
+	echo.  json       to make JSON files
+	echo.  htmlhelp   to make HTML files and a HTML help project
+	echo.  qthelp     to make HTML files and a qthelp project
+	echo.  devhelp    to make HTML files and a Devhelp project
+	echo.  epub       to make an epub
+	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
+	echo.  text       to make text files
+	echo.  man        to make manual pages
+	echo.  texinfo    to make Texinfo files
+	echo.  gettext    to make PO message catalogs
+	echo.  changes    to make an overview over all changed/added/deprecated items
+	echo.  xml        to make Docutils-native XML files
+	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
+	echo.  linkcheck  to check all external links for integrity
+	echo.  doctest    to run all doctests embedded in the documentation if enabled
+	goto end
+)
+
+if "%1" == "clean" (
+	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
+	del /q /s %BUILDDIR%\*
+	goto end
+)
+
+
+%SPHINXBUILD% 2> nul
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+if "%1" == "html" (
+	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
+	goto end
+)
+
+if "%1" == "dirhtml" (
+	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
+	goto end
+)
+
+if "%1" == "singlehtml" (
+	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
+	goto end
+)
+
+if "%1" == "pickle" (
+	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can process the pickle files.
+	goto end
+)
+
+if "%1" == "json" (
+	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can process the JSON files.
+	goto end
+)
+
+if "%1" == "htmlhelp" (
+	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can run HTML Help Workshop with the ^
+.hhp project file in %BUILDDIR%/htmlhelp.
+	goto end
+)
+
+if "%1" == "qthelp" (
+	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; now you can run "qcollectiongenerator" with the ^
+.qhcp project file in %BUILDDIR%/qthelp, like this:
+	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\pyspark.qhcp
+	echo.To view the help file:
+	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\pyspark.ghc
+	goto end
+)
+
+if "%1" == "devhelp" (
+	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished.
+	goto end
+)
+
+if "%1" == "epub" (
+	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The epub file is in %BUILDDIR%/epub.
+	goto end
+)
+
+if "%1" == "latex" (
+	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
+	goto end
+)
+
+if "%1" == "latexpdf" (
+	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+	cd %BUILDDIR%/latex
+	make all-pdf
+	cd %BUILDDIR%/..
+	echo.
+	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
+	goto end
+)
+
+if "%1" == "latexpdfja" (
+	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+	cd %BUILDDIR%/latex
+	make all-pdf-ja
+	cd %BUILDDIR%/..
+	echo.
+	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
+	goto end
+)
+
+if "%1" == "text" (
+	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The text files are in %BUILDDIR%/text.
+	goto end
+)
+
+if "%1" == "man" (
+	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The manual pages are in %BUILDDIR%/man.
+	goto end
+)
+
+if "%1" == "texinfo" (
+	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
+	goto end
+)
+
+if "%1" == "gettext" (
+	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
+	goto end
+)
+
+if "%1" == "changes" (
+	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.The overview file is in %BUILDDIR%/changes.
+	goto end
+)
+
+if "%1" == "linkcheck" (
+	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Link check complete; look for any errors in the above output ^
+or in %BUILDDIR%/linkcheck/output.txt.
+	goto end
+)
+
+if "%1" == "doctest" (
+	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Testing of doctests in the sources finished, look at the ^
+results in %BUILDDIR%/doctest/output.txt.
+	goto end
+)
+
+if "%1" == "xml" (
+	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The XML files are in %BUILDDIR%/xml.
+	goto end
+)
+
+if "%1" == "pseudoxml" (
+	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
+	if errorlevel 1 exit /b 1
+	echo.
+	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
+	goto end
+)
+
+:end
diff --git a/spark-2.4.0-bin-hadoop2.7/python/docs/pyspark.ml.rst b/spark-2.4.0-bin-hadoop2.7/python/docs/pyspark.ml.rst
new file mode 100644
index 0000000000000000000000000000000000000000..6a5d81706f071bd7ff27c43c16d49d6b81166a52
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/docs/pyspark.ml.rst
@@ -0,0 +1,114 @@
+pyspark.ml package
+==================
+
+ML Pipeline APIs
+----------------
+
+.. automodule:: pyspark.ml
+    :members:
+    :undoc-members:
+    :inherited-members:
+
+pyspark.ml.param module
+-----------------------
+
+.. automodule:: pyspark.ml.param
+    :members:
+    :undoc-members:
+    :inherited-members:
+
+pyspark.ml.feature module
+-------------------------
+
+.. automodule:: pyspark.ml.feature
+    :members:
+    :undoc-members:
+    :inherited-members:
+
+pyspark.ml.classification module
+--------------------------------
+
+.. automodule:: pyspark.ml.classification
+    :members:
+    :undoc-members:
+    :inherited-members:
+
+pyspark.ml.clustering module
+----------------------------
+
+.. automodule:: pyspark.ml.clustering
+    :members:
+    :undoc-members:
+    :inherited-members:
+
+pyspark.ml.linalg module
+----------------------------
+
+.. automodule:: pyspark.ml.linalg
+    :members:
+    :undoc-members:
+    :inherited-members:
+
+pyspark.ml.recommendation module
+--------------------------------
+
+.. automodule:: pyspark.ml.recommendation
+    :members:
+    :undoc-members:
+    :inherited-members:
+
+pyspark.ml.regression module
+----------------------------
+
+.. automodule:: pyspark.ml.regression
+    :members:
+    :undoc-members:
+    :inherited-members:
+
+pyspark.ml.stat module
+----------------------
+
+.. automodule:: pyspark.ml.stat
+    :members:
+    :undoc-members:
+    :inherited-members:
+
+pyspark.ml.tuning module
+------------------------
+
+.. automodule:: pyspark.ml.tuning
+    :members:
+    :undoc-members:
+    :inherited-members:
+
+pyspark.ml.evaluation module
+----------------------------
+
+.. automodule:: pyspark.ml.evaluation
+    :members:
+    :undoc-members:
+    :inherited-members:
+
+pyspark.ml.fpm module
+----------------------------
+
+.. automodule:: pyspark.ml.fpm
+    :members:
+    :undoc-members:
+    :inherited-members:
+
+pyspark.ml.image module
+----------------------------
+
+.. automodule:: pyspark.ml.image
+    :members:
+    :undoc-members:
+    :inherited-members:
+
+pyspark.ml.util module
+----------------------------
+
+.. automodule:: pyspark.ml.util
+    :members:
+    :undoc-members:
+    :inherited-members:
diff --git a/spark-2.4.0-bin-hadoop2.7/python/docs/pyspark.mllib.rst b/spark-2.4.0-bin-hadoop2.7/python/docs/pyspark.mllib.rst
new file mode 100644
index 0000000000000000000000000000000000000000..2d54ab118b94b37514f2510e27d464b50e5b70b9
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/docs/pyspark.mllib.rst
@@ -0,0 +1,99 @@
+pyspark.mllib package
+=====================
+
+pyspark.mllib.classification module
+-----------------------------------
+
+.. automodule:: pyspark.mllib.classification
+    :members:
+    :undoc-members:
+    :inherited-members:
+
+pyspark.mllib.clustering module
+-------------------------------
+
+.. automodule:: pyspark.mllib.clustering
+    :members:
+    :undoc-members:
+
+pyspark.mllib.evaluation module
+-------------------------------
+
+.. automodule:: pyspark.mllib.evaluation
+      :members:
+      :undoc-members:
+
+pyspark.mllib.feature module
+-------------------------------
+
+.. automodule:: pyspark.mllib.feature
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+pyspark.mllib.fpm module
+------------------------
+
+.. automodule:: pyspark.mllib.fpm
+    :members:
+    :undoc-members:
+
+pyspark.mllib.linalg module
+---------------------------
+
+.. automodule:: pyspark.mllib.linalg
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+pyspark.mllib.linalg.distributed module
+---------------------------------------
+
+.. automodule:: pyspark.mllib.linalg.distributed
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+pyspark.mllib.random module
+---------------------------
+
+.. automodule:: pyspark.mllib.random
+    :members:
+    :undoc-members:
+
+pyspark.mllib.recommendation module
+-----------------------------------
+
+.. automodule:: pyspark.mllib.recommendation
+    :members:
+    :undoc-members:
+
+pyspark.mllib.regression module
+-------------------------------
+
+.. automodule:: pyspark.mllib.regression
+    :members:
+    :undoc-members:
+    :inherited-members:
+
+pyspark.mllib.stat module
+-------------------------
+
+.. automodule:: pyspark.mllib.stat
+    :members:
+    :undoc-members:
+
+pyspark.mllib.tree module
+-------------------------
+
+.. automodule:: pyspark.mllib.tree
+    :members:
+    :undoc-members:
+    :inherited-members:
+
+pyspark.mllib.util module
+-------------------------
+
+.. automodule:: pyspark.mllib.util
+    :members:
+    :undoc-members:
diff --git a/spark-2.4.0-bin-hadoop2.7/python/docs/pyspark.rst b/spark-2.4.0-bin-hadoop2.7/python/docs/pyspark.rst
new file mode 100644
index 0000000000000000000000000000000000000000..0df12c49ad033f6e24142bc862bdf1ea7c849731
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/docs/pyspark.rst
@@ -0,0 +1,20 @@
+pyspark package
+===============
+
+Subpackages
+-----------
+
+.. toctree::
+    :maxdepth: 1
+    
+    pyspark.sql
+    pyspark.streaming
+    pyspark.ml
+    pyspark.mllib
+
+Contents
+--------
+
+.. automodule:: pyspark
+    :members:
+    :undoc-members:
diff --git a/spark-2.4.0-bin-hadoop2.7/python/docs/pyspark.sql.rst b/spark-2.4.0-bin-hadoop2.7/python/docs/pyspark.sql.rst
new file mode 100644
index 0000000000000000000000000000000000000000..5c3b7e274857a8e7ce920fbb3a3e9a9ff0bebab1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/docs/pyspark.sql.rst
@@ -0,0 +1,30 @@
+pyspark.sql module
+==================
+
+Module Context
+--------------
+
+.. automodule:: pyspark.sql
+    :members:
+    :undoc-members:
+    :exclude-members: builder
+.. We need `exclude-members` to prevent default description generations
+   as a workaround for old Sphinx (< 1.6.6).
+
+pyspark.sql.types module
+------------------------
+.. automodule:: pyspark.sql.types
+    :members:
+    :undoc-members:
+
+pyspark.sql.functions module
+----------------------------
+.. automodule:: pyspark.sql.functions
+    :members:
+    :undoc-members:
+
+pyspark.sql.streaming module
+----------------------------
+.. automodule:: pyspark.sql.streaming
+    :members:
+    :undoc-members:
diff --git a/spark-2.4.0-bin-hadoop2.7/python/docs/pyspark.streaming.rst b/spark-2.4.0-bin-hadoop2.7/python/docs/pyspark.streaming.rst
new file mode 100644
index 0000000000000000000000000000000000000000..25ceabac0a541c83a1a248eee1f9bd923612b23c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/docs/pyspark.streaming.rst
@@ -0,0 +1,31 @@
+pyspark.streaming module
+========================
+
+Module contents
+---------------
+
+.. automodule:: pyspark.streaming
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+pyspark.streaming.kafka module
+------------------------------
+.. automodule:: pyspark.streaming.kafka
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+pyspark.streaming.kinesis module
+--------------------------------
+.. automodule:: pyspark.streaming.kinesis
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+pyspark.streaming.flume.module
+------------------------------
+.. automodule:: pyspark.streaming.flume
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/spark-2.4.0-bin-hadoop2.7/python/lib/PY4J_LICENSE.txt b/spark-2.4.0-bin-hadoop2.7/python/lib/PY4J_LICENSE.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a70279ca14ae3cdfd3f166871eb888f4d6e112ac
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/lib/PY4J_LICENSE.txt
@@ -0,0 +1,27 @@
+
+Copyright (c) 2009-2011, Barthelemy Dagenais All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+- The name of the author may not be used to endorse or promote products
+derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
diff --git a/spark-2.4.0-bin-hadoop2.7/python/lib/py4j-0.10.7-src.zip b/spark-2.4.0-bin-hadoop2.7/python/lib/py4j-0.10.7-src.zip
new file mode 100644
index 0000000000000000000000000000000000000000..128e321078793f41154613544ab7016878d5617e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/python/lib/py4j-0.10.7-src.zip differ
diff --git a/spark-2.4.0-bin-hadoop2.7/python/lib/pyspark.zip b/spark-2.4.0-bin-hadoop2.7/python/lib/pyspark.zip
new file mode 100644
index 0000000000000000000000000000000000000000..399c02b6726625bd3999685a8ee5f42ac8f09cea
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/python/lib/pyspark.zip differ
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pylintrc b/spark-2.4.0-bin-hadoop2.7/python/pylintrc
new file mode 100644
index 0000000000000000000000000000000000000000..6a675770da69a89510ab366dd1de0b1c1dacb12b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pylintrc
@@ -0,0 +1,404 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+[MASTER]
+
+# Specify a configuration file.
+#rcfile=
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Profiled execution.
+profile=no
+
+# Add files or directories to the blacklist. They should be base names, not
+# paths.
+ignore=pyspark.heapq3
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# List of plugins (as comma separated values of python modules names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+# Use multiple processes to speed up Pylint.
+jobs=1
+
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code
+extension-pkg-whitelist=
+
+# Allow optimization of some AST trees. This will activate a peephole AST
+# optimizer, which will apply various small optimizations. For instance, it can
+# be used to obtain the result of joining multiple strings with the addition
+# operator. Joining a lot of strings can lead to a maximum recursion error in
+# Pylint and this flag can prevent that. It has one side effect, the resulting
+# AST will be different than the one from reality.
+optimize-ast=no
+
+
+[MESSAGES CONTROL]
+
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED
+confidence=
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time. See also the "--disable" option for examples.
+enable=
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once).You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use"--disable=all --enable=classes
+# --disable=W"
+
+# These errors are arranged in order of number of warning given in pylint.
+# If you would like to improve the code quality of pyspark, remove any of these disabled errors
+# run ./dev/lint-python and see if the errors raised by pylint can be fixed.
+
+disable=invalid-name,missing-docstring,protected-access,unused-argument,no-member,unused-wildcard-import,redefined-builtin,too-many-arguments,unused-variable,too-few-public-methods,bad-continuation,duplicate-code,redefined-outer-name,too-many-ancestors,import-error,superfluous-parens,unused-import,line-too-long,no-name-in-module,unnecessary-lambda,import-self,no-self-use,unidiomatic-typecheck,fixme,too-many-locals,cyclic-import,too-many-branches,bare-except,wildcard-import,dangerous-default-value,broad-except,too-many-public-methods,deprecated-lambda,anomalous-backslash-in-string,too-many-lines,reimported,too-many-statements,bad-whitespace,unpacking-non-sequence,too-many-instance-attributes,abstract-method,old-style-class,global-statement,attribute-defined-outside-init,arguments-differ,undefined-all-variable,no-init,useless-else-on-loop,super-init-not-called,notimplemented-raised,too-many-return-statements,pointless-string-statement,global-variable-undefined,bad-classmethod-argument,too-many-format-args,parse-error,no-self-argument,pointless-statement,undefined-variable,undefined-loop-variable
+
+
+[REPORTS]
+
+# Set the output format. Available formats are text, parseable, colorized, msvs
+# (visual studio) and html. You can also give a reporter class, eg
+# mypackage.mymodule.MyReporterClass.
+output-format=text
+
+# Put messages in a separate file for each module / package specified on the
+# command line instead of printing them on stdout. Reports (if any) will be
+# written in a file name "pylint_global.[txt|html]".
+files-output=no
+
+# Tells whether to display a full report or only the messages
+reports=no
+
+# Python expression which should return a note less than 10 (10 is the highest
+# note). You have access to the variables errors warning, statement which
+# respectively contain the number of errors / warnings messages and the total
+# number of statements analyzed. This is used by the global evaluation report
+# (RP0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Add a comment according to your evaluation note. This is used by the global
+# evaluation report (RP0004).
+comment=no
+
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details
+#msg-template=
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,XXX,TODO
+
+
+[BASIC]
+
+# Required attributes for module, separated by a comma
+required-attributes=
+
+# List of builtins function names that should not be used, separated by a comma
+bad-functions=
+
+# Good variable names which should always be accepted, separated by a comma
+good-names=i,j,k,ex,Run,_
+
+# Bad variable names which should always be refused, separated by a comma
+bad-names=baz,toto,tutu,tata
+
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=
+
+# Include a hint for the correct naming format with invalid-name
+include-naming-hint=no
+
+# Regular expression matching correct function names
+function-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Naming hint for function names
+function-name-hint=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression matching correct variable names
+variable-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Naming hint for variable names
+variable-name-hint=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression matching correct constant names
+const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
+
+# Naming hint for constant names
+const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$
+
+# Regular expression matching correct attribute names
+attr-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Naming hint for attribute names
+attr-name-hint=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression matching correct argument names
+argument-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Naming hint for argument names
+argument-name-hint=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression matching correct class attribute names
+class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
+
+# Naming hint for class attribute names
+class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
+
+# Regular expression matching correct inline iteration names
+inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
+
+# Naming hint for inline iteration names
+inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$
+
+# Regular expression matching correct class names
+class-rgx=[A-Z_][a-zA-Z0-9]+$
+
+# Naming hint for class names
+class-name-hint=[A-Z_][a-zA-Z0-9]+$
+
+# Regular expression matching correct module names
+module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
+
+# Naming hint for module names
+module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
+
+# Regular expression matching correct method names
+method-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Naming hint for method names
+method-name-hint=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=__.*__
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=-1
+
+
+[FORMAT]
+
+# Maximum number of characters on a single line.
+max-line-length=100
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=^\s*(# )?<?https?://\S+>?$
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=no
+
+# List of optional constructs for which whitespace checking is disabled
+no-space-check=trailing-comma,dict-separator
+
+# Maximum number of lines in a module
+max-module-lines=1000
+
+# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
+
+# Number of spaces of indent required inside a hanging or continued line.
+indent-after-paren=4
+
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+
+
+[SIMILARITIES]
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+# Ignore imports when computing similarities.
+ignore-imports=no
+
+
+[VARIABLES]
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# A regular expression matching the name of dummy variables (i.e. expectedly
+# not used).
+dummy-variables-rgx=_$|dummy
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid to define new builtins when possible.
+additional-builtins=
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,_cb
+
+
+[SPELLING]
+
+# Spelling dictionary name. Available dictionaries: none. To make it working
+# install python-enchant package.
+spelling-dict=
+
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+
+# A path to a file that contains private dictionary; one word per line.
+spelling-private-dict-file=
+
+# Tells whether to store unknown words to indicated private dictionary in
+# --spelling-private-dict-file option instead of raising a message.
+spelling-store-unknown-words=no
+
+
+[LOGGING]
+
+# Logging modules to check that the string format arguments are in logging
+# function parameter format
+logging-modules=logging
+
+
+[TYPECHECK]
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis
+ignored-modules=
+
+# List of classes names for which member attributes should not be checked
+# (useful for classes with attributes dynamically set).
+ignored-classes=SQLObject
+
+# When zope mode is activated, add a predefined set of Zope acquired attributes
+# to generated-members.
+zope=no
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E0201 when accessed. Python regular
+# expressions are accepted.
+generated-members=REQUEST,acl_users,aq_parent
+
+
+[CLASSES]
+
+# List of interface methods to ignore, separated by a comma. This is used for
+# instance to not check methods defines in Zope's Interface base class.
+ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,__new__,setUp
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=mcs
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,_fields,_replace,_source,_make
+
+
+[IMPORTS]
+
+# Deprecated modules which should not be used, separated by a comma
+deprecated-modules=regsub,TERMIOS,Bastion,rexec
+
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report RP0402 must not be disabled)
+import-graph=
+
+# Create a graph of external dependencies in the given file (report RP0402 must
+# not be disabled)
+ext-import-graph=
+
+# Create a graph of internal dependencies in the given file (report RP0402 must
+# not be disabled)
+int-import-graph=
+
+
+[DESIGN]
+
+# Maximum number of arguments for function / method
+max-args=5
+
+# Argument names that match this expression will be ignored. Default to name
+# with leading underscore
+ignored-argument-names=_.*
+
+# Maximum number of locals for function / method body
+max-locals=15
+
+# Maximum number of return / yield for function / method body
+max-returns=6
+
+# Maximum number of branch for function / method body
+max-branches=12
+
+# Maximum number of statements in function / method body
+max-statements=50
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when being caught. Defaults to
+# "Exception"
+overgeneral-exceptions=Exception
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/__init__.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee153af18c88cd85c2630f566b22a6d806020aeb
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/__init__.py
@@ -0,0 +1,122 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+PySpark is the Python API for Spark.
+
+Public classes:
+
+  - :class:`SparkContext`:
+      Main entry point for Spark functionality.
+  - :class:`RDD`:
+      A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
+  - :class:`Broadcast`:
+      A broadcast variable that gets reused across tasks.
+  - :class:`Accumulator`:
+      An "add-only" shared variable that tasks can only add values to.
+  - :class:`SparkConf`:
+      For configuring Spark.
+  - :class:`SparkFiles`:
+      Access files shipped with jobs.
+  - :class:`StorageLevel`:
+      Finer-grained cache persistence levels.
+  - :class:`TaskContext`:
+      Information about the current running task, available on the workers and experimental.
+  - :class:`RDDBarrier`:
+      Wraps an RDD under a barrier stage for barrier execution.
+  - :class:`BarrierTaskContext`:
+      A :class:`TaskContext` that provides extra info and tooling for barrier execution.
+  - :class:`BarrierTaskInfo`:
+      Information about a barrier task.
+"""
+
+from functools import wraps
+import types
+
+from pyspark.conf import SparkConf
+from pyspark.context import SparkContext
+from pyspark.rdd import RDD, RDDBarrier
+from pyspark.files import SparkFiles
+from pyspark.storagelevel import StorageLevel
+from pyspark.accumulators import Accumulator, AccumulatorParam
+from pyspark.broadcast import Broadcast
+from pyspark.serializers import MarshalSerializer, PickleSerializer
+from pyspark.status import *
+from pyspark.taskcontext import TaskContext, BarrierTaskContext, BarrierTaskInfo
+from pyspark.profiler import Profiler, BasicProfiler
+from pyspark.version import __version__
+from pyspark._globals import _NoValue
+
+
+def since(version):
+    """
+    A decorator that annotates a function to append the version of Spark the function was added.
+    """
+    import re
+    indent_p = re.compile(r'\n( +)')
+
+    def deco(f):
+        indents = indent_p.findall(f.__doc__)
+        indent = ' ' * (min(len(m) for m in indents) if indents else 0)
+        f.__doc__ = f.__doc__.rstrip() + "\n\n%s.. versionadded:: %s" % (indent, version)
+        return f
+    return deco
+
+
+def copy_func(f, name=None, sinceversion=None, doc=None):
+    """
+    Returns a function with same code, globals, defaults, closure, and
+    name (or provide a new name).
+    """
+    # See
+    # http://stackoverflow.com/questions/6527633/how-can-i-make-a-deepcopy-of-a-function-in-python
+    fn = types.FunctionType(f.__code__, f.__globals__, name or f.__name__, f.__defaults__,
+                            f.__closure__)
+    # in case f was given attrs (note this dict is a shallow copy):
+    fn.__dict__.update(f.__dict__)
+    if doc is not None:
+        fn.__doc__ = doc
+    if sinceversion is not None:
+        fn = since(sinceversion)(fn)
+    return fn
+
+
+def keyword_only(func):
+    """
+    A decorator that forces keyword arguments in the wrapped method
+    and saves actual input keyword arguments in `_input_kwargs`.
+
+    .. note:: Should only be used to wrap a method where first arg is `self`
+    """
+    @wraps(func)
+    def wrapper(self, *args, **kwargs):
+        if len(args) > 0:
+            raise TypeError("Method %s forces keyword arguments." % func.__name__)
+        self._input_kwargs = kwargs
+        return func(self, **kwargs)
+    return wrapper
+
+
+# for back compatibility
+from pyspark.sql import SQLContext, HiveContext, Row
+
+__all__ = [
+    "SparkConf", "SparkContext", "SparkFiles", "RDD", "StorageLevel", "Broadcast",
+    "Accumulator", "AccumulatorParam", "MarshalSerializer", "PickleSerializer",
+    "StatusTracker", "SparkJobInfo", "SparkStageInfo", "Profiler", "BasicProfiler", "TaskContext",
+    "RDDBarrier", "BarrierTaskContext", "BarrierTaskInfo",
+]
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/_globals.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/_globals.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e6099db099639e28ecbe330297248e753783f22
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/_globals.py
@@ -0,0 +1,70 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Module defining global singleton classes.
+
+This module raises a RuntimeError if an attempt to reload it is made. In that
+way the identities of the classes defined here are fixed and will remain so
+even if pyspark itself is reloaded. In particular, a function like the following
+will still work correctly after pyspark is reloaded:
+
+    def foo(arg=pyspark._NoValue):
+        if arg is pyspark._NoValue:
+            ...
+
+See gh-7844 for a discussion of the reload problem that motivated this module.
+
+Note that this approach is taken after from NumPy.
+"""
+
+__ALL__ = ['_NoValue']
+
+
+# Disallow reloading this module so as to preserve the identities of the
+# classes defined here.
+if '_is_loaded' in globals():
+    raise RuntimeError('Reloading pyspark._globals is not allowed')
+_is_loaded = True
+
+
+class _NoValueType(object):
+    """Special keyword value.
+
+    The instance of this class may be used as the default value assigned to a
+    deprecated keyword in order to check if it has been given a user defined
+    value.
+
+    This class was copied from NumPy.
+    """
+    __instance = None
+
+    def __new__(cls):
+        # ensure that only one instance exists
+        if not cls.__instance:
+            cls.__instance = super(_NoValueType, cls).__new__(cls)
+        return cls.__instance
+
+    # needed for python 2 to preserve identity through a pickle
+    def __reduce__(self):
+        return (self.__class__, ())
+
+    def __repr__(self):
+        return "<no value>"
+
+
+_NoValue = _NoValueType()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/accumulators.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/accumulators.py
new file mode 100644
index 0000000000000000000000000000000000000000..00ec094e7e3b4b1821804fb8ab4efc28b874ad70
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/accumulators.py
@@ -0,0 +1,301 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+>>> from pyspark.context import SparkContext
+>>> sc = SparkContext('local', 'test')
+>>> a = sc.accumulator(1)
+>>> a.value
+1
+>>> a.value = 2
+>>> a.value
+2
+>>> a += 5
+>>> a.value
+7
+
+>>> sc.accumulator(1.0).value
+1.0
+
+>>> sc.accumulator(1j).value
+1j
+
+>>> rdd = sc.parallelize([1,2,3])
+>>> def f(x):
+...     global a
+...     a += x
+>>> rdd.foreach(f)
+>>> a.value
+13
+
+>>> b = sc.accumulator(0)
+>>> def g(x):
+...     b.add(x)
+>>> rdd.foreach(g)
+>>> b.value
+6
+
+>>> from pyspark.accumulators import AccumulatorParam
+>>> class VectorAccumulatorParam(AccumulatorParam):
+...     def zero(self, value):
+...         return [0.0] * len(value)
+...     def addInPlace(self, val1, val2):
+...         for i in range(len(val1)):
+...              val1[i] += val2[i]
+...         return val1
+>>> va = sc.accumulator([1.0, 2.0, 3.0], VectorAccumulatorParam())
+>>> va.value
+[1.0, 2.0, 3.0]
+>>> def g(x):
+...     global va
+...     va += [x] * 3
+>>> rdd.foreach(g)
+>>> va.value
+[7.0, 8.0, 9.0]
+
+>>> rdd.map(lambda x: a.value).collect() # doctest: +IGNORE_EXCEPTION_DETAIL
+Traceback (most recent call last):
+    ...
+Py4JJavaError:...
+
+>>> def h(x):
+...     global a
+...     a.value = 7
+>>> rdd.foreach(h) # doctest: +IGNORE_EXCEPTION_DETAIL
+Traceback (most recent call last):
+    ...
+Py4JJavaError:...
+
+>>> sc.accumulator([1.0, 2.0, 3.0]) # doctest: +IGNORE_EXCEPTION_DETAIL
+Traceback (most recent call last):
+    ...
+TypeError:...
+"""
+
+import sys
+import select
+import struct
+if sys.version < '3':
+    import SocketServer
+else:
+    import socketserver as SocketServer
+import threading
+from pyspark.serializers import read_int, PickleSerializer
+
+
+__all__ = ['Accumulator', 'AccumulatorParam']
+
+
+pickleSer = PickleSerializer()
+
+# Holds accumulators registered on the current machine, keyed by ID. This is then used to send
+# the local accumulator updates back to the driver program at the end of a task.
+_accumulatorRegistry = {}
+
+
+def _deserialize_accumulator(aid, zero_value, accum_param):
+    from pyspark.accumulators import _accumulatorRegistry
+    # If this certain accumulator was deserialized, don't overwrite it.
+    if aid in _accumulatorRegistry:
+        return _accumulatorRegistry[aid]
+    else:
+        accum = Accumulator(aid, zero_value, accum_param)
+        accum._deserialized = True
+        _accumulatorRegistry[aid] = accum
+        return accum
+
+
+class Accumulator(object):
+
+    """
+    A shared variable that can be accumulated, i.e., has a commutative and associative "add"
+    operation. Worker tasks on a Spark cluster can add values to an Accumulator with the C{+=}
+    operator, but only the driver program is allowed to access its value, using C{value}.
+    Updates from the workers get propagated automatically to the driver program.
+
+    While C{SparkContext} supports accumulators for primitive data types like C{int} and
+    C{float}, users can also define accumulators for custom types by providing a custom
+    L{AccumulatorParam} object. Refer to the doctest of this module for an example.
+    """
+
+    def __init__(self, aid, value, accum_param):
+        """Create a new Accumulator with a given initial value and AccumulatorParam object"""
+        from pyspark.accumulators import _accumulatorRegistry
+        self.aid = aid
+        self.accum_param = accum_param
+        self._value = value
+        self._deserialized = False
+        _accumulatorRegistry[aid] = self
+
+    def __reduce__(self):
+        """Custom serialization; saves the zero value from our AccumulatorParam"""
+        param = self.accum_param
+        return (_deserialize_accumulator, (self.aid, param.zero(self._value), param))
+
+    @property
+    def value(self):
+        """Get the accumulator's value; only usable in driver program"""
+        if self._deserialized:
+            raise Exception("Accumulator.value cannot be accessed inside tasks")
+        return self._value
+
+    @value.setter
+    def value(self, value):
+        """Sets the accumulator's value; only usable in driver program"""
+        if self._deserialized:
+            raise Exception("Accumulator.value cannot be accessed inside tasks")
+        self._value = value
+
+    def add(self, term):
+        """Adds a term to this accumulator's value"""
+        self._value = self.accum_param.addInPlace(self._value, term)
+
+    def __iadd__(self, term):
+        """The += operator; adds a term to this accumulator's value"""
+        self.add(term)
+        return self
+
+    def __str__(self):
+        return str(self._value)
+
+    def __repr__(self):
+        return "Accumulator<id=%i, value=%s>" % (self.aid, self._value)
+
+
+class AccumulatorParam(object):
+
+    """
+    Helper object that defines how to accumulate values of a given type.
+    """
+
+    def zero(self, value):
+        """
+        Provide a "zero value" for the type, compatible in dimensions with the
+        provided C{value} (e.g., a zero vector)
+        """
+        raise NotImplementedError
+
+    def addInPlace(self, value1, value2):
+        """
+        Add two values of the accumulator's data type, returning a new value;
+        for efficiency, can also update C{value1} in place and return it.
+        """
+        raise NotImplementedError
+
+
+class AddingAccumulatorParam(AccumulatorParam):
+
+    """
+    An AccumulatorParam that uses the + operators to add values. Designed for simple types
+    such as integers, floats, and lists. Requires the zero value for the underlying type
+    as a parameter.
+    """
+
+    def __init__(self, zero_value):
+        self.zero_value = zero_value
+
+    def zero(self, value):
+        return self.zero_value
+
+    def addInPlace(self, value1, value2):
+        value1 += value2
+        return value1
+
+
+# Singleton accumulator params for some standard types
+INT_ACCUMULATOR_PARAM = AddingAccumulatorParam(0)
+FLOAT_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0)
+COMPLEX_ACCUMULATOR_PARAM = AddingAccumulatorParam(0.0j)
+
+
+class _UpdateRequestHandler(SocketServer.StreamRequestHandler):
+
+    """
+    This handler will keep polling updates from the same socket until the
+    server is shutdown.
+    """
+
+    def handle(self):
+        from pyspark.accumulators import _accumulatorRegistry
+        auth_token = self.server.auth_token
+
+        def poll(func):
+            while not self.server.server_shutdown:
+                # Poll every 1 second for new data -- don't block in case of shutdown.
+                r, _, _ = select.select([self.rfile], [], [], 1)
+                if self.rfile in r:
+                    if func():
+                        break
+
+        def accum_updates():
+            num_updates = read_int(self.rfile)
+            for _ in range(num_updates):
+                (aid, update) = pickleSer._read_with_length(self.rfile)
+                _accumulatorRegistry[aid] += update
+            # Write a byte in acknowledgement
+            self.wfile.write(struct.pack("!b", 1))
+            return False
+
+        def authenticate_and_accum_updates():
+            received_token = self.rfile.read(len(auth_token))
+            if isinstance(received_token, bytes):
+                received_token = received_token.decode("utf-8")
+            if (received_token == auth_token):
+                accum_updates()
+                # we've authenticated, we can break out of the first loop now
+                return True
+            else:
+                raise Exception(
+                    "The value of the provided token to the AccumulatorServer is not correct.")
+
+        # first we keep polling till we've received the authentication token
+        poll(authenticate_and_accum_updates)
+        # now we've authenticated, don't need to check for the token anymore
+        poll(accum_updates)
+
+
+class AccumulatorServer(SocketServer.TCPServer):
+
+    def __init__(self, server_address, RequestHandlerClass, auth_token):
+        SocketServer.TCPServer.__init__(self, server_address, RequestHandlerClass)
+        self.auth_token = auth_token
+
+    """
+    A simple TCP server that intercepts shutdown() in order to interrupt
+    our continuous polling on the handler.
+    """
+    server_shutdown = False
+
+    def shutdown(self):
+        self.server_shutdown = True
+        SocketServer.TCPServer.shutdown(self)
+        self.server_close()
+
+
+def _start_update_server(auth_token):
+    """Start a TCP server to receive accumulator updates in a daemon thread, and returns it"""
+    server = AccumulatorServer(("localhost", 0), _UpdateRequestHandler, auth_token)
+    thread = threading.Thread(target=server.serve_forever)
+    thread.daemon = True
+    thread.start()
+    return server
+
+if __name__ == "__main__":
+    import doctest
+    (failure_count, test_count) = doctest.testmod()
+    if failure_count:
+        sys.exit(-1)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/broadcast.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/broadcast.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c7f2a7418df07327ea494ba467c8d5a4648f515
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/broadcast.py
@@ -0,0 +1,193 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import gc
+import os
+import socket
+import sys
+from tempfile import NamedTemporaryFile
+import threading
+
+from pyspark.cloudpickle import print_exec
+from pyspark.java_gateway import local_connect_and_auth
+from pyspark.serializers import ChunkedStream
+from pyspark.util import _exception_message
+
+if sys.version < '3':
+    import cPickle as pickle
+else:
+    import pickle
+    unicode = str
+
+__all__ = ['Broadcast']
+
+
+# Holds broadcasted data received from Java, keyed by its id.
+_broadcastRegistry = {}
+
+
+def _from_id(bid):
+    from pyspark.broadcast import _broadcastRegistry
+    if bid not in _broadcastRegistry:
+        raise Exception("Broadcast variable '%s' not loaded!" % bid)
+    return _broadcastRegistry[bid]
+
+
+class Broadcast(object):
+
+    """
+    A broadcast variable created with L{SparkContext.broadcast()}.
+    Access its value through C{.value}.
+
+    Examples:
+
+    >>> from pyspark.context import SparkContext
+    >>> sc = SparkContext('local', 'test')
+    >>> b = sc.broadcast([1, 2, 3, 4, 5])
+    >>> b.value
+    [1, 2, 3, 4, 5]
+    >>> sc.parallelize([0, 0]).flatMap(lambda x: b.value).collect()
+    [1, 2, 3, 4, 5, 1, 2, 3, 4, 5]
+    >>> b.unpersist()
+
+    >>> large_broadcast = sc.broadcast(range(10000))
+    """
+
+    def __init__(self, sc=None, value=None, pickle_registry=None, path=None,
+                 sock_file=None):
+        """
+        Should not be called directly by users -- use L{SparkContext.broadcast()}
+        instead.
+        """
+        if sc is not None:
+            # we're on the driver.  We want the pickled data to end up in a file (maybe encrypted)
+            f = NamedTemporaryFile(delete=False, dir=sc._temp_dir)
+            self._path = f.name
+            python_broadcast = sc._jvm.PythonRDD.setupBroadcast(self._path)
+            if sc._encryption_enabled:
+                # with encryption, we ask the jvm to do the encryption for us, we send it data
+                # over a socket
+                port, auth_secret = python_broadcast.setupEncryptionServer()
+                (encryption_sock_file, _) = local_connect_and_auth(port, auth_secret)
+                broadcast_out = ChunkedStream(encryption_sock_file, 8192)
+            else:
+                # no encryption, we can just write pickled data directly to the file from python
+                broadcast_out = f
+            self.dump(value, broadcast_out)
+            if sc._encryption_enabled:
+                python_broadcast.waitTillDataReceived()
+            self._jbroadcast = sc._jsc.broadcast(python_broadcast)
+            self._pickle_registry = pickle_registry
+        else:
+            # we're on an executor
+            self._jbroadcast = None
+            if sock_file is not None:
+                # the jvm is doing decryption for us.  Read the value
+                # immediately from the sock_file
+                self._value = self.load(sock_file)
+            else:
+                # the jvm just dumps the pickled data in path -- we'll unpickle lazily when
+                # the value is requested
+                assert(path is not None)
+                self._path = path
+
+    def dump(self, value, f):
+        try:
+            pickle.dump(value, f, 2)
+        except pickle.PickleError:
+            raise
+        except Exception as e:
+            msg = "Could not serialize broadcast: %s: %s" \
+                  % (e.__class__.__name__, _exception_message(e))
+            print_exec(sys.stderr)
+            raise pickle.PicklingError(msg)
+        f.close()
+
+    def load_from_path(self, path):
+        with open(path, 'rb', 1 << 20) as f:
+            return self.load(f)
+
+    def load(self, file):
+        # "file" could also be a socket
+        gc.disable()
+        try:
+            return pickle.load(file)
+        finally:
+            gc.enable()
+
+    @property
+    def value(self):
+        """ Return the broadcasted value
+        """
+        if not hasattr(self, "_value") and self._path is not None:
+            self._value = self.load_from_path(self._path)
+        return self._value
+
+    def unpersist(self, blocking=False):
+        """
+        Delete cached copies of this broadcast on the executors. If the
+        broadcast is used after this is called, it will need to be
+        re-sent to each executor.
+
+        :param blocking: Whether to block until unpersisting has completed
+        """
+        if self._jbroadcast is None:
+            raise Exception("Broadcast can only be unpersisted in driver")
+        self._jbroadcast.unpersist(blocking)
+
+    def destroy(self):
+        """
+        Destroy all data and metadata related to this broadcast variable.
+        Use this with caution; once a broadcast variable has been destroyed,
+        it cannot be used again. This method blocks until destroy has
+        completed.
+        """
+        if self._jbroadcast is None:
+            raise Exception("Broadcast can only be destroyed in driver")
+        self._jbroadcast.destroy()
+        os.unlink(self._path)
+
+    def __reduce__(self):
+        if self._jbroadcast is None:
+            raise Exception("Broadcast can only be serialized in driver")
+        self._pickle_registry.add(self)
+        return _from_id, (self._jbroadcast.id(),)
+
+
+class BroadcastPickleRegistry(threading.local):
+    """ Thread-local registry for broadcast variables that have been pickled
+    """
+
+    def __init__(self):
+        self.__dict__.setdefault("_registry", set())
+
+    def __iter__(self):
+        for bcast in self._registry:
+            yield bcast
+
+    def add(self, bcast):
+        self._registry.add(bcast)
+
+    def clear(self):
+        self._registry.clear()
+
+
+if __name__ == "__main__":
+    import doctest
+    (failure_count, test_count) = doctest.testmod()
+    if failure_count:
+        sys.exit(-1)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/cloudpickle.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/cloudpickle.py
new file mode 100644
index 0000000000000000000000000000000000000000..88519d7311fccf7df21c4d77b3e2af21bc907ce8
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/cloudpickle.py
@@ -0,0 +1,1078 @@
+"""
+This class is defined to override standard pickle functionality
+
+The goals of it follow:
+-Serialize lambdas and nested functions to compiled byte code
+-Deal with main module correctly
+-Deal with other non-serializable objects
+
+It does not include an unpickler, as standard python unpickling suffices.
+
+This module was extracted from the `cloud` package, developed by `PiCloud, Inc.
+<https://web.archive.org/web/20140626004012/http://www.picloud.com/>`_.
+
+Copyright (c) 2012, Regents of the University of California.
+Copyright (c) 2009 `PiCloud, Inc. <https://web.archive.org/web/20140626004012/http://www.picloud.com/>`_.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the University of California, Berkeley nor the
+      names of its contributors may be used to endorse or promote
+      products derived from this software without specific prior written
+      permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+from __future__ import print_function
+
+import dis
+from functools import partial
+import imp
+import io
+import itertools
+import logging
+import opcode
+import operator
+import pickle
+import struct
+import sys
+import traceback
+import types
+import weakref
+
+
+if sys.version < '3':
+    from pickle import Pickler
+    try:
+        from cStringIO import StringIO
+    except ImportError:
+        from StringIO import StringIO
+    PY3 = False
+else:
+    types.ClassType = type
+    from pickle import _Pickler as Pickler
+    from io import BytesIO as StringIO
+    PY3 = True
+
+
+def _make_cell_set_template_code():
+    """Get the Python compiler to emit LOAD_FAST(arg); STORE_DEREF
+
+    Notes
+    -----
+    In Python 3, we could use an easier function:
+
+    .. code-block:: python
+
+       def f():
+           cell = None
+
+           def _stub(value):
+               nonlocal cell
+               cell = value
+
+           return _stub
+
+        _cell_set_template_code = f()
+
+    This function is _only_ a LOAD_FAST(arg); STORE_DEREF, but that is
+    invalid syntax on Python 2. If we use this function we also don't need
+    to do the weird freevars/cellvars swap below
+    """
+    def inner(value):
+        lambda: cell  # make ``cell`` a closure so that we get a STORE_DEREF
+        cell = value
+
+    co = inner.__code__
+
+    # NOTE: we are marking the cell variable as a free variable intentionally
+    # so that we simulate an inner function instead of the outer function. This
+    # is what gives us the ``nonlocal`` behavior in a Python 2 compatible way.
+    if not PY3:
+        return types.CodeType(
+            co.co_argcount,
+            co.co_nlocals,
+            co.co_stacksize,
+            co.co_flags,
+            co.co_code,
+            co.co_consts,
+            co.co_names,
+            co.co_varnames,
+            co.co_filename,
+            co.co_name,
+            co.co_firstlineno,
+            co.co_lnotab,
+            co.co_cellvars,  # this is the trickery
+            (),
+        )
+    else:
+        return types.CodeType(
+            co.co_argcount,
+            co.co_kwonlyargcount,
+            co.co_nlocals,
+            co.co_stacksize,
+            co.co_flags,
+            co.co_code,
+            co.co_consts,
+            co.co_names,
+            co.co_varnames,
+            co.co_filename,
+            co.co_name,
+            co.co_firstlineno,
+            co.co_lnotab,
+            co.co_cellvars,  # this is the trickery
+            (),
+        )
+
+
+_cell_set_template_code = _make_cell_set_template_code()
+
+
+def cell_set(cell, value):
+    """Set the value of a closure cell.
+    """
+    return types.FunctionType(
+        _cell_set_template_code,
+        {},
+        '_cell_set_inner',
+        (),
+        (cell,),
+    )(value)
+
+
+#relevant opcodes
+STORE_GLOBAL = opcode.opmap['STORE_GLOBAL']
+DELETE_GLOBAL = opcode.opmap['DELETE_GLOBAL']
+LOAD_GLOBAL = opcode.opmap['LOAD_GLOBAL']
+GLOBAL_OPS = (STORE_GLOBAL, DELETE_GLOBAL, LOAD_GLOBAL)
+HAVE_ARGUMENT = dis.HAVE_ARGUMENT
+EXTENDED_ARG = dis.EXTENDED_ARG
+
+
+def islambda(func):
+    return getattr(func,'__name__') == '<lambda>'
+
+
+_BUILTIN_TYPE_NAMES = {}
+for k, v in types.__dict__.items():
+    if type(v) is type:
+        _BUILTIN_TYPE_NAMES[v] = k
+
+
+def _builtin_type(name):
+    return getattr(types, name)
+
+
+def _make__new__factory(type_):
+    def _factory():
+        return type_.__new__
+    return _factory
+
+
+# NOTE: These need to be module globals so that they're pickleable as globals.
+_get_dict_new = _make__new__factory(dict)
+_get_frozenset_new = _make__new__factory(frozenset)
+_get_list_new = _make__new__factory(list)
+_get_set_new = _make__new__factory(set)
+_get_tuple_new = _make__new__factory(tuple)
+_get_object_new = _make__new__factory(object)
+
+# Pre-defined set of builtin_function_or_method instances that can be
+# serialized.
+_BUILTIN_TYPE_CONSTRUCTORS = {
+    dict.__new__: _get_dict_new,
+    frozenset.__new__: _get_frozenset_new,
+    set.__new__: _get_set_new,
+    list.__new__: _get_list_new,
+    tuple.__new__: _get_tuple_new,
+    object.__new__: _get_object_new,
+}
+
+
+if sys.version_info < (3, 4):
+    def _walk_global_ops(code):
+        """
+        Yield (opcode, argument number) tuples for all
+        global-referencing instructions in *code*.
+        """
+        code = getattr(code, 'co_code', b'')
+        if not PY3:
+            code = map(ord, code)
+
+        n = len(code)
+        i = 0
+        extended_arg = 0
+        while i < n:
+            op = code[i]
+            i += 1
+            if op >= HAVE_ARGUMENT:
+                oparg = code[i] + code[i + 1] * 256 + extended_arg
+                extended_arg = 0
+                i += 2
+                if op == EXTENDED_ARG:
+                    extended_arg = oparg * 65536
+                if op in GLOBAL_OPS:
+                    yield op, oparg
+
+else:
+    def _walk_global_ops(code):
+        """
+        Yield (opcode, argument number) tuples for all
+        global-referencing instructions in *code*.
+        """
+        for instr in dis.get_instructions(code):
+            op = instr.opcode
+            if op in GLOBAL_OPS:
+                yield op, instr.arg
+
+
+class CloudPickler(Pickler):
+
+    dispatch = Pickler.dispatch.copy()
+
+    def __init__(self, file, protocol=None):
+        Pickler.__init__(self, file, protocol)
+        # set of modules to unpickle
+        self.modules = set()
+        # map ids to dictionary. used to ensure that functions can share global env
+        self.globals_ref = {}
+
+    def dump(self, obj):
+        self.inject_addons()
+        try:
+            return Pickler.dump(self, obj)
+        except RuntimeError as e:
+            if 'recursion' in e.args[0]:
+                msg = """Could not pickle object as excessively deep recursion required."""
+                raise pickle.PicklingError(msg)
+            else:
+                raise
+
+    def save_memoryview(self, obj):
+        self.save(obj.tobytes())
+    dispatch[memoryview] = save_memoryview
+
+    if not PY3:
+        def save_buffer(self, obj):
+            self.save(str(obj))
+        dispatch[buffer] = save_buffer  # noqa: F821 'buffer' was removed in Python 3
+
+    def save_unsupported(self, obj):
+        raise pickle.PicklingError("Cannot pickle objects of type %s" % type(obj))
+    dispatch[types.GeneratorType] = save_unsupported
+
+    # itertools objects do not pickle!
+    for v in itertools.__dict__.values():
+        if type(v) is type:
+            dispatch[v] = save_unsupported
+
+    def save_module(self, obj):
+        """
+        Save a module as an import
+        """
+        mod_name = obj.__name__
+        # If module is successfully found then it is not a dynamically created module
+        if hasattr(obj, '__file__'):
+            is_dynamic = False
+        else:
+            try:
+                _find_module(mod_name)
+                is_dynamic = False
+            except ImportError:
+                is_dynamic = True
+
+        self.modules.add(obj)
+        if is_dynamic:
+            self.save_reduce(dynamic_subimport, (obj.__name__, vars(obj)), obj=obj)
+        else:
+            self.save_reduce(subimport, (obj.__name__,), obj=obj)
+    dispatch[types.ModuleType] = save_module
+
+    def save_codeobject(self, obj):
+        """
+        Save a code object
+        """
+        if PY3:
+            args = (
+                obj.co_argcount, obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize,
+                obj.co_flags, obj.co_code, obj.co_consts, obj.co_names, obj.co_varnames,
+                obj.co_filename, obj.co_name, obj.co_firstlineno, obj.co_lnotab, obj.co_freevars,
+                obj.co_cellvars
+            )
+        else:
+            args = (
+                obj.co_argcount, obj.co_nlocals, obj.co_stacksize, obj.co_flags, obj.co_code,
+                obj.co_consts, obj.co_names, obj.co_varnames, obj.co_filename, obj.co_name,
+                obj.co_firstlineno, obj.co_lnotab, obj.co_freevars, obj.co_cellvars
+            )
+        self.save_reduce(types.CodeType, args, obj=obj)
+    dispatch[types.CodeType] = save_codeobject
+
+    def save_function(self, obj, name=None):
+        """ Registered with the dispatch to handle all function types.
+
+        Determines what kind of function obj is (e.g. lambda, defined at
+        interactive prompt, etc) and handles the pickling appropriately.
+        """
+        try:
+            should_special_case = obj in _BUILTIN_TYPE_CONSTRUCTORS
+        except TypeError:
+            # Methods of builtin types aren't hashable in python 2.
+            should_special_case = False
+
+        if should_special_case:
+            # We keep a special-cased cache of built-in type constructors at
+            # global scope, because these functions are structured very
+            # differently in different python versions and implementations (for
+            # example, they're instances of types.BuiltinFunctionType in
+            # CPython, but they're ordinary types.FunctionType instances in
+            # PyPy).
+            #
+            # If the function we've received is in that cache, we just
+            # serialize it as a lookup into the cache.
+            return self.save_reduce(_BUILTIN_TYPE_CONSTRUCTORS[obj], (), obj=obj)
+
+        write = self.write
+
+        if name is None:
+            name = obj.__name__
+        try:
+            # whichmodule() could fail, see
+            # https://bitbucket.org/gutworth/six/issues/63/importing-six-breaks-pickling
+            modname = pickle.whichmodule(obj, name)
+        except Exception:
+            modname = None
+        # print('which gives %s %s %s' % (modname, obj, name))
+        try:
+            themodule = sys.modules[modname]
+        except KeyError:
+            # eval'd items such as namedtuple give invalid items for their function __module__
+            modname = '__main__'
+
+        if modname == '__main__':
+            themodule = None
+
+        if themodule:
+            self.modules.add(themodule)
+            if getattr(themodule, name, None) is obj:
+                return self.save_global(obj, name)
+
+        # a builtin_function_or_method which comes in as an attribute of some
+        # object (e.g., itertools.chain.from_iterable) will end
+        # up with modname "__main__" and so end up here. But these functions
+        # have no __code__ attribute in CPython, so the handling for
+        # user-defined functions below will fail.
+        # So we pickle them here using save_reduce; have to do it differently
+        # for different python versions.
+        if not hasattr(obj, '__code__'):
+            if PY3:
+                rv = obj.__reduce_ex__(self.proto)
+            else:
+                if hasattr(obj, '__self__'):
+                    rv = (getattr, (obj.__self__, name))
+                else:
+                    raise pickle.PicklingError("Can't pickle %r" % obj)
+            return self.save_reduce(obj=obj, *rv)
+
+        # if func is lambda, def'ed at prompt, is in main, or is nested, then
+        # we'll pickle the actual function object rather than simply saving a
+        # reference (as is done in default pickler), via save_function_tuple.
+        if (islambda(obj)
+                or getattr(obj.__code__, 'co_filename', None) == '<stdin>'
+                or themodule is None):
+            self.save_function_tuple(obj)
+            return
+        else:
+            # func is nested
+            klass = getattr(themodule, name, None)
+            if klass is None or klass is not obj:
+                self.save_function_tuple(obj)
+                return
+
+        if obj.__dict__:
+            # essentially save_reduce, but workaround needed to avoid recursion
+            self.save(_restore_attr)
+            write(pickle.MARK + pickle.GLOBAL + modname + '\n' + name + '\n')
+            self.memoize(obj)
+            self.save(obj.__dict__)
+            write(pickle.TUPLE + pickle.REDUCE)
+        else:
+            write(pickle.GLOBAL + modname + '\n' + name + '\n')
+            self.memoize(obj)
+    dispatch[types.FunctionType] = save_function
+
+    def _save_subimports(self, code, top_level_dependencies):
+        """
+        Ensure de-pickler imports any package child-modules that
+        are needed by the function
+        """
+        # check if any known dependency is an imported package
+        for x in top_level_dependencies:
+            if isinstance(x, types.ModuleType) and hasattr(x, '__package__') and x.__package__:
+                # check if the package has any currently loaded sub-imports
+                prefix = x.__name__ + '.'
+                for name, module in sys.modules.items():
+                    # Older versions of pytest will add a "None" module to sys.modules.
+                    if name is not None and name.startswith(prefix):
+                        # check whether the function can address the sub-module
+                        tokens = set(name[len(prefix):].split('.'))
+                        if not tokens - set(code.co_names):
+                            # ensure unpickler executes this import
+                            self.save(module)
+                            # then discards the reference to it
+                            self.write(pickle.POP)
+
+    def save_dynamic_class(self, obj):
+        """
+        Save a class that can't be stored as module global.
+
+        This method is used to serialize classes that are defined inside
+        functions, or that otherwise can't be serialized as attribute lookups
+        from global modules.
+        """
+        clsdict = dict(obj.__dict__)  # copy dict proxy to a dict
+        clsdict.pop('__weakref__', None)
+
+        # On PyPy, __doc__ is a readonly attribute, so we need to include it in
+        # the initial skeleton class.  This is safe because we know that the
+        # doc can't participate in a cycle with the original class.
+        type_kwargs = {'__doc__': clsdict.pop('__doc__', None)}
+
+        # If type overrides __dict__ as a property, include it in the type kwargs.
+        # In Python 2, we can't set this attribute after construction.
+        __dict__ = clsdict.pop('__dict__', None)
+        if isinstance(__dict__, property):
+            type_kwargs['__dict__'] = __dict__
+
+        save = self.save
+        write = self.write
+
+        # We write pickle instructions explicitly here to handle the
+        # possibility that the type object participates in a cycle with its own
+        # __dict__. We first write an empty "skeleton" version of the class and
+        # memoize it before writing the class' __dict__ itself. We then write
+        # instructions to "rehydrate" the skeleton class by restoring the
+        # attributes from the __dict__.
+        #
+        # A type can appear in a cycle with its __dict__ if an instance of the
+        # type appears in the type's __dict__ (which happens for the stdlib
+        # Enum class), or if the type defines methods that close over the name
+        # of the type, (which is common for Python 2-style super() calls).
+
+        # Push the rehydration function.
+        save(_rehydrate_skeleton_class)
+
+        # Mark the start of the args tuple for the rehydration function.
+        write(pickle.MARK)
+
+        # Create and memoize an skeleton class with obj's name and bases.
+        tp = type(obj)
+        self.save_reduce(tp, (obj.__name__, obj.__bases__, type_kwargs), obj=obj)
+
+        # Now save the rest of obj's __dict__. Any references to obj
+        # encountered while saving will point to the skeleton class.
+        save(clsdict)
+
+        # Write a tuple of (skeleton_class, clsdict).
+        write(pickle.TUPLE)
+
+        # Call _rehydrate_skeleton_class(skeleton_class, clsdict)
+        write(pickle.REDUCE)
+
+    def save_function_tuple(self, func):
+        """  Pickles an actual func object.
+
+        A func comprises: code, globals, defaults, closure, and dict.  We
+        extract and save these, injecting reducing functions at certain points
+        to recreate the func object.  Keep in mind that some of these pieces
+        can contain a ref to the func itself.  Thus, a naive save on these
+        pieces could trigger an infinite loop of save's.  To get around that,
+        we first create a skeleton func object using just the code (this is
+        safe, since this won't contain a ref to the func), and memoize it as
+        soon as it's created.  The other stuff can then be filled in later.
+        """
+        if is_tornado_coroutine(func):
+            self.save_reduce(_rebuild_tornado_coroutine, (func.__wrapped__,),
+                             obj=func)
+            return
+
+        save = self.save
+        write = self.write
+
+        code, f_globals, defaults, closure_values, dct, base_globals = self.extract_func_data(func)
+
+        save(_fill_function)  # skeleton function updater
+        write(pickle.MARK)    # beginning of tuple that _fill_function expects
+
+        self._save_subimports(
+            code,
+            itertools.chain(f_globals.values(), closure_values or ()),
+        )
+
+        # create a skeleton function object and memoize it
+        save(_make_skel_func)
+        save((
+            code,
+            len(closure_values) if closure_values is not None else -1,
+            base_globals,
+        ))
+        write(pickle.REDUCE)
+        self.memoize(func)
+
+        # save the rest of the func data needed by _fill_function
+        state = {
+            'globals': f_globals,
+            'defaults': defaults,
+            'dict': dct,
+            'module': func.__module__,
+            'closure_values': closure_values,
+        }
+        if hasattr(func, '__qualname__'):
+            state['qualname'] = func.__qualname__
+        save(state)
+        write(pickle.TUPLE)
+        write(pickle.REDUCE)  # applies _fill_function on the tuple
+
+    _extract_code_globals_cache = (
+        weakref.WeakKeyDictionary()
+        if not hasattr(sys, "pypy_version_info")
+        else {})
+
+    @classmethod
+    def extract_code_globals(cls, co):
+        """
+        Find all globals names read or written to by codeblock co
+        """
+        out_names = cls._extract_code_globals_cache.get(co)
+        if out_names is None:
+            try:
+                names = co.co_names
+            except AttributeError:
+                # PyPy "builtin-code" object
+                out_names = set()
+            else:
+                out_names = set(names[oparg]
+                                for op, oparg in _walk_global_ops(co))
+
+                # see if nested function have any global refs
+                if co.co_consts:
+                    for const in co.co_consts:
+                        if type(const) is types.CodeType:
+                            out_names |= cls.extract_code_globals(const)
+
+            cls._extract_code_globals_cache[co] = out_names
+
+        return out_names
+
+    def extract_func_data(self, func):
+        """
+        Turn the function into a tuple of data necessary to recreate it:
+            code, globals, defaults, closure_values, dict
+        """
+        code = func.__code__
+
+        # extract all global ref's
+        func_global_refs = self.extract_code_globals(code)
+
+        # process all variables referenced by global environment
+        f_globals = {}
+        for var in func_global_refs:
+            if var in func.__globals__:
+                f_globals[var] = func.__globals__[var]
+
+        # defaults requires no processing
+        defaults = func.__defaults__
+
+        # process closure
+        closure = (
+            list(map(_get_cell_contents, func.__closure__))
+            if func.__closure__ is not None
+            else None
+        )
+
+        # save the dict
+        dct = func.__dict__
+
+        base_globals = self.globals_ref.get(id(func.__globals__), {})
+        self.globals_ref[id(func.__globals__)] = base_globals
+
+        return (code, f_globals, defaults, closure, dct, base_globals)
+
+    def save_builtin_function(self, obj):
+        if obj.__module__ == "__builtin__":
+            return self.save_global(obj)
+        return self.save_function(obj)
+    dispatch[types.BuiltinFunctionType] = save_builtin_function
+
+    def save_global(self, obj, name=None, pack=struct.pack):
+        """
+        Save a "global".
+
+        The name of this method is somewhat misleading: all types get
+        dispatched here.
+        """
+        if obj.__module__ == "__main__":
+            return self.save_dynamic_class(obj)
+
+        try:
+            return Pickler.save_global(self, obj, name=name)
+        except Exception:
+            if obj.__module__ == "__builtin__" or obj.__module__ == "builtins":
+                if obj in _BUILTIN_TYPE_NAMES:
+                    return self.save_reduce(
+                        _builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj)
+
+            typ = type(obj)
+            if typ is not obj and isinstance(obj, (type, types.ClassType)):
+                return self.save_dynamic_class(obj)
+
+            raise
+
+    dispatch[type] = save_global
+    dispatch[types.ClassType] = save_global
+
+    def save_instancemethod(self, obj):
+        # Memoization rarely is ever useful due to python bounding
+        if obj.__self__ is None:
+            self.save_reduce(getattr, (obj.im_class, obj.__name__))
+        else:
+            if PY3:
+                self.save_reduce(types.MethodType, (obj.__func__, obj.__self__), obj=obj)
+            else:
+                self.save_reduce(types.MethodType, (obj.__func__, obj.__self__, obj.__self__.__class__),
+                         obj=obj)
+    dispatch[types.MethodType] = save_instancemethod
+
+    def save_inst(self, obj):
+        """Inner logic to save instance. Based off pickle.save_inst"""
+        cls = obj.__class__
+
+        # Try the dispatch table (pickle module doesn't do it)
+        f = self.dispatch.get(cls)
+        if f:
+            f(self, obj)  # Call unbound method with explicit self
+            return
+
+        memo = self.memo
+        write = self.write
+        save = self.save
+
+        if hasattr(obj, '__getinitargs__'):
+            args = obj.__getinitargs__()
+            len(args)  # XXX Assert it's a sequence
+            pickle._keep_alive(args, memo)
+        else:
+            args = ()
+
+        write(pickle.MARK)
+
+        if self.bin:
+            save(cls)
+            for arg in args:
+                save(arg)
+            write(pickle.OBJ)
+        else:
+            for arg in args:
+                save(arg)
+            write(pickle.INST + cls.__module__ + '\n' + cls.__name__ + '\n')
+
+        self.memoize(obj)
+
+        try:
+            getstate = obj.__getstate__
+        except AttributeError:
+            stuff = obj.__dict__
+        else:
+            stuff = getstate()
+            pickle._keep_alive(stuff, memo)
+        save(stuff)
+        write(pickle.BUILD)
+
+    if not PY3:
+        dispatch[types.InstanceType] = save_inst
+
+    def save_property(self, obj):
+        # properties not correctly saved in python
+        self.save_reduce(property, (obj.fget, obj.fset, obj.fdel, obj.__doc__), obj=obj)
+    dispatch[property] = save_property
+
+    def save_classmethod(self, obj):
+        orig_func = obj.__func__
+        self.save_reduce(type(obj), (orig_func,), obj=obj)
+    dispatch[classmethod] = save_classmethod
+    dispatch[staticmethod] = save_classmethod
+
+    def save_itemgetter(self, obj):
+        """itemgetter serializer (needed for namedtuple support)"""
+        class Dummy:
+            def __getitem__(self, item):
+                return item
+        items = obj(Dummy())
+        if not isinstance(items, tuple):
+            items = (items, )
+        return self.save_reduce(operator.itemgetter, items)
+
+    if type(operator.itemgetter) is type:
+        dispatch[operator.itemgetter] = save_itemgetter
+
+    def save_attrgetter(self, obj):
+        """attrgetter serializer"""
+        class Dummy(object):
+            def __init__(self, attrs, index=None):
+                self.attrs = attrs
+                self.index = index
+            def __getattribute__(self, item):
+                attrs = object.__getattribute__(self, "attrs")
+                index = object.__getattribute__(self, "index")
+                if index is None:
+                    index = len(attrs)
+                    attrs.append(item)
+                else:
+                    attrs[index] = ".".join([attrs[index], item])
+                return type(self)(attrs, index)
+        attrs = []
+        obj(Dummy(attrs))
+        return self.save_reduce(operator.attrgetter, tuple(attrs))
+
+    if type(operator.attrgetter) is type:
+        dispatch[operator.attrgetter] = save_attrgetter
+
+    def save_file(self, obj):
+        """Save a file"""
+        try:
+            import StringIO as pystringIO #we can't use cStringIO as it lacks the name attribute
+        except ImportError:
+            import io as pystringIO
+
+        if not hasattr(obj, 'name') or  not hasattr(obj, 'mode'):
+            raise pickle.PicklingError("Cannot pickle files that do not map to an actual file")
+        if obj is sys.stdout:
+            return self.save_reduce(getattr, (sys,'stdout'), obj=obj)
+        if obj is sys.stderr:
+            return self.save_reduce(getattr, (sys,'stderr'), obj=obj)
+        if obj is sys.stdin:
+            raise pickle.PicklingError("Cannot pickle standard input")
+        if obj.closed:
+            raise pickle.PicklingError("Cannot pickle closed files")
+        if hasattr(obj, 'isatty') and obj.isatty():
+            raise pickle.PicklingError("Cannot pickle files that map to tty objects")
+        if 'r' not in obj.mode and '+' not in obj.mode:
+            raise pickle.PicklingError("Cannot pickle files that are not opened for reading: %s" % obj.mode)
+
+        name = obj.name
+
+        retval = pystringIO.StringIO()
+
+        try:
+            # Read the whole file
+            curloc = obj.tell()
+            obj.seek(0)
+            contents = obj.read()
+            obj.seek(curloc)
+        except IOError:
+            raise pickle.PicklingError("Cannot pickle file %s as it cannot be read" % name)
+        retval.write(contents)
+        retval.seek(curloc)
+
+        retval.name = name
+        self.save(retval)
+        self.memoize(obj)
+
+    def save_ellipsis(self, obj):
+        self.save_reduce(_gen_ellipsis, ())
+
+    def save_not_implemented(self, obj):
+        self.save_reduce(_gen_not_implemented, ())
+
+    try:               # Python 2
+        dispatch[file] = save_file
+    except NameError:  # Python 3
+        dispatch[io.TextIOWrapper] = save_file
+
+    dispatch[type(Ellipsis)] = save_ellipsis
+    dispatch[type(NotImplemented)] = save_not_implemented
+
+    def save_weakset(self, obj):
+        self.save_reduce(weakref.WeakSet, (list(obj),))
+
+    dispatch[weakref.WeakSet] = save_weakset
+
+    def save_logger(self, obj):
+        self.save_reduce(logging.getLogger, (obj.name,), obj=obj)
+
+    dispatch[logging.Logger] = save_logger
+
+    def save_root_logger(self, obj):
+        self.save_reduce(logging.getLogger, (), obj=obj)
+
+    dispatch[logging.RootLogger] = save_root_logger
+
+    """Special functions for Add-on libraries"""
+    def inject_addons(self):
+        """Plug in system. Register additional pickling functions if modules already loaded"""
+        pass
+
+
+# Tornado support
+
+def is_tornado_coroutine(func):
+    """
+    Return whether *func* is a Tornado coroutine function.
+    Running coroutines are not supported.
+    """
+    if 'tornado.gen' not in sys.modules:
+        return False
+    gen = sys.modules['tornado.gen']
+    if not hasattr(gen, "is_coroutine_function"):
+        # Tornado version is too old
+        return False
+    return gen.is_coroutine_function(func)
+
+def _rebuild_tornado_coroutine(func):
+    from tornado import gen
+    return gen.coroutine(func)
+
+
+# Shorthands for legacy support
+
+def dump(obj, file, protocol=2):
+    CloudPickler(file, protocol).dump(obj)
+
+
+def dumps(obj, protocol=2):
+    file = StringIO()
+    try:
+        cp = CloudPickler(file,protocol)
+        cp.dump(obj)
+        return file.getvalue()
+    finally:
+        file.close()
+
+# including pickles unloading functions in this namespace
+load = pickle.load
+loads = pickle.loads
+
+
+#hack for __import__ not working as desired
+def subimport(name):
+    __import__(name)
+    return sys.modules[name]
+
+
+def dynamic_subimport(name, vars):
+    mod = imp.new_module(name)
+    mod.__dict__.update(vars)
+    sys.modules[name] = mod
+    return mod
+
+# restores function attributes
+def _restore_attr(obj, attr):
+    for key, val in attr.items():
+        setattr(obj, key, val)
+    return obj
+
+
+def _get_module_builtins():
+    return pickle.__builtins__
+
+
+def print_exec(stream):
+    ei = sys.exc_info()
+    traceback.print_exception(ei[0], ei[1], ei[2], None, stream)
+
+
+def _modules_to_main(modList):
+    """Force every module in modList to be placed into main"""
+    if not modList:
+        return
+
+    main = sys.modules['__main__']
+    for modname in modList:
+        if type(modname) is str:
+            try:
+                mod = __import__(modname)
+            except Exception as e:
+                sys.stderr.write('warning: could not import %s\n.  '
+                                 'Your function may unexpectedly error due to this import failing;'
+                                 'A version mismatch is likely.  Specific error was:\n' % modname)
+                print_exec(sys.stderr)
+            else:
+                setattr(main, mod.__name__, mod)
+
+
+#object generators:
+def _genpartial(func, args, kwds):
+    if not args:
+        args = ()
+    if not kwds:
+        kwds = {}
+    return partial(func, *args, **kwds)
+
+def _gen_ellipsis():
+    return Ellipsis
+
+def _gen_not_implemented():
+    return NotImplemented
+
+
+def _get_cell_contents(cell):
+    try:
+        return cell.cell_contents
+    except ValueError:
+        # sentinel used by ``_fill_function`` which will leave the cell empty
+        return _empty_cell_value
+
+
+def instance(cls):
+    """Create a new instance of a class.
+
+    Parameters
+    ----------
+    cls : type
+        The class to create an instance of.
+
+    Returns
+    -------
+    instance : cls
+        A new instance of ``cls``.
+    """
+    return cls()
+
+
+@instance
+class _empty_cell_value(object):
+    """sentinel for empty closures
+    """
+    @classmethod
+    def __reduce__(cls):
+        return cls.__name__
+
+
+def _fill_function(*args):
+    """Fills in the rest of function data into the skeleton function object
+
+    The skeleton itself is create by _make_skel_func().
+    """
+    if len(args) == 2:
+        func = args[0]
+        state = args[1]
+    elif len(args) == 5:
+        # Backwards compat for cloudpickle v0.4.0, after which the `module`
+        # argument was introduced
+        func = args[0]
+        keys = ['globals', 'defaults', 'dict', 'closure_values']
+        state = dict(zip(keys, args[1:]))
+    elif len(args) == 6:
+        # Backwards compat for cloudpickle v0.4.1, after which the function
+        # state was passed as a dict to the _fill_function it-self.
+        func = args[0]
+        keys = ['globals', 'defaults', 'dict', 'module', 'closure_values']
+        state = dict(zip(keys, args[1:]))
+    else:
+        raise ValueError('Unexpected _fill_value arguments: %r' % (args,))
+
+    func.__globals__.update(state['globals'])
+    func.__defaults__ = state['defaults']
+    func.__dict__ = state['dict']
+    if 'module' in state:
+        func.__module__ = state['module']
+    if 'qualname' in state:
+        func.__qualname__ = state['qualname']
+
+    cells = func.__closure__
+    if cells is not None:
+        for cell, value in zip(cells, state['closure_values']):
+            if value is not _empty_cell_value:
+                cell_set(cell, value)
+
+    return func
+
+
+def _make_empty_cell():
+    if False:
+        # trick the compiler into creating an empty cell in our lambda
+        cell = None
+        raise AssertionError('this route should not be executed')
+
+    return (lambda: cell).__closure__[0]
+
+
+def _make_skel_func(code, cell_count, base_globals=None):
+    """ Creates a skeleton function object that contains just the provided
+        code and the correct number of cells in func_closure.  All other
+        func attributes (e.g. func_globals) are empty.
+    """
+    if base_globals is None:
+        base_globals = {}
+    base_globals['__builtins__'] = __builtins__
+
+    closure = (
+        tuple(_make_empty_cell() for _ in range(cell_count))
+        if cell_count >= 0 else
+        None
+    )
+    return types.FunctionType(code, base_globals, None, None, closure)
+
+
+def _rehydrate_skeleton_class(skeleton_class, class_dict):
+    """Put attributes from `class_dict` back on `skeleton_class`.
+
+    See CloudPickler.save_dynamic_class for more info.
+    """
+    for attrname, attr in class_dict.items():
+        setattr(skeleton_class, attrname, attr)
+    return skeleton_class
+
+
+def _find_module(mod_name):
+    """
+    Iterate over each part instead of calling imp.find_module directly.
+    This function is able to find submodules (e.g. sickit.tree)
+    """
+    path = None
+    for part in mod_name.split('.'):
+        if path is not None:
+            path = [path]
+        file, path, description = imp.find_module(part, path)
+        if file is not None:
+            file.close()
+    return path, description
+
+"""Constructors for 3rd party libraries
+Note: These can never be renamed due to client compatibility issues"""
+
+def _getobject(modname, attribute):
+    mod = __import__(modname, fromlist=[attribute])
+    return mod.__dict__[attribute]
+
+
+""" Use copy_reg to extend global pickle definitions """
+
+if sys.version_info < (3, 4):
+    method_descriptor = type(str.upper)
+
+    def _reduce_method_descriptor(obj):
+        return (getattr, (obj.__objclass__, obj.__name__))
+
+    try:
+        import copy_reg as copyreg
+    except ImportError:
+        import copyreg
+    copyreg.pickle(method_descriptor, _reduce_method_descriptor)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/conf.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/conf.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab429d9ab10dedd0b349f9ba078fdf0e20cd21af
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/conf.py
@@ -0,0 +1,224 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+>>> from pyspark.conf import SparkConf
+>>> from pyspark.context import SparkContext
+>>> conf = SparkConf()
+>>> conf.setMaster("local").setAppName("My app")
+<pyspark.conf.SparkConf object at ...>
+>>> conf.get("spark.master")
+u'local'
+>>> conf.get("spark.app.name")
+u'My app'
+>>> sc = SparkContext(conf=conf)
+>>> sc.master
+u'local'
+>>> sc.appName
+u'My app'
+>>> sc.sparkHome is None
+True
+
+>>> conf = SparkConf(loadDefaults=False)
+>>> conf.setSparkHome("/path")
+<pyspark.conf.SparkConf object at ...>
+>>> conf.get("spark.home")
+u'/path'
+>>> conf.setExecutorEnv("VAR1", "value1")
+<pyspark.conf.SparkConf object at ...>
+>>> conf.setExecutorEnv(pairs = [("VAR3", "value3"), ("VAR4", "value4")])
+<pyspark.conf.SparkConf object at ...>
+>>> conf.get("spark.executorEnv.VAR1")
+u'value1'
+>>> print(conf.toDebugString())
+spark.executorEnv.VAR1=value1
+spark.executorEnv.VAR3=value3
+spark.executorEnv.VAR4=value4
+spark.home=/path
+>>> sorted(conf.getAll(), key=lambda p: p[0])
+[(u'spark.executorEnv.VAR1', u'value1'), (u'spark.executorEnv.VAR3', u'value3'), \
+(u'spark.executorEnv.VAR4', u'value4'), (u'spark.home', u'/path')]
+>>> conf._jconf.setExecutorEnv("VAR5", "value5")
+JavaObject id...
+>>> print(conf.toDebugString())
+spark.executorEnv.VAR1=value1
+spark.executorEnv.VAR3=value3
+spark.executorEnv.VAR4=value4
+spark.executorEnv.VAR5=value5
+spark.home=/path
+"""
+
+__all__ = ['SparkConf']
+
+import sys
+import re
+
+if sys.version > '3':
+    unicode = str
+    __doc__ = re.sub(r"(\W|^)[uU](['])", r'\1\2', __doc__)
+
+
+class SparkConf(object):
+
+    """
+    Configuration for a Spark application. Used to set various Spark
+    parameters as key-value pairs.
+
+    Most of the time, you would create a SparkConf object with
+    C{SparkConf()}, which will load values from C{spark.*} Java system
+    properties as well. In this case, any parameters you set directly on
+    the C{SparkConf} object take priority over system properties.
+
+    For unit tests, you can also call C{SparkConf(false)} to skip
+    loading external settings and get the same configuration no matter
+    what the system properties are.
+
+    All setter methods in this class support chaining. For example,
+    you can write C{conf.setMaster("local").setAppName("My app")}.
+
+    .. note:: Once a SparkConf object is passed to Spark, it is cloned
+        and can no longer be modified by the user.
+    """
+
+    def __init__(self, loadDefaults=True, _jvm=None, _jconf=None):
+        """
+        Create a new Spark configuration.
+
+        :param loadDefaults: whether to load values from Java system
+               properties (True by default)
+        :param _jvm: internal parameter used to pass a handle to the
+               Java VM; does not need to be set by users
+        :param _jconf: Optionally pass in an existing SparkConf handle
+               to use its parameters
+        """
+        if _jconf:
+            self._jconf = _jconf
+        else:
+            from pyspark.context import SparkContext
+            _jvm = _jvm or SparkContext._jvm
+
+            if _jvm is not None:
+                # JVM is created, so create self._jconf directly through JVM
+                self._jconf = _jvm.SparkConf(loadDefaults)
+                self._conf = None
+            else:
+                # JVM is not created, so store data in self._conf first
+                self._jconf = None
+                self._conf = {}
+
+    def set(self, key, value):
+        """Set a configuration property."""
+        # Try to set self._jconf first if JVM is created, set self._conf if JVM is not created yet.
+        if self._jconf is not None:
+            self._jconf.set(key, unicode(value))
+        else:
+            self._conf[key] = unicode(value)
+        return self
+
+    def setIfMissing(self, key, value):
+        """Set a configuration property, if not already set."""
+        if self.get(key) is None:
+            self.set(key, value)
+        return self
+
+    def setMaster(self, value):
+        """Set master URL to connect to."""
+        self.set("spark.master", value)
+        return self
+
+    def setAppName(self, value):
+        """Set application name."""
+        self.set("spark.app.name", value)
+        return self
+
+    def setSparkHome(self, value):
+        """Set path where Spark is installed on worker nodes."""
+        self.set("spark.home", value)
+        return self
+
+    def setExecutorEnv(self, key=None, value=None, pairs=None):
+        """Set an environment variable to be passed to executors."""
+        if (key is not None and pairs is not None) or (key is None and pairs is None):
+            raise Exception("Either pass one key-value pair or a list of pairs")
+        elif key is not None:
+            self.set("spark.executorEnv." + key, value)
+        elif pairs is not None:
+            for (k, v) in pairs:
+                self.set("spark.executorEnv." + k, v)
+        return self
+
+    def setAll(self, pairs):
+        """
+        Set multiple parameters, passed as a list of key-value pairs.
+
+        :param pairs: list of key-value pairs to set
+        """
+        for (k, v) in pairs:
+            self.set(k, v)
+        return self
+
+    def get(self, key, defaultValue=None):
+        """Get the configured value for some key, or return a default otherwise."""
+        if defaultValue is None:   # Py4J doesn't call the right get() if we pass None
+            if self._jconf is not None:
+                if not self._jconf.contains(key):
+                    return None
+                return self._jconf.get(key)
+            else:
+                if key not in self._conf:
+                    return None
+                return self._conf[key]
+        else:
+            if self._jconf is not None:
+                return self._jconf.get(key, defaultValue)
+            else:
+                return self._conf.get(key, defaultValue)
+
+    def getAll(self):
+        """Get all values as a list of key-value pairs."""
+        if self._jconf is not None:
+            return [(elem._1(), elem._2()) for elem in self._jconf.getAll()]
+        else:
+            return self._conf.items()
+
+    def contains(self, key):
+        """Does this configuration contain a given key?"""
+        if self._jconf is not None:
+            return self._jconf.contains(key)
+        else:
+            return key in self._conf
+
+    def toDebugString(self):
+        """
+        Returns a printable version of the configuration, as a list of
+        key=value pairs, one per line.
+        """
+        if self._jconf is not None:
+            return self._jconf.toDebugString()
+        else:
+            return '\n'.join('%s=%s' % (k, v) for k, v in self._conf.items())
+
+
+def _test():
+    import doctest
+    (failure_count, test_count) = doctest.testmod(optionflags=doctest.ELLIPSIS)
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/context.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/context.py
new file mode 100644
index 0000000000000000000000000000000000000000..0924d3d95f0447afcae4517731efbd03f392eee2
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/context.py
@@ -0,0 +1,1092 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+import os
+import shutil
+import signal
+import sys
+import threading
+import warnings
+from threading import RLock
+from tempfile import NamedTemporaryFile
+
+from py4j.protocol import Py4JError
+
+from pyspark import accumulators
+from pyspark.accumulators import Accumulator
+from pyspark.broadcast import Broadcast, BroadcastPickleRegistry
+from pyspark.conf import SparkConf
+from pyspark.files import SparkFiles
+from pyspark.java_gateway import launch_gateway, local_connect_and_auth
+from pyspark.serializers import PickleSerializer, BatchedSerializer, UTF8Deserializer, \
+    PairDeserializer, AutoBatchedSerializer, NoOpSerializer, ChunkedStream
+from pyspark.storagelevel import StorageLevel
+from pyspark.rdd import RDD, _load_from_socket, ignore_unicode_prefix
+from pyspark.traceback_utils import CallSite, first_spark_call
+from pyspark.status import StatusTracker
+from pyspark.profiler import ProfilerCollector, BasicProfiler
+
+if sys.version > '3':
+    xrange = range
+
+
+__all__ = ['SparkContext']
+
+
+# These are special default configs for PySpark, they will overwrite
+# the default ones for Spark if they are not configured by user.
+DEFAULT_CONFIGS = {
+    "spark.serializer.objectStreamReset": 100,
+    "spark.rdd.compress": True,
+}
+
+
+class SparkContext(object):
+
+    """
+    Main entry point for Spark functionality. A SparkContext represents the
+    connection to a Spark cluster, and can be used to create L{RDD} and
+    broadcast variables on that cluster.
+    """
+
+    _gateway = None
+    _jvm = None
+    _next_accum_id = 0
+    _active_spark_context = None
+    _lock = RLock()
+    _python_includes = None  # zip and egg files that need to be added to PYTHONPATH
+
+    PACKAGE_EXTENSIONS = ('.zip', '.egg', '.jar')
+
+    def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
+                 environment=None, batchSize=0, serializer=PickleSerializer(), conf=None,
+                 gateway=None, jsc=None, profiler_cls=BasicProfiler):
+        """
+        Create a new SparkContext. At least the master and app name should be set,
+        either through the named parameters here or through C{conf}.
+
+        :param master: Cluster URL to connect to
+               (e.g. mesos://host:port, spark://host:port, local[4]).
+        :param appName: A name for your job, to display on the cluster web UI.
+        :param sparkHome: Location where Spark is installed on cluster nodes.
+        :param pyFiles: Collection of .zip or .py files to send to the cluster
+               and add to PYTHONPATH.  These can be paths on the local file
+               system or HDFS, HTTP, HTTPS, or FTP URLs.
+        :param environment: A dictionary of environment variables to set on
+               worker nodes.
+        :param batchSize: The number of Python objects represented as a single
+               Java object. Set 1 to disable batching, 0 to automatically choose
+               the batch size based on object sizes, or -1 to use an unlimited
+               batch size
+        :param serializer: The serializer for RDDs.
+        :param conf: A L{SparkConf} object setting Spark properties.
+        :param gateway: Use an existing gateway and JVM, otherwise a new JVM
+               will be instantiated.
+        :param jsc: The JavaSparkContext instance (optional).
+        :param profiler_cls: A class of custom Profiler used to do profiling
+               (default is pyspark.profiler.BasicProfiler).
+
+
+        >>> from pyspark.context import SparkContext
+        >>> sc = SparkContext('local', 'test')
+
+        >>> sc2 = SparkContext('local', 'test2') # doctest: +IGNORE_EXCEPTION_DETAIL
+        Traceback (most recent call last):
+            ...
+        ValueError:...
+        """
+        self._callsite = first_spark_call() or CallSite(None, None, None)
+        SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
+        try:
+            self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
+                          conf, jsc, profiler_cls)
+        except:
+            # If an error occurs, clean up in order to allow future SparkContext creation:
+            self.stop()
+            raise
+
+    def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
+                 conf, jsc, profiler_cls):
+        self.environment = environment or {}
+        # java gateway must have been launched at this point.
+        if conf is not None and conf._jconf is not None:
+            # conf has been initialized in JVM properly, so use conf directly. This represents the
+            # scenario that JVM has been launched before SparkConf is created (e.g. SparkContext is
+            # created and then stopped, and we create a new SparkConf and new SparkContext again)
+            self._conf = conf
+        else:
+            self._conf = SparkConf(_jvm=SparkContext._jvm)
+            if conf is not None:
+                for k, v in conf.getAll():
+                    self._conf.set(k, v)
+
+        self._batchSize = batchSize  # -1 represents an unlimited batch size
+        self._unbatched_serializer = serializer
+        if batchSize == 0:
+            self.serializer = AutoBatchedSerializer(self._unbatched_serializer)
+        else:
+            self.serializer = BatchedSerializer(self._unbatched_serializer,
+                                                batchSize)
+
+        # Set any parameters passed directly to us on the conf
+        if master:
+            self._conf.setMaster(master)
+        if appName:
+            self._conf.setAppName(appName)
+        if sparkHome:
+            self._conf.setSparkHome(sparkHome)
+        if environment:
+            for key, value in environment.items():
+                self._conf.setExecutorEnv(key, value)
+        for key, value in DEFAULT_CONFIGS.items():
+            self._conf.setIfMissing(key, value)
+
+        # Check that we have at least the required parameters
+        if not self._conf.contains("spark.master"):
+            raise Exception("A master URL must be set in your configuration")
+        if not self._conf.contains("spark.app.name"):
+            raise Exception("An application name must be set in your configuration")
+
+        # Read back our properties from the conf in case we loaded some of them from
+        # the classpath or an external config file
+        self.master = self._conf.get("spark.master")
+        self.appName = self._conf.get("spark.app.name")
+        self.sparkHome = self._conf.get("spark.home", None)
+
+        for (k, v) in self._conf.getAll():
+            if k.startswith("spark.executorEnv."):
+                varName = k[len("spark.executorEnv."):]
+                self.environment[varName] = v
+
+        self.environment["PYTHONHASHSEED"] = os.environ.get("PYTHONHASHSEED", "0")
+
+        # Create the Java SparkContext through Py4J
+        self._jsc = jsc or self._initialize_context(self._conf._jconf)
+        # Reset the SparkConf to the one actually used by the SparkContext in JVM.
+        self._conf = SparkConf(_jconf=self._jsc.sc().conf())
+
+        # Create a single Accumulator in Java that we'll send all our updates through;
+        # they will be passed back to us through a TCP server
+        auth_token = self._gateway.gateway_parameters.auth_token
+        self._accumulatorServer = accumulators._start_update_server(auth_token)
+        (host, port) = self._accumulatorServer.server_address
+        self._javaAccumulator = self._jvm.PythonAccumulatorV2(host, port, auth_token)
+        self._jsc.sc().register(self._javaAccumulator)
+
+        # If encryption is enabled, we need to setup a server in the jvm to read broadcast
+        # data via a socket.
+        # scala's mangled names w/ $ in them require special treatment.
+        self._encryption_enabled = self._jvm.PythonUtils.getEncryptionEnabled(self._jsc)
+
+        self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python')
+        self.pythonVer = "%d.%d" % sys.version_info[:2]
+
+        # Broadcast's __reduce__ method stores Broadcast instances here.
+        # This allows other code to determine which Broadcast instances have
+        # been pickled, so it can determine which Java broadcast objects to
+        # send.
+        self._pickled_broadcast_vars = BroadcastPickleRegistry()
+
+        SparkFiles._sc = self
+        root_dir = SparkFiles.getRootDirectory()
+        sys.path.insert(1, root_dir)
+
+        # Deploy any code dependencies specified in the constructor
+        self._python_includes = list()
+        for path in (pyFiles or []):
+            self.addPyFile(path)
+
+        # Deploy code dependencies set by spark-submit; these will already have been added
+        # with SparkContext.addFile, so we just need to add them to the PYTHONPATH
+        for path in self._conf.get("spark.submit.pyFiles", "").split(","):
+            if path != "":
+                (dirname, filename) = os.path.split(path)
+                try:
+                    filepath = os.path.join(SparkFiles.getRootDirectory(), filename)
+                    if not os.path.exists(filepath):
+                        # In case of YARN with shell mode, 'spark.submit.pyFiles' files are
+                        # not added via SparkContext.addFile. Here we check if the file exists,
+                        # try to copy and then add it to the path. See SPARK-21945.
+                        shutil.copyfile(path, filepath)
+                    if filename[-4:].lower() in self.PACKAGE_EXTENSIONS:
+                        self._python_includes.append(filename)
+                        sys.path.insert(1, filepath)
+                except Exception:
+                    warnings.warn(
+                        "Failed to add file [%s] speficied in 'spark.submit.pyFiles' to "
+                        "Python path:\n  %s" % (path, "\n  ".join(sys.path)),
+                        RuntimeWarning)
+
+        # Create a temporary directory inside spark.local.dir:
+        local_dir = self._jvm.org.apache.spark.util.Utils.getLocalDir(self._jsc.sc().conf())
+        self._temp_dir = \
+            self._jvm.org.apache.spark.util.Utils.createTempDir(local_dir, "pyspark") \
+                .getAbsolutePath()
+
+        # profiling stats collected for each PythonRDD
+        if self._conf.get("spark.python.profile", "false") == "true":
+            dump_path = self._conf.get("spark.python.profile.dump", None)
+            self.profiler_collector = ProfilerCollector(profiler_cls, dump_path)
+        else:
+            self.profiler_collector = None
+
+        # create a signal handler which would be invoked on receiving SIGINT
+        def signal_handler(signal, frame):
+            self.cancelAllJobs()
+            raise KeyboardInterrupt()
+
+        # see http://stackoverflow.com/questions/23206787/
+        if isinstance(threading.current_thread(), threading._MainThread):
+            signal.signal(signal.SIGINT, signal_handler)
+
+    def __repr__(self):
+        return "<SparkContext master={master} appName={appName}>".format(
+            master=self.master,
+            appName=self.appName,
+        )
+
+    def _repr_html_(self):
+        return """
+        <div>
+            <p><b>SparkContext</b></p>
+
+            <p><a href="{sc.uiWebUrl}">Spark UI</a></p>
+
+            <dl>
+              <dt>Version</dt>
+                <dd><code>v{sc.version}</code></dd>
+              <dt>Master</dt>
+                <dd><code>{sc.master}</code></dd>
+              <dt>AppName</dt>
+                <dd><code>{sc.appName}</code></dd>
+            </dl>
+        </div>
+        """.format(
+            sc=self
+        )
+
+    def _initialize_context(self, jconf):
+        """
+        Initialize SparkContext in function to allow subclass specific initialization
+        """
+        return self._jvm.JavaSparkContext(jconf)
+
+    @classmethod
+    def _ensure_initialized(cls, instance=None, gateway=None, conf=None):
+        """
+        Checks whether a SparkContext is initialized or not.
+        Throws error if a SparkContext is already running.
+        """
+        with SparkContext._lock:
+            if not SparkContext._gateway:
+                SparkContext._gateway = gateway or launch_gateway(conf)
+                SparkContext._jvm = SparkContext._gateway.jvm
+
+            if instance:
+                if (SparkContext._active_spark_context and
+                        SparkContext._active_spark_context != instance):
+                    currentMaster = SparkContext._active_spark_context.master
+                    currentAppName = SparkContext._active_spark_context.appName
+                    callsite = SparkContext._active_spark_context._callsite
+
+                    # Raise error if there is already a running Spark context
+                    raise ValueError(
+                        "Cannot run multiple SparkContexts at once; "
+                        "existing SparkContext(app=%s, master=%s)"
+                        " created by %s at %s:%s "
+                        % (currentAppName, currentMaster,
+                            callsite.function, callsite.file, callsite.linenum))
+                else:
+                    SparkContext._active_spark_context = instance
+
+    def __getnewargs__(self):
+        # This method is called when attempting to pickle SparkContext, which is always an error:
+        raise Exception(
+            "It appears that you are attempting to reference SparkContext from a broadcast "
+            "variable, action, or transformation. SparkContext can only be used on the driver, "
+            "not in code that it run on workers. For more information, see SPARK-5063."
+        )
+
+    def __enter__(self):
+        """
+        Enable 'with SparkContext(...) as sc: app(sc)' syntax.
+        """
+        return self
+
+    def __exit__(self, type, value, trace):
+        """
+        Enable 'with SparkContext(...) as sc: app' syntax.
+
+        Specifically stop the context on exit of the with block.
+        """
+        self.stop()
+
+    @classmethod
+    def getOrCreate(cls, conf=None):
+        """
+        Get or instantiate a SparkContext and register it as a singleton object.
+
+        :param conf: SparkConf (optional)
+        """
+        with SparkContext._lock:
+            if SparkContext._active_spark_context is None:
+                SparkContext(conf=conf or SparkConf())
+            return SparkContext._active_spark_context
+
+    def setLogLevel(self, logLevel):
+        """
+        Control our logLevel. This overrides any user-defined log settings.
+        Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN
+        """
+        self._jsc.setLogLevel(logLevel)
+
+    @classmethod
+    def setSystemProperty(cls, key, value):
+        """
+        Set a Java system property, such as spark.executor.memory. This must
+        must be invoked before instantiating SparkContext.
+        """
+        SparkContext._ensure_initialized()
+        SparkContext._jvm.java.lang.System.setProperty(key, value)
+
+    @property
+    def version(self):
+        """
+        The version of Spark on which this application is running.
+        """
+        return self._jsc.version()
+
+    @property
+    @ignore_unicode_prefix
+    def applicationId(self):
+        """
+        A unique identifier for the Spark application.
+        Its format depends on the scheduler implementation.
+
+        * in case of local spark app something like 'local-1433865536131'
+        * in case of YARN something like 'application_1433865536131_34483'
+
+        >>> sc.applicationId  # doctest: +ELLIPSIS
+        u'local-...'
+        """
+        return self._jsc.sc().applicationId()
+
+    @property
+    def uiWebUrl(self):
+        """Return the URL of the SparkUI instance started by this SparkContext"""
+        return self._jsc.sc().uiWebUrl().get()
+
+    @property
+    def startTime(self):
+        """Return the epoch time when the Spark Context was started."""
+        return self._jsc.startTime()
+
+    @property
+    def defaultParallelism(self):
+        """
+        Default level of parallelism to use when not given by user (e.g. for
+        reduce tasks)
+        """
+        return self._jsc.sc().defaultParallelism()
+
+    @property
+    def defaultMinPartitions(self):
+        """
+        Default min number of partitions for Hadoop RDDs when not given by user
+        """
+        return self._jsc.sc().defaultMinPartitions()
+
+    def stop(self):
+        """
+        Shut down the SparkContext.
+        """
+        if getattr(self, "_jsc", None):
+            try:
+                self._jsc.stop()
+            except Py4JError:
+                # Case: SPARK-18523
+                warnings.warn(
+                    'Unable to cleanly shutdown Spark JVM process.'
+                    ' It is possible that the process has crashed,'
+                    ' been killed or may also be in a zombie state.',
+                    RuntimeWarning
+                )
+                pass
+            finally:
+                self._jsc = None
+        if getattr(self, "_accumulatorServer", None):
+            self._accumulatorServer.shutdown()
+            self._accumulatorServer = None
+        with SparkContext._lock:
+            SparkContext._active_spark_context = None
+
+    def emptyRDD(self):
+        """
+        Create an RDD that has no partitions or elements.
+        """
+        return RDD(self._jsc.emptyRDD(), self, NoOpSerializer())
+
+    def range(self, start, end=None, step=1, numSlices=None):
+        """
+        Create a new RDD of int containing elements from `start` to `end`
+        (exclusive), increased by `step` every element. Can be called the same
+        way as python's built-in range() function. If called with a single argument,
+        the argument is interpreted as `end`, and `start` is set to 0.
+
+        :param start: the start value
+        :param end: the end value (exclusive)
+        :param step: the incremental step (default: 1)
+        :param numSlices: the number of partitions of the new RDD
+        :return: An RDD of int
+
+        >>> sc.range(5).collect()
+        [0, 1, 2, 3, 4]
+        >>> sc.range(2, 4).collect()
+        [2, 3]
+        >>> sc.range(1, 7, 2).collect()
+        [1, 3, 5]
+        """
+        if end is None:
+            end = start
+            start = 0
+
+        return self.parallelize(xrange(start, end, step), numSlices)
+
+    def parallelize(self, c, numSlices=None):
+        """
+        Distribute a local Python collection to form an RDD. Using xrange
+        is recommended if the input represents a range for performance.
+
+        >>> sc.parallelize([0, 2, 3, 4, 6], 5).glom().collect()
+        [[0], [2], [3], [4], [6]]
+        >>> sc.parallelize(xrange(0, 6, 2), 5).glom().collect()
+        [[], [0], [], [2], [4]]
+        """
+        numSlices = int(numSlices) if numSlices is not None else self.defaultParallelism
+        if isinstance(c, xrange):
+            size = len(c)
+            if size == 0:
+                return self.parallelize([], numSlices)
+            step = c[1] - c[0] if size > 1 else 1
+            start0 = c[0]
+
+            def getStart(split):
+                return start0 + int((split * size / numSlices)) * step
+
+            def f(split, iterator):
+                return xrange(getStart(split), getStart(split + 1), step)
+
+            return self.parallelize([], numSlices).mapPartitionsWithIndex(f)
+
+        # Make sure we distribute data evenly if it's smaller than self.batchSize
+        if "__len__" not in dir(c):
+            c = list(c)    # Make it a list so we can compute its length
+        batchSize = max(1, min(len(c) // numSlices, self._batchSize or 1024))
+        serializer = BatchedSerializer(self._unbatched_serializer, batchSize)
+
+        def reader_func(temp_filename):
+            return self._jvm.PythonRDD.readRDDFromFile(self._jsc, temp_filename, numSlices)
+
+        def createRDDServer():
+            return self._jvm.PythonParallelizeServer(self._jsc.sc(), numSlices)
+
+        jrdd = self._serialize_to_jvm(c, serializer, reader_func, createRDDServer)
+        return RDD(jrdd, self, serializer)
+
+    def _serialize_to_jvm(self, data, serializer, reader_func, createRDDServer):
+        """
+        Using py4j to send a large dataset to the jvm is really slow, so we use either a file
+        or a socket if we have encryption enabled.
+        :param data:
+        :param serializer:
+        :param reader_func:  A function which takes a filename and reads in the data in the jvm and
+                returns a JavaRDD. Only used when encryption is disabled.
+        :param createRDDServer:  A function which creates a PythonRDDServer in the jvm to
+               accept the serialized data, for use when encryption is enabled.
+        :return:
+        """
+        if self._encryption_enabled:
+            # with encryption, we open a server in java and send the data directly
+            server = createRDDServer()
+            (sock_file, _) = local_connect_and_auth(server.port(), server.secret())
+            chunked_out = ChunkedStream(sock_file, 8192)
+            serializer.dump_stream(data, chunked_out)
+            chunked_out.close()
+            # this call will block until the server has read all the data and processed it (or
+            # throws an exception)
+            r = server.getResult()
+            return r
+        else:
+            # without encryption, we serialize to a file, and we read the file in java and
+            # parallelize from there.
+            tempFile = NamedTemporaryFile(delete=False, dir=self._temp_dir)
+            try:
+                try:
+                    serializer.dump_stream(data, tempFile)
+                finally:
+                    tempFile.close()
+                return reader_func(tempFile.name)
+            finally:
+                # we eagerily reads the file so we can delete right after.
+                os.unlink(tempFile.name)
+
+    def pickleFile(self, name, minPartitions=None):
+        """
+        Load an RDD previously saved using L{RDD.saveAsPickleFile} method.
+
+        >>> tmpFile = NamedTemporaryFile(delete=True)
+        >>> tmpFile.close()
+        >>> sc.parallelize(range(10)).saveAsPickleFile(tmpFile.name, 5)
+        >>> sorted(sc.pickleFile(tmpFile.name, 3).collect())
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+        """
+        minPartitions = minPartitions or self.defaultMinPartitions
+        return RDD(self._jsc.objectFile(name, minPartitions), self)
+
+    @ignore_unicode_prefix
+    def textFile(self, name, minPartitions=None, use_unicode=True):
+        """
+        Read a text file from HDFS, a local file system (available on all
+        nodes), or any Hadoop-supported file system URI, and return it as an
+        RDD of Strings.
+
+        If use_unicode is False, the strings will be kept as `str` (encoding
+        as `utf-8`), which is faster and smaller than unicode. (Added in
+        Spark 1.2)
+
+        >>> path = os.path.join(tempdir, "sample-text.txt")
+        >>> with open(path, "w") as testFile:
+        ...    _ = testFile.write("Hello world!")
+        >>> textFile = sc.textFile(path)
+        >>> textFile.collect()
+        [u'Hello world!']
+        """
+        minPartitions = minPartitions or min(self.defaultParallelism, 2)
+        return RDD(self._jsc.textFile(name, minPartitions), self,
+                   UTF8Deserializer(use_unicode))
+
+    @ignore_unicode_prefix
+    def wholeTextFiles(self, path, minPartitions=None, use_unicode=True):
+        """
+        Read a directory of text files from HDFS, a local file system
+        (available on all nodes), or any  Hadoop-supported file system
+        URI. Each file is read as a single record and returned in a
+        key-value pair, where the key is the path of each file, the
+        value is the content of each file.
+
+        If use_unicode is False, the strings will be kept as `str` (encoding
+        as `utf-8`), which is faster and smaller than unicode. (Added in
+        Spark 1.2)
+
+        For example, if you have the following files::
+
+          hdfs://a-hdfs-path/part-00000
+          hdfs://a-hdfs-path/part-00001
+          ...
+          hdfs://a-hdfs-path/part-nnnnn
+
+        Do C{rdd = sparkContext.wholeTextFiles("hdfs://a-hdfs-path")},
+        then C{rdd} contains::
+
+          (a-hdfs-path/part-00000, its content)
+          (a-hdfs-path/part-00001, its content)
+          ...
+          (a-hdfs-path/part-nnnnn, its content)
+
+        .. note:: Small files are preferred, as each file will be loaded
+            fully in memory.
+
+        >>> dirPath = os.path.join(tempdir, "files")
+        >>> os.mkdir(dirPath)
+        >>> with open(os.path.join(dirPath, "1.txt"), "w") as file1:
+        ...    _ = file1.write("1")
+        >>> with open(os.path.join(dirPath, "2.txt"), "w") as file2:
+        ...    _ = file2.write("2")
+        >>> textFiles = sc.wholeTextFiles(dirPath)
+        >>> sorted(textFiles.collect())
+        [(u'.../1.txt', u'1'), (u'.../2.txt', u'2')]
+        """
+        minPartitions = minPartitions or self.defaultMinPartitions
+        return RDD(self._jsc.wholeTextFiles(path, minPartitions), self,
+                   PairDeserializer(UTF8Deserializer(use_unicode), UTF8Deserializer(use_unicode)))
+
+    def binaryFiles(self, path, minPartitions=None):
+        """
+        .. note:: Experimental
+
+        Read a directory of binary files from HDFS, a local file system
+        (available on all nodes), or any Hadoop-supported file system URI
+        as a byte array. Each file is read as a single record and returned
+        in a key-value pair, where the key is the path of each file, the
+        value is the content of each file.
+
+        .. note:: Small files are preferred, large file is also allowable, but
+            may cause bad performance.
+        """
+        minPartitions = minPartitions or self.defaultMinPartitions
+        return RDD(self._jsc.binaryFiles(path, minPartitions), self,
+                   PairDeserializer(UTF8Deserializer(), NoOpSerializer()))
+
+    def binaryRecords(self, path, recordLength):
+        """
+        .. note:: Experimental
+
+        Load data from a flat binary file, assuming each record is a set of numbers
+        with the specified numerical format (see ByteBuffer), and the number of
+        bytes per record is constant.
+
+        :param path: Directory to the input data files
+        :param recordLength: The length at which to split the records
+        """
+        return RDD(self._jsc.binaryRecords(path, recordLength), self, NoOpSerializer())
+
+    def _dictToJavaMap(self, d):
+        jm = self._jvm.java.util.HashMap()
+        if not d:
+            d = {}
+        for k, v in d.items():
+            jm[k] = v
+        return jm
+
+    def sequenceFile(self, path, keyClass=None, valueClass=None, keyConverter=None,
+                     valueConverter=None, minSplits=None, batchSize=0):
+        """
+        Read a Hadoop SequenceFile with arbitrary key and value Writable class from HDFS,
+        a local file system (available on all nodes), or any Hadoop-supported file system URI.
+        The mechanism is as follows:
+
+            1. A Java RDD is created from the SequenceFile or other InputFormat, and the key
+               and value Writable classes
+            2. Serialization is attempted via Pyrolite pickling
+            3. If this fails, the fallback is to call 'toString' on each key and value
+            4. C{PickleSerializer} is used to deserialize pickled objects on the Python side
+
+        :param path: path to sequncefile
+        :param keyClass: fully qualified classname of key Writable class
+               (e.g. "org.apache.hadoop.io.Text")
+        :param valueClass: fully qualified classname of value Writable class
+               (e.g. "org.apache.hadoop.io.LongWritable")
+        :param keyConverter:
+        :param valueConverter:
+        :param minSplits: minimum splits in dataset
+               (default min(2, sc.defaultParallelism))
+        :param batchSize: The number of Python objects represented as a single
+               Java object. (default 0, choose batchSize automatically)
+        """
+        minSplits = minSplits or min(self.defaultParallelism, 2)
+        jrdd = self._jvm.PythonRDD.sequenceFile(self._jsc, path, keyClass, valueClass,
+                                                keyConverter, valueConverter, minSplits, batchSize)
+        return RDD(jrdd, self)
+
+    def newAPIHadoopFile(self, path, inputFormatClass, keyClass, valueClass, keyConverter=None,
+                         valueConverter=None, conf=None, batchSize=0):
+        """
+        Read a 'new API' Hadoop InputFormat with arbitrary key and value class from HDFS,
+        a local file system (available on all nodes), or any Hadoop-supported file system URI.
+        The mechanism is the same as for sc.sequenceFile.
+
+        A Hadoop configuration can be passed in as a Python dict. This will be converted into a
+        Configuration in Java
+
+        :param path: path to Hadoop file
+        :param inputFormatClass: fully qualified classname of Hadoop InputFormat
+               (e.g. "org.apache.hadoop.mapreduce.lib.input.TextInputFormat")
+        :param keyClass: fully qualified classname of key Writable class
+               (e.g. "org.apache.hadoop.io.Text")
+        :param valueClass: fully qualified classname of value Writable class
+               (e.g. "org.apache.hadoop.io.LongWritable")
+        :param keyConverter: (None by default)
+        :param valueConverter: (None by default)
+        :param conf: Hadoop configuration, passed in as a dict
+               (None by default)
+        :param batchSize: The number of Python objects represented as a single
+               Java object. (default 0, choose batchSize automatically)
+        """
+        jconf = self._dictToJavaMap(conf)
+        jrdd = self._jvm.PythonRDD.newAPIHadoopFile(self._jsc, path, inputFormatClass, keyClass,
+                                                    valueClass, keyConverter, valueConverter,
+                                                    jconf, batchSize)
+        return RDD(jrdd, self)
+
+    def newAPIHadoopRDD(self, inputFormatClass, keyClass, valueClass, keyConverter=None,
+                        valueConverter=None, conf=None, batchSize=0):
+        """
+        Read a 'new API' Hadoop InputFormat with arbitrary key and value class, from an arbitrary
+        Hadoop configuration, which is passed in as a Python dict.
+        This will be converted into a Configuration in Java.
+        The mechanism is the same as for sc.sequenceFile.
+
+        :param inputFormatClass: fully qualified classname of Hadoop InputFormat
+               (e.g. "org.apache.hadoop.mapreduce.lib.input.TextInputFormat")
+        :param keyClass: fully qualified classname of key Writable class
+               (e.g. "org.apache.hadoop.io.Text")
+        :param valueClass: fully qualified classname of value Writable class
+               (e.g. "org.apache.hadoop.io.LongWritable")
+        :param keyConverter: (None by default)
+        :param valueConverter: (None by default)
+        :param conf: Hadoop configuration, passed in as a dict
+               (None by default)
+        :param batchSize: The number of Python objects represented as a single
+               Java object. (default 0, choose batchSize automatically)
+        """
+        jconf = self._dictToJavaMap(conf)
+        jrdd = self._jvm.PythonRDD.newAPIHadoopRDD(self._jsc, inputFormatClass, keyClass,
+                                                   valueClass, keyConverter, valueConverter,
+                                                   jconf, batchSize)
+        return RDD(jrdd, self)
+
+    def hadoopFile(self, path, inputFormatClass, keyClass, valueClass, keyConverter=None,
+                   valueConverter=None, conf=None, batchSize=0):
+        """
+        Read an 'old' Hadoop InputFormat with arbitrary key and value class from HDFS,
+        a local file system (available on all nodes), or any Hadoop-supported file system URI.
+        The mechanism is the same as for sc.sequenceFile.
+
+        A Hadoop configuration can be passed in as a Python dict. This will be converted into a
+        Configuration in Java.
+
+        :param path: path to Hadoop file
+        :param inputFormatClass: fully qualified classname of Hadoop InputFormat
+               (e.g. "org.apache.hadoop.mapred.TextInputFormat")
+        :param keyClass: fully qualified classname of key Writable class
+               (e.g. "org.apache.hadoop.io.Text")
+        :param valueClass: fully qualified classname of value Writable class
+               (e.g. "org.apache.hadoop.io.LongWritable")
+        :param keyConverter: (None by default)
+        :param valueConverter: (None by default)
+        :param conf: Hadoop configuration, passed in as a dict
+               (None by default)
+        :param batchSize: The number of Python objects represented as a single
+               Java object. (default 0, choose batchSize automatically)
+        """
+        jconf = self._dictToJavaMap(conf)
+        jrdd = self._jvm.PythonRDD.hadoopFile(self._jsc, path, inputFormatClass, keyClass,
+                                              valueClass, keyConverter, valueConverter,
+                                              jconf, batchSize)
+        return RDD(jrdd, self)
+
+    def hadoopRDD(self, inputFormatClass, keyClass, valueClass, keyConverter=None,
+                  valueConverter=None, conf=None, batchSize=0):
+        """
+        Read an 'old' Hadoop InputFormat with arbitrary key and value class, from an arbitrary
+        Hadoop configuration, which is passed in as a Python dict.
+        This will be converted into a Configuration in Java.
+        The mechanism is the same as for sc.sequenceFile.
+
+        :param inputFormatClass: fully qualified classname of Hadoop InputFormat
+               (e.g. "org.apache.hadoop.mapred.TextInputFormat")
+        :param keyClass: fully qualified classname of key Writable class
+               (e.g. "org.apache.hadoop.io.Text")
+        :param valueClass: fully qualified classname of value Writable class
+               (e.g. "org.apache.hadoop.io.LongWritable")
+        :param keyConverter: (None by default)
+        :param valueConverter: (None by default)
+        :param conf: Hadoop configuration, passed in as a dict
+               (None by default)
+        :param batchSize: The number of Python objects represented as a single
+               Java object. (default 0, choose batchSize automatically)
+        """
+        jconf = self._dictToJavaMap(conf)
+        jrdd = self._jvm.PythonRDD.hadoopRDD(self._jsc, inputFormatClass, keyClass,
+                                             valueClass, keyConverter, valueConverter,
+                                             jconf, batchSize)
+        return RDD(jrdd, self)
+
+    def _checkpointFile(self, name, input_deserializer):
+        jrdd = self._jsc.checkpointFile(name)
+        return RDD(jrdd, self, input_deserializer)
+
+    @ignore_unicode_prefix
+    def union(self, rdds):
+        """
+        Build the union of a list of RDDs.
+
+        This supports unions() of RDDs with different serialized formats,
+        although this forces them to be reserialized using the default
+        serializer:
+
+        >>> path = os.path.join(tempdir, "union-text.txt")
+        >>> with open(path, "w") as testFile:
+        ...    _ = testFile.write("Hello")
+        >>> textFile = sc.textFile(path)
+        >>> textFile.collect()
+        [u'Hello']
+        >>> parallelized = sc.parallelize(["World!"])
+        >>> sorted(sc.union([textFile, parallelized]).collect())
+        [u'Hello', 'World!']
+        """
+        first_jrdd_deserializer = rdds[0]._jrdd_deserializer
+        if any(x._jrdd_deserializer != first_jrdd_deserializer for x in rdds):
+            rdds = [x._reserialize() for x in rdds]
+        first = rdds[0]._jrdd
+        rest = [x._jrdd for x in rdds[1:]]
+        return RDD(self._jsc.union(first, rest), self, rdds[0]._jrdd_deserializer)
+
+    def broadcast(self, value):
+        """
+        Broadcast a read-only variable to the cluster, returning a
+        L{Broadcast<pyspark.broadcast.Broadcast>}
+        object for reading it in distributed functions. The variable will
+        be sent to each cluster only once.
+        """
+        return Broadcast(self, value, self._pickled_broadcast_vars)
+
+    def accumulator(self, value, accum_param=None):
+        """
+        Create an L{Accumulator} with the given initial value, using a given
+        L{AccumulatorParam} helper object to define how to add values of the
+        data type if provided. Default AccumulatorParams are used for integers
+        and floating-point numbers if you do not provide one. For other types,
+        a custom AccumulatorParam can be used.
+        """
+        if accum_param is None:
+            if isinstance(value, int):
+                accum_param = accumulators.INT_ACCUMULATOR_PARAM
+            elif isinstance(value, float):
+                accum_param = accumulators.FLOAT_ACCUMULATOR_PARAM
+            elif isinstance(value, complex):
+                accum_param = accumulators.COMPLEX_ACCUMULATOR_PARAM
+            else:
+                raise TypeError("No default accumulator param for type %s" % type(value))
+        SparkContext._next_accum_id += 1
+        return Accumulator(SparkContext._next_accum_id - 1, value, accum_param)
+
+    def addFile(self, path, recursive=False):
+        """
+        Add a file to be downloaded with this Spark job on every node.
+        The C{path} passed can be either a local file, a file in HDFS
+        (or other Hadoop-supported filesystems), or an HTTP, HTTPS or
+        FTP URI.
+
+        To access the file in Spark jobs, use
+        L{SparkFiles.get(fileName)<pyspark.files.SparkFiles.get>} with the
+        filename to find its download location.
+
+        A directory can be given if the recursive option is set to True.
+        Currently directories are only supported for Hadoop-supported filesystems.
+
+        .. note:: A path can be added only once. Subsequent additions of the same path are ignored.
+
+        >>> from pyspark import SparkFiles
+        >>> path = os.path.join(tempdir, "test.txt")
+        >>> with open(path, "w") as testFile:
+        ...    _ = testFile.write("100")
+        >>> sc.addFile(path)
+        >>> def func(iterator):
+        ...    with open(SparkFiles.get("test.txt")) as testFile:
+        ...        fileVal = int(testFile.readline())
+        ...        return [x * fileVal for x in iterator]
+        >>> sc.parallelize([1, 2, 3, 4]).mapPartitions(func).collect()
+        [100, 200, 300, 400]
+        """
+        self._jsc.sc().addFile(path, recursive)
+
+    def addPyFile(self, path):
+        """
+        Add a .py or .zip dependency for all tasks to be executed on this
+        SparkContext in the future.  The C{path} passed can be either a local
+        file, a file in HDFS (or other Hadoop-supported filesystems), or an
+        HTTP, HTTPS or FTP URI.
+
+        .. note:: A path can be added only once. Subsequent additions of the same path are ignored.
+        """
+        self.addFile(path)
+        (dirname, filename) = os.path.split(path)  # dirname may be directory or HDFS/S3 prefix
+        if filename[-4:].lower() in self.PACKAGE_EXTENSIONS:
+            self._python_includes.append(filename)
+            # for tests in local mode
+            sys.path.insert(1, os.path.join(SparkFiles.getRootDirectory(), filename))
+        if sys.version > '3':
+            import importlib
+            importlib.invalidate_caches()
+
+    def setCheckpointDir(self, dirName):
+        """
+        Set the directory under which RDDs are going to be checkpointed. The
+        directory must be a HDFS path if running on a cluster.
+        """
+        self._jsc.sc().setCheckpointDir(dirName)
+
+    def _getJavaStorageLevel(self, storageLevel):
+        """
+        Returns a Java StorageLevel based on a pyspark.StorageLevel.
+        """
+        if not isinstance(storageLevel, StorageLevel):
+            raise Exception("storageLevel must be of type pyspark.StorageLevel")
+
+        newStorageLevel = self._jvm.org.apache.spark.storage.StorageLevel
+        return newStorageLevel(storageLevel.useDisk,
+                               storageLevel.useMemory,
+                               storageLevel.useOffHeap,
+                               storageLevel.deserialized,
+                               storageLevel.replication)
+
+    def setJobGroup(self, groupId, description, interruptOnCancel=False):
+        """
+        Assigns a group ID to all the jobs started by this thread until the group ID is set to a
+        different value or cleared.
+
+        Often, a unit of execution in an application consists of multiple Spark actions or jobs.
+        Application programmers can use this method to group all those jobs together and give a
+        group description. Once set, the Spark web UI will associate such jobs with this group.
+
+        The application can use L{SparkContext.cancelJobGroup} to cancel all
+        running jobs in this group.
+
+        >>> import threading
+        >>> from time import sleep
+        >>> result = "Not Set"
+        >>> lock = threading.Lock()
+        >>> def map_func(x):
+        ...     sleep(100)
+        ...     raise Exception("Task should have been cancelled")
+        >>> def start_job(x):
+        ...     global result
+        ...     try:
+        ...         sc.setJobGroup("job_to_cancel", "some description")
+        ...         result = sc.parallelize(range(x)).map(map_func).collect()
+        ...     except Exception as e:
+        ...         result = "Cancelled"
+        ...     lock.release()
+        >>> def stop_job():
+        ...     sleep(5)
+        ...     sc.cancelJobGroup("job_to_cancel")
+        >>> suppress = lock.acquire()
+        >>> suppress = threading.Thread(target=start_job, args=(10,)).start()
+        >>> suppress = threading.Thread(target=stop_job).start()
+        >>> suppress = lock.acquire()
+        >>> print(result)
+        Cancelled
+
+        If interruptOnCancel is set to true for the job group, then job cancellation will result
+        in Thread.interrupt() being called on the job's executor threads. This is useful to help
+        ensure that the tasks are actually stopped in a timely manner, but is off by default due
+        to HDFS-1208, where HDFS may respond to Thread.interrupt() by marking nodes as dead.
+        """
+        self._jsc.setJobGroup(groupId, description, interruptOnCancel)
+
+    def setLocalProperty(self, key, value):
+        """
+        Set a local property that affects jobs submitted from this thread, such as the
+        Spark fair scheduler pool.
+        """
+        self._jsc.setLocalProperty(key, value)
+
+    def getLocalProperty(self, key):
+        """
+        Get a local property set in this thread, or null if it is missing. See
+        L{setLocalProperty}
+        """
+        return self._jsc.getLocalProperty(key)
+
+    def setJobDescription(self, value):
+        """
+        Set a human readable description of the current job.
+        """
+        self._jsc.setJobDescription(value)
+
+    def sparkUser(self):
+        """
+        Get SPARK_USER for user who is running SparkContext.
+        """
+        return self._jsc.sc().sparkUser()
+
+    def cancelJobGroup(self, groupId):
+        """
+        Cancel active jobs for the specified group. See L{SparkContext.setJobGroup}
+        for more information.
+        """
+        self._jsc.sc().cancelJobGroup(groupId)
+
+    def cancelAllJobs(self):
+        """
+        Cancel all jobs that have been scheduled or are running.
+        """
+        self._jsc.sc().cancelAllJobs()
+
+    def statusTracker(self):
+        """
+        Return :class:`StatusTracker` object
+        """
+        return StatusTracker(self._jsc.statusTracker())
+
+    def runJob(self, rdd, partitionFunc, partitions=None, allowLocal=False):
+        """
+        Executes the given partitionFunc on the specified set of partitions,
+        returning the result as an array of elements.
+
+        If 'partitions' is not specified, this will run over all partitions.
+
+        >>> myRDD = sc.parallelize(range(6), 3)
+        >>> sc.runJob(myRDD, lambda part: [x * x for x in part])
+        [0, 1, 4, 9, 16, 25]
+
+        >>> myRDD = sc.parallelize(range(6), 3)
+        >>> sc.runJob(myRDD, lambda part: [x * x for x in part], [0, 2], True)
+        [0, 1, 16, 25]
+        """
+        if partitions is None:
+            partitions = range(rdd._jrdd.partitions().size())
+
+        # Implementation note: This is implemented as a mapPartitions followed
+        # by runJob() in order to avoid having to pass a Python lambda into
+        # SparkContext#runJob.
+        mappedRDD = rdd.mapPartitions(partitionFunc)
+        sock_info = self._jvm.PythonRDD.runJob(self._jsc.sc(), mappedRDD._jrdd, partitions)
+        return list(_load_from_socket(sock_info, mappedRDD._jrdd_deserializer))
+
+    def show_profiles(self):
+        """ Print the profile stats to stdout """
+        if self.profiler_collector is not None:
+            self.profiler_collector.show_profiles()
+        else:
+            raise RuntimeError("'spark.python.profile' configuration must be set "
+                               "to 'true' to enable Python profile.")
+
+    def dump_profiles(self, path):
+        """ Dump the profile stats into directory `path`
+        """
+        if self.profiler_collector is not None:
+            self.profiler_collector.dump_profiles(path)
+        else:
+            raise RuntimeError("'spark.python.profile' configuration must be set "
+                               "to 'true' to enable Python profile.")
+
+    def getConf(self):
+        conf = SparkConf()
+        conf.setAll(self._conf.getAll())
+        return conf
+
+
+def _test():
+    import atexit
+    import doctest
+    import tempfile
+    globs = globals().copy()
+    globs['sc'] = SparkContext('local[4]', 'PythonTest')
+    globs['tempdir'] = tempfile.mkdtemp()
+    atexit.register(lambda: shutil.rmtree(globs['tempdir']))
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    globs['sc'].stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/daemon.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/daemon.py
new file mode 100644
index 0000000000000000000000000000000000000000..ebdd665e349c526c934bd9ec253a82675d0946cc
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/daemon.py
@@ -0,0 +1,195 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numbers
+import os
+import signal
+import select
+import socket
+import sys
+import traceback
+import time
+import gc
+from errno import EINTR, EAGAIN
+from socket import AF_INET, SOCK_STREAM, SOMAXCONN
+from signal import SIGHUP, SIGTERM, SIGCHLD, SIG_DFL, SIG_IGN, SIGINT
+
+from pyspark.worker import main as worker_main
+from pyspark.serializers import read_int, write_int, write_with_length, UTF8Deserializer
+
+
+def compute_real_exit_code(exit_code):
+    # SystemExit's code can be integer or string, but os._exit only accepts integers
+    if isinstance(exit_code, numbers.Integral):
+        return exit_code
+    else:
+        return 1
+
+
+def worker(sock, authenticated):
+    """
+    Called by a worker process after the fork().
+    """
+    signal.signal(SIGHUP, SIG_DFL)
+    signal.signal(SIGCHLD, SIG_DFL)
+    signal.signal(SIGTERM, SIG_DFL)
+    # restore the handler for SIGINT,
+    # it's useful for debugging (show the stacktrace before exit)
+    signal.signal(SIGINT, signal.default_int_handler)
+
+    # Read the socket using fdopen instead of socket.makefile() because the latter
+    # seems to be very slow; note that we need to dup() the file descriptor because
+    # otherwise writes also cause a seek that makes us miss data on the read side.
+    infile = os.fdopen(os.dup(sock.fileno()), "rb", 65536)
+    outfile = os.fdopen(os.dup(sock.fileno()), "wb", 65536)
+
+    if not authenticated:
+        client_secret = UTF8Deserializer().loads(infile)
+        if os.environ["PYTHON_WORKER_FACTORY_SECRET"] == client_secret:
+            write_with_length("ok".encode("utf-8"), outfile)
+            outfile.flush()
+        else:
+            write_with_length("err".encode("utf-8"), outfile)
+            outfile.flush()
+            sock.close()
+            return 1
+
+    exit_code = 0
+    try:
+        worker_main(infile, outfile)
+    except SystemExit as exc:
+        exit_code = compute_real_exit_code(exc.code)
+    finally:
+        try:
+            outfile.flush()
+        except Exception:
+            pass
+    return exit_code
+
+
+def manager():
+    # Create a new process group to corral our children
+    os.setpgid(0, 0)
+
+    # Create a listening socket on the AF_INET loopback interface
+    listen_sock = socket.socket(AF_INET, SOCK_STREAM)
+    listen_sock.bind(('127.0.0.1', 0))
+    listen_sock.listen(max(1024, SOMAXCONN))
+    listen_host, listen_port = listen_sock.getsockname()
+
+    # re-open stdin/stdout in 'wb' mode
+    stdin_bin = os.fdopen(sys.stdin.fileno(), 'rb', 4)
+    stdout_bin = os.fdopen(sys.stdout.fileno(), 'wb', 4)
+    write_int(listen_port, stdout_bin)
+    stdout_bin.flush()
+
+    def shutdown(code):
+        signal.signal(SIGTERM, SIG_DFL)
+        # Send SIGHUP to notify workers of shutdown
+        os.kill(0, SIGHUP)
+        sys.exit(code)
+
+    def handle_sigterm(*args):
+        shutdown(1)
+    signal.signal(SIGTERM, handle_sigterm)  # Gracefully exit on SIGTERM
+    signal.signal(SIGHUP, SIG_IGN)  # Don't die on SIGHUP
+    signal.signal(SIGCHLD, SIG_IGN)
+
+    reuse = os.environ.get("SPARK_REUSE_WORKER")
+
+    # Initialization complete
+    try:
+        while True:
+            try:
+                ready_fds = select.select([0, listen_sock], [], [], 1)[0]
+            except select.error as ex:
+                if ex[0] == EINTR:
+                    continue
+                else:
+                    raise
+
+            if 0 in ready_fds:
+                try:
+                    worker_pid = read_int(stdin_bin)
+                except EOFError:
+                    # Spark told us to exit by closing stdin
+                    shutdown(0)
+                try:
+                    os.kill(worker_pid, signal.SIGKILL)
+                except OSError:
+                    pass  # process already died
+
+            if listen_sock in ready_fds:
+                try:
+                    sock, _ = listen_sock.accept()
+                except OSError as e:
+                    if e.errno == EINTR:
+                        continue
+                    raise
+
+                # Launch a worker process
+                try:
+                    pid = os.fork()
+                except OSError as e:
+                    if e.errno in (EAGAIN, EINTR):
+                        time.sleep(1)
+                        pid = os.fork()  # error here will shutdown daemon
+                    else:
+                        outfile = sock.makefile(mode='wb')
+                        write_int(e.errno, outfile)  # Signal that the fork failed
+                        outfile.flush()
+                        outfile.close()
+                        sock.close()
+                        continue
+
+                if pid == 0:
+                    # in child process
+                    listen_sock.close()
+                    try:
+                        # Acknowledge that the fork was successful
+                        outfile = sock.makefile(mode="wb")
+                        write_int(os.getpid(), outfile)
+                        outfile.flush()
+                        outfile.close()
+                        authenticated = False
+                        while True:
+                            code = worker(sock, authenticated)
+                            if code == 0:
+                                authenticated = True
+                            if not reuse or code:
+                                # wait for closing
+                                try:
+                                    while sock.recv(1024):
+                                        pass
+                                except Exception:
+                                    pass
+                                break
+                            gc.collect()
+                    except:
+                        traceback.print_exc()
+                        os._exit(1)
+                    else:
+                        os._exit(0)
+                else:
+                    sock.close()
+
+    finally:
+        shutdown(1)
+
+
+if __name__ == '__main__':
+    manager()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/files.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/files.py
new file mode 100644
index 0000000000000000000000000000000000000000..797573f49dac8a3421f427c75fd404d4c85b52cf
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/files.py
@@ -0,0 +1,59 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+
+
+__all__ = ['SparkFiles']
+
+
+class SparkFiles(object):
+
+    """
+    Resolves paths to files added through
+    L{SparkContext.addFile()<pyspark.context.SparkContext.addFile>}.
+
+    SparkFiles contains only classmethods; users should not create SparkFiles
+    instances.
+    """
+
+    _root_directory = None
+    _is_running_on_worker = False
+    _sc = None
+
+    def __init__(self):
+        raise NotImplementedError("Do not construct SparkFiles objects")
+
+    @classmethod
+    def get(cls, filename):
+        """
+        Get the absolute path of a file added through C{SparkContext.addFile()}.
+        """
+        path = os.path.join(SparkFiles.getRootDirectory(), filename)
+        return os.path.abspath(path)
+
+    @classmethod
+    def getRootDirectory(cls):
+        """
+        Get the root directory that contains files added through
+        C{SparkContext.addFile()}.
+        """
+        if cls._is_running_on_worker:
+            return cls._root_directory
+        else:
+            # This will have to change if we support multiple SparkContexts:
+            return cls._sc._jvm.org.apache.spark.SparkFiles.getRootDirectory()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/find_spark_home.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/find_spark_home.py
new file mode 100755
index 0000000000000000000000000000000000000000..9c4ed4659863271c1c22edc5dd60da95f1a288fe
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/find_spark_home.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This script attempt to determine the correct setting for SPARK_HOME given
+# that Spark may have been installed on the system with pip.
+
+from __future__ import print_function
+import os
+import sys
+
+
+def _find_spark_home():
+    """Find the SPARK_HOME."""
+    # If the environment has SPARK_HOME set trust it.
+    if "SPARK_HOME" in os.environ:
+        return os.environ["SPARK_HOME"]
+
+    def is_spark_home(path):
+        """Takes a path and returns true if the provided path could be a reasonable SPARK_HOME"""
+        return (os.path.isfile(os.path.join(path, "bin/spark-submit")) and
+                (os.path.isdir(os.path.join(path, "jars")) or
+                 os.path.isdir(os.path.join(path, "assembly"))))
+
+    paths = ["../", os.path.dirname(os.path.realpath(__file__))]
+
+    # Add the path of the PySpark module if it exists
+    if sys.version < "3":
+        import imp
+        try:
+            module_home = imp.find_module("pyspark")[1]
+            paths.append(module_home)
+            # If we are installed in edit mode also look two dirs up
+            paths.append(os.path.join(module_home, "../../"))
+        except ImportError:
+            # Not pip installed no worries
+            pass
+    else:
+        from importlib.util import find_spec
+        try:
+            module_home = os.path.dirname(find_spec("pyspark").origin)
+            paths.append(module_home)
+            # If we are installed in edit mode also look two dirs up
+            paths.append(os.path.join(module_home, "../../"))
+        except ImportError:
+            # Not pip installed no worries
+            pass
+
+    # Normalize the paths
+    paths = [os.path.abspath(p) for p in paths]
+
+    try:
+        return next(path for path in paths if is_spark_home(path))
+    except StopIteration:
+        print("Could not find valid SPARK_HOME while searching {0}".format(paths), file=sys.stderr)
+        sys.exit(-1)
+
+if __name__ == "__main__":
+    print(_find_spark_home())
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/heapq3.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/heapq3.py
new file mode 100644
index 0000000000000000000000000000000000000000..37a2914ebac0520e28753c7b1a84cbe9fc6551e0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/heapq3.py
@@ -0,0 +1,890 @@
+# -*- encoding: utf-8 -*-
+#  back ported from CPython 3
+# A. HISTORY OF THE SOFTWARE
+# ==========================
+#
+# Python was created in the early 1990s by Guido van Rossum at Stichting
+# Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands
+# as a successor of a language called ABC.  Guido remains Python's
+# principal author, although it includes many contributions from others.
+#
+# In 1995, Guido continued his work on Python at the Corporation for
+#     National Research Initiatives (CNRI, see http://www.cnri.reston.va.us)
+# in Reston, Virginia where he released several versions of the
+# software.
+#
+# In May 2000, Guido and the Python core development team moved to
+# BeOpen.com to form the BeOpen PythonLabs team.  In October of the same
+# year, the PythonLabs team moved to Digital Creations (now Zope
+# Corporation, see http://www.zope.com).  In 2001, the Python Software
+# Foundation (PSF, see http://www.python.org/psf/) was formed, a
+# non-profit organization created specifically to own Python-related
+# Intellectual Property.  Zope Corporation is a sponsoring member of
+# the PSF.
+#
+# All Python releases are Open Source (see http://www.opensource.org for
+# the Open Source Definition).  Historically, most, but not all, Python
+# releases have also been GPL-compatible; the table below summarizes
+# the various releases.
+#
+# Release         Derived     Year        Owner       GPL-
+# from                                compatible? (1)
+#
+# 0.9.0 thru 1.2              1991-1995   CWI         yes
+# 1.3 thru 1.5.2  1.2         1995-1999   CNRI        yes
+# 1.6             1.5.2       2000        CNRI        no
+# 2.0             1.6         2000        BeOpen.com  no
+# 1.6.1           1.6         2001        CNRI        yes (2)
+# 2.1             2.0+1.6.1   2001        PSF         no
+# 2.0.1           2.0+1.6.1   2001        PSF         yes
+# 2.1.1           2.1+2.0.1   2001        PSF         yes
+# 2.2             2.1.1       2001        PSF         yes
+# 2.1.2           2.1.1       2002        PSF         yes
+# 2.1.3           2.1.2       2002        PSF         yes
+# 2.2.1           2.2         2002        PSF         yes
+# 2.2.2           2.2.1       2002        PSF         yes
+# 2.2.3           2.2.2       2003        PSF         yes
+# 2.3             2.2.2       2002-2003   PSF         yes
+# 2.3.1           2.3         2002-2003   PSF         yes
+# 2.3.2           2.3.1       2002-2003   PSF         yes
+# 2.3.3           2.3.2       2002-2003   PSF         yes
+# 2.3.4           2.3.3       2004        PSF         yes
+# 2.3.5           2.3.4       2005        PSF         yes
+# 2.4             2.3         2004        PSF         yes
+# 2.4.1           2.4         2005        PSF         yes
+# 2.4.2           2.4.1       2005        PSF         yes
+# 2.4.3           2.4.2       2006        PSF         yes
+# 2.4.4           2.4.3       2006        PSF         yes
+# 2.5             2.4         2006        PSF         yes
+# 2.5.1           2.5         2007        PSF         yes
+# 2.5.2           2.5.1       2008        PSF         yes
+# 2.5.3           2.5.2       2008        PSF         yes
+# 2.6             2.5         2008        PSF         yes
+# 2.6.1           2.6         2008        PSF         yes
+# 2.6.2           2.6.1       2009        PSF         yes
+# 2.6.3           2.6.2       2009        PSF         yes
+# 2.6.4           2.6.3       2009        PSF         yes
+# 2.6.5           2.6.4       2010        PSF         yes
+# 2.7             2.6         2010        PSF         yes
+#
+# Footnotes:
+#
+# (1) GPL-compatible doesn't mean that we're distributing Python under
+# the GPL.  All Python licenses, unlike the GPL, let you distribute
+# a modified version without making your changes open source.  The
+# GPL-compatible licenses make it possible to combine Python with
+#     other software that is released under the GPL; the others don't.
+#
+# (2) According to Richard Stallman, 1.6.1 is not GPL-compatible,
+# because its license has a choice of law clause.  According to
+# CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1
+# is "not incompatible" with the GPL.
+#
+# Thanks to the many outside volunteers who have worked under Guido's
+# direction to make these releases possible.
+#
+#
+# B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON
+# ===============================================================
+#
+# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
+# --------------------------------------------
+#
+# 1. This LICENSE AGREEMENT is between the Python Software Foundation
+# ("PSF"), and the Individual or Organization ("Licensee") accessing and
+# otherwise using this software ("Python") in source or binary form and
+# its associated documentation.
+#
+# 2. Subject to the terms and conditions of this License Agreement, PSF hereby
+# grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
+# analyze, test, perform and/or display publicly, prepare derivative works,
+# distribute, and otherwise use Python alone or in any derivative version,
+# provided, however, that PSF's License Agreement and PSF's notice of copyright,
+# i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+# 2011, 2012, 2013 Python Software Foundation; All Rights Reserved" are retained
+# in Python alone or in any derivative version prepared by Licensee.
+#
+# 3. In the event Licensee prepares a derivative work that is based on
+# or incorporates Python or any part thereof, and wants to make
+# the derivative work available to others as provided herein, then
+# Licensee hereby agrees to include in any such work a brief summary of
+# the changes made to Python.
+#
+# 4. PSF is making Python available to Licensee on an "AS IS"
+# basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+# IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
+# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
+# INFRINGE ANY THIRD PARTY RIGHTS.
+#
+# 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+# FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+# A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
+# OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+#
+# 6. This License Agreement will automatically terminate upon a material
+# breach of its terms and conditions.
+#
+# 7. Nothing in this License Agreement shall be deemed to create any
+# relationship of agency, partnership, or joint venture between PSF and
+# Licensee.  This License Agreement does not grant permission to use PSF
+# trademarks or trade name in a trademark sense to endorse or promote
+# products or services of Licensee, or any third party.
+#
+# 8. By copying, installing or otherwise using Python, Licensee
+# agrees to be bound by the terms and conditions of this License
+# Agreement.
+#
+#
+# BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0
+# -------------------------------------------
+#
+# BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
+#
+# 1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an
+# office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the
+# Individual or Organization ("Licensee") accessing and otherwise using
+# this software in source or binary form and its associated
+# documentation ("the Software").
+#
+# 2. Subject to the terms and conditions of this BeOpen Python License
+# Agreement, BeOpen hereby grants Licensee a non-exclusive,
+# royalty-free, world-wide license to reproduce, analyze, test, perform
+# and/or display publicly, prepare derivative works, distribute, and
+# otherwise use the Software alone or in any derivative version,
+# provided, however, that the BeOpen Python License is retained in the
+# Software, alone or in any derivative version prepared by Licensee.
+#
+# 3. BeOpen is making the Software available to Licensee on an "AS IS"
+# basis.  BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+# IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND
+# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT
+# INFRINGE ANY THIRD PARTY RIGHTS.
+#
+# 4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
+# SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
+# AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY
+# DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+#
+# 5. This License Agreement will automatically terminate upon a material
+# breach of its terms and conditions.
+#
+# 6. This License Agreement shall be governed by and interpreted in all
+# respects by the law of the State of California, excluding conflict of
+# law provisions.  Nothing in this License Agreement shall be deemed to
+# create any relationship of agency, partnership, or joint venture
+# between BeOpen and Licensee.  This License Agreement does not grant
+# permission to use BeOpen trademarks or trade names in a trademark
+# sense to endorse or promote products or services of Licensee, or any
+# third party.  As an exception, the "BeOpen Python" logos available at
+# http://www.pythonlabs.com/logos.html may be used according to the
+# permissions granted on that web page.
+#
+# 7. By copying, installing or otherwise using the software, Licensee
+# agrees to be bound by the terms and conditions of this License
+# Agreement.
+#
+#
+# CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1
+# ---------------------------------------
+#
+# 1. This LICENSE AGREEMENT is between the Corporation for National
+#     Research Initiatives, having an office at 1895 Preston White Drive,
+# Reston, VA 20191 ("CNRI"), and the Individual or Organization
+# ("Licensee") accessing and otherwise using Python 1.6.1 software in
+# source or binary form and its associated documentation.
+#
+# 2. Subject to the terms and conditions of this License Agreement, CNRI
+# hereby grants Licensee a nonexclusive, royalty-free, world-wide
+# license to reproduce, analyze, test, perform and/or display publicly,
+# prepare derivative works, distribute, and otherwise use Python 1.6.1
+# alone or in any derivative version, provided, however, that CNRI's
+# License Agreement and CNRI's notice of copyright, i.e., "Copyright (c)
+# 1995-2001 Corporation for National Research Initiatives; All Rights
+# Reserved" are retained in Python 1.6.1 alone or in any derivative
+# version prepared by Licensee.  Alternately, in lieu of CNRI's License
+# Agreement, Licensee may substitute the following text (omitting the
+# quotes): "Python 1.6.1 is made available subject to the terms and
+# conditions in CNRI's License Agreement.  This Agreement together with
+# Python 1.6.1 may be located on the Internet using the following
+# unique, persistent identifier (known as a handle): 1895.22/1013.  This
+# Agreement may also be obtained from a proxy server on the Internet
+# using the following URL: http://hdl.handle.net/1895.22/1013".
+#
+# 3. In the event Licensee prepares a derivative work that is based on
+# or incorporates Python 1.6.1 or any part thereof, and wants to make
+# the derivative work available to others as provided herein, then
+# Licensee hereby agrees to include in any such work a brief summary of
+# the changes made to Python 1.6.1.
+#
+# 4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS"
+# basis.  CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+# IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND
+# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT
+# INFRINGE ANY THIRD PARTY RIGHTS.
+#
+# 5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+# 1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+# A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1,
+# OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+#
+# 6. This License Agreement will automatically terminate upon a material
+# breach of its terms and conditions.
+#
+# 7. This License Agreement shall be governed by the federal
+# intellectual property law of the United States, including without
+# limitation the federal copyright law, and, to the extent such
+# U.S. federal law does not apply, by the law of the Commonwealth of
+# Virginia, excluding Virginia's conflict of law provisions.
+# Notwithstanding the foregoing, with regard to derivative works based
+# on Python 1.6.1 that incorporate non-separable material that was
+# previously distributed under the GNU General Public License (GPL), the
+# law of the Commonwealth of Virginia shall govern this License
+# Agreement only as to issues arising under or with respect to
+# Paragraphs 4, 5, and 7 of this License Agreement.  Nothing in this
+# License Agreement shall be deemed to create any relationship of
+# agency, partnership, or joint venture between CNRI and Licensee.  This
+# License Agreement does not grant permission to use CNRI trademarks or
+# trade name in a trademark sense to endorse or promote products or
+# services of Licensee, or any third party.
+#
+# 8. By clicking on the "ACCEPT" button where indicated, or by copying,
+# installing or otherwise using Python 1.6.1, Licensee agrees to be
+# bound by the terms and conditions of this License Agreement.
+#
+# ACCEPT
+#
+#
+# CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2
+# --------------------------------------------------
+#
+# Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam,
+# The Netherlands.  All rights reserved.
+#
+# Permission to use, copy, modify, and distribute this software and its
+# documentation for any purpose and without fee is hereby granted,
+# provided that the above copyright notice appear in all copies and that
+# both that copyright notice and this permission notice appear in
+# supporting documentation, and that the name of Stichting Mathematisch
+# Centrum or CWI not be used in advertising or publicity pertaining to
+# distribution of the software without specific, written prior
+# permission.
+#
+# STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
+# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+# FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
+# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+"""Heap queue algorithm (a.k.a. priority queue).
+
+Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for
+all k, counting elements from 0.  For the sake of comparison,
+non-existing elements are considered to be infinite.  The interesting
+property of a heap is that a[0] is always its smallest element.
+
+Usage:
+
+heap = []            # creates an empty heap
+heappush(heap, item) # pushes a new item on the heap
+item = heappop(heap) # pops the smallest item from the heap
+item = heap[0]       # smallest item on the heap without popping it
+heapify(x)           # transforms list into a heap, in-place, in linear time
+item = heapreplace(heap, item) # pops and returns smallest item, and adds
+                               # new item; the heap size is unchanged
+
+Our API differs from textbook heap algorithms as follows:
+
+- We use 0-based indexing.  This makes the relationship between the
+  index for a node and the indexes for its children slightly less
+  obvious, but is more suitable since Python uses 0-based indexing.
+
+- Our heappop() method returns the smallest item, not the largest.
+
+These two make it possible to view the heap as a regular Python list
+without surprises: heap[0] is the smallest item, and heap.sort()
+maintains the heap invariant!
+"""
+
+# Original code by Kevin O'Connor, augmented by Tim Peters and Raymond Hettinger
+
+__about__ = """Heap queues
+
+[explanation by François Pinard]
+
+Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for
+all k, counting elements from 0.  For the sake of comparison,
+non-existing elements are considered to be infinite.  The interesting
+property of a heap is that a[0] is always its smallest element.
+
+The strange invariant above is meant to be an efficient memory
+representation for a tournament.  The numbers below are `k', not a[k]:
+
+                                   0
+
+                  1                                 2
+
+          3               4                5               6
+
+      7       8       9       10      11      12      13      14
+
+    15 16   17 18   19 20   21 22   23 24   25 26   27 28   29 30
+
+
+In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'.  In
+an usual binary tournament we see in sports, each cell is the winner
+over the two cells it tops, and we can trace the winner down the tree
+to see all opponents s/he had.  However, in many computer applications
+of such tournaments, we do not need to trace the history of a winner.
+To be more memory efficient, when a winner is promoted, we try to
+replace it by something else at a lower level, and the rule becomes
+that a cell and the two cells it tops contain three different items,
+but the top cell "wins" over the two topped cells.
+
+If this heap invariant is protected at all time, index 0 is clearly
+the overall winner.  The simplest algorithmic way to remove it and
+find the "next" winner is to move some loser (let's say cell 30 in the
+diagram above) into the 0 position, and then percolate this new 0 down
+the tree, exchanging values, until the invariant is re-established.
+This is clearly logarithmic on the total number of items in the tree.
+By iterating over all items, you get an O(n ln n) sort.
+
+A nice feature of this sort is that you can efficiently insert new
+items while the sort is going on, provided that the inserted items are
+not "better" than the last 0'th element you extracted.  This is
+especially useful in simulation contexts, where the tree holds all
+incoming events, and the "win" condition means the smallest scheduled
+time.  When an event schedule other events for execution, they are
+scheduled into the future, so they can easily go into the heap.  So, a
+heap is a good structure for implementing schedulers (this is what I
+used for my MIDI sequencer :-).
+
+Various structures for implementing schedulers have been extensively
+studied, and heaps are good for this, as they are reasonably speedy,
+the speed is almost constant, and the worst case is not much different
+than the average case.  However, there are other representations which
+are more efficient overall, yet the worst cases might be terrible.
+
+Heaps are also very useful in big disk sorts.  You most probably all
+know that a big sort implies producing "runs" (which are pre-sorted
+sequences, which size is usually related to the amount of CPU memory),
+followed by a merging passes for these runs, which merging is often
+very cleverly organised[1].  It is very important that the initial
+sort produces the longest runs possible.  Tournaments are a good way
+to that.  If, using all the memory available to hold a tournament, you
+replace and percolate items that happen to fit the current run, you'll
+produce runs which are twice the size of the memory for random input,
+and much better for input fuzzily ordered.
+
+Moreover, if you output the 0'th item on disk and get an input which
+may not fit in the current tournament (because the value "wins" over
+the last output value), it cannot fit in the heap, so the size of the
+heap decreases.  The freed memory could be cleverly reused immediately
+for progressively building a second heap, which grows at exactly the
+same rate the first heap is melting.  When the first heap completely
+vanishes, you switch heaps and start a new run.  Clever and quite
+effective!
+
+In a word, heaps are useful memory structures to know.  I use them in
+a few applications, and I think it is good to keep a `heap' module
+around. :-)
+
+--------------------
+[1] The disk balancing algorithms which are current, nowadays, are
+more annoying than clever, and this is a consequence of the seeking
+capabilities of the disks.  On devices which cannot seek, like big
+tape drives, the story was quite different, and one had to be very
+clever to ensure (far in advance) that each tape movement will be the
+most effective possible (that is, will best participate at
+"progressing" the merge).  Some tapes were even able to read
+backwards, and this was also used to avoid the rewinding time.
+Believe me, real good tape sorts were quite spectacular to watch!
+From all times, sorting has always been a Great Art! :-)
+"""
+
+__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge',
+           'nlargest', 'nsmallest', 'heappushpop']
+
+def heappush(heap, item):
+    """Push item onto heap, maintaining the heap invariant."""
+    heap.append(item)
+    _siftdown(heap, 0, len(heap)-1)
+
+def heappop(heap):
+    """Pop the smallest item off the heap, maintaining the heap invariant."""
+    lastelt = heap.pop()    # raises appropriate IndexError if heap is empty
+    if heap:
+        returnitem = heap[0]
+        heap[0] = lastelt
+        _siftup(heap, 0)
+        return returnitem
+    return lastelt
+
+def heapreplace(heap, item):
+    """Pop and return the current smallest value, and add the new item.
+
+    This is more efficient than heappop() followed by heappush(), and can be
+    more appropriate when using a fixed-size heap.  Note that the value
+    returned may be larger than item!  That constrains reasonable uses of
+    this routine unless written as part of a conditional replacement:
+
+        if item > heap[0]:
+            item = heapreplace(heap, item)
+    """
+    returnitem = heap[0]    # raises appropriate IndexError if heap is empty
+    heap[0] = item
+    _siftup(heap, 0)
+    return returnitem
+
+def heappushpop(heap, item):
+    """Fast version of a heappush followed by a heappop."""
+    if heap and heap[0] < item:
+        item, heap[0] = heap[0], item
+        _siftup(heap, 0)
+    return item
+
+def heapify(x):
+    """Transform list into a heap, in-place, in O(len(x)) time."""
+    n = len(x)
+    # Transform bottom-up.  The largest index there's any point to looking at
+    # is the largest with a child index in-range, so must have 2*i + 1 < n,
+    # or i < (n-1)/2.  If n is even = 2*j, this is (2*j-1)/2 = j-1/2 so
+    # j-1 is the largest, which is n//2 - 1.  If n is odd = 2*j+1, this is
+    # (2*j+1-1)/2 = j so j-1 is the largest, and that's again n//2-1.
+    for i in reversed(range(n//2)):
+        _siftup(x, i)
+
+def _heappop_max(heap):
+    """Maxheap version of a heappop."""
+    lastelt = heap.pop()    # raises appropriate IndexError if heap is empty
+    if heap:
+        returnitem = heap[0]
+        heap[0] = lastelt
+        _siftup_max(heap, 0)
+        return returnitem
+    return lastelt
+
+def _heapreplace_max(heap, item):
+    """Maxheap version of a heappop followed by a heappush."""
+    returnitem = heap[0]    # raises appropriate IndexError if heap is empty
+    heap[0] = item
+    _siftup_max(heap, 0)
+    return returnitem
+
+def _heapify_max(x):
+    """Transform list into a maxheap, in-place, in O(len(x)) time."""
+    n = len(x)
+    for i in reversed(range(n//2)):
+        _siftup_max(x, i)
+
+# 'heap' is a heap at all indices >= startpos, except possibly for pos.  pos
+# is the index of a leaf with a possibly out-of-order value.  Restore the
+# heap invariant.
+def _siftdown(heap, startpos, pos):
+    newitem = heap[pos]
+    # Follow the path to the root, moving parents down until finding a place
+    # newitem fits.
+    while pos > startpos:
+        parentpos = (pos - 1) >> 1
+        parent = heap[parentpos]
+        if newitem < parent:
+            heap[pos] = parent
+            pos = parentpos
+            continue
+        break
+    heap[pos] = newitem
+
+# The child indices of heap index pos are already heaps, and we want to make
+# a heap at index pos too.  We do this by bubbling the smaller child of
+# pos up (and so on with that child's children, etc) until hitting a leaf,
+# then using _siftdown to move the oddball originally at index pos into place.
+#
+# We *could* break out of the loop as soon as we find a pos where newitem <=
+# both its children, but turns out that's not a good idea, and despite that
+# many books write the algorithm that way.  During a heap pop, the last array
+# element is sifted in, and that tends to be large, so that comparing it
+# against values starting from the root usually doesn't pay (= usually doesn't
+# get us out of the loop early).  See Knuth, Volume 3, where this is
+# explained and quantified in an exercise.
+#
+# Cutting the # of comparisons is important, since these routines have no
+# way to extract "the priority" from an array element, so that intelligence
+# is likely to be hiding in custom comparison methods, or in array elements
+# storing (priority, record) tuples.  Comparisons are thus potentially
+# expensive.
+#
+# On random arrays of length 1000, making this change cut the number of
+# comparisons made by heapify() a little, and those made by exhaustive
+# heappop() a lot, in accord with theory.  Here are typical results from 3
+# runs (3 just to demonstrate how small the variance is):
+#
+# Compares needed by heapify     Compares needed by 1000 heappops
+# --------------------------     --------------------------------
+# 1837 cut to 1663               14996 cut to 8680
+# 1855 cut to 1659               14966 cut to 8678
+# 1847 cut to 1660               15024 cut to 8703
+#
+# Building the heap by using heappush() 1000 times instead required
+# 2198, 2148, and 2219 compares:  heapify() is more efficient, when
+# you can use it.
+#
+# The total compares needed by list.sort() on the same lists were 8627,
+# 8627, and 8632 (this should be compared to the sum of heapify() and
+# heappop() compares):  list.sort() is (unsurprisingly!) more efficient
+# for sorting.
+
+def _siftup(heap, pos):
+    endpos = len(heap)
+    startpos = pos
+    newitem = heap[pos]
+    # Bubble up the smaller child until hitting a leaf.
+    childpos = 2*pos + 1    # leftmost child position
+    while childpos < endpos:
+        # Set childpos to index of smaller child.
+        rightpos = childpos + 1
+        if rightpos < endpos and not heap[childpos] < heap[rightpos]:
+            childpos = rightpos
+        # Move the smaller child up.
+        heap[pos] = heap[childpos]
+        pos = childpos
+        childpos = 2*pos + 1
+    # The leaf at pos is empty now.  Put newitem there, and bubble it up
+    # to its final resting place (by sifting its parents down).
+    heap[pos] = newitem
+    _siftdown(heap, startpos, pos)
+
+def _siftdown_max(heap, startpos, pos):
+    'Maxheap variant of _siftdown'
+    newitem = heap[pos]
+    # Follow the path to the root, moving parents down until finding a place
+    # newitem fits.
+    while pos > startpos:
+        parentpos = (pos - 1) >> 1
+        parent = heap[parentpos]
+        if parent < newitem:
+            heap[pos] = parent
+            pos = parentpos
+            continue
+        break
+    heap[pos] = newitem
+
+def _siftup_max(heap, pos):
+    'Maxheap variant of _siftup'
+    endpos = len(heap)
+    startpos = pos
+    newitem = heap[pos]
+    # Bubble up the larger child until hitting a leaf.
+    childpos = 2*pos + 1    # leftmost child position
+    while childpos < endpos:
+        # Set childpos to index of larger child.
+        rightpos = childpos + 1
+        if rightpos < endpos and not heap[rightpos] < heap[childpos]:
+            childpos = rightpos
+        # Move the larger child up.
+        heap[pos] = heap[childpos]
+        pos = childpos
+        childpos = 2*pos + 1
+    # The leaf at pos is empty now.  Put newitem there, and bubble it up
+    # to its final resting place (by sifting its parents down).
+    heap[pos] = newitem
+    _siftdown_max(heap, startpos, pos)
+
+def merge(iterables, key=None, reverse=False):
+    '''Merge multiple sorted inputs into a single sorted output.
+
+    Similar to sorted(itertools.chain(*iterables)) but returns a generator,
+    does not pull the data into memory all at once, and assumes that each of
+    the input streams is already sorted (smallest to largest).
+
+    >>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]))
+    [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25]
+
+    If *key* is not None, applies a key function to each element to determine
+    its sort order.
+
+    >>> list(merge(['dog', 'horse'], ['cat', 'fish', 'kangaroo'], key=len))
+    ['dog', 'cat', 'fish', 'horse', 'kangaroo']
+
+    '''
+
+    h = []
+    h_append = h.append
+
+    if reverse:
+        _heapify = _heapify_max
+        _heappop = _heappop_max
+        _heapreplace = _heapreplace_max
+        direction = -1
+    else:
+        _heapify = heapify
+        _heappop = heappop
+        _heapreplace = heapreplace
+        direction = 1
+
+    if key is None:
+        for order, it in enumerate(map(iter, iterables)):
+            try:
+                h_append([next(it), order * direction, it])
+            except StopIteration:
+                pass
+        _heapify(h)
+        while len(h) > 1:
+            try:
+                while True:
+                    value, order, it = s = h[0]
+                    yield value
+                    s[0] = next(it)           # raises StopIteration when exhausted
+                    _heapreplace(h, s)      # restore heap condition
+            except StopIteration:
+                _heappop(h)                 # remove empty iterator
+        if h:
+            # fast case when only a single iterator remains
+            value, order, it = h[0]
+            yield value
+            for value in it:
+                yield value
+        return
+
+    for order, it in enumerate(map(iter, iterables)):
+        try:
+            value = next(it)
+            h_append([key(value), order * direction, value, it])
+        except StopIteration:
+            pass
+    _heapify(h)
+    while len(h) > 1:
+        try:
+            while True:
+                key_value, order, value, it = s = h[0]
+                yield value
+                value = next(it)
+                s[0] = key(value)
+                s[2] = value
+                _heapreplace(h, s)
+        except StopIteration:
+            _heappop(h)
+    if h:
+        key_value, order, value, it = h[0]
+        yield value
+        for value in it:
+            yield value
+
+
+# Algorithm notes for nlargest() and nsmallest()
+# ==============================================
+#
+# Make a single pass over the data while keeping the k most extreme values
+# in a heap.  Memory consumption is limited to keeping k values in a list.
+#
+# Measured performance for random inputs:
+#
+#                                   number of comparisons
+#    n inputs     k-extreme values  (average of 5 trials)   % more than min()
+# -------------   ----------------  ---------------------   -----------------
+#      1,000           100                  3,317               231.7%
+#     10,000           100                 14,046                40.5%
+#    100,000           100                105,749                 5.7%
+#  1,000,000           100              1,007,751                 0.8%
+# 10,000,000           100             10,009,401                 0.1%
+#
+# Theoretical number of comparisons for k smallest of n random inputs:
+#
+# Step   Comparisons                  Action
+# ----   --------------------------   ---------------------------
+#  1     1.66 * k                     heapify the first k-inputs
+#  2     n - k                        compare remaining elements to top of heap
+#  3     k * (1 + lg2(k)) * ln(n/k)   replace the topmost value on the heap
+#  4     k * lg2(k) - (k/2)           final sort of the k most extreme values
+#
+# Combining and simplifying for a rough estimate gives:
+#
+#        comparisons = n + k * (log(k, 2) * log(n/k) + log(k, 2) + log(n/k))
+#
+# Computing the number of comparisons for step 3:
+# -----------------------------------------------
+# * For the i-th new value from the iterable, the probability of being in the
+#   k most extreme values is k/i.  For example, the probability of the 101st
+#   value seen being in the 100 most extreme values is 100/101.
+# * If the value is a new extreme value, the cost of inserting it into the
+#   heap is 1 + log(k, 2).
+# * The probability times the cost gives:
+#            (k/i) * (1 + log(k, 2))
+# * Summing across the remaining n-k elements gives:
+#            sum((k/i) * (1 + log(k, 2)) for i in range(k+1, n+1))
+# * This reduces to:
+#            (H(n) - H(k)) * k * (1 + log(k, 2))
+# * Where H(n) is the n-th harmonic number estimated by:
+#            gamma = 0.5772156649
+#            H(n) = log(n, e) + gamma + 1 / (2 * n)
+#   http://en.wikipedia.org/wiki/Harmonic_series_(mathematics)#Rate_of_divergence
+# * Substituting the H(n) formula:
+#            comparisons = k * (1 + log(k, 2)) * (log(n/k, e) + (1/n - 1/k) / 2)
+#
+# Worst-case for step 3:
+# ----------------------
+# In the worst case, the input data is reversed sorted so that every new element
+# must be inserted in the heap:
+#
+#             comparisons = 1.66 * k + log(k, 2) * (n - k)
+#
+# Alternative Algorithms
+# ----------------------
+# Other algorithms were not used because they:
+# 1) Took much more auxiliary memory,
+# 2) Made multiple passes over the data.
+# 3) Made more comparisons in common cases (small k, large n, semi-random input).
+# See the more detailed comparison of approach at:
+# http://code.activestate.com/recipes/577573-compare-algorithms-for-heapqsmallest
+
+def nsmallest(n, iterable, key=None):
+    """Find the n smallest elements in a dataset.
+
+    Equivalent to:  sorted(iterable, key=key)[:n]
+    """
+
+    # Short-cut for n==1 is to use min()
+    if n == 1:
+        it = iter(iterable)
+        sentinel = object()
+        if key is None:
+            result = min(it, default=sentinel)
+        else:
+            result = min(it, default=sentinel, key=key)
+        return [] if result is sentinel else [result]
+
+    # When n>=size, it's faster to use sorted()
+    try:
+        size = len(iterable)
+    except (TypeError, AttributeError):
+        pass
+    else:
+        if n >= size:
+            return sorted(iterable, key=key)[:n]
+
+    # When key is none, use simpler decoration
+    if key is None:
+        it = iter(iterable)
+        # put the range(n) first so that zip() doesn't
+        # consume one too many elements from the iterator
+        result = [(elem, i) for i, elem in zip(range(n), it)]
+        if not result:
+            return result
+        _heapify_max(result)
+        top = result[0][0]
+        order = n
+        _heapreplace = _heapreplace_max
+        for elem in it:
+            if elem < top:
+                _heapreplace(result, (elem, order))
+                top = result[0][0]
+                order += 1
+        result.sort()
+        return [r[0] for r in result]
+
+    # General case, slowest method
+    it = iter(iterable)
+    result = [(key(elem), i, elem) for i, elem in zip(range(n), it)]
+    if not result:
+        return result
+    _heapify_max(result)
+    top = result[0][0]
+    order = n
+    _heapreplace = _heapreplace_max
+    for elem in it:
+        k = key(elem)
+        if k < top:
+            _heapreplace(result, (k, order, elem))
+            top = result[0][0]
+            order += 1
+    result.sort()
+    return [r[2] for r in result]
+
+def nlargest(n, iterable, key=None):
+    """Find the n largest elements in a dataset.
+
+    Equivalent to:  sorted(iterable, key=key, reverse=True)[:n]
+    """
+
+    # Short-cut for n==1 is to use max()
+    if n == 1:
+        it = iter(iterable)
+        sentinel = object()
+        if key is None:
+            result = max(it, default=sentinel)
+        else:
+            result = max(it, default=sentinel, key=key)
+        return [] if result is sentinel else [result]
+
+    # When n>=size, it's faster to use sorted()
+    try:
+        size = len(iterable)
+    except (TypeError, AttributeError):
+        pass
+    else:
+        if n >= size:
+            return sorted(iterable, key=key, reverse=True)[:n]
+
+    # When key is none, use simpler decoration
+    if key is None:
+        it = iter(iterable)
+        result = [(elem, i) for i, elem in zip(range(0, -n, -1), it)]
+        if not result:
+            return result
+        heapify(result)
+        top = result[0][0]
+        order = -n
+        _heapreplace = heapreplace
+        for elem in it:
+            if top < elem:
+                _heapreplace(result, (elem, order))
+                top = result[0][0]
+                order -= 1
+        result.sort(reverse=True)
+        return [r[0] for r in result]
+
+    # General case, slowest method
+    it = iter(iterable)
+    result = [(key(elem), i, elem) for i, elem in zip(range(0, -n, -1), it)]
+    if not result:
+        return result
+    heapify(result)
+    top = result[0][0]
+    order = -n
+    _heapreplace = heapreplace
+    for elem in it:
+        k = key(elem)
+        if top < k:
+            _heapreplace(result, (k, order, elem))
+            top = result[0][0]
+            order -= 1
+    result.sort(reverse=True)
+    return [r[2] for r in result]
+
+# If available, use C implementation
+try:
+    from _heapq import *
+except ImportError:
+    pass
+try:
+    from _heapq import _heapreplace_max
+except ImportError:
+    pass
+try:
+    from _heapq import _heapify_max
+except ImportError:
+    pass
+try:
+    from _heapq import _heappop_max
+except ImportError:
+    pass
+
+
+if __name__ == "__main__":
+    import doctest
+    import sys
+    (failure_count, test_count) = doctest.testmod()
+    if failure_count:
+        sys.exit(-1)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/java_gateway.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/java_gateway.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8c5f801f89bb3fcf186bcf0ea407b85db1a30aa
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/java_gateway.py
@@ -0,0 +1,201 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import atexit
+import os
+import sys
+import select
+import signal
+import shlex
+import shutil
+import socket
+import platform
+import tempfile
+import time
+from subprocess import Popen, PIPE
+
+if sys.version >= '3':
+    xrange = range
+
+from py4j.java_gateway import java_import, JavaGateway, JavaObject, GatewayParameters
+from pyspark.find_spark_home import _find_spark_home
+from pyspark.serializers import read_int, write_with_length, UTF8Deserializer
+from pyspark.util import _exception_message
+
+
+def launch_gateway(conf=None):
+    """
+    launch jvm gateway
+    :param conf: spark configuration passed to spark-submit
+    :return:
+    """
+    if "PYSPARK_GATEWAY_PORT" in os.environ:
+        gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
+        gateway_secret = os.environ["PYSPARK_GATEWAY_SECRET"]
+    else:
+        SPARK_HOME = _find_spark_home()
+        # Launch the Py4j gateway using Spark's run command so that we pick up the
+        # proper classpath and settings from spark-env.sh
+        on_windows = platform.system() == "Windows"
+        script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit"
+        command = [os.path.join(SPARK_HOME, script)]
+        if conf:
+            for k, v in conf.getAll():
+                command += ['--conf', '%s=%s' % (k, v)]
+        submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
+        if os.environ.get("SPARK_TESTING"):
+            submit_args = ' '.join([
+                "--conf spark.ui.enabled=false",
+                submit_args
+            ])
+        command = command + shlex.split(submit_args)
+
+        # Create a temporary directory where the gateway server should write the connection
+        # information.
+        conn_info_dir = tempfile.mkdtemp()
+        try:
+            fd, conn_info_file = tempfile.mkstemp(dir=conn_info_dir)
+            os.close(fd)
+            os.unlink(conn_info_file)
+
+            env = dict(os.environ)
+            env["_PYSPARK_DRIVER_CONN_INFO_PATH"] = conn_info_file
+
+            # Launch the Java gateway.
+            # We open a pipe to stdin so that the Java gateway can die when the pipe is broken
+            if not on_windows:
+                # Don't send ctrl-c / SIGINT to the Java gateway:
+                def preexec_func():
+                    signal.signal(signal.SIGINT, signal.SIG_IGN)
+                proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
+            else:
+                # preexec_fn not supported on Windows
+                proc = Popen(command, stdin=PIPE, env=env)
+
+            # Wait for the file to appear, or for the process to exit, whichever happens first.
+            while not proc.poll() and not os.path.isfile(conn_info_file):
+                time.sleep(0.1)
+
+            if not os.path.isfile(conn_info_file):
+                raise Exception("Java gateway process exited before sending its port number")
+
+            with open(conn_info_file, "rb") as info:
+                gateway_port = read_int(info)
+                gateway_secret = UTF8Deserializer().loads(info)
+        finally:
+            shutil.rmtree(conn_info_dir)
+
+        # In Windows, ensure the Java child processes do not linger after Python has exited.
+        # In UNIX-based systems, the child process can kill itself on broken pipe (i.e. when
+        # the parent process' stdin sends an EOF). In Windows, however, this is not possible
+        # because java.lang.Process reads directly from the parent process' stdin, contending
+        # with any opportunity to read an EOF from the parent. Note that this is only best
+        # effort and will not take effect if the python process is violently terminated.
+        if on_windows:
+            # In Windows, the child process here is "spark-submit.cmd", not the JVM itself
+            # (because the UNIX "exec" command is not available). This means we cannot simply
+            # call proc.kill(), which kills only the "spark-submit.cmd" process but not the
+            # JVMs. Instead, we use "taskkill" with the tree-kill option "/t" to terminate all
+            # child processes in the tree (http://technet.microsoft.com/en-us/library/bb491009.aspx)
+            def killChild():
+                Popen(["cmd", "/c", "taskkill", "/f", "/t", "/pid", str(proc.pid)])
+            atexit.register(killChild)
+
+    # Connect to the gateway
+    gateway = JavaGateway(
+        gateway_parameters=GatewayParameters(port=gateway_port, auth_token=gateway_secret,
+                                             auto_convert=True))
+
+    # Import the classes used by PySpark
+    java_import(gateway.jvm, "org.apache.spark.SparkConf")
+    java_import(gateway.jvm, "org.apache.spark.api.java.*")
+    java_import(gateway.jvm, "org.apache.spark.api.python.*")
+    java_import(gateway.jvm, "org.apache.spark.ml.python.*")
+    java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*")
+    # TODO(davies): move into sql
+    java_import(gateway.jvm, "org.apache.spark.sql.*")
+    java_import(gateway.jvm, "org.apache.spark.sql.api.python.*")
+    java_import(gateway.jvm, "org.apache.spark.sql.hive.*")
+    java_import(gateway.jvm, "scala.Tuple2")
+
+    return gateway
+
+
+def _do_server_auth(conn, auth_secret):
+    """
+    Performs the authentication protocol defined by the SocketAuthHelper class on the given
+    file-like object 'conn'.
+    """
+    write_with_length(auth_secret.encode("utf-8"), conn)
+    conn.flush()
+    reply = UTF8Deserializer().loads(conn)
+    if reply != "ok":
+        conn.close()
+        raise Exception("Unexpected reply from iterator server.")
+
+
+def local_connect_and_auth(port, auth_secret):
+    """
+    Connect to local host, authenticate with it, and return a (sockfile,sock) for that connection.
+    Handles IPV4 & IPV6, does some error handling.
+    :param port
+    :param auth_secret
+    :return: a tuple with (sockfile, sock)
+    """
+    sock = None
+    errors = []
+    # Support for both IPv4 and IPv6.
+    # On most of IPv6-ready systems, IPv6 will take precedence.
+    for res in socket.getaddrinfo("127.0.0.1", port, socket.AF_UNSPEC, socket.SOCK_STREAM):
+        af, socktype, proto, _, sa = res
+        try:
+            sock = socket.socket(af, socktype, proto)
+            sock.settimeout(15)
+            sock.connect(sa)
+            sockfile = sock.makefile("rwb", 65536)
+            _do_server_auth(sockfile, auth_secret)
+            return (sockfile, sock)
+        except socket.error as e:
+            emsg = _exception_message(e)
+            errors.append("tried to connect to %s, but an error occured: %s" % (sa, emsg))
+            sock.close()
+            sock = None
+    else:
+        raise Exception("could not open socket: %s" % errors)
+
+
+def ensure_callback_server_started(gw):
+    """
+    Start callback server if not already started. The callback server is needed if the Java
+    driver process needs to callback into the Python driver process to execute Python code.
+    """
+
+    # getattr will fallback to JVM, so we cannot test by hasattr()
+    if "_callback_server" not in gw.__dict__ or gw._callback_server is None:
+        gw.callback_server_parameters.eager_load = True
+        gw.callback_server_parameters.daemonize = True
+        gw.callback_server_parameters.daemonize_connections = True
+        gw.callback_server_parameters.port = 0
+        gw.start_callback_server(gw.callback_server_parameters)
+        cbport = gw._callback_server.server_socket.getsockname()[1]
+        gw._callback_server.port = cbport
+        # gateway with real port
+        gw._python_proxy_port = gw._callback_server.port
+        # get the GatewayServer object in JVM by ID
+        jgws = JavaObject("GATEWAY_SERVER", gw._gateway_client)
+        # update the port of CallbackClient with real port
+        jgws.resetCallbackClient(jgws.getCallbackClient().getAddress(), gw._python_proxy_port)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/join.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/join.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1f5362648f6e29d66faa3695ee3b1d3f6465bcb
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/join.py
@@ -0,0 +1,113 @@
+"""
+Copyright (c) 2011, Douban Inc. <http://www.douban.com/>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+
+    * Neither the name of the Douban Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+from pyspark.resultiterable import ResultIterable
+from functools import reduce
+
+
+def _do_python_join(rdd, other, numPartitions, dispatch):
+    vs = rdd.mapValues(lambda v: (1, v))
+    ws = other.mapValues(lambda v: (2, v))
+    return vs.union(ws).groupByKey(numPartitions).flatMapValues(lambda x: dispatch(x.__iter__()))
+
+
+def python_join(rdd, other, numPartitions):
+    def dispatch(seq):
+        vbuf, wbuf = [], []
+        for (n, v) in seq:
+            if n == 1:
+                vbuf.append(v)
+            elif n == 2:
+                wbuf.append(v)
+        return ((v, w) for v in vbuf for w in wbuf)
+    return _do_python_join(rdd, other, numPartitions, dispatch)
+
+
+def python_right_outer_join(rdd, other, numPartitions):
+    def dispatch(seq):
+        vbuf, wbuf = [], []
+        for (n, v) in seq:
+            if n == 1:
+                vbuf.append(v)
+            elif n == 2:
+                wbuf.append(v)
+        if not vbuf:
+            vbuf.append(None)
+        return ((v, w) for v in vbuf for w in wbuf)
+    return _do_python_join(rdd, other, numPartitions, dispatch)
+
+
+def python_left_outer_join(rdd, other, numPartitions):
+    def dispatch(seq):
+        vbuf, wbuf = [], []
+        for (n, v) in seq:
+            if n == 1:
+                vbuf.append(v)
+            elif n == 2:
+                wbuf.append(v)
+        if not wbuf:
+            wbuf.append(None)
+        return ((v, w) for v in vbuf for w in wbuf)
+    return _do_python_join(rdd, other, numPartitions, dispatch)
+
+
+def python_full_outer_join(rdd, other, numPartitions):
+    def dispatch(seq):
+        vbuf, wbuf = [], []
+        for (n, v) in seq:
+            if n == 1:
+                vbuf.append(v)
+            elif n == 2:
+                wbuf.append(v)
+        if not vbuf:
+            vbuf.append(None)
+        if not wbuf:
+            wbuf.append(None)
+        return ((v, w) for v in vbuf for w in wbuf)
+    return _do_python_join(rdd, other, numPartitions, dispatch)
+
+
+def python_cogroup(rdds, numPartitions):
+    def make_mapper(i):
+        return lambda v: (i, v)
+    vrdds = [rdd.mapValues(make_mapper(i)) for i, rdd in enumerate(rdds)]
+    union_vrdds = reduce(lambda acc, other: acc.union(other), vrdds)
+    rdd_len = len(vrdds)
+
+    def dispatch(seq):
+        bufs = [[] for _ in range(rdd_len)]
+        for n, v in seq:
+            bufs[n].append(v)
+        return tuple(ResultIterable(vs) for vs in bufs)
+
+    return union_vrdds.groupByKey(numPartitions).mapValues(dispatch)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/__init__.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d99a25390db1538b057a9b9710e7145055dc0981
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/__init__.py
@@ -0,0 +1,31 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+DataFrame-based machine learning APIs to let users quickly assemble and configure practical
+machine learning pipelines.
+"""
+from pyspark.ml.base import Estimator, Model, Transformer, UnaryTransformer
+from pyspark.ml.pipeline import Pipeline, PipelineModel
+from pyspark.ml import classification, clustering, evaluation, feature, fpm, \
+    image, pipeline, recommendation, regression, stat, tuning, util, linalg, param
+
+__all__ = [
+    "Transformer", "UnaryTransformer", "Estimator", "Model", "Pipeline", "PipelineModel",
+    "classification", "clustering", "evaluation", "feature", "fpm", "image",
+    "recommendation", "regression", "stat", "tuning", "util", "linalg", "param",
+]
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/base.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4470b5bf29006a7ceee57952bc6c81faf49d85e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/base.py
@@ -0,0 +1,237 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from abc import ABCMeta, abstractmethod
+
+import copy
+import threading
+
+from pyspark import since
+from pyspark.ml.param.shared import *
+from pyspark.ml.common import inherit_doc
+from pyspark.sql.functions import udf
+from pyspark.sql.types import StructField, StructType
+
+
+class _FitMultipleIterator(object):
+    """
+    Used by default implementation of Estimator.fitMultiple to produce models in a thread safe
+    iterator. This class handles the simple case of fitMultiple where each param map should be
+    fit independently.
+
+    :param fitSingleModel: Function: (int => Model) which fits an estimator to a dataset.
+        `fitSingleModel` may be called up to `numModels` times, with a unique index each time.
+        Each call to `fitSingleModel` with an index should return the Model associated with
+        that index.
+    :param numModel: Number of models this iterator should produce.
+
+    See Estimator.fitMultiple for more info.
+    """
+    def __init__(self, fitSingleModel, numModels):
+        """
+
+        """
+        self.fitSingleModel = fitSingleModel
+        self.numModel = numModels
+        self.counter = 0
+        self.lock = threading.Lock()
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        with self.lock:
+            index = self.counter
+            if index >= self.numModel:
+                raise StopIteration("No models remaining.")
+            self.counter += 1
+        return index, self.fitSingleModel(index)
+
+    def next(self):
+        """For python2 compatibility."""
+        return self.__next__()
+
+
+@inherit_doc
+class Estimator(Params):
+    """
+    Abstract class for estimators that fit models to data.
+
+    .. versionadded:: 1.3.0
+    """
+
+    __metaclass__ = ABCMeta
+
+    @abstractmethod
+    def _fit(self, dataset):
+        """
+        Fits a model to the input dataset. This is called by the default implementation of fit.
+
+        :param dataset: input dataset, which is an instance of :py:class:`pyspark.sql.DataFrame`
+        :returns: fitted model
+        """
+        raise NotImplementedError()
+
+    @since("2.3.0")
+    def fitMultiple(self, dataset, paramMaps):
+        """
+        Fits a model to the input dataset for each param map in `paramMaps`.
+
+        :param dataset: input dataset, which is an instance of :py:class:`pyspark.sql.DataFrame`.
+        :param paramMaps: A Sequence of param maps.
+        :return: A thread safe iterable which contains one model for each param map. Each
+                 call to `next(modelIterator)` will return `(index, model)` where model was fit
+                 using `paramMaps[index]`. `index` values may not be sequential.
+
+        .. note:: DeveloperApi
+        .. note:: Experimental
+        """
+        estimator = self.copy()
+
+        def fitSingleModel(index):
+            return estimator.fit(dataset, paramMaps[index])
+
+        return _FitMultipleIterator(fitSingleModel, len(paramMaps))
+
+    @since("1.3.0")
+    def fit(self, dataset, params=None):
+        """
+        Fits a model to the input dataset with optional parameters.
+
+        :param dataset: input dataset, which is an instance of :py:class:`pyspark.sql.DataFrame`
+        :param params: an optional param map that overrides embedded params. If a list/tuple of
+                       param maps is given, this calls fit on each param map and returns a list of
+                       models.
+        :returns: fitted model(s)
+        """
+        if params is None:
+            params = dict()
+        if isinstance(params, (list, tuple)):
+            models = [None] * len(params)
+            for index, model in self.fitMultiple(dataset, params):
+                models[index] = model
+            return models
+        elif isinstance(params, dict):
+            if params:
+                return self.copy(params)._fit(dataset)
+            else:
+                return self._fit(dataset)
+        else:
+            raise ValueError("Params must be either a param map or a list/tuple of param maps, "
+                             "but got %s." % type(params))
+
+
+@inherit_doc
+class Transformer(Params):
+    """
+    Abstract class for transformers that transform one dataset into another.
+
+    .. versionadded:: 1.3.0
+    """
+
+    __metaclass__ = ABCMeta
+
+    @abstractmethod
+    def _transform(self, dataset):
+        """
+        Transforms the input dataset.
+
+        :param dataset: input dataset, which is an instance of :py:class:`pyspark.sql.DataFrame`
+        :returns: transformed dataset
+        """
+        raise NotImplementedError()
+
+    @since("1.3.0")
+    def transform(self, dataset, params=None):
+        """
+        Transforms the input dataset with optional parameters.
+
+        :param dataset: input dataset, which is an instance of :py:class:`pyspark.sql.DataFrame`
+        :param params: an optional param map that overrides embedded params.
+        :returns: transformed dataset
+        """
+        if params is None:
+            params = dict()
+        if isinstance(params, dict):
+            if params:
+                return self.copy(params)._transform(dataset)
+            else:
+                return self._transform(dataset)
+        else:
+            raise ValueError("Params must be a param map but got %s." % type(params))
+
+
+@inherit_doc
+class Model(Transformer):
+    """
+    Abstract class for models that are fitted by estimators.
+
+    .. versionadded:: 1.4.0
+    """
+
+    __metaclass__ = ABCMeta
+
+
+@inherit_doc
+class UnaryTransformer(HasInputCol, HasOutputCol, Transformer):
+    """
+    Abstract class for transformers that take one input column, apply transformation,
+    and output the result as a new column.
+
+    .. versionadded:: 2.3.0
+    """
+
+    @abstractmethod
+    def createTransformFunc(self):
+        """
+        Creates the transform function using the given param map. The input param map already takes
+        account of the embedded param map. So the param values should be determined
+        solely by the input param map.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def outputDataType(self):
+        """
+        Returns the data type of the output column.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def validateInputType(self, inputType):
+        """
+        Validates the input type. Throw an exception if it is invalid.
+        """
+        raise NotImplementedError()
+
+    def transformSchema(self, schema):
+        inputType = schema[self.getInputCol()].dataType
+        self.validateInputType(inputType)
+        if self.getOutputCol() in schema.names:
+            raise ValueError("Output column %s already exists." % self.getOutputCol())
+        outputFields = copy.copy(schema.fields)
+        outputFields.append(StructField(self.getOutputCol(),
+                                        self.outputDataType(),
+                                        nullable=False))
+        return StructType(outputFields)
+
+    def _transform(self, dataset):
+        self.transformSchema(dataset.schema)
+        transformUDF = udf(self.createTransformFunc(), self.outputDataType())
+        transformedDataset = dataset.withColumn(self.getOutputCol(),
+                                                transformUDF(dataset[self.getInputCol()]))
+        return transformedDataset
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/classification.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/classification.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce028512357f2b651e6503e0a0ff602e3e5d3e4c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/classification.py
@@ -0,0 +1,2090 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import operator
+import sys
+from multiprocessing.pool import ThreadPool
+
+from pyspark import since, keyword_only
+from pyspark.ml import Estimator, Model
+from pyspark.ml.param.shared import *
+from pyspark.ml.regression import DecisionTreeModel, DecisionTreeRegressionModel, \
+    RandomForestParams, TreeEnsembleModel, TreeEnsembleParams
+from pyspark.ml.util import *
+from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams
+from pyspark.ml.wrapper import JavaWrapper
+from pyspark.ml.common import inherit_doc, _java2py, _py2java
+from pyspark.sql import DataFrame
+from pyspark.sql.functions import udf, when
+from pyspark.sql.types import ArrayType, DoubleType
+from pyspark.storagelevel import StorageLevel
+
+__all__ = ['LinearSVC', 'LinearSVCModel',
+           'LogisticRegression', 'LogisticRegressionModel',
+           'LogisticRegressionSummary', 'LogisticRegressionTrainingSummary',
+           'BinaryLogisticRegressionSummary', 'BinaryLogisticRegressionTrainingSummary',
+           'DecisionTreeClassifier', 'DecisionTreeClassificationModel',
+           'GBTClassifier', 'GBTClassificationModel',
+           'RandomForestClassifier', 'RandomForestClassificationModel',
+           'NaiveBayes', 'NaiveBayesModel',
+           'MultilayerPerceptronClassifier', 'MultilayerPerceptronClassificationModel',
+           'OneVsRest', 'OneVsRestModel']
+
+
+@inherit_doc
+class JavaClassificationModel(JavaPredictionModel):
+    """
+    (Private) Java Model produced by a ``Classifier``.
+    Classes are indexed {0, 1, ..., numClasses - 1}.
+    To be mixed in with class:`pyspark.ml.JavaModel`
+    """
+
+    @property
+    @since("2.1.0")
+    def numClasses(self):
+        """
+        Number of classes (values which the label can take).
+        """
+        return self._call_java("numClasses")
+
+
+@inherit_doc
+class LinearSVC(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
+                HasRegParam, HasTol, HasRawPredictionCol, HasFitIntercept, HasStandardization,
+                HasWeightCol, HasAggregationDepth, HasThreshold, JavaMLWritable, JavaMLReadable):
+    """
+    .. note:: Experimental
+
+    `Linear SVM Classifier <https://en.wikipedia.org/wiki/Support_vector_machine#Linear_SVM>`_
+
+    This binary classifier optimizes the Hinge Loss using the OWLQN optimizer.
+    Only supports L2 regularization currently.
+
+    >>> from pyspark.sql import Row
+    >>> from pyspark.ml.linalg import Vectors
+    >>> df = sc.parallelize([
+    ...     Row(label=1.0, features=Vectors.dense(1.0, 1.0, 1.0)),
+    ...     Row(label=0.0, features=Vectors.dense(1.0, 2.0, 3.0))]).toDF()
+    >>> svm = LinearSVC(maxIter=5, regParam=0.01)
+    >>> model = svm.fit(df)
+    >>> model.coefficients
+    DenseVector([0.0, -0.2792, -0.1833])
+    >>> model.intercept
+    1.0206118982229047
+    >>> model.numClasses
+    2
+    >>> model.numFeatures
+    3
+    >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, -1.0, -1.0))]).toDF()
+    >>> result = model.transform(test0).head()
+    >>> result.prediction
+    1.0
+    >>> result.rawPrediction
+    DenseVector([-1.4831, 1.4831])
+    >>> svm_path = temp_path + "/svm"
+    >>> svm.save(svm_path)
+    >>> svm2 = LinearSVC.load(svm_path)
+    >>> svm2.getMaxIter()
+    5
+    >>> model_path = temp_path + "/svm_model"
+    >>> model.save(model_path)
+    >>> model2 = LinearSVCModel.load(model_path)
+    >>> model.coefficients[0] == model2.coefficients[0]
+    True
+    >>> model.intercept == model2.intercept
+    True
+
+    .. versionadded:: 2.2.0
+    """
+
+    threshold = Param(Params._dummy(), "threshold",
+                      "The threshold in binary classification applied to the linear model"
+                      " prediction.  This threshold can be any real number, where Inf will make"
+                      " all predictions 0.0 and -Inf will make all predictions 1.0.",
+                      typeConverter=TypeConverters.toFloat)
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction",
+                 fitIntercept=True, standardization=True, threshold=0.0, weightCol=None,
+                 aggregationDepth=2):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", \
+                 fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, \
+                 aggregationDepth=2):
+        """
+        super(LinearSVC, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.classification.LinearSVC", self.uid)
+        self._setDefault(maxIter=100, regParam=0.0, tol=1e-6, fitIntercept=True,
+                         standardization=True, threshold=0.0, aggregationDepth=2)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("2.2.0")
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction",
+                  fitIntercept=True, standardization=True, threshold=0.0, weightCol=None,
+                  aggregationDepth=2):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", \
+                  fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, \
+                  aggregationDepth=2):
+        Sets params for Linear SVM Classifier.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return LinearSVCModel(java_model)
+
+
+class LinearSVCModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable):
+    """
+    .. note:: Experimental
+
+    Model fitted by LinearSVC.
+
+    .. versionadded:: 2.2.0
+    """
+
+    @property
+    @since("2.2.0")
+    def coefficients(self):
+        """
+        Model coefficients of Linear SVM Classifier.
+        """
+        return self._call_java("coefficients")
+
+    @property
+    @since("2.2.0")
+    def intercept(self):
+        """
+        Model intercept of Linear SVM Classifier.
+        """
+        return self._call_java("intercept")
+
+
+@inherit_doc
+class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
+                         HasRegParam, HasTol, HasProbabilityCol, HasRawPredictionCol,
+                         HasElasticNetParam, HasFitIntercept, HasStandardization, HasThresholds,
+                         HasWeightCol, HasAggregationDepth, JavaMLWritable, JavaMLReadable):
+    """
+    Logistic regression.
+    This class supports multinomial logistic (softmax) and binomial logistic regression.
+
+    >>> from pyspark.sql import Row
+    >>> from pyspark.ml.linalg import Vectors
+    >>> bdf = sc.parallelize([
+    ...     Row(label=1.0, weight=1.0, features=Vectors.dense(0.0, 5.0)),
+    ...     Row(label=0.0, weight=2.0, features=Vectors.dense(1.0, 2.0)),
+    ...     Row(label=1.0, weight=3.0, features=Vectors.dense(2.0, 1.0)),
+    ...     Row(label=0.0, weight=4.0, features=Vectors.dense(3.0, 3.0))]).toDF()
+    >>> blor = LogisticRegression(regParam=0.01, weightCol="weight")
+    >>> blorModel = blor.fit(bdf)
+    >>> blorModel.coefficients
+    DenseVector([-1.080..., -0.646...])
+    >>> blorModel.intercept
+    3.112...
+    >>> data_path = "data/mllib/sample_multiclass_classification_data.txt"
+    >>> mdf = spark.read.format("libsvm").load(data_path)
+    >>> mlor = LogisticRegression(regParam=0.1, elasticNetParam=1.0, family="multinomial")
+    >>> mlorModel = mlor.fit(mdf)
+    >>> mlorModel.coefficientMatrix
+    SparseMatrix(3, 4, [0, 1, 2, 3], [3, 2, 1], [1.87..., -2.75..., -0.50...], 1)
+    >>> mlorModel.interceptVector
+    DenseVector([0.04..., -0.42..., 0.37...])
+    >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, 1.0))]).toDF()
+    >>> result = blorModel.transform(test0).head()
+    >>> result.prediction
+    1.0
+    >>> result.probability
+    DenseVector([0.02..., 0.97...])
+    >>> result.rawPrediction
+    DenseVector([-3.54..., 3.54...])
+    >>> test1 = sc.parallelize([Row(features=Vectors.sparse(2, [0], [1.0]))]).toDF()
+    >>> blorModel.transform(test1).head().prediction
+    1.0
+    >>> blor.setParams("vector")
+    Traceback (most recent call last):
+        ...
+    TypeError: Method setParams forces keyword arguments.
+    >>> lr_path = temp_path + "/lr"
+    >>> blor.save(lr_path)
+    >>> lr2 = LogisticRegression.load(lr_path)
+    >>> lr2.getRegParam()
+    0.01
+    >>> model_path = temp_path + "/lr_model"
+    >>> blorModel.save(model_path)
+    >>> model2 = LogisticRegressionModel.load(model_path)
+    >>> blorModel.coefficients[0] == model2.coefficients[0]
+    True
+    >>> blorModel.intercept == model2.intercept
+    True
+    >>> model2
+    LogisticRegressionModel: uid = ..., numClasses = 2, numFeatures = 2
+
+    .. versionadded:: 1.3.0
+    """
+
+    threshold = Param(Params._dummy(), "threshold",
+                      "Threshold in binary classification prediction, in range [0, 1]." +
+                      " If threshold and thresholds are both set, they must match." +
+                      "e.g. if threshold is p, then thresholds must be equal to [1-p, p].",
+                      typeConverter=TypeConverters.toFloat)
+
+    family = Param(Params._dummy(), "family",
+                   "The name of family which is a description of the label distribution to " +
+                   "be used in the model. Supported options: auto, binomial, multinomial",
+                   typeConverter=TypeConverters.toString)
+
+    lowerBoundsOnCoefficients = Param(Params._dummy(), "lowerBoundsOnCoefficients",
+                                      "The lower bounds on coefficients if fitting under bound "
+                                      "constrained optimization. The bound matrix must be "
+                                      "compatible with the shape "
+                                      "(1, number of features) for binomial regression, or "
+                                      "(number of classes, number of features) "
+                                      "for multinomial regression.",
+                                      typeConverter=TypeConverters.toMatrix)
+
+    upperBoundsOnCoefficients = Param(Params._dummy(), "upperBoundsOnCoefficients",
+                                      "The upper bounds on coefficients if fitting under bound "
+                                      "constrained optimization. The bound matrix must be "
+                                      "compatible with the shape "
+                                      "(1, number of features) for binomial regression, or "
+                                      "(number of classes, number of features) "
+                                      "for multinomial regression.",
+                                      typeConverter=TypeConverters.toMatrix)
+
+    lowerBoundsOnIntercepts = Param(Params._dummy(), "lowerBoundsOnIntercepts",
+                                    "The lower bounds on intercepts if fitting under bound "
+                                    "constrained optimization. The bounds vector size must be"
+                                    "equal with 1 for binomial regression, or the number of"
+                                    "lasses for multinomial regression.",
+                                    typeConverter=TypeConverters.toVector)
+
+    upperBoundsOnIntercepts = Param(Params._dummy(), "upperBoundsOnIntercepts",
+                                    "The upper bounds on intercepts if fitting under bound "
+                                    "constrained optimization. The bound vector size must be "
+                                    "equal with 1 for binomial regression, or the number of "
+                                    "classes for multinomial regression.",
+                                    typeConverter=TypeConverters.toVector)
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
+                 threshold=0.5, thresholds=None, probabilityCol="probability",
+                 rawPredictionCol="rawPrediction", standardization=True, weightCol=None,
+                 aggregationDepth=2, family="auto",
+                 lowerBoundsOnCoefficients=None, upperBoundsOnCoefficients=None,
+                 lowerBoundsOnIntercepts=None, upperBoundsOnIntercepts=None):
+
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
+                 threshold=0.5, thresholds=None, probabilityCol="probability", \
+                 rawPredictionCol="rawPrediction", standardization=True, weightCol=None, \
+                 aggregationDepth=2, family="auto", \
+                 lowerBoundsOnCoefficients=None, upperBoundsOnCoefficients=None, \
+                 lowerBoundsOnIntercepts=None, upperBoundsOnIntercepts=None):
+        If the threshold and thresholds Params are both set, they must be equivalent.
+        """
+        super(LogisticRegression, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.classification.LogisticRegression", self.uid)
+        self._setDefault(maxIter=100, regParam=0.0, tol=1E-6, threshold=0.5, family="auto")
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+        self._checkThresholdConsistency()
+
+    @keyword_only
+    @since("1.3.0")
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
+                  threshold=0.5, thresholds=None, probabilityCol="probability",
+                  rawPredictionCol="rawPrediction", standardization=True, weightCol=None,
+                  aggregationDepth=2, family="auto",
+                  lowerBoundsOnCoefficients=None, upperBoundsOnCoefficients=None,
+                  lowerBoundsOnIntercepts=None, upperBoundsOnIntercepts=None):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
+                  threshold=0.5, thresholds=None, probabilityCol="probability", \
+                  rawPredictionCol="rawPrediction", standardization=True, weightCol=None, \
+                  aggregationDepth=2, family="auto", \
+                  lowerBoundsOnCoefficients=None, upperBoundsOnCoefficients=None, \
+                  lowerBoundsOnIntercepts=None, upperBoundsOnIntercepts=None):
+        Sets params for logistic regression.
+        If the threshold and thresholds Params are both set, they must be equivalent.
+        """
+        kwargs = self._input_kwargs
+        self._set(**kwargs)
+        self._checkThresholdConsistency()
+        return self
+
+    def _create_model(self, java_model):
+        return LogisticRegressionModel(java_model)
+
+    @since("1.4.0")
+    def setThreshold(self, value):
+        """
+        Sets the value of :py:attr:`threshold`.
+        Clears value of :py:attr:`thresholds` if it has been set.
+        """
+        self._set(threshold=value)
+        self._clear(self.thresholds)
+        return self
+
+    @since("1.4.0")
+    def getThreshold(self):
+        """
+        Get threshold for binary classification.
+
+        If :py:attr:`thresholds` is set with length 2 (i.e., binary classification),
+        this returns the equivalent threshold:
+        :math:`\\frac{1}{1 + \\frac{thresholds(0)}{thresholds(1)}}`.
+        Otherwise, returns :py:attr:`threshold` if set or its default value if unset.
+        """
+        self._checkThresholdConsistency()
+        if self.isSet(self.thresholds):
+            ts = self.getOrDefault(self.thresholds)
+            if len(ts) != 2:
+                raise ValueError("Logistic Regression getThreshold only applies to" +
+                                 " binary classification, but thresholds has length != 2." +
+                                 "  thresholds: " + ",".join(ts))
+            return 1.0/(1.0 + ts[0]/ts[1])
+        else:
+            return self.getOrDefault(self.threshold)
+
+    @since("1.5.0")
+    def setThresholds(self, value):
+        """
+        Sets the value of :py:attr:`thresholds`.
+        Clears value of :py:attr:`threshold` if it has been set.
+        """
+        self._set(thresholds=value)
+        self._clear(self.threshold)
+        return self
+
+    @since("1.5.0")
+    def getThresholds(self):
+        """
+        If :py:attr:`thresholds` is set, return its value.
+        Otherwise, if :py:attr:`threshold` is set, return the equivalent thresholds for binary
+        classification: (1-threshold, threshold).
+        If neither are set, throw an error.
+        """
+        self._checkThresholdConsistency()
+        if not self.isSet(self.thresholds) and self.isSet(self.threshold):
+            t = self.getOrDefault(self.threshold)
+            return [1.0-t, t]
+        else:
+            return self.getOrDefault(self.thresholds)
+
+    def _checkThresholdConsistency(self):
+        if self.isSet(self.threshold) and self.isSet(self.thresholds):
+            ts = self.getOrDefault(self.thresholds)
+            if len(ts) != 2:
+                raise ValueError("Logistic Regression getThreshold only applies to" +
+                                 " binary classification, but thresholds has length != 2." +
+                                 " thresholds: {0}".format(str(ts)))
+            t = 1.0/(1.0 + ts[0]/ts[1])
+            t2 = self.getOrDefault(self.threshold)
+            if abs(t2 - t) >= 1E-5:
+                raise ValueError("Logistic Regression getThreshold found inconsistent values for" +
+                                 " threshold (%g) and thresholds (equivalent to %g)" % (t2, t))
+
+    @since("2.1.0")
+    def setFamily(self, value):
+        """
+        Sets the value of :py:attr:`family`.
+        """
+        return self._set(family=value)
+
+    @since("2.1.0")
+    def getFamily(self):
+        """
+        Gets the value of :py:attr:`family` or its default value.
+        """
+        return self.getOrDefault(self.family)
+
+    @since("2.3.0")
+    def setLowerBoundsOnCoefficients(self, value):
+        """
+        Sets the value of :py:attr:`lowerBoundsOnCoefficients`
+        """
+        return self._set(lowerBoundsOnCoefficients=value)
+
+    @since("2.3.0")
+    def getLowerBoundsOnCoefficients(self):
+        """
+        Gets the value of :py:attr:`lowerBoundsOnCoefficients`
+        """
+        return self.getOrDefault(self.lowerBoundsOnCoefficients)
+
+    @since("2.3.0")
+    def setUpperBoundsOnCoefficients(self, value):
+        """
+        Sets the value of :py:attr:`upperBoundsOnCoefficients`
+        """
+        return self._set(upperBoundsOnCoefficients=value)
+
+    @since("2.3.0")
+    def getUpperBoundsOnCoefficients(self):
+        """
+        Gets the value of :py:attr:`upperBoundsOnCoefficients`
+        """
+        return self.getOrDefault(self.upperBoundsOnCoefficients)
+
+    @since("2.3.0")
+    def setLowerBoundsOnIntercepts(self, value):
+        """
+        Sets the value of :py:attr:`lowerBoundsOnIntercepts`
+        """
+        return self._set(lowerBoundsOnIntercepts=value)
+
+    @since("2.3.0")
+    def getLowerBoundsOnIntercepts(self):
+        """
+        Gets the value of :py:attr:`lowerBoundsOnIntercepts`
+        """
+        return self.getOrDefault(self.lowerBoundsOnIntercepts)
+
+    @since("2.3.0")
+    def setUpperBoundsOnIntercepts(self, value):
+        """
+        Sets the value of :py:attr:`upperBoundsOnIntercepts`
+        """
+        return self._set(upperBoundsOnIntercepts=value)
+
+    @since("2.3.0")
+    def getUpperBoundsOnIntercepts(self):
+        """
+        Gets the value of :py:attr:`upperBoundsOnIntercepts`
+        """
+        return self.getOrDefault(self.upperBoundsOnIntercepts)
+
+
+class LogisticRegressionModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable):
+    """
+    Model fitted by LogisticRegression.
+
+    .. versionadded:: 1.3.0
+    """
+
+    @property
+    @since("2.0.0")
+    def coefficients(self):
+        """
+        Model coefficients of binomial logistic regression.
+        An exception is thrown in the case of multinomial logistic regression.
+        """
+        return self._call_java("coefficients")
+
+    @property
+    @since("1.4.0")
+    def intercept(self):
+        """
+        Model intercept of binomial logistic regression.
+        An exception is thrown in the case of multinomial logistic regression.
+        """
+        return self._call_java("intercept")
+
+    @property
+    @since("2.1.0")
+    def coefficientMatrix(self):
+        """
+        Model coefficients.
+        """
+        return self._call_java("coefficientMatrix")
+
+    @property
+    @since("2.1.0")
+    def interceptVector(self):
+        """
+        Model intercept.
+        """
+        return self._call_java("interceptVector")
+
+    @property
+    @since("2.0.0")
+    def summary(self):
+        """
+        Gets summary (e.g. accuracy/precision/recall, objective history, total iterations) of model
+        trained on the training set. An exception is thrown if `trainingSummary is None`.
+        """
+        if self.hasSummary:
+            java_lrt_summary = self._call_java("summary")
+            if self.numClasses <= 2:
+                return BinaryLogisticRegressionTrainingSummary(java_lrt_summary)
+            else:
+                return LogisticRegressionTrainingSummary(java_lrt_summary)
+        else:
+            raise RuntimeError("No training summary available for this %s" %
+                               self.__class__.__name__)
+
+    @property
+    @since("2.0.0")
+    def hasSummary(self):
+        """
+        Indicates whether a training summary exists for this model
+        instance.
+        """
+        return self._call_java("hasSummary")
+
+    @since("2.0.0")
+    def evaluate(self, dataset):
+        """
+        Evaluates the model on a test dataset.
+
+        :param dataset:
+          Test dataset to evaluate model on, where dataset is an
+          instance of :py:class:`pyspark.sql.DataFrame`
+        """
+        if not isinstance(dataset, DataFrame):
+            raise ValueError("dataset must be a DataFrame but got %s." % type(dataset))
+        java_blr_summary = self._call_java("evaluate", dataset)
+        return BinaryLogisticRegressionSummary(java_blr_summary)
+
+    def __repr__(self):
+        return self._call_java("toString")
+
+
+class LogisticRegressionSummary(JavaWrapper):
+    """
+    .. note:: Experimental
+
+    Abstraction for Logistic Regression Results for a given model.
+
+    .. versionadded:: 2.0.0
+    """
+
+    @property
+    @since("2.0.0")
+    def predictions(self):
+        """
+        Dataframe outputted by the model's `transform` method.
+        """
+        return self._call_java("predictions")
+
+    @property
+    @since("2.0.0")
+    def probabilityCol(self):
+        """
+        Field in "predictions" which gives the probability
+        of each class as a vector.
+        """
+        return self._call_java("probabilityCol")
+
+    @property
+    @since("2.3.0")
+    def predictionCol(self):
+        """
+        Field in "predictions" which gives the prediction of each class.
+        """
+        return self._call_java("predictionCol")
+
+    @property
+    @since("2.0.0")
+    def labelCol(self):
+        """
+        Field in "predictions" which gives the true label of each
+        instance.
+        """
+        return self._call_java("labelCol")
+
+    @property
+    @since("2.0.0")
+    def featuresCol(self):
+        """
+        Field in "predictions" which gives the features of each instance
+        as a vector.
+        """
+        return self._call_java("featuresCol")
+
+    @property
+    @since("2.3.0")
+    def labels(self):
+        """
+        Returns the sequence of labels in ascending order. This order matches the order used
+        in metrics which are specified as arrays over labels, e.g., truePositiveRateByLabel.
+
+        Note: In most cases, it will be values {0.0, 1.0, ..., numClasses-1}, However, if the
+        training set is missing a label, then all of the arrays over labels
+        (e.g., from truePositiveRateByLabel) will be of length numClasses-1 instead of the
+        expected numClasses.
+        """
+        return self._call_java("labels")
+
+    @property
+    @since("2.3.0")
+    def truePositiveRateByLabel(self):
+        """
+        Returns true positive rate for each label (category).
+        """
+        return self._call_java("truePositiveRateByLabel")
+
+    @property
+    @since("2.3.0")
+    def falsePositiveRateByLabel(self):
+        """
+        Returns false positive rate for each label (category).
+        """
+        return self._call_java("falsePositiveRateByLabel")
+
+    @property
+    @since("2.3.0")
+    def precisionByLabel(self):
+        """
+        Returns precision for each label (category).
+        """
+        return self._call_java("precisionByLabel")
+
+    @property
+    @since("2.3.0")
+    def recallByLabel(self):
+        """
+        Returns recall for each label (category).
+        """
+        return self._call_java("recallByLabel")
+
+    @since("2.3.0")
+    def fMeasureByLabel(self, beta=1.0):
+        """
+        Returns f-measure for each label (category).
+        """
+        return self._call_java("fMeasureByLabel", beta)
+
+    @property
+    @since("2.3.0")
+    def accuracy(self):
+        """
+        Returns accuracy.
+        (equals to the total number of correctly classified instances
+        out of the total number of instances.)
+        """
+        return self._call_java("accuracy")
+
+    @property
+    @since("2.3.0")
+    def weightedTruePositiveRate(self):
+        """
+        Returns weighted true positive rate.
+        (equals to precision, recall and f-measure)
+        """
+        return self._call_java("weightedTruePositiveRate")
+
+    @property
+    @since("2.3.0")
+    def weightedFalsePositiveRate(self):
+        """
+        Returns weighted false positive rate.
+        """
+        return self._call_java("weightedFalsePositiveRate")
+
+    @property
+    @since("2.3.0")
+    def weightedRecall(self):
+        """
+        Returns weighted averaged recall.
+        (equals to precision, recall and f-measure)
+        """
+        return self._call_java("weightedRecall")
+
+    @property
+    @since("2.3.0")
+    def weightedPrecision(self):
+        """
+        Returns weighted averaged precision.
+        """
+        return self._call_java("weightedPrecision")
+
+    @since("2.3.0")
+    def weightedFMeasure(self, beta=1.0):
+        """
+        Returns weighted averaged f-measure.
+        """
+        return self._call_java("weightedFMeasure", beta)
+
+
+@inherit_doc
+class LogisticRegressionTrainingSummary(LogisticRegressionSummary):
+    """
+    .. note:: Experimental
+
+    Abstraction for multinomial Logistic Regression Training results.
+    Currently, the training summary ignores the training weights except
+    for the objective trace.
+
+    .. versionadded:: 2.0.0
+    """
+
+    @property
+    @since("2.0.0")
+    def objectiveHistory(self):
+        """
+        Objective function (scaled loss + regularization) at each
+        iteration.
+        """
+        return self._call_java("objectiveHistory")
+
+    @property
+    @since("2.0.0")
+    def totalIterations(self):
+        """
+        Number of training iterations until termination.
+        """
+        return self._call_java("totalIterations")
+
+
+@inherit_doc
+class BinaryLogisticRegressionSummary(LogisticRegressionSummary):
+    """
+    .. note:: Experimental
+
+    Binary Logistic regression results for a given model.
+
+    .. versionadded:: 2.0.0
+    """
+
+    @property
+    @since("2.0.0")
+    def roc(self):
+        """
+        Returns the receiver operating characteristic (ROC) curve,
+        which is a Dataframe having two fields (FPR, TPR) with
+        (0.0, 0.0) prepended and (1.0, 1.0) appended to it.
+
+        .. seealso:: `Wikipedia reference
+            <http://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_
+
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LogisticRegression.weightCol`. This will change in later Spark
+            versions.
+        """
+        return self._call_java("roc")
+
+    @property
+    @since("2.0.0")
+    def areaUnderROC(self):
+        """
+        Computes the area under the receiver operating characteristic
+        (ROC) curve.
+
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LogisticRegression.weightCol`. This will change in later Spark
+            versions.
+        """
+        return self._call_java("areaUnderROC")
+
+    @property
+    @since("2.0.0")
+    def pr(self):
+        """
+        Returns the precision-recall curve, which is a Dataframe
+        containing two fields recall, precision with (0.0, 1.0) prepended
+        to it.
+
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LogisticRegression.weightCol`. This will change in later Spark
+            versions.
+        """
+        return self._call_java("pr")
+
+    @property
+    @since("2.0.0")
+    def fMeasureByThreshold(self):
+        """
+        Returns a dataframe with two fields (threshold, F-Measure) curve
+        with beta = 1.0.
+
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LogisticRegression.weightCol`. This will change in later Spark
+            versions.
+        """
+        return self._call_java("fMeasureByThreshold")
+
+    @property
+    @since("2.0.0")
+    def precisionByThreshold(self):
+        """
+        Returns a dataframe with two fields (threshold, precision) curve.
+        Every possible probability obtained in transforming the dataset
+        are used as thresholds used in calculating the precision.
+
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LogisticRegression.weightCol`. This will change in later Spark
+            versions.
+        """
+        return self._call_java("precisionByThreshold")
+
+    @property
+    @since("2.0.0")
+    def recallByThreshold(self):
+        """
+        Returns a dataframe with two fields (threshold, recall) curve.
+        Every possible probability obtained in transforming the dataset
+        are used as thresholds used in calculating the recall.
+
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LogisticRegression.weightCol`. This will change in later Spark
+            versions.
+        """
+        return self._call_java("recallByThreshold")
+
+
+@inherit_doc
+class BinaryLogisticRegressionTrainingSummary(BinaryLogisticRegressionSummary,
+                                              LogisticRegressionTrainingSummary):
+    """
+    .. note:: Experimental
+
+    Binary Logistic regression training results for a given model.
+
+    .. versionadded:: 2.0.0
+    """
+    pass
+
+
+class TreeClassifierParams(object):
+    """
+    Private class to track supported impurity measures.
+
+    .. versionadded:: 1.4.0
+    """
+    supportedImpurities = ["entropy", "gini"]
+
+    impurity = Param(Params._dummy(), "impurity",
+                     "Criterion used for information gain calculation (case-insensitive). " +
+                     "Supported options: " +
+                     ", ".join(supportedImpurities), typeConverter=TypeConverters.toString)
+
+    def __init__(self):
+        super(TreeClassifierParams, self).__init__()
+
+    @since("1.6.0")
+    def setImpurity(self, value):
+        """
+        Sets the value of :py:attr:`impurity`.
+        """
+        return self._set(impurity=value)
+
+    @since("1.6.0")
+    def getImpurity(self):
+        """
+        Gets the value of impurity or its default value.
+        """
+        return self.getOrDefault(self.impurity)
+
+
+class GBTParams(TreeEnsembleParams):
+    """
+    Private class to track supported GBT params.
+
+    .. versionadded:: 1.4.0
+    """
+    supportedLossTypes = ["logistic"]
+
+
+@inherit_doc
+class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
+                             HasProbabilityCol, HasRawPredictionCol, DecisionTreeParams,
+                             TreeClassifierParams, HasCheckpointInterval, HasSeed, JavaMLWritable,
+                             JavaMLReadable):
+    """
+    `Decision tree <http://en.wikipedia.org/wiki/Decision_tree_learning>`_
+    learning algorithm for classification.
+    It supports both binary and multiclass labels, as well as both continuous and categorical
+    features.
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> from pyspark.ml.feature import StringIndexer
+    >>> df = spark.createDataFrame([
+    ...     (1.0, Vectors.dense(1.0)),
+    ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
+    >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed")
+    >>> si_model = stringIndexer.fit(df)
+    >>> td = si_model.transform(df)
+    >>> dt = DecisionTreeClassifier(maxDepth=2, labelCol="indexed")
+    >>> model = dt.fit(td)
+    >>> model.numNodes
+    3
+    >>> model.depth
+    1
+    >>> model.featureImportances
+    SparseVector(1, {0: 1.0})
+    >>> model.numFeatures
+    1
+    >>> model.numClasses
+    2
+    >>> print(model.toDebugString)
+    DecisionTreeClassificationModel (uid=...) of depth 1 with 3 nodes...
+    >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> result = model.transform(test0).head()
+    >>> result.prediction
+    0.0
+    >>> result.probability
+    DenseVector([1.0, 0.0])
+    >>> result.rawPrediction
+    DenseVector([1.0, 0.0])
+    >>> test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
+    >>> model.transform(test1).head().prediction
+    1.0
+
+    >>> dtc_path = temp_path + "/dtc"
+    >>> dt.save(dtc_path)
+    >>> dt2 = DecisionTreeClassifier.load(dtc_path)
+    >>> dt2.getMaxDepth()
+    2
+    >>> model_path = temp_path + "/dtc_model"
+    >>> model.save(model_path)
+    >>> model2 = DecisionTreeClassificationModel.load(model_path)
+    >>> model.featureImportances == model2.featureImportances
+    True
+
+    .. versionadded:: 1.4.0
+    """
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 probabilityCol="probability", rawPredictionCol="rawPrediction",
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
+                 seed=None):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 probabilityCol="probability", rawPredictionCol="rawPrediction", \
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", \
+                 seed=None)
+        """
+        super(DecisionTreeClassifier, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.classification.DecisionTreeClassifier", self.uid)
+        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                         impurity="gini")
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  probabilityCol="probability", rawPredictionCol="rawPrediction",
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                  impurity="gini", seed=None):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  probabilityCol="probability", rawPredictionCol="rawPrediction", \
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", \
+                  seed=None)
+        Sets params for the DecisionTreeClassifier.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return DecisionTreeClassificationModel(java_model)
+
+
+@inherit_doc
+class DecisionTreeClassificationModel(DecisionTreeModel, JavaClassificationModel, JavaMLWritable,
+                                      JavaMLReadable):
+    """
+    Model fitted by DecisionTreeClassifier.
+
+    .. versionadded:: 1.4.0
+    """
+
+    @property
+    @since("2.0.0")
+    def featureImportances(self):
+        """
+        Estimate of the importance of each feature.
+
+        This generalizes the idea of "Gini" importance to other losses,
+        following the explanation of Gini importance from "Random Forests" documentation
+        by Leo Breiman and Adele Cutler, and following the implementation from scikit-learn.
+
+        This feature importance is calculated as follows:
+          - importance(feature j) = sum (over nodes which split on feature j) of the gain,
+            where gain is scaled by the number of instances passing through node
+          - Normalize importances for tree to sum to 1.
+
+        .. note:: Feature importance for single decision trees can have high variance due to
+            correlated predictor variables. Consider using a :py:class:`RandomForestClassifier`
+            to determine feature importance instead.
+        """
+        return self._call_java("featureImportances")
+
+
+@inherit_doc
+class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed,
+                             HasRawPredictionCol, HasProbabilityCol,
+                             RandomForestParams, TreeClassifierParams, HasCheckpointInterval,
+                             JavaMLWritable, JavaMLReadable):
+    """
+    `Random Forest <http://en.wikipedia.org/wiki/Random_forest>`_
+    learning algorithm for classification.
+    It supports both binary and multiclass labels, as well as both continuous and categorical
+    features.
+
+    >>> import numpy
+    >>> from numpy import allclose
+    >>> from pyspark.ml.linalg import Vectors
+    >>> from pyspark.ml.feature import StringIndexer
+    >>> df = spark.createDataFrame([
+    ...     (1.0, Vectors.dense(1.0)),
+    ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
+    >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed")
+    >>> si_model = stringIndexer.fit(df)
+    >>> td = si_model.transform(df)
+    >>> rf = RandomForestClassifier(numTrees=3, maxDepth=2, labelCol="indexed", seed=42)
+    >>> model = rf.fit(td)
+    >>> model.featureImportances
+    SparseVector(1, {0: 1.0})
+    >>> allclose(model.treeWeights, [1.0, 1.0, 1.0])
+    True
+    >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> result = model.transform(test0).head()
+    >>> result.prediction
+    0.0
+    >>> numpy.argmax(result.probability)
+    0
+    >>> numpy.argmax(result.rawPrediction)
+    0
+    >>> test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
+    >>> model.transform(test1).head().prediction
+    1.0
+    >>> model.trees
+    [DecisionTreeClassificationModel (uid=...) of depth..., DecisionTreeClassificationModel...]
+    >>> rfc_path = temp_path + "/rfc"
+    >>> rf.save(rfc_path)
+    >>> rf2 = RandomForestClassifier.load(rfc_path)
+    >>> rf2.getNumTrees()
+    3
+    >>> model_path = temp_path + "/rfc_model"
+    >>> model.save(model_path)
+    >>> model2 = RandomForestClassificationModel.load(model_path)
+    >>> model.featureImportances == model2.featureImportances
+    True
+
+    .. versionadded:: 1.4.0
+    """
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 probabilityCol="probability", rawPredictionCol="rawPrediction",
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
+                 numTrees=20, featureSubsetStrategy="auto", seed=None, subsamplingRate=1.0):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 probabilityCol="probability", rawPredictionCol="rawPrediction", \
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", \
+                 numTrees=20, featureSubsetStrategy="auto", seed=None, subsamplingRate=1.0)
+        """
+        super(RandomForestClassifier, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.classification.RandomForestClassifier", self.uid)
+        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                         impurity="gini", numTrees=20, featureSubsetStrategy="auto",
+                         subsamplingRate=1.0)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  probabilityCol="probability", rawPredictionCol="rawPrediction",
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None,
+                  impurity="gini", numTrees=20, featureSubsetStrategy="auto", subsamplingRate=1.0):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 probabilityCol="probability", rawPredictionCol="rawPrediction", \
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None, \
+                  impurity="gini", numTrees=20, featureSubsetStrategy="auto", subsamplingRate=1.0)
+        Sets params for linear classification.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return RandomForestClassificationModel(java_model)
+
+    @since("2.4.0")
+    def setFeatureSubsetStrategy(self, value):
+        """
+        Sets the value of :py:attr:`featureSubsetStrategy`.
+        """
+        return self._set(featureSubsetStrategy=value)
+
+
+class RandomForestClassificationModel(TreeEnsembleModel, JavaClassificationModel, JavaMLWritable,
+                                      JavaMLReadable):
+    """
+    Model fitted by RandomForestClassifier.
+
+    .. versionadded:: 1.4.0
+    """
+
+    @property
+    @since("2.0.0")
+    def featureImportances(self):
+        """
+        Estimate of the importance of each feature.
+
+        Each feature's importance is the average of its importance across all trees in the ensemble
+        The importance vector is normalized to sum to 1. This method is suggested by Hastie et al.
+        (Hastie, Tibshirani, Friedman. "The Elements of Statistical Learning, 2nd Edition." 2001.)
+        and follows the implementation from scikit-learn.
+
+        .. seealso:: :py:attr:`DecisionTreeClassificationModel.featureImportances`
+        """
+        return self._call_java("featureImportances")
+
+    @property
+    @since("2.0.0")
+    def trees(self):
+        """Trees in this ensemble. Warning: These have null parent Estimators."""
+        return [DecisionTreeClassificationModel(m) for m in list(self._call_java("trees"))]
+
+
+@inherit_doc
+class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
+                    GBTParams, HasCheckpointInterval, HasStepSize, HasSeed, JavaMLWritable,
+                    JavaMLReadable):
+    """
+    `Gradient-Boosted Trees (GBTs) <http://en.wikipedia.org/wiki/Gradient_boosting>`_
+    learning algorithm for classification.
+    It supports binary labels, as well as both continuous and categorical features.
+
+    The implementation is based upon: J.H. Friedman. "Stochastic Gradient Boosting." 1999.
+
+    Notes on Gradient Boosting vs. TreeBoost:
+    - This implementation is for Stochastic Gradient Boosting, not for TreeBoost.
+    - Both algorithms learn tree ensembles by minimizing loss functions.
+    - TreeBoost (Friedman, 1999) additionally modifies the outputs at tree leaf nodes
+    based on the loss function, whereas the original gradient boosting method does not.
+    - We expect to implement TreeBoost in the future:
+    `SPARK-4240 <https://issues.apache.org/jira/browse/SPARK-4240>`_
+
+    .. note:: Multiclass labels are not currently supported.
+
+    >>> from numpy import allclose
+    >>> from pyspark.ml.linalg import Vectors
+    >>> from pyspark.ml.feature import StringIndexer
+    >>> df = spark.createDataFrame([
+    ...     (1.0, Vectors.dense(1.0)),
+    ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
+    >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed")
+    >>> si_model = stringIndexer.fit(df)
+    >>> td = si_model.transform(df)
+    >>> gbt = GBTClassifier(maxIter=5, maxDepth=2, labelCol="indexed", seed=42)
+    >>> gbt.getFeatureSubsetStrategy()
+    'all'
+    >>> model = gbt.fit(td)
+    >>> model.featureImportances
+    SparseVector(1, {0: 1.0})
+    >>> allclose(model.treeWeights, [1.0, 0.1, 0.1, 0.1, 0.1])
+    True
+    >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.transform(test0).head().prediction
+    0.0
+    >>> test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
+    >>> model.transform(test1).head().prediction
+    1.0
+    >>> model.totalNumNodes
+    15
+    >>> print(model.toDebugString)
+    GBTClassificationModel (uid=...)...with 5 trees...
+    >>> gbtc_path = temp_path + "gbtc"
+    >>> gbt.save(gbtc_path)
+    >>> gbt2 = GBTClassifier.load(gbtc_path)
+    >>> gbt2.getMaxDepth()
+    2
+    >>> model_path = temp_path + "gbtc_model"
+    >>> model.save(model_path)
+    >>> model2 = GBTClassificationModel.load(model_path)
+    >>> model.featureImportances == model2.featureImportances
+    True
+    >>> model.treeWeights == model2.treeWeights
+    True
+    >>> model.trees
+    [DecisionTreeRegressionModel (uid=...) of depth..., DecisionTreeRegressionModel...]
+    >>> validation = spark.createDataFrame([(0.0, Vectors.dense(-1.0),)],
+    ...              ["indexed", "features"])
+    >>> model.evaluateEachIteration(validation)
+    [0.25..., 0.23..., 0.21..., 0.19..., 0.18...]
+    >>> model.numClasses
+    2
+
+    .. versionadded:: 1.4.0
+    """
+
+    lossType = Param(Params._dummy(), "lossType",
+                     "Loss function which GBT tries to minimize (case-insensitive). " +
+                     "Supported options: " + ", ".join(GBTParams.supportedLossTypes),
+                     typeConverter=TypeConverters.toString)
+
+    stepSize = Param(Params._dummy(), "stepSize",
+                     "Step size (a.k.a. learning rate) in interval (0, 1] for shrinking " +
+                     "the contribution of each estimator.",
+                     typeConverter=TypeConverters.toFloat)
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="logistic",
+                 maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0,
+                 featureSubsetStrategy="all"):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
+                 lossType="logistic", maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0, \
+                 featureSubsetStrategy="all")
+        """
+        super(GBTClassifier, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.classification.GBTClassifier", self.uid)
+        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                         lossType="logistic", maxIter=20, stepSize=0.1, subsamplingRate=1.0,
+                         featureSubsetStrategy="all")
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                  lossType="logistic", maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0,
+                  featureSubsetStrategy="all"):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
+                  lossType="logistic", maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0, \
+                  featureSubsetStrategy="all")
+        Sets params for Gradient Boosted Tree Classification.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return GBTClassificationModel(java_model)
+
+    @since("1.4.0")
+    def setLossType(self, value):
+        """
+        Sets the value of :py:attr:`lossType`.
+        """
+        return self._set(lossType=value)
+
+    @since("1.4.0")
+    def getLossType(self):
+        """
+        Gets the value of lossType or its default value.
+        """
+        return self.getOrDefault(self.lossType)
+
+    @since("2.4.0")
+    def setFeatureSubsetStrategy(self, value):
+        """
+        Sets the value of :py:attr:`featureSubsetStrategy`.
+        """
+        return self._set(featureSubsetStrategy=value)
+
+
+class GBTClassificationModel(TreeEnsembleModel, JavaClassificationModel, JavaMLWritable,
+                             JavaMLReadable):
+    """
+    Model fitted by GBTClassifier.
+
+    .. versionadded:: 1.4.0
+    """
+
+    @property
+    @since("2.0.0")
+    def featureImportances(self):
+        """
+        Estimate of the importance of each feature.
+
+        Each feature's importance is the average of its importance across all trees in the ensemble
+        The importance vector is normalized to sum to 1. This method is suggested by Hastie et al.
+        (Hastie, Tibshirani, Friedman. "The Elements of Statistical Learning, 2nd Edition." 2001.)
+        and follows the implementation from scikit-learn.
+
+        .. seealso:: :py:attr:`DecisionTreeClassificationModel.featureImportances`
+        """
+        return self._call_java("featureImportances")
+
+    @property
+    @since("2.0.0")
+    def trees(self):
+        """Trees in this ensemble. Warning: These have null parent Estimators."""
+        return [DecisionTreeRegressionModel(m) for m in list(self._call_java("trees"))]
+
+    @since("2.4.0")
+    def evaluateEachIteration(self, dataset):
+        """
+        Method to compute error or loss for every iteration of gradient boosting.
+
+        :param dataset:
+            Test dataset to evaluate model on, where dataset is an
+            instance of :py:class:`pyspark.sql.DataFrame`
+        """
+        return self._call_java("evaluateEachIteration", dataset)
+
+
+@inherit_doc
+class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasProbabilityCol,
+                 HasRawPredictionCol, HasThresholds, HasWeightCol, JavaMLWritable, JavaMLReadable):
+    """
+    Naive Bayes Classifiers.
+    It supports both Multinomial and Bernoulli NB. `Multinomial NB
+    <http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html>`_
+    can handle finitely supported discrete data. For example, by converting documents into
+    TF-IDF vectors, it can be used for document classification. By making every vector a
+    binary (0/1) data, it can also be used as `Bernoulli NB
+    <http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html>`_.
+    The input feature values must be nonnegative.
+
+    >>> from pyspark.sql import Row
+    >>> from pyspark.ml.linalg import Vectors
+    >>> df = spark.createDataFrame([
+    ...     Row(label=0.0, weight=0.1, features=Vectors.dense([0.0, 0.0])),
+    ...     Row(label=0.0, weight=0.5, features=Vectors.dense([0.0, 1.0])),
+    ...     Row(label=1.0, weight=1.0, features=Vectors.dense([1.0, 0.0]))])
+    >>> nb = NaiveBayes(smoothing=1.0, modelType="multinomial", weightCol="weight")
+    >>> model = nb.fit(df)
+    >>> model.pi
+    DenseVector([-0.81..., -0.58...])
+    >>> model.theta
+    DenseMatrix(2, 2, [-0.91..., -0.51..., -0.40..., -1.09...], 1)
+    >>> test0 = sc.parallelize([Row(features=Vectors.dense([1.0, 0.0]))]).toDF()
+    >>> result = model.transform(test0).head()
+    >>> result.prediction
+    1.0
+    >>> result.probability
+    DenseVector([0.32..., 0.67...])
+    >>> result.rawPrediction
+    DenseVector([-1.72..., -0.99...])
+    >>> test1 = sc.parallelize([Row(features=Vectors.sparse(2, [0], [1.0]))]).toDF()
+    >>> model.transform(test1).head().prediction
+    1.0
+    >>> nb_path = temp_path + "/nb"
+    >>> nb.save(nb_path)
+    >>> nb2 = NaiveBayes.load(nb_path)
+    >>> nb2.getSmoothing()
+    1.0
+    >>> model_path = temp_path + "/nb_model"
+    >>> model.save(model_path)
+    >>> model2 = NaiveBayesModel.load(model_path)
+    >>> model.pi == model2.pi
+    True
+    >>> model.theta == model2.theta
+    True
+    >>> nb = nb.setThresholds([0.01, 10.00])
+    >>> model3 = nb.fit(df)
+    >>> result = model3.transform(test0).head()
+    >>> result.prediction
+    0.0
+
+    .. versionadded:: 1.5.0
+    """
+
+    smoothing = Param(Params._dummy(), "smoothing", "The smoothing parameter, should be >= 0, " +
+                      "default is 1.0", typeConverter=TypeConverters.toFloat)
+    modelType = Param(Params._dummy(), "modelType", "The model type which is a string " +
+                      "(case-sensitive). Supported options: multinomial (default) and bernoulli.",
+                      typeConverter=TypeConverters.toString)
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 probabilityCol="probability", rawPredictionCol="rawPrediction", smoothing=1.0,
+                 modelType="multinomial", thresholds=None, weightCol=None):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 probabilityCol="probability", rawPredictionCol="rawPrediction", smoothing=1.0, \
+                 modelType="multinomial", thresholds=None, weightCol=None)
+        """
+        super(NaiveBayes, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.classification.NaiveBayes", self.uid)
+        self._setDefault(smoothing=1.0, modelType="multinomial")
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.5.0")
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  probabilityCol="probability", rawPredictionCol="rawPrediction", smoothing=1.0,
+                  modelType="multinomial", thresholds=None, weightCol=None):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  probabilityCol="probability", rawPredictionCol="rawPrediction", smoothing=1.0, \
+                  modelType="multinomial", thresholds=None, weightCol=None)
+        Sets params for Naive Bayes.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return NaiveBayesModel(java_model)
+
+    @since("1.5.0")
+    def setSmoothing(self, value):
+        """
+        Sets the value of :py:attr:`smoothing`.
+        """
+        return self._set(smoothing=value)
+
+    @since("1.5.0")
+    def getSmoothing(self):
+        """
+        Gets the value of smoothing or its default value.
+        """
+        return self.getOrDefault(self.smoothing)
+
+    @since("1.5.0")
+    def setModelType(self, value):
+        """
+        Sets the value of :py:attr:`modelType`.
+        """
+        return self._set(modelType=value)
+
+    @since("1.5.0")
+    def getModelType(self):
+        """
+        Gets the value of modelType or its default value.
+        """
+        return self.getOrDefault(self.modelType)
+
+
+class NaiveBayesModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable):
+    """
+    Model fitted by NaiveBayes.
+
+    .. versionadded:: 1.5.0
+    """
+
+    @property
+    @since("2.0.0")
+    def pi(self):
+        """
+        log of class priors.
+        """
+        return self._call_java("pi")
+
+    @property
+    @since("2.0.0")
+    def theta(self):
+        """
+        log of class conditional probabilities.
+        """
+        return self._call_java("theta")
+
+
+@inherit_doc
+class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
+                                     HasMaxIter, HasTol, HasSeed, HasStepSize, HasSolver,
+                                     JavaMLWritable, JavaMLReadable, HasProbabilityCol,
+                                     HasRawPredictionCol):
+    """
+    Classifier trainer based on the Multilayer Perceptron.
+    Each layer has sigmoid activation function, output layer has softmax.
+    Number of inputs has to be equal to the size of feature vectors.
+    Number of outputs has to be equal to the total number of labels.
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> df = spark.createDataFrame([
+    ...     (0.0, Vectors.dense([0.0, 0.0])),
+    ...     (1.0, Vectors.dense([0.0, 1.0])),
+    ...     (1.0, Vectors.dense([1.0, 0.0])),
+    ...     (0.0, Vectors.dense([1.0, 1.0]))], ["label", "features"])
+    >>> mlp = MultilayerPerceptronClassifier(maxIter=100, layers=[2, 2, 2], blockSize=1, seed=123)
+    >>> model = mlp.fit(df)
+    >>> model.layers
+    [2, 2, 2]
+    >>> model.weights.size
+    12
+    >>> testDF = spark.createDataFrame([
+    ...     (Vectors.dense([1.0, 0.0]),),
+    ...     (Vectors.dense([0.0, 0.0]),)], ["features"])
+    >>> model.transform(testDF).select("features", "prediction").show()
+    +---------+----------+
+    | features|prediction|
+    +---------+----------+
+    |[1.0,0.0]|       1.0|
+    |[0.0,0.0]|       0.0|
+    +---------+----------+
+    ...
+    >>> mlp_path = temp_path + "/mlp"
+    >>> mlp.save(mlp_path)
+    >>> mlp2 = MultilayerPerceptronClassifier.load(mlp_path)
+    >>> mlp2.getBlockSize()
+    1
+    >>> model_path = temp_path + "/mlp_model"
+    >>> model.save(model_path)
+    >>> model2 = MultilayerPerceptronClassificationModel.load(model_path)
+    >>> model.layers == model2.layers
+    True
+    >>> model.weights == model2.weights
+    True
+    >>> mlp2 = mlp2.setInitialWeights(list(range(0, 12)))
+    >>> model3 = mlp2.fit(df)
+    >>> model3.weights != model2.weights
+    True
+    >>> model3.layers == model.layers
+    True
+
+    .. versionadded:: 1.6.0
+    """
+
+    layers = Param(Params._dummy(), "layers", "Sizes of layers from input layer to output layer " +
+                   "E.g., Array(780, 100, 10) means 780 inputs, one hidden layer with 100 " +
+                   "neurons and output layer of 10 neurons.",
+                   typeConverter=TypeConverters.toListInt)
+    blockSize = Param(Params._dummy(), "blockSize", "Block size for stacking input data in " +
+                      "matrices. Data is stacked within partitions. If block size is more than " +
+                      "remaining data in a partition then it is adjusted to the size of this " +
+                      "data. Recommended size is between 10 and 1000, default is 128.",
+                      typeConverter=TypeConverters.toInt)
+    solver = Param(Params._dummy(), "solver", "The solver algorithm for optimization. Supported " +
+                   "options: l-bfgs, gd.", typeConverter=TypeConverters.toString)
+    initialWeights = Param(Params._dummy(), "initialWeights", "The initial weights of the model.",
+                           typeConverter=TypeConverters.toVector)
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxIter=100, tol=1e-6, seed=None, layers=None, blockSize=128, stepSize=0.03,
+                 solver="l-bfgs", initialWeights=None, probabilityCol="probability",
+                 rawPredictionCol="rawPrediction"):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 maxIter=100, tol=1e-6, seed=None, layers=None, blockSize=128, stepSize=0.03, \
+                 solver="l-bfgs", initialWeights=None, probabilityCol="probability", \
+                 rawPredictionCol="rawPrediction")
+        """
+        super(MultilayerPerceptronClassifier, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.classification.MultilayerPerceptronClassifier", self.uid)
+        self._setDefault(maxIter=100, tol=1E-6, blockSize=128, stepSize=0.03, solver="l-bfgs")
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.6.0")
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxIter=100, tol=1e-6, seed=None, layers=None, blockSize=128, stepSize=0.03,
+                  solver="l-bfgs", initialWeights=None, probabilityCol="probability",
+                  rawPredictionCol="rawPrediction"):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  maxIter=100, tol=1e-6, seed=None, layers=None, blockSize=128, stepSize=0.03, \
+                  solver="l-bfgs", initialWeights=None, probabilityCol="probability", \
+                  rawPredictionCol="rawPrediction"):
+        Sets params for MultilayerPerceptronClassifier.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return MultilayerPerceptronClassificationModel(java_model)
+
+    @since("1.6.0")
+    def setLayers(self, value):
+        """
+        Sets the value of :py:attr:`layers`.
+        """
+        return self._set(layers=value)
+
+    @since("1.6.0")
+    def getLayers(self):
+        """
+        Gets the value of layers or its default value.
+        """
+        return self.getOrDefault(self.layers)
+
+    @since("1.6.0")
+    def setBlockSize(self, value):
+        """
+        Sets the value of :py:attr:`blockSize`.
+        """
+        return self._set(blockSize=value)
+
+    @since("1.6.0")
+    def getBlockSize(self):
+        """
+        Gets the value of blockSize or its default value.
+        """
+        return self.getOrDefault(self.blockSize)
+
+    @since("2.0.0")
+    def setStepSize(self, value):
+        """
+        Sets the value of :py:attr:`stepSize`.
+        """
+        return self._set(stepSize=value)
+
+    @since("2.0.0")
+    def getStepSize(self):
+        """
+        Gets the value of stepSize or its default value.
+        """
+        return self.getOrDefault(self.stepSize)
+
+    @since("2.0.0")
+    def setInitialWeights(self, value):
+        """
+        Sets the value of :py:attr:`initialWeights`.
+        """
+        return self._set(initialWeights=value)
+
+    @since("2.0.0")
+    def getInitialWeights(self):
+        """
+        Gets the value of initialWeights or its default value.
+        """
+        return self.getOrDefault(self.initialWeights)
+
+
+class MultilayerPerceptronClassificationModel(JavaModel, JavaClassificationModel, JavaMLWritable,
+                                              JavaMLReadable):
+    """
+    Model fitted by MultilayerPerceptronClassifier.
+
+    .. versionadded:: 1.6.0
+    """
+
+    @property
+    @since("1.6.0")
+    def layers(self):
+        """
+        array of layer sizes including input and output layers.
+        """
+        return self._call_java("javaLayers")
+
+    @property
+    @since("2.0.0")
+    def weights(self):
+        """
+        the weights of layers.
+        """
+        return self._call_java("weights")
+
+
+class OneVsRestParams(HasFeaturesCol, HasLabelCol, HasWeightCol, HasPredictionCol):
+    """
+    Parameters for OneVsRest and OneVsRestModel.
+    """
+
+    classifier = Param(Params._dummy(), "classifier", "base binary classifier")
+
+    @since("2.0.0")
+    def setClassifier(self, value):
+        """
+        Sets the value of :py:attr:`classifier`.
+
+        .. note:: Only LogisticRegression and NaiveBayes are supported now.
+        """
+        return self._set(classifier=value)
+
+    @since("2.0.0")
+    def getClassifier(self):
+        """
+        Gets the value of classifier or its default value.
+        """
+        return self.getOrDefault(self.classifier)
+
+
+@inherit_doc
+class OneVsRest(Estimator, OneVsRestParams, HasParallelism, JavaMLReadable, JavaMLWritable):
+    """
+    .. note:: Experimental
+
+    Reduction of Multiclass Classification to Binary Classification.
+    Performs reduction using one against all strategy.
+    For a multiclass classification with k classes, train k models (one per class).
+    Each example is scored against all k models and the model with highest score
+    is picked to label the example.
+
+    >>> from pyspark.sql import Row
+    >>> from pyspark.ml.linalg import Vectors
+    >>> data_path = "data/mllib/sample_multiclass_classification_data.txt"
+    >>> df = spark.read.format("libsvm").load(data_path)
+    >>> lr = LogisticRegression(regParam=0.01)
+    >>> ovr = OneVsRest(classifier=lr)
+    >>> model = ovr.fit(df)
+    >>> model.models[0].coefficients
+    DenseVector([0.5..., -1.0..., 3.4..., 4.2...])
+    >>> model.models[1].coefficients
+    DenseVector([-2.1..., 3.1..., -2.6..., -2.3...])
+    >>> model.models[2].coefficients
+    DenseVector([0.3..., -3.4..., 1.0..., -1.1...])
+    >>> [x.intercept for x in model.models]
+    [-2.7..., -2.5..., -1.3...]
+    >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, 0.0, 1.0, 1.0))]).toDF()
+    >>> model.transform(test0).head().prediction
+    0.0
+    >>> test1 = sc.parallelize([Row(features=Vectors.sparse(4, [0], [1.0]))]).toDF()
+    >>> model.transform(test1).head().prediction
+    2.0
+    >>> test2 = sc.parallelize([Row(features=Vectors.dense(0.5, 0.4, 0.3, 0.2))]).toDF()
+    >>> model.transform(test2).head().prediction
+    0.0
+    >>> model_path = temp_path + "/ovr_model"
+    >>> model.save(model_path)
+    >>> model2 = OneVsRestModel.load(model_path)
+    >>> model2.transform(test0).head().prediction
+    0.0
+
+    .. versionadded:: 2.0.0
+    """
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 classifier=None, weightCol=None, parallelism=1):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 classifier=None, weightCol=None, parallelism=1):
+        """
+        super(OneVsRest, self).__init__()
+        self._setDefault(parallelism=1)
+        kwargs = self._input_kwargs
+        self._set(**kwargs)
+
+    @keyword_only
+    @since("2.0.0")
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  classifier=None, weightCol=None, parallelism=1):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  classifier=None, weightCol=None, parallelism=1):
+        Sets params for OneVsRest.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def _fit(self, dataset):
+        labelCol = self.getLabelCol()
+        featuresCol = self.getFeaturesCol()
+        predictionCol = self.getPredictionCol()
+        classifier = self.getClassifier()
+        assert isinstance(classifier, HasRawPredictionCol),\
+            "Classifier %s doesn't extend from HasRawPredictionCol." % type(classifier)
+
+        numClasses = int(dataset.agg({labelCol: "max"}).head()["max("+labelCol+")"]) + 1
+
+        weightCol = None
+        if (self.isDefined(self.weightCol) and self.getWeightCol()):
+            if isinstance(classifier, HasWeightCol):
+                weightCol = self.getWeightCol()
+            else:
+                warnings.warn("weightCol is ignored, "
+                              "as it is not supported by {} now.".format(classifier))
+
+        if weightCol:
+            multiclassLabeled = dataset.select(labelCol, featuresCol, weightCol)
+        else:
+            multiclassLabeled = dataset.select(labelCol, featuresCol)
+
+        # persist if underlying dataset is not persistent.
+        handlePersistence = dataset.storageLevel == StorageLevel(False, False, False, False)
+        if handlePersistence:
+            multiclassLabeled.persist(StorageLevel.MEMORY_AND_DISK)
+
+        def trainSingleClass(index):
+            binaryLabelCol = "mc2b$" + str(index)
+            trainingDataset = multiclassLabeled.withColumn(
+                binaryLabelCol,
+                when(multiclassLabeled[labelCol] == float(index), 1.0).otherwise(0.0))
+            paramMap = dict([(classifier.labelCol, binaryLabelCol),
+                            (classifier.featuresCol, featuresCol),
+                            (classifier.predictionCol, predictionCol)])
+            if weightCol:
+                paramMap[classifier.weightCol] = weightCol
+            return classifier.fit(trainingDataset, paramMap)
+
+        pool = ThreadPool(processes=min(self.getParallelism(), numClasses))
+
+        models = pool.map(trainSingleClass, range(numClasses))
+
+        if handlePersistence:
+            multiclassLabeled.unpersist()
+
+        return self._copyValues(OneVsRestModel(models=models))
+
+    @since("2.0.0")
+    def copy(self, extra=None):
+        """
+        Creates a copy of this instance with a randomly generated uid
+        and some extra params. This creates a deep copy of the embedded paramMap,
+        and copies the embedded and extra parameters over.
+
+        :param extra: Extra parameters to copy to the new instance
+        :return: Copy of this instance
+        """
+        if extra is None:
+            extra = dict()
+        newOvr = Params.copy(self, extra)
+        if self.isSet(self.classifier):
+            newOvr.setClassifier(self.getClassifier().copy(extra))
+        return newOvr
+
+    @classmethod
+    def _from_java(cls, java_stage):
+        """
+        Given a Java OneVsRest, create and return a Python wrapper of it.
+        Used for ML persistence.
+        """
+        featuresCol = java_stage.getFeaturesCol()
+        labelCol = java_stage.getLabelCol()
+        predictionCol = java_stage.getPredictionCol()
+        classifier = JavaParams._from_java(java_stage.getClassifier())
+        parallelism = java_stage.getParallelism()
+        py_stage = cls(featuresCol=featuresCol, labelCol=labelCol, predictionCol=predictionCol,
+                       classifier=classifier, parallelism=parallelism)
+        py_stage._resetUid(java_stage.uid())
+        return py_stage
+
+    def _to_java(self):
+        """
+        Transfer this instance to a Java OneVsRest. Used for ML persistence.
+
+        :return: Java object equivalent to this instance.
+        """
+        _java_obj = JavaParams._new_java_obj("org.apache.spark.ml.classification.OneVsRest",
+                                             self.uid)
+        _java_obj.setClassifier(self.getClassifier()._to_java())
+        _java_obj.setParallelism(self.getParallelism())
+        _java_obj.setFeaturesCol(self.getFeaturesCol())
+        _java_obj.setLabelCol(self.getLabelCol())
+        _java_obj.setPredictionCol(self.getPredictionCol())
+        return _java_obj
+
+    def _make_java_param_pair(self, param, value):
+        """
+        Makes a Java param pair.
+        """
+        sc = SparkContext._active_spark_context
+        param = self._resolveParam(param)
+        _java_obj = JavaParams._new_java_obj("org.apache.spark.ml.classification.OneVsRest",
+                                             self.uid)
+        java_param = _java_obj.getParam(param.name)
+        if isinstance(value, JavaParams):
+            # used in the case of an estimator having another estimator as a parameter
+            # the reason why this is not in _py2java in common.py is that importing
+            # Estimator and Model in common.py results in a circular import with inherit_doc
+            java_value = value._to_java()
+        else:
+            java_value = _py2java(sc, value)
+        return java_param.w(java_value)
+
+    def _transfer_param_map_to_java(self, pyParamMap):
+        """
+        Transforms a Python ParamMap into a Java ParamMap.
+        """
+        paramMap = JavaWrapper._new_java_obj("org.apache.spark.ml.param.ParamMap")
+        for param in self.params:
+            if param in pyParamMap:
+                pair = self._make_java_param_pair(param, pyParamMap[param])
+                paramMap.put([pair])
+        return paramMap
+
+    def _transfer_param_map_from_java(self, javaParamMap):
+        """
+        Transforms a Java ParamMap into a Python ParamMap.
+        """
+        sc = SparkContext._active_spark_context
+        paramMap = dict()
+        for pair in javaParamMap.toList():
+            param = pair.param()
+            if self.hasParam(str(param.name())):
+                if param.name() == "classifier":
+                    paramMap[self.getParam(param.name())] = JavaParams._from_java(pair.value())
+                else:
+                    paramMap[self.getParam(param.name())] = _java2py(sc, pair.value())
+        return paramMap
+
+
+class OneVsRestModel(Model, OneVsRestParams, JavaMLReadable, JavaMLWritable):
+    """
+    .. note:: Experimental
+
+    Model fitted by OneVsRest.
+    This stores the models resulting from training k binary classifiers: one for each class.
+    Each example is scored against all k models, and the model with the highest score
+    is picked to label the example.
+
+    .. versionadded:: 2.0.0
+    """
+
+    def __init__(self, models):
+        super(OneVsRestModel, self).__init__()
+        self.models = models
+        java_models = [model._to_java() for model in self.models]
+        sc = SparkContext._active_spark_context
+        java_models_array = JavaWrapper._new_java_array(java_models,
+                                                        sc._gateway.jvm.org.apache.spark.ml
+                                                        .classification.ClassificationModel)
+        # TODO: need to set metadata
+        metadata = JavaParams._new_java_obj("org.apache.spark.sql.types.Metadata")
+        self._java_obj = \
+            JavaParams._new_java_obj("org.apache.spark.ml.classification.OneVsRestModel",
+                                     self.uid, metadata.empty(), java_models_array)
+
+    def _transform(self, dataset):
+        # determine the input columns: these need to be passed through
+        origCols = dataset.columns
+
+        # add an accumulator column to store predictions of all the models
+        accColName = "mbc$acc" + str(uuid.uuid4())
+        initUDF = udf(lambda _: [], ArrayType(DoubleType()))
+        newDataset = dataset.withColumn(accColName, initUDF(dataset[origCols[0]]))
+
+        # persist if underlying dataset is not persistent.
+        handlePersistence = dataset.storageLevel == StorageLevel(False, False, False, False)
+        if handlePersistence:
+            newDataset.persist(StorageLevel.MEMORY_AND_DISK)
+
+        # update the accumulator column with the result of prediction of models
+        aggregatedDataset = newDataset
+        for index, model in enumerate(self.models):
+            rawPredictionCol = model._call_java("getRawPredictionCol")
+            columns = origCols + [rawPredictionCol, accColName]
+
+            # add temporary column to store intermediate scores and update
+            tmpColName = "mbc$tmp" + str(uuid.uuid4())
+            updateUDF = udf(
+                lambda predictions, prediction: predictions + [prediction.tolist()[1]],
+                ArrayType(DoubleType()))
+            transformedDataset = model.transform(aggregatedDataset).select(*columns)
+            updatedDataset = transformedDataset.withColumn(
+                tmpColName,
+                updateUDF(transformedDataset[accColName], transformedDataset[rawPredictionCol]))
+            newColumns = origCols + [tmpColName]
+
+            # switch out the intermediate column with the accumulator column
+            aggregatedDataset = updatedDataset\
+                .select(*newColumns).withColumnRenamed(tmpColName, accColName)
+
+        if handlePersistence:
+            newDataset.unpersist()
+
+        # output the index of the classifier with highest confidence as prediction
+        labelUDF = udf(
+            lambda predictions: float(max(enumerate(predictions), key=operator.itemgetter(1))[0]),
+            DoubleType())
+
+        # output label and label metadata as prediction
+        return aggregatedDataset.withColumn(
+            self.getPredictionCol(), labelUDF(aggregatedDataset[accColName])).drop(accColName)
+
+    @since("2.0.0")
+    def copy(self, extra=None):
+        """
+        Creates a copy of this instance with a randomly generated uid
+        and some extra params. This creates a deep copy of the embedded paramMap,
+        and copies the embedded and extra parameters over.
+
+        :param extra: Extra parameters to copy to the new instance
+        :return: Copy of this instance
+        """
+        if extra is None:
+            extra = dict()
+        newModel = Params.copy(self, extra)
+        newModel.models = [model.copy(extra) for model in self.models]
+        return newModel
+
+    @classmethod
+    def _from_java(cls, java_stage):
+        """
+        Given a Java OneVsRestModel, create and return a Python wrapper of it.
+        Used for ML persistence.
+        """
+        featuresCol = java_stage.getFeaturesCol()
+        labelCol = java_stage.getLabelCol()
+        predictionCol = java_stage.getPredictionCol()
+        classifier = JavaParams._from_java(java_stage.getClassifier())
+        models = [JavaParams._from_java(model) for model in java_stage.models()]
+        py_stage = cls(models=models).setPredictionCol(predictionCol).setLabelCol(labelCol)\
+            .setFeaturesCol(featuresCol).setClassifier(classifier)
+        py_stage._resetUid(java_stage.uid())
+        return py_stage
+
+    def _to_java(self):
+        """
+        Transfer this instance to a Java OneVsRestModel. Used for ML persistence.
+
+        :return: Java object equivalent to this instance.
+        """
+        sc = SparkContext._active_spark_context
+        java_models = [model._to_java() for model in self.models]
+        java_models_array = JavaWrapper._new_java_array(
+            java_models, sc._gateway.jvm.org.apache.spark.ml.classification.ClassificationModel)
+        metadata = JavaParams._new_java_obj("org.apache.spark.sql.types.Metadata")
+        _java_obj = JavaParams._new_java_obj("org.apache.spark.ml.classification.OneVsRestModel",
+                                             self.uid, metadata.empty(), java_models_array)
+        _java_obj.set("classifier", self.getClassifier()._to_java())
+        _java_obj.set("featuresCol", self.getFeaturesCol())
+        _java_obj.set("labelCol", self.getLabelCol())
+        _java_obj.set("predictionCol", self.getPredictionCol())
+        return _java_obj
+
+
+if __name__ == "__main__":
+    import doctest
+    import pyspark.ml.classification
+    from pyspark.sql import SparkSession
+    globs = pyspark.ml.classification.__dict__.copy()
+    # The small batch size here ensures that we see multiple batches,
+    # even in these small test examples:
+    spark = SparkSession.builder\
+        .master("local[2]")\
+        .appName("ml.classification tests")\
+        .getOrCreate()
+    sc = spark.sparkContext
+    globs['sc'] = sc
+    globs['spark'] = spark
+    import tempfile
+    temp_path = tempfile.mkdtemp()
+    globs['temp_path'] = temp_path
+    try:
+        (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+        spark.stop()
+    finally:
+        from shutil import rmtree
+        try:
+            rmtree(temp_path)
+        except OSError:
+            pass
+    if failure_count:
+        sys.exit(-1)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/clustering.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/clustering.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ef4e765ea4e109f943ab3f4c82392300a5d0e07
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/clustering.py
@@ -0,0 +1,1404 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import warnings
+
+from pyspark import since, keyword_only
+from pyspark.ml.util import *
+from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, JavaWrapper
+from pyspark.ml.param.shared import *
+from pyspark.ml.common import inherit_doc
+from pyspark.sql import DataFrame
+
+__all__ = ['BisectingKMeans', 'BisectingKMeansModel', 'BisectingKMeansSummary',
+           'KMeans', 'KMeansModel',
+           'GaussianMixture', 'GaussianMixtureModel', 'GaussianMixtureSummary',
+           'LDA', 'LDAModel', 'LocalLDAModel', 'DistributedLDAModel', 'PowerIterationClustering']
+
+
+class ClusteringSummary(JavaWrapper):
+    """
+    .. note:: Experimental
+
+    Clustering results for a given model.
+
+    .. versionadded:: 2.1.0
+    """
+
+    @property
+    @since("2.1.0")
+    def predictionCol(self):
+        """
+        Name for column of predicted clusters in `predictions`.
+        """
+        return self._call_java("predictionCol")
+
+    @property
+    @since("2.1.0")
+    def predictions(self):
+        """
+        DataFrame produced by the model's `transform` method.
+        """
+        return self._call_java("predictions")
+
+    @property
+    @since("2.1.0")
+    def featuresCol(self):
+        """
+        Name for column of features in `predictions`.
+        """
+        return self._call_java("featuresCol")
+
+    @property
+    @since("2.1.0")
+    def k(self):
+        """
+        The number of clusters the model was trained with.
+        """
+        return self._call_java("k")
+
+    @property
+    @since("2.1.0")
+    def cluster(self):
+        """
+        DataFrame of predicted cluster centers for each training data point.
+        """
+        return self._call_java("cluster")
+
+    @property
+    @since("2.1.0")
+    def clusterSizes(self):
+        """
+        Size of (number of data points in) each cluster.
+        """
+        return self._call_java("clusterSizes")
+
+    @property
+    @since("2.4.0")
+    def numIter(self):
+        """
+        Number of iterations.
+        """
+        return self._call_java("numIter")
+
+
+class GaussianMixtureModel(JavaModel, JavaMLWritable, JavaMLReadable):
+    """
+    Model fitted by GaussianMixture.
+
+    .. versionadded:: 2.0.0
+    """
+
+    @property
+    @since("2.0.0")
+    def weights(self):
+        """
+        Weight for each Gaussian distribution in the mixture.
+        This is a multinomial probability distribution over the k Gaussians,
+        where weights[i] is the weight for Gaussian i, and weights sum to 1.
+        """
+        return self._call_java("weights")
+
+    @property
+    @since("2.0.0")
+    def gaussiansDF(self):
+        """
+        Retrieve Gaussian distributions as a DataFrame.
+        Each row represents a Gaussian Distribution.
+        The DataFrame has two columns: mean (Vector) and cov (Matrix).
+        """
+        return self._call_java("gaussiansDF")
+
+    @property
+    @since("2.1.0")
+    def hasSummary(self):
+        """
+        Indicates whether a training summary exists for this model
+        instance.
+        """
+        return self._call_java("hasSummary")
+
+    @property
+    @since("2.1.0")
+    def summary(self):
+        """
+        Gets summary (e.g. cluster assignments, cluster sizes) of the model trained on the
+        training set. An exception is thrown if no summary exists.
+        """
+        if self.hasSummary:
+            return GaussianMixtureSummary(self._call_java("summary"))
+        else:
+            raise RuntimeError("No training summary available for this %s" %
+                               self.__class__.__name__)
+
+
+@inherit_doc
+class GaussianMixture(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasTol, HasSeed,
+                      HasProbabilityCol, JavaMLWritable, JavaMLReadable):
+    """
+    GaussianMixture clustering.
+    This class performs expectation maximization for multivariate Gaussian
+    Mixture Models (GMMs).  A GMM represents a composite distribution of
+    independent Gaussian distributions with associated "mixing" weights
+    specifying each's contribution to the composite.
+
+    Given a set of sample points, this class will maximize the log-likelihood
+    for a mixture of k Gaussians, iterating until the log-likelihood changes by
+    less than convergenceTol, or until it has reached the max number of iterations.
+    While this process is generally guaranteed to converge, it is not guaranteed
+    to find a global optimum.
+
+    .. note:: For high-dimensional data (with many features), this algorithm may perform poorly.
+          This is due to high-dimensional data (a) making it difficult to cluster at all
+          (based on statistical/theoretical arguments) and (b) numerical issues with
+          Gaussian distributions.
+
+    >>> from pyspark.ml.linalg import Vectors
+
+    >>> data = [(Vectors.dense([-0.1, -0.05 ]),),
+    ...         (Vectors.dense([-0.01, -0.1]),),
+    ...         (Vectors.dense([0.9, 0.8]),),
+    ...         (Vectors.dense([0.75, 0.935]),),
+    ...         (Vectors.dense([-0.83, -0.68]),),
+    ...         (Vectors.dense([-0.91, -0.76]),)]
+    >>> df = spark.createDataFrame(data, ["features"])
+    >>> gm = GaussianMixture(k=3, tol=0.0001,
+    ...                      maxIter=10, seed=10)
+    >>> model = gm.fit(df)
+    >>> model.hasSummary
+    True
+    >>> summary = model.summary
+    >>> summary.k
+    3
+    >>> summary.clusterSizes
+    [2, 2, 2]
+    >>> summary.logLikelihood
+    8.14636...
+    >>> weights = model.weights
+    >>> len(weights)
+    3
+    >>> model.gaussiansDF.select("mean").head()
+    Row(mean=DenseVector([0.825, 0.8675]))
+    >>> model.gaussiansDF.select("cov").head()
+    Row(cov=DenseMatrix(2, 2, [0.0056, -0.0051, -0.0051, 0.0046], False))
+    >>> transformed = model.transform(df).select("features", "prediction")
+    >>> rows = transformed.collect()
+    >>> rows[4].prediction == rows[5].prediction
+    True
+    >>> rows[2].prediction == rows[3].prediction
+    True
+    >>> gmm_path = temp_path + "/gmm"
+    >>> gm.save(gmm_path)
+    >>> gm2 = GaussianMixture.load(gmm_path)
+    >>> gm2.getK()
+    3
+    >>> model_path = temp_path + "/gmm_model"
+    >>> model.save(model_path)
+    >>> model2 = GaussianMixtureModel.load(model_path)
+    >>> model2.hasSummary
+    False
+    >>> model2.weights == model.weights
+    True
+    >>> model2.gaussiansDF.select("mean").head()
+    Row(mean=DenseVector([0.825, 0.8675]))
+    >>> model2.gaussiansDF.select("cov").head()
+    Row(cov=DenseMatrix(2, 2, [0.0056, -0.0051, -0.0051, 0.0046], False))
+
+    .. versionadded:: 2.0.0
+    """
+
+    k = Param(Params._dummy(), "k", "Number of independent Gaussians in the mixture model. " +
+              "Must be > 1.", typeConverter=TypeConverters.toInt)
+
+    @keyword_only
+    def __init__(self, featuresCol="features", predictionCol="prediction", k=2,
+                 probabilityCol="probability", tol=0.01, maxIter=100, seed=None):
+        """
+        __init__(self, featuresCol="features", predictionCol="prediction", k=2, \
+                 probabilityCol="probability", tol=0.01, maxIter=100, seed=None)
+        """
+        super(GaussianMixture, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.GaussianMixture",
+                                            self.uid)
+        self._setDefault(k=2, tol=0.01, maxIter=100)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    def _create_model(self, java_model):
+        return GaussianMixtureModel(java_model)
+
+    @keyword_only
+    @since("2.0.0")
+    def setParams(self, featuresCol="features", predictionCol="prediction", k=2,
+                  probabilityCol="probability", tol=0.01, maxIter=100, seed=None):
+        """
+        setParams(self, featuresCol="features", predictionCol="prediction", k=2, \
+                  probabilityCol="probability", tol=0.01, maxIter=100, seed=None)
+
+        Sets params for GaussianMixture.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("2.0.0")
+    def setK(self, value):
+        """
+        Sets the value of :py:attr:`k`.
+        """
+        return self._set(k=value)
+
+    @since("2.0.0")
+    def getK(self):
+        """
+        Gets the value of `k`
+        """
+        return self.getOrDefault(self.k)
+
+
+class GaussianMixtureSummary(ClusteringSummary):
+    """
+    .. note:: Experimental
+
+    Gaussian mixture clustering results for a given model.
+
+    .. versionadded:: 2.1.0
+    """
+
+    @property
+    @since("2.1.0")
+    def probabilityCol(self):
+        """
+        Name for column of predicted probability of each cluster in `predictions`.
+        """
+        return self._call_java("probabilityCol")
+
+    @property
+    @since("2.1.0")
+    def probability(self):
+        """
+        DataFrame of probabilities of each cluster for each training data point.
+        """
+        return self._call_java("probability")
+
+    @property
+    @since("2.2.0")
+    def logLikelihood(self):
+        """
+        Total log-likelihood for this model on the given data.
+        """
+        return self._call_java("logLikelihood")
+
+
+class KMeansSummary(ClusteringSummary):
+    """
+    .. note:: Experimental
+
+    Summary of KMeans.
+
+    .. versionadded:: 2.1.0
+    """
+
+    @property
+    @since("2.4.0")
+    def trainingCost(self):
+        """
+        K-means cost (sum of squared distances to the nearest centroid for all points in the
+        training dataset). This is equivalent to sklearn's inertia.
+        """
+        return self._call_java("trainingCost")
+
+
+class KMeansModel(JavaModel, JavaMLWritable, JavaMLReadable):
+    """
+    Model fitted by KMeans.
+
+    .. versionadded:: 1.5.0
+    """
+
+    @since("1.5.0")
+    def clusterCenters(self):
+        """Get the cluster centers, represented as a list of NumPy arrays."""
+        return [c.toArray() for c in self._call_java("clusterCenters")]
+
+    @since("2.0.0")
+    def computeCost(self, dataset):
+        """
+        Return the K-means cost (sum of squared distances of points to their nearest center)
+        for this model on the given data.
+
+        ..note:: Deprecated in 2.4.0. It will be removed in 3.0.0. Use ClusteringEvaluator instead.
+           You can also get the cost on the training dataset in the summary.
+        """
+        warnings.warn("Deprecated in 2.4.0. It will be removed in 3.0.0. Use ClusteringEvaluator "
+                      "instead. You can also get the cost on the training dataset in the summary.",
+                      DeprecationWarning)
+        return self._call_java("computeCost", dataset)
+
+    @property
+    @since("2.1.0")
+    def hasSummary(self):
+        """
+        Indicates whether a training summary exists for this model instance.
+        """
+        return self._call_java("hasSummary")
+
+    @property
+    @since("2.1.0")
+    def summary(self):
+        """
+        Gets summary (e.g. cluster assignments, cluster sizes) of the model trained on the
+        training set. An exception is thrown if no summary exists.
+        """
+        if self.hasSummary:
+            return KMeansSummary(self._call_java("summary"))
+        else:
+            raise RuntimeError("No training summary available for this %s" %
+                               self.__class__.__name__)
+
+
+@inherit_doc
+class KMeans(JavaEstimator, HasDistanceMeasure, HasFeaturesCol, HasPredictionCol, HasMaxIter,
+             HasTol, HasSeed, JavaMLWritable, JavaMLReadable):
+    """
+    K-means clustering with a k-means++ like initialization mode
+    (the k-means|| algorithm by Bahmani et al).
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> data = [(Vectors.dense([0.0, 0.0]),), (Vectors.dense([1.0, 1.0]),),
+    ...         (Vectors.dense([9.0, 8.0]),), (Vectors.dense([8.0, 9.0]),)]
+    >>> df = spark.createDataFrame(data, ["features"])
+    >>> kmeans = KMeans(k=2, seed=1)
+    >>> model = kmeans.fit(df)
+    >>> centers = model.clusterCenters()
+    >>> len(centers)
+    2
+    >>> model.computeCost(df)
+    2.000...
+    >>> transformed = model.transform(df).select("features", "prediction")
+    >>> rows = transformed.collect()
+    >>> rows[0].prediction == rows[1].prediction
+    True
+    >>> rows[2].prediction == rows[3].prediction
+    True
+    >>> model.hasSummary
+    True
+    >>> summary = model.summary
+    >>> summary.k
+    2
+    >>> summary.clusterSizes
+    [2, 2]
+    >>> summary.trainingCost
+    2.000...
+    >>> kmeans_path = temp_path + "/kmeans"
+    >>> kmeans.save(kmeans_path)
+    >>> kmeans2 = KMeans.load(kmeans_path)
+    >>> kmeans2.getK()
+    2
+    >>> model_path = temp_path + "/kmeans_model"
+    >>> model.save(model_path)
+    >>> model2 = KMeansModel.load(model_path)
+    >>> model2.hasSummary
+    False
+    >>> model.clusterCenters()[0] == model2.clusterCenters()[0]
+    array([ True,  True], dtype=bool)
+    >>> model.clusterCenters()[1] == model2.clusterCenters()[1]
+    array([ True,  True], dtype=bool)
+
+    .. versionadded:: 1.5.0
+    """
+
+    k = Param(Params._dummy(), "k", "The number of clusters to create. Must be > 1.",
+              typeConverter=TypeConverters.toInt)
+    initMode = Param(Params._dummy(), "initMode",
+                     "The initialization algorithm. This can be either \"random\" to " +
+                     "choose random points as initial cluster centers, or \"k-means||\" " +
+                     "to use a parallel variant of k-means++",
+                     typeConverter=TypeConverters.toString)
+    initSteps = Param(Params._dummy(), "initSteps", "The number of steps for k-means|| " +
+                      "initialization mode. Must be > 0.", typeConverter=TypeConverters.toInt)
+
+    @keyword_only
+    def __init__(self, featuresCol="features", predictionCol="prediction", k=2,
+                 initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20, seed=None,
+                 distanceMeasure="euclidean"):
+        """
+        __init__(self, featuresCol="features", predictionCol="prediction", k=2, \
+                 initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20, seed=None, \
+                 distanceMeasure="euclidean")
+        """
+        super(KMeans, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.KMeans", self.uid)
+        self._setDefault(k=2, initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20,
+                         distanceMeasure="euclidean")
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    def _create_model(self, java_model):
+        return KMeansModel(java_model)
+
+    @keyword_only
+    @since("1.5.0")
+    def setParams(self, featuresCol="features", predictionCol="prediction", k=2,
+                  initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20, seed=None,
+                  distanceMeasure="euclidean"):
+        """
+        setParams(self, featuresCol="features", predictionCol="prediction", k=2, \
+                  initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20, seed=None, \
+                  distanceMeasure="euclidean")
+
+        Sets params for KMeans.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("1.5.0")
+    def setK(self, value):
+        """
+        Sets the value of :py:attr:`k`.
+        """
+        return self._set(k=value)
+
+    @since("1.5.0")
+    def getK(self):
+        """
+        Gets the value of `k`
+        """
+        return self.getOrDefault(self.k)
+
+    @since("1.5.0")
+    def setInitMode(self, value):
+        """
+        Sets the value of :py:attr:`initMode`.
+        """
+        return self._set(initMode=value)
+
+    @since("1.5.0")
+    def getInitMode(self):
+        """
+        Gets the value of `initMode`
+        """
+        return self.getOrDefault(self.initMode)
+
+    @since("1.5.0")
+    def setInitSteps(self, value):
+        """
+        Sets the value of :py:attr:`initSteps`.
+        """
+        return self._set(initSteps=value)
+
+    @since("1.5.0")
+    def getInitSteps(self):
+        """
+        Gets the value of `initSteps`
+        """
+        return self.getOrDefault(self.initSteps)
+
+    @since("2.4.0")
+    def setDistanceMeasure(self, value):
+        """
+        Sets the value of :py:attr:`distanceMeasure`.
+        """
+        return self._set(distanceMeasure=value)
+
+    @since("2.4.0")
+    def getDistanceMeasure(self):
+        """
+        Gets the value of `distanceMeasure`
+        """
+        return self.getOrDefault(self.distanceMeasure)
+
+
+class BisectingKMeansModel(JavaModel, JavaMLWritable, JavaMLReadable):
+    """
+    Model fitted by BisectingKMeans.
+
+    .. versionadded:: 2.0.0
+    """
+
+    @since("2.0.0")
+    def clusterCenters(self):
+        """Get the cluster centers, represented as a list of NumPy arrays."""
+        return [c.toArray() for c in self._call_java("clusterCenters")]
+
+    @since("2.0.0")
+    def computeCost(self, dataset):
+        """
+        Computes the sum of squared distances between the input points
+        and their corresponding cluster centers.
+        """
+        return self._call_java("computeCost", dataset)
+
+    @property
+    @since("2.1.0")
+    def hasSummary(self):
+        """
+        Indicates whether a training summary exists for this model instance.
+        """
+        return self._call_java("hasSummary")
+
+    @property
+    @since("2.1.0")
+    def summary(self):
+        """
+        Gets summary (e.g. cluster assignments, cluster sizes) of the model trained on the
+        training set. An exception is thrown if no summary exists.
+        """
+        if self.hasSummary:
+            return BisectingKMeansSummary(self._call_java("summary"))
+        else:
+            raise RuntimeError("No training summary available for this %s" %
+                               self.__class__.__name__)
+
+
+@inherit_doc
+class BisectingKMeans(JavaEstimator, HasDistanceMeasure, HasFeaturesCol, HasPredictionCol,
+                      HasMaxIter, HasSeed, JavaMLWritable, JavaMLReadable):
+    """
+    A bisecting k-means algorithm based on the paper "A comparison of document clustering
+    techniques" by Steinbach, Karypis, and Kumar, with modification to fit Spark.
+    The algorithm starts from a single cluster that contains all points.
+    Iteratively it finds divisible clusters on the bottom level and bisects each of them using
+    k-means, until there are `k` leaf clusters in total or no leaf clusters are divisible.
+    The bisecting steps of clusters on the same level are grouped together to increase parallelism.
+    If bisecting all divisible clusters on the bottom level would result more than `k` leaf
+    clusters, larger clusters get higher priority.
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> data = [(Vectors.dense([0.0, 0.0]),), (Vectors.dense([1.0, 1.0]),),
+    ...         (Vectors.dense([9.0, 8.0]),), (Vectors.dense([8.0, 9.0]),)]
+    >>> df = spark.createDataFrame(data, ["features"])
+    >>> bkm = BisectingKMeans(k=2, minDivisibleClusterSize=1.0)
+    >>> model = bkm.fit(df)
+    >>> centers = model.clusterCenters()
+    >>> len(centers)
+    2
+    >>> model.computeCost(df)
+    2.000...
+    >>> model.hasSummary
+    True
+    >>> summary = model.summary
+    >>> summary.k
+    2
+    >>> summary.clusterSizes
+    [2, 2]
+    >>> transformed = model.transform(df).select("features", "prediction")
+    >>> rows = transformed.collect()
+    >>> rows[0].prediction == rows[1].prediction
+    True
+    >>> rows[2].prediction == rows[3].prediction
+    True
+    >>> bkm_path = temp_path + "/bkm"
+    >>> bkm.save(bkm_path)
+    >>> bkm2 = BisectingKMeans.load(bkm_path)
+    >>> bkm2.getK()
+    2
+    >>> bkm2.getDistanceMeasure()
+    'euclidean'
+    >>> model_path = temp_path + "/bkm_model"
+    >>> model.save(model_path)
+    >>> model2 = BisectingKMeansModel.load(model_path)
+    >>> model2.hasSummary
+    False
+    >>> model.clusterCenters()[0] == model2.clusterCenters()[0]
+    array([ True,  True], dtype=bool)
+    >>> model.clusterCenters()[1] == model2.clusterCenters()[1]
+    array([ True,  True], dtype=bool)
+
+    .. versionadded:: 2.0.0
+    """
+
+    k = Param(Params._dummy(), "k", "The desired number of leaf clusters. Must be > 1.",
+              typeConverter=TypeConverters.toInt)
+    minDivisibleClusterSize = Param(Params._dummy(), "minDivisibleClusterSize",
+                                    "The minimum number of points (if >= 1.0) or the minimum " +
+                                    "proportion of points (if < 1.0) of a divisible cluster.",
+                                    typeConverter=TypeConverters.toFloat)
+
+    @keyword_only
+    def __init__(self, featuresCol="features", predictionCol="prediction", maxIter=20,
+                 seed=None, k=4, minDivisibleClusterSize=1.0, distanceMeasure="euclidean"):
+        """
+        __init__(self, featuresCol="features", predictionCol="prediction", maxIter=20, \
+                 seed=None, k=4, minDivisibleClusterSize=1.0, distanceMeasure="euclidean")
+        """
+        super(BisectingKMeans, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.BisectingKMeans",
+                                            self.uid)
+        self._setDefault(maxIter=20, k=4, minDivisibleClusterSize=1.0)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("2.0.0")
+    def setParams(self, featuresCol="features", predictionCol="prediction", maxIter=20,
+                  seed=None, k=4, minDivisibleClusterSize=1.0, distanceMeasure="euclidean"):
+        """
+        setParams(self, featuresCol="features", predictionCol="prediction", maxIter=20, \
+                  seed=None, k=4, minDivisibleClusterSize=1.0, distanceMeasure="euclidean")
+        Sets params for BisectingKMeans.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("2.0.0")
+    def setK(self, value):
+        """
+        Sets the value of :py:attr:`k`.
+        """
+        return self._set(k=value)
+
+    @since("2.0.0")
+    def getK(self):
+        """
+        Gets the value of `k` or its default value.
+        """
+        return self.getOrDefault(self.k)
+
+    @since("2.0.0")
+    def setMinDivisibleClusterSize(self, value):
+        """
+        Sets the value of :py:attr:`minDivisibleClusterSize`.
+        """
+        return self._set(minDivisibleClusterSize=value)
+
+    @since("2.0.0")
+    def getMinDivisibleClusterSize(self):
+        """
+        Gets the value of `minDivisibleClusterSize` or its default value.
+        """
+        return self.getOrDefault(self.minDivisibleClusterSize)
+
+    @since("2.4.0")
+    def setDistanceMeasure(self, value):
+        """
+        Sets the value of :py:attr:`distanceMeasure`.
+        """
+        return self._set(distanceMeasure=value)
+
+    @since("2.4.0")
+    def getDistanceMeasure(self):
+        """
+        Gets the value of `distanceMeasure` or its default value.
+        """
+        return self.getOrDefault(self.distanceMeasure)
+
+    def _create_model(self, java_model):
+        return BisectingKMeansModel(java_model)
+
+
+class BisectingKMeansSummary(ClusteringSummary):
+    """
+    .. note:: Experimental
+
+    Bisecting KMeans clustering results for a given model.
+
+    .. versionadded:: 2.1.0
+    """
+    pass
+
+
+@inherit_doc
+class LDAModel(JavaModel):
+    """
+    Latent Dirichlet Allocation (LDA) model.
+    This abstraction permits for different underlying representations,
+    including local and distributed data structures.
+
+    .. versionadded:: 2.0.0
+    """
+
+    @since("2.0.0")
+    def isDistributed(self):
+        """
+        Indicates whether this instance is of type DistributedLDAModel
+        """
+        return self._call_java("isDistributed")
+
+    @since("2.0.0")
+    def vocabSize(self):
+        """Vocabulary size (number of terms or words in the vocabulary)"""
+        return self._call_java("vocabSize")
+
+    @since("2.0.0")
+    def topicsMatrix(self):
+        """
+        Inferred topics, where each topic is represented by a distribution over terms.
+        This is a matrix of size vocabSize x k, where each column is a topic.
+        No guarantees are given about the ordering of the topics.
+
+        WARNING: If this model is actually a :py:class:`DistributedLDAModel` instance produced by
+        the Expectation-Maximization ("em") `optimizer`, then this method could involve
+        collecting a large amount of data to the driver (on the order of vocabSize x k).
+        """
+        return self._call_java("topicsMatrix")
+
+    @since("2.0.0")
+    def logLikelihood(self, dataset):
+        """
+        Calculates a lower bound on the log likelihood of the entire corpus.
+        See Equation (16) in the Online LDA paper (Hoffman et al., 2010).
+
+        WARNING: If this model is an instance of :py:class:`DistributedLDAModel` (produced when
+        :py:attr:`optimizer` is set to "em"), this involves collecting a large
+        :py:func:`topicsMatrix` to the driver. This implementation may be changed in the future.
+        """
+        return self._call_java("logLikelihood", dataset)
+
+    @since("2.0.0")
+    def logPerplexity(self, dataset):
+        """
+        Calculate an upper bound on perplexity.  (Lower is better.)
+        See Equation (16) in the Online LDA paper (Hoffman et al., 2010).
+
+        WARNING: If this model is an instance of :py:class:`DistributedLDAModel` (produced when
+        :py:attr:`optimizer` is set to "em"), this involves collecting a large
+        :py:func:`topicsMatrix` to the driver. This implementation may be changed in the future.
+        """
+        return self._call_java("logPerplexity", dataset)
+
+    @since("2.0.0")
+    def describeTopics(self, maxTermsPerTopic=10):
+        """
+        Return the topics described by their top-weighted terms.
+        """
+        return self._call_java("describeTopics", maxTermsPerTopic)
+
+    @since("2.0.0")
+    def estimatedDocConcentration(self):
+        """
+        Value for :py:attr:`LDA.docConcentration` estimated from data.
+        If Online LDA was used and :py:attr:`LDA.optimizeDocConcentration` was set to false,
+        then this returns the fixed (given) value for the :py:attr:`LDA.docConcentration` parameter.
+        """
+        return self._call_java("estimatedDocConcentration")
+
+
+@inherit_doc
+class DistributedLDAModel(LDAModel, JavaMLReadable, JavaMLWritable):
+    """
+    Distributed model fitted by :py:class:`LDA`.
+    This type of model is currently only produced by Expectation-Maximization (EM).
+
+    This model stores the inferred topics, the full training dataset, and the topic distribution
+    for each training document.
+
+    .. versionadded:: 2.0.0
+    """
+
+    @since("2.0.0")
+    def toLocal(self):
+        """
+        Convert this distributed model to a local representation.  This discards info about the
+        training dataset.
+
+        WARNING: This involves collecting a large :py:func:`topicsMatrix` to the driver.
+        """
+        model = LocalLDAModel(self._call_java("toLocal"))
+
+        # SPARK-10931: Temporary fix to be removed once LDAModel defines Params
+        model._create_params_from_java()
+        model._transfer_params_from_java()
+
+        return model
+
+    @since("2.0.0")
+    def trainingLogLikelihood(self):
+        """
+        Log likelihood of the observed tokens in the training set,
+        given the current parameter estimates:
+        log P(docs | topics, topic distributions for docs, Dirichlet hyperparameters)
+
+        Notes:
+          - This excludes the prior; for that, use :py:func:`logPrior`.
+          - Even with :py:func:`logPrior`, this is NOT the same as the data log likelihood given
+            the hyperparameters.
+          - This is computed from the topic distributions computed during training. If you call
+            :py:func:`logLikelihood` on the same training dataset, the topic distributions
+            will be computed again, possibly giving different results.
+        """
+        return self._call_java("trainingLogLikelihood")
+
+    @since("2.0.0")
+    def logPrior(self):
+        """
+        Log probability of the current parameter estimate:
+        log P(topics, topic distributions for docs | alpha, eta)
+        """
+        return self._call_java("logPrior")
+
+    @since("2.0.0")
+    def getCheckpointFiles(self):
+        """
+        If using checkpointing and :py:attr:`LDA.keepLastCheckpoint` is set to true, then there may
+        be saved checkpoint files.  This method is provided so that users can manage those files.
+
+        .. note:: Removing the checkpoints can cause failures if a partition is lost and is needed
+            by certain :py:class:`DistributedLDAModel` methods.  Reference counting will clean up
+            the checkpoints when this model and derivative data go out of scope.
+
+        :return  List of checkpoint files from training
+        """
+        return self._call_java("getCheckpointFiles")
+
+
+@inherit_doc
+class LocalLDAModel(LDAModel, JavaMLReadable, JavaMLWritable):
+    """
+    Local (non-distributed) model fitted by :py:class:`LDA`.
+    This model stores the inferred topics only; it does not store info about the training dataset.
+
+    .. versionadded:: 2.0.0
+    """
+    pass
+
+
+@inherit_doc
+class LDA(JavaEstimator, HasFeaturesCol, HasMaxIter, HasSeed, HasCheckpointInterval,
+          JavaMLReadable, JavaMLWritable):
+    """
+    Latent Dirichlet Allocation (LDA), a topic model designed for text documents.
+
+    Terminology:
+
+     - "term" = "word": an element of the vocabulary
+     - "token": instance of a term appearing in a document
+     - "topic": multinomial distribution over terms representing some concept
+     - "document": one piece of text, corresponding to one row in the input data
+
+    Original LDA paper (journal version):
+      Blei, Ng, and Jordan.  "Latent Dirichlet Allocation."  JMLR, 2003.
+
+    Input data (featuresCol):
+    LDA is given a collection of documents as input data, via the featuresCol parameter.
+    Each document is specified as a :py:class:`Vector` of length vocabSize, where each entry is the
+    count for the corresponding term (word) in the document.  Feature transformers such as
+    :py:class:`pyspark.ml.feature.Tokenizer` and :py:class:`pyspark.ml.feature.CountVectorizer`
+    can be useful for converting text to word count vectors.
+
+    >>> from pyspark.ml.linalg import Vectors, SparseVector
+    >>> from pyspark.ml.clustering import LDA
+    >>> df = spark.createDataFrame([[1, Vectors.dense([0.0, 1.0])],
+    ...      [2, SparseVector(2, {0: 1.0})],], ["id", "features"])
+    >>> lda = LDA(k=2, seed=1, optimizer="em")
+    >>> model = lda.fit(df)
+    >>> model.isDistributed()
+    True
+    >>> localModel = model.toLocal()
+    >>> localModel.isDistributed()
+    False
+    >>> model.vocabSize()
+    2
+    >>> model.describeTopics().show()
+    +-----+-----------+--------------------+
+    |topic|termIndices|         termWeights|
+    +-----+-----------+--------------------+
+    |    0|     [1, 0]|[0.50401530077160...|
+    |    1|     [0, 1]|[0.50401530077160...|
+    +-----+-----------+--------------------+
+    ...
+    >>> model.topicsMatrix()
+    DenseMatrix(2, 2, [0.496, 0.504, 0.504, 0.496], 0)
+    >>> lda_path = temp_path + "/lda"
+    >>> lda.save(lda_path)
+    >>> sameLDA = LDA.load(lda_path)
+    >>> distributed_model_path = temp_path + "/lda_distributed_model"
+    >>> model.save(distributed_model_path)
+    >>> sameModel = DistributedLDAModel.load(distributed_model_path)
+    >>> local_model_path = temp_path + "/lda_local_model"
+    >>> localModel.save(local_model_path)
+    >>> sameLocalModel = LocalLDAModel.load(local_model_path)
+
+    .. versionadded:: 2.0.0
+    """
+
+    k = Param(Params._dummy(), "k", "The number of topics (clusters) to infer. Must be > 1.",
+              typeConverter=TypeConverters.toInt)
+    optimizer = Param(Params._dummy(), "optimizer",
+                      "Optimizer or inference algorithm used to estimate the LDA model.  "
+                      "Supported: online, em", typeConverter=TypeConverters.toString)
+    learningOffset = Param(Params._dummy(), "learningOffset",
+                           "A (positive) learning parameter that downweights early iterations."
+                           " Larger values make early iterations count less",
+                           typeConverter=TypeConverters.toFloat)
+    learningDecay = Param(Params._dummy(), "learningDecay", "Learning rate, set as an"
+                          "exponential decay rate. This should be between (0.5, 1.0] to "
+                          "guarantee asymptotic convergence.", typeConverter=TypeConverters.toFloat)
+    subsamplingRate = Param(Params._dummy(), "subsamplingRate",
+                            "Fraction of the corpus to be sampled and used in each iteration "
+                            "of mini-batch gradient descent, in range (0, 1].",
+                            typeConverter=TypeConverters.toFloat)
+    optimizeDocConcentration = Param(Params._dummy(), "optimizeDocConcentration",
+                                     "Indicates whether the docConcentration (Dirichlet parameter "
+                                     "for document-topic distribution) will be optimized during "
+                                     "training.", typeConverter=TypeConverters.toBoolean)
+    docConcentration = Param(Params._dummy(), "docConcentration",
+                             "Concentration parameter (commonly named \"alpha\") for the "
+                             "prior placed on documents' distributions over topics (\"theta\").",
+                             typeConverter=TypeConverters.toListFloat)
+    topicConcentration = Param(Params._dummy(), "topicConcentration",
+                               "Concentration parameter (commonly named \"beta\" or \"eta\") for "
+                               "the prior placed on topic' distributions over terms.",
+                               typeConverter=TypeConverters.toFloat)
+    topicDistributionCol = Param(Params._dummy(), "topicDistributionCol",
+                                 "Output column with estimates of the topic mixture distribution "
+                                 "for each document (often called \"theta\" in the literature). "
+                                 "Returns a vector of zeros for an empty document.",
+                                 typeConverter=TypeConverters.toString)
+    keepLastCheckpoint = Param(Params._dummy(), "keepLastCheckpoint",
+                               "(For EM optimizer) If using checkpointing, this indicates whether"
+                               " to keep the last checkpoint. If false, then the checkpoint will be"
+                               " deleted. Deleting the checkpoint can cause failures if a data"
+                               " partition is lost, so set this bit with care.",
+                               TypeConverters.toBoolean)
+
+    @keyword_only
+    def __init__(self, featuresCol="features", maxIter=20, seed=None, checkpointInterval=10,
+                 k=10, optimizer="online", learningOffset=1024.0, learningDecay=0.51,
+                 subsamplingRate=0.05, optimizeDocConcentration=True,
+                 docConcentration=None, topicConcentration=None,
+                 topicDistributionCol="topicDistribution", keepLastCheckpoint=True):
+        """
+        __init__(self, featuresCol="features", maxIter=20, seed=None, checkpointInterval=10,\
+                  k=10, optimizer="online", learningOffset=1024.0, learningDecay=0.51,\
+                  subsamplingRate=0.05, optimizeDocConcentration=True,\
+                  docConcentration=None, topicConcentration=None,\
+                  topicDistributionCol="topicDistribution", keepLastCheckpoint=True)
+        """
+        super(LDA, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.LDA", self.uid)
+        self._setDefault(maxIter=20, checkpointInterval=10,
+                         k=10, optimizer="online", learningOffset=1024.0, learningDecay=0.51,
+                         subsamplingRate=0.05, optimizeDocConcentration=True,
+                         topicDistributionCol="topicDistribution", keepLastCheckpoint=True)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    def _create_model(self, java_model):
+        if self.getOptimizer() == "em":
+            return DistributedLDAModel(java_model)
+        else:
+            return LocalLDAModel(java_model)
+
+    @keyword_only
+    @since("2.0.0")
+    def setParams(self, featuresCol="features", maxIter=20, seed=None, checkpointInterval=10,
+                  k=10, optimizer="online", learningOffset=1024.0, learningDecay=0.51,
+                  subsamplingRate=0.05, optimizeDocConcentration=True,
+                  docConcentration=None, topicConcentration=None,
+                  topicDistributionCol="topicDistribution", keepLastCheckpoint=True):
+        """
+        setParams(self, featuresCol="features", maxIter=20, seed=None, checkpointInterval=10,\
+                  k=10, optimizer="online", learningOffset=1024.0, learningDecay=0.51,\
+                  subsamplingRate=0.05, optimizeDocConcentration=True,\
+                  docConcentration=None, topicConcentration=None,\
+                  topicDistributionCol="topicDistribution", keepLastCheckpoint=True)
+
+        Sets params for LDA.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("2.0.0")
+    def setK(self, value):
+        """
+        Sets the value of :py:attr:`k`.
+
+        >>> algo = LDA().setK(10)
+        >>> algo.getK()
+        10
+        """
+        return self._set(k=value)
+
+    @since("2.0.0")
+    def getK(self):
+        """
+        Gets the value of :py:attr:`k` or its default value.
+        """
+        return self.getOrDefault(self.k)
+
+    @since("2.0.0")
+    def setOptimizer(self, value):
+        """
+        Sets the value of :py:attr:`optimizer`.
+        Currently only support 'em' and 'online'.
+
+        >>> algo = LDA().setOptimizer("em")
+        >>> algo.getOptimizer()
+        'em'
+        """
+        return self._set(optimizer=value)
+
+    @since("2.0.0")
+    def getOptimizer(self):
+        """
+        Gets the value of :py:attr:`optimizer` or its default value.
+        """
+        return self.getOrDefault(self.optimizer)
+
+    @since("2.0.0")
+    def setLearningOffset(self, value):
+        """
+        Sets the value of :py:attr:`learningOffset`.
+
+        >>> algo = LDA().setLearningOffset(100)
+        >>> algo.getLearningOffset()
+        100.0
+        """
+        return self._set(learningOffset=value)
+
+    @since("2.0.0")
+    def getLearningOffset(self):
+        """
+        Gets the value of :py:attr:`learningOffset` or its default value.
+        """
+        return self.getOrDefault(self.learningOffset)
+
+    @since("2.0.0")
+    def setLearningDecay(self, value):
+        """
+        Sets the value of :py:attr:`learningDecay`.
+
+        >>> algo = LDA().setLearningDecay(0.1)
+        >>> algo.getLearningDecay()
+        0.1...
+        """
+        return self._set(learningDecay=value)
+
+    @since("2.0.0")
+    def getLearningDecay(self):
+        """
+        Gets the value of :py:attr:`learningDecay` or its default value.
+        """
+        return self.getOrDefault(self.learningDecay)
+
+    @since("2.0.0")
+    def setSubsamplingRate(self, value):
+        """
+        Sets the value of :py:attr:`subsamplingRate`.
+
+        >>> algo = LDA().setSubsamplingRate(0.1)
+        >>> algo.getSubsamplingRate()
+        0.1...
+        """
+        return self._set(subsamplingRate=value)
+
+    @since("2.0.0")
+    def getSubsamplingRate(self):
+        """
+        Gets the value of :py:attr:`subsamplingRate` or its default value.
+        """
+        return self.getOrDefault(self.subsamplingRate)
+
+    @since("2.0.0")
+    def setOptimizeDocConcentration(self, value):
+        """
+        Sets the value of :py:attr:`optimizeDocConcentration`.
+
+        >>> algo = LDA().setOptimizeDocConcentration(True)
+        >>> algo.getOptimizeDocConcentration()
+        True
+        """
+        return self._set(optimizeDocConcentration=value)
+
+    @since("2.0.0")
+    def getOptimizeDocConcentration(self):
+        """
+        Gets the value of :py:attr:`optimizeDocConcentration` or its default value.
+        """
+        return self.getOrDefault(self.optimizeDocConcentration)
+
+    @since("2.0.0")
+    def setDocConcentration(self, value):
+        """
+        Sets the value of :py:attr:`docConcentration`.
+
+        >>> algo = LDA().setDocConcentration([0.1, 0.2])
+        >>> algo.getDocConcentration()
+        [0.1..., 0.2...]
+        """
+        return self._set(docConcentration=value)
+
+    @since("2.0.0")
+    def getDocConcentration(self):
+        """
+        Gets the value of :py:attr:`docConcentration` or its default value.
+        """
+        return self.getOrDefault(self.docConcentration)
+
+    @since("2.0.0")
+    def setTopicConcentration(self, value):
+        """
+        Sets the value of :py:attr:`topicConcentration`.
+
+        >>> algo = LDA().setTopicConcentration(0.5)
+        >>> algo.getTopicConcentration()
+        0.5...
+        """
+        return self._set(topicConcentration=value)
+
+    @since("2.0.0")
+    def getTopicConcentration(self):
+        """
+        Gets the value of :py:attr:`topicConcentration` or its default value.
+        """
+        return self.getOrDefault(self.topicConcentration)
+
+    @since("2.0.0")
+    def setTopicDistributionCol(self, value):
+        """
+        Sets the value of :py:attr:`topicDistributionCol`.
+
+        >>> algo = LDA().setTopicDistributionCol("topicDistributionCol")
+        >>> algo.getTopicDistributionCol()
+        'topicDistributionCol'
+        """
+        return self._set(topicDistributionCol=value)
+
+    @since("2.0.0")
+    def getTopicDistributionCol(self):
+        """
+        Gets the value of :py:attr:`topicDistributionCol` or its default value.
+        """
+        return self.getOrDefault(self.topicDistributionCol)
+
+    @since("2.0.0")
+    def setKeepLastCheckpoint(self, value):
+        """
+        Sets the value of :py:attr:`keepLastCheckpoint`.
+
+        >>> algo = LDA().setKeepLastCheckpoint(False)
+        >>> algo.getKeepLastCheckpoint()
+        False
+        """
+        return self._set(keepLastCheckpoint=value)
+
+    @since("2.0.0")
+    def getKeepLastCheckpoint(self):
+        """
+        Gets the value of :py:attr:`keepLastCheckpoint` or its default value.
+        """
+        return self.getOrDefault(self.keepLastCheckpoint)
+
+
+@inherit_doc
+class PowerIterationClustering(HasMaxIter, HasWeightCol, JavaParams, JavaMLReadable,
+                               JavaMLWritable):
+    """
+    .. note:: Experimental
+
+    Power Iteration Clustering (PIC), a scalable graph clustering algorithm developed by
+    `Lin and Cohen <http://www.icml2010.org/papers/387.pdf>`_. From the abstract:
+    PIC finds a very low-dimensional embedding of a dataset using truncated power
+    iteration on a normalized pair-wise similarity matrix of the data.
+
+    This class is not yet an Estimator/Transformer, use :py:func:`assignClusters` method
+    to run the PowerIterationClustering algorithm.
+
+    .. seealso:: `Wikipedia on Spectral clustering
+        <http://en.wikipedia.org/wiki/Spectral_clustering>`_
+
+    >>> data = [(1, 0, 0.5),
+    ...         (2, 0, 0.5), (2, 1, 0.7),
+    ...         (3, 0, 0.5), (3, 1, 0.7), (3, 2, 0.9),
+    ...         (4, 0, 0.5), (4, 1, 0.7), (4, 2, 0.9), (4, 3, 1.1),
+    ...         (5, 0, 0.5), (5, 1, 0.7), (5, 2, 0.9), (5, 3, 1.1), (5, 4, 1.3)]
+    >>> df = spark.createDataFrame(data).toDF("src", "dst", "weight")
+    >>> pic = PowerIterationClustering(k=2, maxIter=40, weightCol="weight")
+    >>> assignments = pic.assignClusters(df)
+    >>> assignments.sort(assignments.id).show(truncate=False)
+    +---+-------+
+    |id |cluster|
+    +---+-------+
+    |0  |1      |
+    |1  |1      |
+    |2  |1      |
+    |3  |1      |
+    |4  |1      |
+    |5  |0      |
+    +---+-------+
+    ...
+    >>> pic_path = temp_path + "/pic"
+    >>> pic.save(pic_path)
+    >>> pic2 = PowerIterationClustering.load(pic_path)
+    >>> pic2.getK()
+    2
+    >>> pic2.getMaxIter()
+    40
+
+    .. versionadded:: 2.4.0
+    """
+
+    k = Param(Params._dummy(), "k",
+              "The number of clusters to create. Must be > 1.",
+              typeConverter=TypeConverters.toInt)
+    initMode = Param(Params._dummy(), "initMode",
+                     "The initialization algorithm. This can be either " +
+                     "'random' to use a random vector as vertex properties, or 'degree' to use " +
+                     "a normalized sum of similarities with other vertices.  Supported options: " +
+                     "'random' and 'degree'.",
+                     typeConverter=TypeConverters.toString)
+    srcCol = Param(Params._dummy(), "srcCol",
+                   "Name of the input column for source vertex IDs.",
+                   typeConverter=TypeConverters.toString)
+    dstCol = Param(Params._dummy(), "dstCol",
+                   "Name of the input column for destination vertex IDs.",
+                   typeConverter=TypeConverters.toString)
+
+    @keyword_only
+    def __init__(self, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst",
+                 weightCol=None):
+        """
+        __init__(self, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst",\
+                 weightCol=None)
+        """
+        super(PowerIterationClustering, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.clustering.PowerIterationClustering", self.uid)
+        self._setDefault(k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst")
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("2.4.0")
+    def setParams(self, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst",
+                  weightCol=None):
+        """
+        setParams(self, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst",\
+                  weightCol=None)
+        Sets params for PowerIterationClustering.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("2.4.0")
+    def setK(self, value):
+        """
+        Sets the value of :py:attr:`k`.
+        """
+        return self._set(k=value)
+
+    @since("2.4.0")
+    def getK(self):
+        """
+        Gets the value of :py:attr:`k` or its default value.
+        """
+        return self.getOrDefault(self.k)
+
+    @since("2.4.0")
+    def setInitMode(self, value):
+        """
+        Sets the value of :py:attr:`initMode`.
+        """
+        return self._set(initMode=value)
+
+    @since("2.4.0")
+    def getInitMode(self):
+        """
+        Gets the value of :py:attr:`initMode` or its default value.
+        """
+        return self.getOrDefault(self.initMode)
+
+    @since("2.4.0")
+    def setSrcCol(self, value):
+        """
+        Sets the value of :py:attr:`srcCol`.
+        """
+        return self._set(srcCol=value)
+
+    @since("2.4.0")
+    def getSrcCol(self):
+        """
+        Gets the value of :py:attr:`srcCol` or its default value.
+        """
+        return self.getOrDefault(self.srcCol)
+
+    @since("2.4.0")
+    def setDstCol(self, value):
+        """
+        Sets the value of :py:attr:`dstCol`.
+        """
+        return self._set(dstCol=value)
+
+    @since("2.4.0")
+    def getDstCol(self):
+        """
+        Gets the value of :py:attr:`dstCol` or its default value.
+        """
+        return self.getOrDefault(self.dstCol)
+
+    @since("2.4.0")
+    def assignClusters(self, dataset):
+        """
+        Run the PIC algorithm and returns a cluster assignment for each input vertex.
+
+        :param dataset:
+          A dataset with columns src, dst, weight representing the affinity matrix,
+          which is the matrix A in the PIC paper. Suppose the src column value is i,
+          the dst column value is j, the weight column value is similarity s,,ij,,
+          which must be nonnegative. This is a symmetric matrix and hence
+          s,,ij,, = s,,ji,,. For any (i, j) with nonzero similarity, there should be
+          either (i, j, s,,ij,,) or (j, i, s,,ji,,) in the input. Rows with i = j are
+          ignored, because we assume s,,ij,, = 0.0.
+
+        :return:
+          A dataset that contains columns of vertex id and the corresponding cluster for
+          the id. The schema of it will be:
+          - id: Long
+          - cluster: Int
+
+        .. versionadded:: 2.4.0
+        """
+        self._transfer_params_to_java()
+        jdf = self._java_obj.assignClusters(dataset._jdf)
+        return DataFrame(jdf, dataset.sql_ctx)
+
+
+if __name__ == "__main__":
+    import doctest
+    import numpy
+    import pyspark.ml.clustering
+    from pyspark.sql import SparkSession
+    try:
+        # Numpy 1.14+ changed it's string format.
+        numpy.set_printoptions(legacy='1.13')
+    except TypeError:
+        pass
+    globs = pyspark.ml.clustering.__dict__.copy()
+    # The small batch size here ensures that we see multiple batches,
+    # even in these small test examples:
+    spark = SparkSession.builder\
+        .master("local[2]")\
+        .appName("ml.clustering tests")\
+        .getOrCreate()
+    sc = spark.sparkContext
+    globs['sc'] = sc
+    globs['spark'] = spark
+    import tempfile
+    temp_path = tempfile.mkdtemp()
+    globs['temp_path'] = temp_path
+    try:
+        (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+        spark.stop()
+    finally:
+        from shutil import rmtree
+        try:
+            rmtree(temp_path)
+        except OSError:
+            pass
+    if failure_count:
+        sys.exit(-1)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/common.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..387c5d7309deadbf9ea2da1e3539cb63a07bfd12
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/common.py
@@ -0,0 +1,138 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+if sys.version >= '3':
+    long = int
+    unicode = str
+
+import py4j.protocol
+from py4j.protocol import Py4JJavaError
+from py4j.java_gateway import JavaObject
+from py4j.java_collections import JavaArray, JavaList
+
+from pyspark import RDD, SparkContext
+from pyspark.serializers import PickleSerializer, AutoBatchedSerializer
+from pyspark.sql import DataFrame, SQLContext
+
+# Hack for support float('inf') in Py4j
+_old_smart_decode = py4j.protocol.smart_decode
+
+_float_str_mapping = {
+    'nan': 'NaN',
+    'inf': 'Infinity',
+    '-inf': '-Infinity',
+}
+
+
+def _new_smart_decode(obj):
+    if isinstance(obj, float):
+        s = str(obj)
+        return _float_str_mapping.get(s, s)
+    return _old_smart_decode(obj)
+
+py4j.protocol.smart_decode = _new_smart_decode
+
+
+_picklable_classes = [
+    'SparseVector',
+    'DenseVector',
+    'SparseMatrix',
+    'DenseMatrix',
+]
+
+
+# this will call the ML version of pythonToJava()
+def _to_java_object_rdd(rdd):
+    """ Return an JavaRDD of Object by unpickling
+
+    It will convert each Python object into Java object by Pyrolite, whenever the
+    RDD is serialized in batch or not.
+    """
+    rdd = rdd._reserialize(AutoBatchedSerializer(PickleSerializer()))
+    return rdd.ctx._jvm.org.apache.spark.ml.python.MLSerDe.pythonToJava(rdd._jrdd, True)
+
+
+def _py2java(sc, obj):
+    """ Convert Python object into Java """
+    if isinstance(obj, RDD):
+        obj = _to_java_object_rdd(obj)
+    elif isinstance(obj, DataFrame):
+        obj = obj._jdf
+    elif isinstance(obj, SparkContext):
+        obj = obj._jsc
+    elif isinstance(obj, list):
+        obj = [_py2java(sc, x) for x in obj]
+    elif isinstance(obj, JavaObject):
+        pass
+    elif isinstance(obj, (int, long, float, bool, bytes, unicode)):
+        pass
+    else:
+        data = bytearray(PickleSerializer().dumps(obj))
+        obj = sc._jvm.org.apache.spark.ml.python.MLSerDe.loads(data)
+    return obj
+
+
+def _java2py(sc, r, encoding="bytes"):
+    if isinstance(r, JavaObject):
+        clsName = r.getClass().getSimpleName()
+        # convert RDD into JavaRDD
+        if clsName != 'JavaRDD' and clsName.endswith("RDD"):
+            r = r.toJavaRDD()
+            clsName = 'JavaRDD'
+
+        if clsName == 'JavaRDD':
+            jrdd = sc._jvm.org.apache.spark.ml.python.MLSerDe.javaToPython(r)
+            return RDD(jrdd, sc)
+
+        if clsName == 'Dataset':
+            return DataFrame(r, SQLContext.getOrCreate(sc))
+
+        if clsName in _picklable_classes:
+            r = sc._jvm.org.apache.spark.ml.python.MLSerDe.dumps(r)
+        elif isinstance(r, (JavaArray, JavaList)):
+            try:
+                r = sc._jvm.org.apache.spark.ml.python.MLSerDe.dumps(r)
+            except Py4JJavaError:
+                pass  # not pickable
+
+    if isinstance(r, (bytearray, bytes)):
+        r = PickleSerializer().loads(bytes(r), encoding=encoding)
+    return r
+
+
+def callJavaFunc(sc, func, *args):
+    """ Call Java Function """
+    args = [_py2java(sc, a) for a in args]
+    return _java2py(sc, func(*args))
+
+
+def inherit_doc(cls):
+    """
+    A decorator that makes a class inherit documentation from its parents.
+    """
+    for name, func in vars(cls).items():
+        # only inherit docstring for public functions
+        if name.startswith("_"):
+            continue
+        if not func.__doc__:
+            for parent in cls.__bases__:
+                parent_func = getattr(parent, name, None)
+                if parent_func and getattr(parent_func, "__doc__", None):
+                    func.__doc__ = parent_func.__doc__
+                    break
+    return cls
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/evaluation.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..8eaf07645a37f820478e13f25de6f355ba08b53d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/evaluation.py
@@ -0,0 +1,450 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+from abc import abstractmethod, ABCMeta
+
+from pyspark import since, keyword_only
+from pyspark.ml.wrapper import JavaParams
+from pyspark.ml.param import Param, Params, TypeConverters
+from pyspark.ml.param.shared import HasLabelCol, HasPredictionCol, HasRawPredictionCol, \
+    HasFeaturesCol
+from pyspark.ml.common import inherit_doc
+from pyspark.ml.util import JavaMLReadable, JavaMLWritable
+
+__all__ = ['Evaluator', 'BinaryClassificationEvaluator', 'RegressionEvaluator',
+           'MulticlassClassificationEvaluator', 'ClusteringEvaluator']
+
+
+@inherit_doc
+class Evaluator(Params):
+    """
+    Base class for evaluators that compute metrics from predictions.
+
+    .. versionadded:: 1.4.0
+    """
+
+    __metaclass__ = ABCMeta
+
+    @abstractmethod
+    def _evaluate(self, dataset):
+        """
+        Evaluates the output.
+
+        :param dataset: a dataset that contains labels/observations and
+               predictions
+        :return: metric
+        """
+        raise NotImplementedError()
+
+    @since("1.4.0")
+    def evaluate(self, dataset, params=None):
+        """
+        Evaluates the output with optional parameters.
+
+        :param dataset: a dataset that contains labels/observations and
+                        predictions
+        :param params: an optional param map that overrides embedded
+                       params
+        :return: metric
+        """
+        if params is None:
+            params = dict()
+        if isinstance(params, dict):
+            if params:
+                return self.copy(params)._evaluate(dataset)
+            else:
+                return self._evaluate(dataset)
+        else:
+            raise ValueError("Params must be a param map but got %s." % type(params))
+
+    @since("1.5.0")
+    def isLargerBetter(self):
+        """
+        Indicates whether the metric returned by :py:meth:`evaluate` should be maximized
+        (True, default) or minimized (False).
+        A given evaluator may support multiple metrics which may be maximized or minimized.
+        """
+        return True
+
+
+@inherit_doc
+class JavaEvaluator(JavaParams, Evaluator):
+    """
+    Base class for :py:class:`Evaluator`s that wrap Java/Scala
+    implementations.
+    """
+
+    __metaclass__ = ABCMeta
+
+    def _evaluate(self, dataset):
+        """
+        Evaluates the output.
+        :param dataset: a dataset that contains labels/observations and predictions.
+        :return: evaluation metric
+        """
+        self._transfer_params_to_java()
+        return self._java_obj.evaluate(dataset._jdf)
+
+    def isLargerBetter(self):
+        self._transfer_params_to_java()
+        return self._java_obj.isLargerBetter()
+
+
+@inherit_doc
+class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPredictionCol,
+                                    JavaMLReadable, JavaMLWritable):
+    """
+    .. note:: Experimental
+
+    Evaluator for binary classification, which expects two input columns: rawPrediction and label.
+    The rawPrediction column can be of type double (binary 0/1 prediction, or probability of label
+    1) or of type vector (length-2 vector of raw predictions, scores, or label probabilities).
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> scoreAndLabels = map(lambda x: (Vectors.dense([1.0 - x[0], x[0]]), x[1]),
+    ...    [(0.1, 0.0), (0.1, 1.0), (0.4, 0.0), (0.6, 0.0), (0.6, 1.0), (0.6, 1.0), (0.8, 1.0)])
+    >>> dataset = spark.createDataFrame(scoreAndLabels, ["raw", "label"])
+    ...
+    >>> evaluator = BinaryClassificationEvaluator(rawPredictionCol="raw")
+    >>> evaluator.evaluate(dataset)
+    0.70...
+    >>> evaluator.evaluate(dataset, {evaluator.metricName: "areaUnderPR"})
+    0.83...
+    >>> bce_path = temp_path + "/bce"
+    >>> evaluator.save(bce_path)
+    >>> evaluator2 = BinaryClassificationEvaluator.load(bce_path)
+    >>> str(evaluator2.getRawPredictionCol())
+    'raw'
+
+    .. versionadded:: 1.4.0
+    """
+
+    metricName = Param(Params._dummy(), "metricName",
+                       "metric name in evaluation (areaUnderROC|areaUnderPR)",
+                       typeConverter=TypeConverters.toString)
+
+    @keyword_only
+    def __init__(self, rawPredictionCol="rawPrediction", labelCol="label",
+                 metricName="areaUnderROC"):
+        """
+        __init__(self, rawPredictionCol="rawPrediction", labelCol="label", \
+                 metricName="areaUnderROC")
+        """
+        super(BinaryClassificationEvaluator, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.evaluation.BinaryClassificationEvaluator", self.uid)
+        self._setDefault(metricName="areaUnderROC")
+        kwargs = self._input_kwargs
+        self._set(**kwargs)
+
+    @since("1.4.0")
+    def setMetricName(self, value):
+        """
+        Sets the value of :py:attr:`metricName`.
+        """
+        return self._set(metricName=value)
+
+    @since("1.4.0")
+    def getMetricName(self):
+        """
+        Gets the value of metricName or its default value.
+        """
+        return self.getOrDefault(self.metricName)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, rawPredictionCol="rawPrediction", labelCol="label",
+                  metricName="areaUnderROC"):
+        """
+        setParams(self, rawPredictionCol="rawPrediction", labelCol="label", \
+                  metricName="areaUnderROC")
+        Sets params for binary classification evaluator.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+
+@inherit_doc
+class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol,
+                          JavaMLReadable, JavaMLWritable):
+    """
+    .. note:: Experimental
+
+    Evaluator for Regression, which expects two input
+    columns: prediction and label.
+
+    >>> scoreAndLabels = [(-28.98343821, -27.0), (20.21491975, 21.5),
+    ...   (-25.98418959, -22.0), (30.69731842, 33.0), (74.69283752, 71.0)]
+    >>> dataset = spark.createDataFrame(scoreAndLabels, ["raw", "label"])
+    ...
+    >>> evaluator = RegressionEvaluator(predictionCol="raw")
+    >>> evaluator.evaluate(dataset)
+    2.842...
+    >>> evaluator.evaluate(dataset, {evaluator.metricName: "r2"})
+    0.993...
+    >>> evaluator.evaluate(dataset, {evaluator.metricName: "mae"})
+    2.649...
+    >>> re_path = temp_path + "/re"
+    >>> evaluator.save(re_path)
+    >>> evaluator2 = RegressionEvaluator.load(re_path)
+    >>> str(evaluator2.getPredictionCol())
+    'raw'
+
+    .. versionadded:: 1.4.0
+    """
+    metricName = Param(Params._dummy(), "metricName",
+                       """metric name in evaluation - one of:
+                       rmse - root mean squared error (default)
+                       mse - mean squared error
+                       r2 - r^2 metric
+                       mae - mean absolute error.""",
+                       typeConverter=TypeConverters.toString)
+
+    @keyword_only
+    def __init__(self, predictionCol="prediction", labelCol="label",
+                 metricName="rmse"):
+        """
+        __init__(self, predictionCol="prediction", labelCol="label", \
+                 metricName="rmse")
+        """
+        super(RegressionEvaluator, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.evaluation.RegressionEvaluator", self.uid)
+        self._setDefault(metricName="rmse")
+        kwargs = self._input_kwargs
+        self._set(**kwargs)
+
+    @since("1.4.0")
+    def setMetricName(self, value):
+        """
+        Sets the value of :py:attr:`metricName`.
+        """
+        return self._set(metricName=value)
+
+    @since("1.4.0")
+    def getMetricName(self):
+        """
+        Gets the value of metricName or its default value.
+        """
+        return self.getOrDefault(self.metricName)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, predictionCol="prediction", labelCol="label",
+                  metricName="rmse"):
+        """
+        setParams(self, predictionCol="prediction", labelCol="label", \
+                  metricName="rmse")
+        Sets params for regression evaluator.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+
+@inherit_doc
+class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol,
+                                        JavaMLReadable, JavaMLWritable):
+    """
+    .. note:: Experimental
+
+    Evaluator for Multiclass Classification, which expects two input
+    columns: prediction and label.
+
+    >>> scoreAndLabels = [(0.0, 0.0), (0.0, 1.0), (0.0, 0.0),
+    ...     (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (2.0, 2.0), (2.0, 0.0)]
+    >>> dataset = spark.createDataFrame(scoreAndLabels, ["prediction", "label"])
+    ...
+    >>> evaluator = MulticlassClassificationEvaluator(predictionCol="prediction")
+    >>> evaluator.evaluate(dataset)
+    0.66...
+    >>> evaluator.evaluate(dataset, {evaluator.metricName: "accuracy"})
+    0.66...
+    >>> mce_path = temp_path + "/mce"
+    >>> evaluator.save(mce_path)
+    >>> evaluator2 = MulticlassClassificationEvaluator.load(mce_path)
+    >>> str(evaluator2.getPredictionCol())
+    'prediction'
+
+    .. versionadded:: 1.5.0
+    """
+    metricName = Param(Params._dummy(), "metricName",
+                       "metric name in evaluation "
+                       "(f1|weightedPrecision|weightedRecall|accuracy)",
+                       typeConverter=TypeConverters.toString)
+
+    @keyword_only
+    def __init__(self, predictionCol="prediction", labelCol="label",
+                 metricName="f1"):
+        """
+        __init__(self, predictionCol="prediction", labelCol="label", \
+                 metricName="f1")
+        """
+        super(MulticlassClassificationEvaluator, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator", self.uid)
+        self._setDefault(metricName="f1")
+        kwargs = self._input_kwargs
+        self._set(**kwargs)
+
+    @since("1.5.0")
+    def setMetricName(self, value):
+        """
+        Sets the value of :py:attr:`metricName`.
+        """
+        return self._set(metricName=value)
+
+    @since("1.5.0")
+    def getMetricName(self):
+        """
+        Gets the value of metricName or its default value.
+        """
+        return self.getOrDefault(self.metricName)
+
+    @keyword_only
+    @since("1.5.0")
+    def setParams(self, predictionCol="prediction", labelCol="label",
+                  metricName="f1"):
+        """
+        setParams(self, predictionCol="prediction", labelCol="label", \
+                  metricName="f1")
+        Sets params for multiclass classification evaluator.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+
+@inherit_doc
+class ClusteringEvaluator(JavaEvaluator, HasPredictionCol, HasFeaturesCol,
+                          JavaMLReadable, JavaMLWritable):
+    """
+    .. note:: Experimental
+
+    Evaluator for Clustering results, which expects two input
+    columns: prediction and features. The metric computes the Silhouette
+    measure using the squared Euclidean distance.
+
+    The Silhouette is a measure for the validation of the consistency
+    within clusters. It ranges between 1 and -1, where a value close to
+    1 means that the points in a cluster are close to the other points
+    in the same cluster and far from the points of the other clusters.
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> featureAndPredictions = map(lambda x: (Vectors.dense(x[0]), x[1]),
+    ...     [([0.0, 0.5], 0.0), ([0.5, 0.0], 0.0), ([10.0, 11.0], 1.0),
+    ...     ([10.5, 11.5], 1.0), ([1.0, 1.0], 0.0), ([8.0, 6.0], 1.0)])
+    >>> dataset = spark.createDataFrame(featureAndPredictions, ["features", "prediction"])
+    ...
+    >>> evaluator = ClusteringEvaluator(predictionCol="prediction")
+    >>> evaluator.evaluate(dataset)
+    0.9079...
+    >>> ce_path = temp_path + "/ce"
+    >>> evaluator.save(ce_path)
+    >>> evaluator2 = ClusteringEvaluator.load(ce_path)
+    >>> str(evaluator2.getPredictionCol())
+    'prediction'
+
+    .. versionadded:: 2.3.0
+    """
+    metricName = Param(Params._dummy(), "metricName",
+                       "metric name in evaluation (silhouette)",
+                       typeConverter=TypeConverters.toString)
+    distanceMeasure = Param(Params._dummy(), "distanceMeasure", "The distance measure. " +
+                            "Supported options: 'squaredEuclidean' and 'cosine'.",
+                            typeConverter=TypeConverters.toString)
+
+    @keyword_only
+    def __init__(self, predictionCol="prediction", featuresCol="features",
+                 metricName="silhouette", distanceMeasure="squaredEuclidean"):
+        """
+        __init__(self, predictionCol="prediction", featuresCol="features", \
+                 metricName="silhouette", distanceMeasure="squaredEuclidean")
+        """
+        super(ClusteringEvaluator, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.evaluation.ClusteringEvaluator", self.uid)
+        self._setDefault(metricName="silhouette", distanceMeasure="squaredEuclidean")
+        kwargs = self._input_kwargs
+        self._set(**kwargs)
+
+    @since("2.3.0")
+    def setMetricName(self, value):
+        """
+        Sets the value of :py:attr:`metricName`.
+        """
+        return self._set(metricName=value)
+
+    @since("2.3.0")
+    def getMetricName(self):
+        """
+        Gets the value of metricName or its default value.
+        """
+        return self.getOrDefault(self.metricName)
+
+    @keyword_only
+    @since("2.3.0")
+    def setParams(self, predictionCol="prediction", featuresCol="features",
+                  metricName="silhouette", distanceMeasure="squaredEuclidean"):
+        """
+        setParams(self, predictionCol="prediction", featuresCol="features", \
+                  metricName="silhouette", distanceMeasure="squaredEuclidean")
+        Sets params for clustering evaluator.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("2.4.0")
+    def setDistanceMeasure(self, value):
+        """
+        Sets the value of :py:attr:`distanceMeasure`.
+        """
+        return self._set(distanceMeasure=value)
+
+    @since("2.4.0")
+    def getDistanceMeasure(self):
+        """
+        Gets the value of `distanceMeasure`
+        """
+        return self.getOrDefault(self.distanceMeasure)
+
+
+if __name__ == "__main__":
+    import doctest
+    import tempfile
+    import pyspark.ml.evaluation
+    from pyspark.sql import SparkSession
+    globs = pyspark.ml.evaluation.__dict__.copy()
+    # The small batch size here ensures that we see multiple batches,
+    # even in these small test examples:
+    spark = SparkSession.builder\
+        .master("local[2]")\
+        .appName("ml.evaluation tests")\
+        .getOrCreate()
+    globs['spark'] = spark
+    temp_path = tempfile.mkdtemp()
+    globs['temp_path'] = temp_path
+    try:
+        (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+        spark.stop()
+    finally:
+        from shutil import rmtree
+        try:
+            rmtree(temp_path)
+        except OSError:
+            pass
+    if failure_count:
+        sys.exit(-1)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/feature.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/feature.py
new file mode 100755
index 0000000000000000000000000000000000000000..eccb7acae5b982e8262ceaa1bf713ddc2cba3760
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/feature.py
@@ -0,0 +1,3890 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+if sys.version > '3':
+    basestring = str
+
+from pyspark import since, keyword_only, SparkContext
+from pyspark.rdd import ignore_unicode_prefix
+from pyspark.ml.linalg import _convert_to_vector
+from pyspark.ml.param.shared import *
+from pyspark.ml.util import JavaMLReadable, JavaMLWritable
+from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, JavaTransformer, _jvm
+from pyspark.ml.common import inherit_doc
+
+__all__ = ['Binarizer',
+           'BucketedRandomProjectionLSH', 'BucketedRandomProjectionLSHModel',
+           'Bucketizer',
+           'ChiSqSelector', 'ChiSqSelectorModel',
+           'CountVectorizer', 'CountVectorizerModel',
+           'DCT',
+           'ElementwiseProduct',
+           'FeatureHasher',
+           'HashingTF',
+           'IDF', 'IDFModel',
+           'Imputer', 'ImputerModel',
+           'IndexToString',
+           'MaxAbsScaler', 'MaxAbsScalerModel',
+           'MinHashLSH', 'MinHashLSHModel',
+           'MinMaxScaler', 'MinMaxScalerModel',
+           'NGram',
+           'Normalizer',
+           'OneHotEncoder',
+           'OneHotEncoderEstimator', 'OneHotEncoderModel',
+           'PCA', 'PCAModel',
+           'PolynomialExpansion',
+           'QuantileDiscretizer',
+           'RegexTokenizer',
+           'RFormula', 'RFormulaModel',
+           'SQLTransformer',
+           'StandardScaler', 'StandardScalerModel',
+           'StopWordsRemover',
+           'StringIndexer', 'StringIndexerModel',
+           'Tokenizer',
+           'VectorAssembler',
+           'VectorIndexer', 'VectorIndexerModel',
+           'VectorSizeHint',
+           'VectorSlicer',
+           'Word2Vec', 'Word2VecModel']
+
+
+@inherit_doc
+class Binarizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+    """
+    Binarize a column of continuous features given a threshold.
+
+    >>> df = spark.createDataFrame([(0.5,)], ["values"])
+    >>> binarizer = Binarizer(threshold=1.0, inputCol="values", outputCol="features")
+    >>> binarizer.transform(df).head().features
+    0.0
+    >>> binarizer.setParams(outputCol="freqs").transform(df).head().freqs
+    0.0
+    >>> params = {binarizer.threshold: -0.5, binarizer.outputCol: "vector"}
+    >>> binarizer.transform(df, params).head().vector
+    1.0
+    >>> binarizerPath = temp_path + "/binarizer"
+    >>> binarizer.save(binarizerPath)
+    >>> loadedBinarizer = Binarizer.load(binarizerPath)
+    >>> loadedBinarizer.getThreshold() == binarizer.getThreshold()
+    True
+
+    .. versionadded:: 1.4.0
+    """
+
+    threshold = Param(Params._dummy(), "threshold",
+                      "threshold in binary classification prediction, in range [0, 1]",
+                      typeConverter=TypeConverters.toFloat)
+
+    @keyword_only
+    def __init__(self, threshold=0.0, inputCol=None, outputCol=None):
+        """
+        __init__(self, threshold=0.0, inputCol=None, outputCol=None)
+        """
+        super(Binarizer, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Binarizer", self.uid)
+        self._setDefault(threshold=0.0)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, threshold=0.0, inputCol=None, outputCol=None):
+        """
+        setParams(self, threshold=0.0, inputCol=None, outputCol=None)
+        Sets params for this Binarizer.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("1.4.0")
+    def setThreshold(self, value):
+        """
+        Sets the value of :py:attr:`threshold`.
+        """
+        return self._set(threshold=value)
+
+    @since("1.4.0")
+    def getThreshold(self):
+        """
+        Gets the value of threshold or its default value.
+        """
+        return self.getOrDefault(self.threshold)
+
+
+class LSHParams(Params):
+    """
+    Mixin for Locality Sensitive Hashing (LSH) algorithm parameters.
+    """
+
+    numHashTables = Param(Params._dummy(), "numHashTables", "number of hash tables, where " +
+                          "increasing number of hash tables lowers the false negative rate, " +
+                          "and decreasing it improves the running performance.",
+                          typeConverter=TypeConverters.toInt)
+
+    def __init__(self):
+        super(LSHParams, self).__init__()
+
+    def setNumHashTables(self, value):
+        """
+        Sets the value of :py:attr:`numHashTables`.
+        """
+        return self._set(numHashTables=value)
+
+    def getNumHashTables(self):
+        """
+        Gets the value of numHashTables or its default value.
+        """
+        return self.getOrDefault(self.numHashTables)
+
+
+class LSHModel(JavaModel):
+    """
+    Mixin for Locality Sensitive Hashing (LSH) models.
+    """
+
+    def approxNearestNeighbors(self, dataset, key, numNearestNeighbors, distCol="distCol"):
+        """
+        Given a large dataset and an item, approximately find at most k items which have the
+        closest distance to the item. If the :py:attr:`outputCol` is missing, the method will
+        transform the data; if the :py:attr:`outputCol` exists, it will use that. This allows
+        caching of the transformed data when necessary.
+
+        .. note:: This method is experimental and will likely change behavior in the next release.
+
+        :param dataset: The dataset to search for nearest neighbors of the key.
+        :param key: Feature vector representing the item to search for.
+        :param numNearestNeighbors: The maximum number of nearest neighbors.
+        :param distCol: Output column for storing the distance between each result row and the key.
+                        Use "distCol" as default value if it's not specified.
+        :return: A dataset containing at most k items closest to the key. A column "distCol" is
+                 added to show the distance between each row and the key.
+        """
+        return self._call_java("approxNearestNeighbors", dataset, key, numNearestNeighbors,
+                               distCol)
+
+    def approxSimilarityJoin(self, datasetA, datasetB, threshold, distCol="distCol"):
+        """
+        Join two datasets to approximately find all pairs of rows whose distance are smaller than
+        the threshold. If the :py:attr:`outputCol` is missing, the method will transform the data;
+        if the :py:attr:`outputCol` exists, it will use that. This allows caching of the
+        transformed data when necessary.
+
+        :param datasetA: One of the datasets to join.
+        :param datasetB: Another dataset to join.
+        :param threshold: The threshold for the distance of row pairs.
+        :param distCol: Output column for storing the distance between each pair of rows. Use
+                        "distCol" as default value if it's not specified.
+        :return: A joined dataset containing pairs of rows. The original rows are in columns
+                 "datasetA" and "datasetB", and a column "distCol" is added to show the distance
+                 between each pair.
+        """
+        return self._call_java("approxSimilarityJoin", datasetA, datasetB, threshold, distCol)
+
+
+@inherit_doc
+class BucketedRandomProjectionLSH(JavaEstimator, LSHParams, HasInputCol, HasOutputCol, HasSeed,
+                                  JavaMLReadable, JavaMLWritable):
+    """
+    .. note:: Experimental
+
+    LSH class for Euclidean distance metrics.
+    The input is dense or sparse vectors, each of which represents a point in the Euclidean
+    distance space. The output will be vectors of configurable dimension. Hash values in the same
+    dimension are calculated by the same hash function.
+
+    .. seealso:: `Stable Distributions
+        <https://en.wikipedia.org/wiki/Locality-sensitive_hashing#Stable_distributions>`_
+    .. seealso:: `Hashing for Similarity Search: A Survey <https://arxiv.org/abs/1408.2927>`_
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> from pyspark.sql.functions import col
+    >>> data = [(0, Vectors.dense([-1.0, -1.0 ]),),
+    ...         (1, Vectors.dense([-1.0, 1.0 ]),),
+    ...         (2, Vectors.dense([1.0, -1.0 ]),),
+    ...         (3, Vectors.dense([1.0, 1.0]),)]
+    >>> df = spark.createDataFrame(data, ["id", "features"])
+    >>> brp = BucketedRandomProjectionLSH(inputCol="features", outputCol="hashes",
+    ...                                   seed=12345, bucketLength=1.0)
+    >>> model = brp.fit(df)
+    >>> model.transform(df).head()
+    Row(id=0, features=DenseVector([-1.0, -1.0]), hashes=[DenseVector([-1.0])])
+    >>> data2 = [(4, Vectors.dense([2.0, 2.0 ]),),
+    ...          (5, Vectors.dense([2.0, 3.0 ]),),
+    ...          (6, Vectors.dense([3.0, 2.0 ]),),
+    ...          (7, Vectors.dense([3.0, 3.0]),)]
+    >>> df2 = spark.createDataFrame(data2, ["id", "features"])
+    >>> model.approxNearestNeighbors(df2, Vectors.dense([1.0, 2.0]), 1).collect()
+    [Row(id=4, features=DenseVector([2.0, 2.0]), hashes=[DenseVector([1.0])], distCol=1.0)]
+    >>> model.approxSimilarityJoin(df, df2, 3.0, distCol="EuclideanDistance").select(
+    ...     col("datasetA.id").alias("idA"),
+    ...     col("datasetB.id").alias("idB"),
+    ...     col("EuclideanDistance")).show()
+    +---+---+-----------------+
+    |idA|idB|EuclideanDistance|
+    +---+---+-----------------+
+    |  3|  6| 2.23606797749979|
+    +---+---+-----------------+
+    ...
+    >>> brpPath = temp_path + "/brp"
+    >>> brp.save(brpPath)
+    >>> brp2 = BucketedRandomProjectionLSH.load(brpPath)
+    >>> brp2.getBucketLength() == brp.getBucketLength()
+    True
+    >>> modelPath = temp_path + "/brp-model"
+    >>> model.save(modelPath)
+    >>> model2 = BucketedRandomProjectionLSHModel.load(modelPath)
+    >>> model.transform(df).head().hashes == model2.transform(df).head().hashes
+    True
+
+    .. versionadded:: 2.2.0
+    """
+
+    bucketLength = Param(Params._dummy(), "bucketLength", "the length of each hash bucket, " +
+                         "a larger bucket lowers the false negative rate.",
+                         typeConverter=TypeConverters.toFloat)
+
+    @keyword_only
+    def __init__(self, inputCol=None, outputCol=None, seed=None, numHashTables=1,
+                 bucketLength=None):
+        """
+        __init__(self, inputCol=None, outputCol=None, seed=None, numHashTables=1, \
+                 bucketLength=None)
+        """
+        super(BucketedRandomProjectionLSH, self).__init__()
+        self._java_obj = \
+            self._new_java_obj("org.apache.spark.ml.feature.BucketedRandomProjectionLSH", self.uid)
+        self._setDefault(numHashTables=1)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("2.2.0")
+    def setParams(self, inputCol=None, outputCol=None, seed=None, numHashTables=1,
+                  bucketLength=None):
+        """
+        setParams(self, inputCol=None, outputCol=None, seed=None, numHashTables=1, \
+                  bucketLength=None)
+        Sets params for this BucketedRandomProjectionLSH.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("2.2.0")
+    def setBucketLength(self, value):
+        """
+        Sets the value of :py:attr:`bucketLength`.
+        """
+        return self._set(bucketLength=value)
+
+    @since("2.2.0")
+    def getBucketLength(self):
+        """
+        Gets the value of bucketLength or its default value.
+        """
+        return self.getOrDefault(self.bucketLength)
+
+    def _create_model(self, java_model):
+        return BucketedRandomProjectionLSHModel(java_model)
+
+
+class BucketedRandomProjectionLSHModel(LSHModel, JavaMLReadable, JavaMLWritable):
+    r"""
+    .. note:: Experimental
+
+    Model fitted by :py:class:`BucketedRandomProjectionLSH`, where multiple random vectors are
+    stored. The vectors are normalized to be unit vectors and each vector is used in a hash
+    function: :math:`h_i(x) = floor(r_i \cdot x / bucketLength)` where :math:`r_i` is the
+    i-th random unit vector. The number of buckets will be `(max L2 norm of input vectors) /
+    bucketLength`.
+
+    .. versionadded:: 2.2.0
+    """
+
+
+@inherit_doc
+class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, HasHandleInvalid,
+                 JavaMLReadable, JavaMLWritable):
+    """
+    Maps a column of continuous features to a column of feature buckets.
+
+    >>> values = [(0.1,), (0.4,), (1.2,), (1.5,), (float("nan"),), (float("nan"),)]
+    >>> df = spark.createDataFrame(values, ["values"])
+    >>> bucketizer = Bucketizer(splits=[-float("inf"), 0.5, 1.4, float("inf")],
+    ...     inputCol="values", outputCol="buckets")
+    >>> bucketed = bucketizer.setHandleInvalid("keep").transform(df).collect()
+    >>> len(bucketed)
+    6
+    >>> bucketed[0].buckets
+    0.0
+    >>> bucketed[1].buckets
+    0.0
+    >>> bucketed[2].buckets
+    1.0
+    >>> bucketed[3].buckets
+    2.0
+    >>> bucketizer.setParams(outputCol="b").transform(df).head().b
+    0.0
+    >>> bucketizerPath = temp_path + "/bucketizer"
+    >>> bucketizer.save(bucketizerPath)
+    >>> loadedBucketizer = Bucketizer.load(bucketizerPath)
+    >>> loadedBucketizer.getSplits() == bucketizer.getSplits()
+    True
+    >>> bucketed = bucketizer.setHandleInvalid("skip").transform(df).collect()
+    >>> len(bucketed)
+    4
+
+    .. versionadded:: 1.4.0
+    """
+
+    splits = \
+        Param(Params._dummy(), "splits",
+              "Split points for mapping continuous features into buckets. With n+1 splits, " +
+              "there are n buckets. A bucket defined by splits x,y holds values in the " +
+              "range [x,y) except the last bucket, which also includes y. The splits " +
+              "should be of length >= 3 and strictly increasing. Values at -inf, inf must be " +
+              "explicitly provided to cover all Double values; otherwise, values outside the " +
+              "splits specified will be treated as errors.",
+              typeConverter=TypeConverters.toListFloat)
+
+    handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid entries. " +
+                          "Options are 'skip' (filter out rows with invalid values), " +
+                          "'error' (throw an error), or 'keep' (keep invalid values in a special " +
+                          "additional bucket).",
+                          typeConverter=TypeConverters.toString)
+
+    @keyword_only
+    def __init__(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error"):
+        """
+        __init__(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error")
+        """
+        super(Bucketizer, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Bucketizer", self.uid)
+        self._setDefault(handleInvalid="error")
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error"):
+        """
+        setParams(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error")
+        Sets params for this Bucketizer.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("1.4.0")
+    def setSplits(self, value):
+        """
+        Sets the value of :py:attr:`splits`.
+        """
+        return self._set(splits=value)
+
+    @since("1.4.0")
+    def getSplits(self):
+        """
+        Gets the value of threshold or its default value.
+        """
+        return self.getOrDefault(self.splits)
+
+
+class _CountVectorizerParams(JavaParams, HasInputCol, HasOutputCol):
+    """
+    Params for :py:attr:`CountVectorizer` and :py:attr:`CountVectorizerModel`.
+    """
+
+    minTF = Param(
+        Params._dummy(), "minTF", "Filter to ignore rare words in" +
+        " a document. For each document, terms with frequency/count less than the given" +
+        " threshold are ignored. If this is an integer >= 1, then this specifies a count (of" +
+        " times the term must appear in the document); if this is a double in [0,1), then this " +
+        "specifies a fraction (out of the document's token count). Note that the parameter is " +
+        "only used in transform of CountVectorizerModel and does not affect fitting. Default 1.0",
+        typeConverter=TypeConverters.toFloat)
+    minDF = Param(
+        Params._dummy(), "minDF", "Specifies the minimum number of" +
+        " different documents a term must appear in to be included in the vocabulary." +
+        " If this is an integer >= 1, this specifies the number of documents the term must" +
+        " appear in; if this is a double in [0,1), then this specifies the fraction of documents." +
+        " Default 1.0", typeConverter=TypeConverters.toFloat)
+    maxDF = Param(
+        Params._dummy(), "maxDF", "Specifies the maximum number of" +
+        " different documents a term could appear in to be included in the vocabulary." +
+        " A term that appears more than the threshold will be ignored. If this is an" +
+        " integer >= 1, this specifies the maximum number of documents the term could appear in;" +
+        " if this is a double in [0,1), then this specifies the maximum" +
+        " fraction of documents the term could appear in." +
+        " Default (2^63) - 1", typeConverter=TypeConverters.toFloat)
+    vocabSize = Param(
+        Params._dummy(), "vocabSize", "max size of the vocabulary. Default 1 << 18.",
+        typeConverter=TypeConverters.toInt)
+    binary = Param(
+        Params._dummy(), "binary", "Binary toggle to control the output vector values." +
+        " If True, all nonzero counts (after minTF filter applied) are set to 1. This is useful" +
+        " for discrete probabilistic models that model binary events rather than integer counts." +
+        " Default False", typeConverter=TypeConverters.toBoolean)
+
+    def __init__(self, *args):
+        super(_CountVectorizerParams, self).__init__(*args)
+        self._setDefault(minTF=1.0, minDF=1.0, maxDF=2 ** 63 - 1, vocabSize=1 << 18, binary=False)
+
+    @since("1.6.0")
+    def getMinTF(self):
+        """
+        Gets the value of minTF or its default value.
+        """
+        return self.getOrDefault(self.minTF)
+
+    @since("1.6.0")
+    def getMinDF(self):
+        """
+        Gets the value of minDF or its default value.
+        """
+        return self.getOrDefault(self.minDF)
+
+    @since("2.4.0")
+    def getMaxDF(self):
+        """
+        Gets the value of maxDF or its default value.
+        """
+        return self.getOrDefault(self.maxDF)
+
+    @since("1.6.0")
+    def getVocabSize(self):
+        """
+        Gets the value of vocabSize or its default value.
+        """
+        return self.getOrDefault(self.vocabSize)
+
+    @since("2.0.0")
+    def getBinary(self):
+        """
+        Gets the value of binary or its default value.
+        """
+        return self.getOrDefault(self.binary)
+
+
+@inherit_doc
+class CountVectorizer(JavaEstimator, _CountVectorizerParams, JavaMLReadable, JavaMLWritable):
+    """
+    Extracts a vocabulary from document collections and generates a :py:attr:`CountVectorizerModel`.
+
+    >>> df = spark.createDataFrame(
+    ...    [(0, ["a", "b", "c"]), (1, ["a", "b", "b", "c", "a"])],
+    ...    ["label", "raw"])
+    >>> cv = CountVectorizer(inputCol="raw", outputCol="vectors")
+    >>> model = cv.fit(df)
+    >>> model.transform(df).show(truncate=False)
+    +-----+---------------+-------------------------+
+    |label|raw            |vectors                  |
+    +-----+---------------+-------------------------+
+    |0    |[a, b, c]      |(3,[0,1,2],[1.0,1.0,1.0])|
+    |1    |[a, b, b, c, a]|(3,[0,1,2],[2.0,2.0,1.0])|
+    +-----+---------------+-------------------------+
+    ...
+    >>> sorted(model.vocabulary) == ['a', 'b', 'c']
+    True
+    >>> countVectorizerPath = temp_path + "/count-vectorizer"
+    >>> cv.save(countVectorizerPath)
+    >>> loadedCv = CountVectorizer.load(countVectorizerPath)
+    >>> loadedCv.getMinDF() == cv.getMinDF()
+    True
+    >>> loadedCv.getMinTF() == cv.getMinTF()
+    True
+    >>> loadedCv.getVocabSize() == cv.getVocabSize()
+    True
+    >>> modelPath = temp_path + "/count-vectorizer-model"
+    >>> model.save(modelPath)
+    >>> loadedModel = CountVectorizerModel.load(modelPath)
+    >>> loadedModel.vocabulary == model.vocabulary
+    True
+    >>> fromVocabModel = CountVectorizerModel.from_vocabulary(["a", "b", "c"],
+    ...     inputCol="raw", outputCol="vectors")
+    >>> fromVocabModel.transform(df).show(truncate=False)
+    +-----+---------------+-------------------------+
+    |label|raw            |vectors                  |
+    +-----+---------------+-------------------------+
+    |0    |[a, b, c]      |(3,[0,1,2],[1.0,1.0,1.0])|
+    |1    |[a, b, b, c, a]|(3,[0,1,2],[2.0,2.0,1.0])|
+    +-----+---------------+-------------------------+
+    ...
+
+    .. versionadded:: 1.6.0
+    """
+
+    @keyword_only
+    def __init__(self, minTF=1.0, minDF=1.0, maxDF=2 ** 63 - 1, vocabSize=1 << 18, binary=False,
+                 inputCol=None, outputCol=None):
+        """
+        __init__(self, minTF=1.0, minDF=1.0, maxDF=2 ** 63 - 1, vocabSize=1 << 18, binary=False,\
+                 inputCol=None,outputCol=None)
+        """
+        super(CountVectorizer, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.CountVectorizer",
+                                            self.uid)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.6.0")
+    def setParams(self, minTF=1.0, minDF=1.0, maxDF=2 ** 63 - 1, vocabSize=1 << 18, binary=False,
+                  inputCol=None, outputCol=None):
+        """
+        setParams(self, minTF=1.0, minDF=1.0, maxDF=2 ** 63 - 1, vocabSize=1 << 18, binary=False,\
+                  inputCol=None, outputCol=None)
+        Set the params for the CountVectorizer
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("1.6.0")
+    def setMinTF(self, value):
+        """
+        Sets the value of :py:attr:`minTF`.
+        """
+        return self._set(minTF=value)
+
+    @since("1.6.0")
+    def setMinDF(self, value):
+        """
+        Sets the value of :py:attr:`minDF`.
+        """
+        return self._set(minDF=value)
+
+    @since("2.4.0")
+    def setMaxDF(self, value):
+        """
+        Sets the value of :py:attr:`maxDF`.
+        """
+        return self._set(maxDF=value)
+
+    @since("1.6.0")
+    def setVocabSize(self, value):
+        """
+        Sets the value of :py:attr:`vocabSize`.
+        """
+        return self._set(vocabSize=value)
+
+    @since("2.0.0")
+    def setBinary(self, value):
+        """
+        Sets the value of :py:attr:`binary`.
+        """
+        return self._set(binary=value)
+
+    def _create_model(self, java_model):
+        return CountVectorizerModel(java_model)
+
+
+@inherit_doc
+class CountVectorizerModel(JavaModel, _CountVectorizerParams, JavaMLReadable, JavaMLWritable):
+    """
+    Model fitted by :py:class:`CountVectorizer`.
+
+    .. versionadded:: 1.6.0
+    """
+
+    @classmethod
+    @since("2.4.0")
+    def from_vocabulary(cls, vocabulary, inputCol, outputCol=None, minTF=None, binary=None):
+        """
+        Construct the model directly from a vocabulary list of strings,
+        requires an active SparkContext.
+        """
+        sc = SparkContext._active_spark_context
+        java_class = sc._gateway.jvm.java.lang.String
+        jvocab = CountVectorizerModel._new_java_array(vocabulary, java_class)
+        model = CountVectorizerModel._create_from_java_class(
+            "org.apache.spark.ml.feature.CountVectorizerModel", jvocab)
+        model.setInputCol(inputCol)
+        if outputCol is not None:
+            model.setOutputCol(outputCol)
+        if minTF is not None:
+            model.setMinTF(minTF)
+        if binary is not None:
+            model.setBinary(binary)
+        model._set(vocabSize=len(vocabulary))
+        return model
+
+    @property
+    @since("1.6.0")
+    def vocabulary(self):
+        """
+        An array of terms in the vocabulary.
+        """
+        return self._call_java("vocabulary")
+
+    @since("2.4.0")
+    def setMinTF(self, value):
+        """
+        Sets the value of :py:attr:`minTF`.
+        """
+        return self._set(minTF=value)
+
+    @since("2.4.0")
+    def setBinary(self, value):
+        """
+        Sets the value of :py:attr:`binary`.
+        """
+        return self._set(binary=value)
+
+
+@inherit_doc
+class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+    """
+    A feature transformer that takes the 1D discrete cosine transform
+    of a real vector. No zero padding is performed on the input vector.
+    It returns a real vector of the same length representing the DCT.
+    The return vector is scaled such that the transform matrix is
+    unitary (aka scaled DCT-II).
+
+    .. seealso:: `More information on Wikipedia
+        <https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia>`_.
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> df1 = spark.createDataFrame([(Vectors.dense([5.0, 8.0, 6.0]),)], ["vec"])
+    >>> dct = DCT(inverse=False, inputCol="vec", outputCol="resultVec")
+    >>> df2 = dct.transform(df1)
+    >>> df2.head().resultVec
+    DenseVector([10.969..., -0.707..., -2.041...])
+    >>> df3 = DCT(inverse=True, inputCol="resultVec", outputCol="origVec").transform(df2)
+    >>> df3.head().origVec
+    DenseVector([5.0, 8.0, 6.0])
+    >>> dctPath = temp_path + "/dct"
+    >>> dct.save(dctPath)
+    >>> loadedDtc = DCT.load(dctPath)
+    >>> loadedDtc.getInverse()
+    False
+
+    .. versionadded:: 1.6.0
+    """
+
+    inverse = Param(Params._dummy(), "inverse", "Set transformer to perform inverse DCT, " +
+                    "default False.", typeConverter=TypeConverters.toBoolean)
+
+    @keyword_only
+    def __init__(self, inverse=False, inputCol=None, outputCol=None):
+        """
+        __init__(self, inverse=False, inputCol=None, outputCol=None)
+        """
+        super(DCT, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.DCT", self.uid)
+        self._setDefault(inverse=False)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.6.0")
+    def setParams(self, inverse=False, inputCol=None, outputCol=None):
+        """
+        setParams(self, inverse=False, inputCol=None, outputCol=None)
+        Sets params for this DCT.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("1.6.0")
+    def setInverse(self, value):
+        """
+        Sets the value of :py:attr:`inverse`.
+        """
+        return self._set(inverse=value)
+
+    @since("1.6.0")
+    def getInverse(self):
+        """
+        Gets the value of inverse or its default value.
+        """
+        return self.getOrDefault(self.inverse)
+
+
+@inherit_doc
+class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
+                         JavaMLWritable):
+    """
+    Outputs the Hadamard product (i.e., the element-wise product) of each input vector
+    with a provided "weight" vector. In other words, it scales each column of the dataset
+    by a scalar multiplier.
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> df = spark.createDataFrame([(Vectors.dense([2.0, 1.0, 3.0]),)], ["values"])
+    >>> ep = ElementwiseProduct(scalingVec=Vectors.dense([1.0, 2.0, 3.0]),
+    ...     inputCol="values", outputCol="eprod")
+    >>> ep.transform(df).head().eprod
+    DenseVector([2.0, 2.0, 9.0])
+    >>> ep.setParams(scalingVec=Vectors.dense([2.0, 3.0, 5.0])).transform(df).head().eprod
+    DenseVector([4.0, 3.0, 15.0])
+    >>> elementwiseProductPath = temp_path + "/elementwise-product"
+    >>> ep.save(elementwiseProductPath)
+    >>> loadedEp = ElementwiseProduct.load(elementwiseProductPath)
+    >>> loadedEp.getScalingVec() == ep.getScalingVec()
+    True
+
+    .. versionadded:: 1.5.0
+    """
+
+    scalingVec = Param(Params._dummy(), "scalingVec", "Vector for hadamard product.",
+                       typeConverter=TypeConverters.toVector)
+
+    @keyword_only
+    def __init__(self, scalingVec=None, inputCol=None, outputCol=None):
+        """
+        __init__(self, scalingVec=None, inputCol=None, outputCol=None)
+        """
+        super(ElementwiseProduct, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.ElementwiseProduct",
+                                            self.uid)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.5.0")
+    def setParams(self, scalingVec=None, inputCol=None, outputCol=None):
+        """
+        setParams(self, scalingVec=None, inputCol=None, outputCol=None)
+        Sets params for this ElementwiseProduct.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("2.0.0")
+    def setScalingVec(self, value):
+        """
+        Sets the value of :py:attr:`scalingVec`.
+        """
+        return self._set(scalingVec=value)
+
+    @since("2.0.0")
+    def getScalingVec(self):
+        """
+        Gets the value of scalingVec or its default value.
+        """
+        return self.getOrDefault(self.scalingVec)
+
+
+@inherit_doc
+class FeatureHasher(JavaTransformer, HasInputCols, HasOutputCol, HasNumFeatures, JavaMLReadable,
+                    JavaMLWritable):
+    """
+    .. note:: Experimental
+
+    Feature hashing projects a set of categorical or numerical features into a feature vector of
+    specified dimension (typically substantially smaller than that of the original feature
+    space). This is done using the hashing trick (https://en.wikipedia.org/wiki/Feature_hashing)
+    to map features to indices in the feature vector.
+
+    The FeatureHasher transformer operates on multiple columns. Each column may contain either
+    numeric or categorical features. Behavior and handling of column data types is as follows:
+
+    * Numeric columns:
+        For numeric features, the hash value of the column name is used to map the
+        feature value to its index in the feature vector. By default, numeric features
+        are not treated as categorical (even when they are integers). To treat them
+        as categorical, specify the relevant columns in `categoricalCols`.
+
+    * String columns:
+        For categorical features, the hash value of the string "column_name=value"
+        is used to map to the vector index, with an indicator value of `1.0`.
+        Thus, categorical features are "one-hot" encoded
+        (similarly to using :py:class:`OneHotEncoder` with `dropLast=false`).
+
+    * Boolean columns:
+        Boolean values are treated in the same way as string columns. That is,
+        boolean features are represented as "column_name=true" or "column_name=false",
+        with an indicator value of `1.0`.
+
+    Null (missing) values are ignored (implicitly zero in the resulting feature vector).
+
+    Since a simple modulo is used to transform the hash function to a vector index,
+    it is advisable to use a power of two as the `numFeatures` parameter;
+    otherwise the features will not be mapped evenly to the vector indices.
+
+    >>> data = [(2.0, True, "1", "foo"), (3.0, False, "2", "bar")]
+    >>> cols = ["real", "bool", "stringNum", "string"]
+    >>> df = spark.createDataFrame(data, cols)
+    >>> hasher = FeatureHasher(inputCols=cols, outputCol="features")
+    >>> hasher.transform(df).head().features
+    SparseVector(262144, {174475: 2.0, 247670: 1.0, 257907: 1.0, 262126: 1.0})
+    >>> hasher.setCategoricalCols(["real"]).transform(df).head().features
+    SparseVector(262144, {171257: 1.0, 247670: 1.0, 257907: 1.0, 262126: 1.0})
+    >>> hasherPath = temp_path + "/hasher"
+    >>> hasher.save(hasherPath)
+    >>> loadedHasher = FeatureHasher.load(hasherPath)
+    >>> loadedHasher.getNumFeatures() == hasher.getNumFeatures()
+    True
+    >>> loadedHasher.transform(df).head().features == hasher.transform(df).head().features
+    True
+
+    .. versionadded:: 2.3.0
+    """
+
+    categoricalCols = Param(Params._dummy(), "categoricalCols",
+                            "numeric columns to treat as categorical",
+                            typeConverter=TypeConverters.toListString)
+
+    @keyword_only
+    def __init__(self, numFeatures=1 << 18, inputCols=None, outputCol=None, categoricalCols=None):
+        """
+        __init__(self, numFeatures=1 << 18, inputCols=None, outputCol=None, categoricalCols=None)
+        """
+        super(FeatureHasher, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.FeatureHasher", self.uid)
+        self._setDefault(numFeatures=1 << 18)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("2.3.0")
+    def setParams(self, numFeatures=1 << 18, inputCols=None, outputCol=None, categoricalCols=None):
+        """
+        setParams(self, numFeatures=1 << 18, inputCols=None, outputCol=None, categoricalCols=None)
+        Sets params for this FeatureHasher.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("2.3.0")
+    def setCategoricalCols(self, value):
+        """
+        Sets the value of :py:attr:`categoricalCols`.
+        """
+        return self._set(categoricalCols=value)
+
+    @since("2.3.0")
+    def getCategoricalCols(self):
+        """
+        Gets the value of binary or its default value.
+        """
+        return self.getOrDefault(self.categoricalCols)
+
+
+@inherit_doc
+class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures, JavaMLReadable,
+                JavaMLWritable):
+    """
+    Maps a sequence of terms to their term frequencies using the hashing trick.
+    Currently we use Austin Appleby's MurmurHash 3 algorithm (MurmurHash3_x86_32)
+    to calculate the hash code value for the term object.
+    Since a simple modulo is used to transform the hash function to a column index,
+    it is advisable to use a power of two as the numFeatures parameter;
+    otherwise the features will not be mapped evenly to the columns.
+
+    >>> df = spark.createDataFrame([(["a", "b", "c"],)], ["words"])
+    >>> hashingTF = HashingTF(numFeatures=10, inputCol="words", outputCol="features")
+    >>> hashingTF.transform(df).head().features
+    SparseVector(10, {0: 1.0, 1: 1.0, 2: 1.0})
+    >>> hashingTF.setParams(outputCol="freqs").transform(df).head().freqs
+    SparseVector(10, {0: 1.0, 1: 1.0, 2: 1.0})
+    >>> params = {hashingTF.numFeatures: 5, hashingTF.outputCol: "vector"}
+    >>> hashingTF.transform(df, params).head().vector
+    SparseVector(5, {0: 1.0, 1: 1.0, 2: 1.0})
+    >>> hashingTFPath = temp_path + "/hashing-tf"
+    >>> hashingTF.save(hashingTFPath)
+    >>> loadedHashingTF = HashingTF.load(hashingTFPath)
+    >>> loadedHashingTF.getNumFeatures() == hashingTF.getNumFeatures()
+    True
+
+    .. versionadded:: 1.3.0
+    """
+
+    binary = Param(Params._dummy(), "binary", "If True, all non zero counts are set to 1. " +
+                   "This is useful for discrete probabilistic models that model binary events " +
+                   "rather than integer counts. Default False.",
+                   typeConverter=TypeConverters.toBoolean)
+
+    @keyword_only
+    def __init__(self, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=None):
+        """
+        __init__(self, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=None)
+        """
+        super(HashingTF, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.HashingTF", self.uid)
+        self._setDefault(numFeatures=1 << 18, binary=False)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.3.0")
+    def setParams(self, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=None):
+        """
+        setParams(self, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=None)
+        Sets params for this HashingTF.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("2.0.0")
+    def setBinary(self, value):
+        """
+        Sets the value of :py:attr:`binary`.
+        """
+        return self._set(binary=value)
+
+    @since("2.0.0")
+    def getBinary(self):
+        """
+        Gets the value of binary or its default value.
+        """
+        return self.getOrDefault(self.binary)
+
+
+@inherit_doc
+class IDF(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+    """
+    Compute the Inverse Document Frequency (IDF) given a collection of documents.
+
+    >>> from pyspark.ml.linalg import DenseVector
+    >>> df = spark.createDataFrame([(DenseVector([1.0, 2.0]),),
+    ...     (DenseVector([0.0, 1.0]),), (DenseVector([3.0, 0.2]),)], ["tf"])
+    >>> idf = IDF(minDocFreq=3, inputCol="tf", outputCol="idf")
+    >>> model = idf.fit(df)
+    >>> model.idf
+    DenseVector([0.0, 0.0])
+    >>> model.transform(df).head().idf
+    DenseVector([0.0, 0.0])
+    >>> idf.setParams(outputCol="freqs").fit(df).transform(df).collect()[1].freqs
+    DenseVector([0.0, 0.0])
+    >>> params = {idf.minDocFreq: 1, idf.outputCol: "vector"}
+    >>> idf.fit(df, params).transform(df).head().vector
+    DenseVector([0.2877, 0.0])
+    >>> idfPath = temp_path + "/idf"
+    >>> idf.save(idfPath)
+    >>> loadedIdf = IDF.load(idfPath)
+    >>> loadedIdf.getMinDocFreq() == idf.getMinDocFreq()
+    True
+    >>> modelPath = temp_path + "/idf-model"
+    >>> model.save(modelPath)
+    >>> loadedModel = IDFModel.load(modelPath)
+    >>> loadedModel.transform(df).head().idf == model.transform(df).head().idf
+    True
+
+    .. versionadded:: 1.4.0
+    """
+
+    minDocFreq = Param(Params._dummy(), "minDocFreq",
+                       "minimum number of documents in which a term should appear for filtering",
+                       typeConverter=TypeConverters.toInt)
+
+    @keyword_only
+    def __init__(self, minDocFreq=0, inputCol=None, outputCol=None):
+        """
+        __init__(self, minDocFreq=0, inputCol=None, outputCol=None)
+        """
+        super(IDF, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.IDF", self.uid)
+        self._setDefault(minDocFreq=0)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, minDocFreq=0, inputCol=None, outputCol=None):
+        """
+        setParams(self, minDocFreq=0, inputCol=None, outputCol=None)
+        Sets params for this IDF.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("1.4.0")
+    def setMinDocFreq(self, value):
+        """
+        Sets the value of :py:attr:`minDocFreq`.
+        """
+        return self._set(minDocFreq=value)
+
+    @since("1.4.0")
+    def getMinDocFreq(self):
+        """
+        Gets the value of minDocFreq or its default value.
+        """
+        return self.getOrDefault(self.minDocFreq)
+
+    def _create_model(self, java_model):
+        return IDFModel(java_model)
+
+
+class IDFModel(JavaModel, JavaMLReadable, JavaMLWritable):
+    """
+    Model fitted by :py:class:`IDF`.
+
+    .. versionadded:: 1.4.0
+    """
+
+    @property
+    @since("2.0.0")
+    def idf(self):
+        """
+        Returns the IDF vector.
+        """
+        return self._call_java("idf")
+
+
+@inherit_doc
+class Imputer(JavaEstimator, HasInputCols, JavaMLReadable, JavaMLWritable):
+    """
+    .. note:: Experimental
+
+    Imputation estimator for completing missing values, either using the mean or the median
+    of the columns in which the missing values are located. The input columns should be of
+    DoubleType or FloatType. Currently Imputer does not support categorical features and
+    possibly creates incorrect values for a categorical feature.
+
+    Note that the mean/median value is computed after filtering out missing values.
+    All Null values in the input columns are treated as missing, and so are also imputed. For
+    computing median, :py:meth:`pyspark.sql.DataFrame.approxQuantile` is used with a
+    relative error of `0.001`.
+
+    >>> df = spark.createDataFrame([(1.0, float("nan")), (2.0, float("nan")), (float("nan"), 3.0),
+    ...                             (4.0, 4.0), (5.0, 5.0)], ["a", "b"])
+    >>> imputer = Imputer(inputCols=["a", "b"], outputCols=["out_a", "out_b"])
+    >>> model = imputer.fit(df)
+    >>> model.surrogateDF.show()
+    +---+---+
+    |  a|  b|
+    +---+---+
+    |3.0|4.0|
+    +---+---+
+    ...
+    >>> model.transform(df).show()
+    +---+---+-----+-----+
+    |  a|  b|out_a|out_b|
+    +---+---+-----+-----+
+    |1.0|NaN|  1.0|  4.0|
+    |2.0|NaN|  2.0|  4.0|
+    |NaN|3.0|  3.0|  3.0|
+    ...
+    >>> imputer.setStrategy("median").setMissingValue(1.0).fit(df).transform(df).show()
+    +---+---+-----+-----+
+    |  a|  b|out_a|out_b|
+    +---+---+-----+-----+
+    |1.0|NaN|  4.0|  NaN|
+    ...
+    >>> imputerPath = temp_path + "/imputer"
+    >>> imputer.save(imputerPath)
+    >>> loadedImputer = Imputer.load(imputerPath)
+    >>> loadedImputer.getStrategy() == imputer.getStrategy()
+    True
+    >>> loadedImputer.getMissingValue()
+    1.0
+    >>> modelPath = temp_path + "/imputer-model"
+    >>> model.save(modelPath)
+    >>> loadedModel = ImputerModel.load(modelPath)
+    >>> loadedModel.transform(df).head().out_a == model.transform(df).head().out_a
+    True
+
+    .. versionadded:: 2.2.0
+    """
+
+    outputCols = Param(Params._dummy(), "outputCols",
+                       "output column names.", typeConverter=TypeConverters.toListString)
+
+    strategy = Param(Params._dummy(), "strategy",
+                     "strategy for imputation. If mean, then replace missing values using the mean "
+                     "value of the feature. If median, then replace missing values using the "
+                     "median value of the feature.",
+                     typeConverter=TypeConverters.toString)
+
+    missingValue = Param(Params._dummy(), "missingValue",
+                         "The placeholder for the missing values. All occurrences of missingValue "
+                         "will be imputed.", typeConverter=TypeConverters.toFloat)
+
+    @keyword_only
+    def __init__(self, strategy="mean", missingValue=float("nan"), inputCols=None,
+                 outputCols=None):
+        """
+        __init__(self, strategy="mean", missingValue=float("nan"), inputCols=None, \
+                 outputCols=None):
+        """
+        super(Imputer, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Imputer", self.uid)
+        self._setDefault(strategy="mean", missingValue=float("nan"))
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("2.2.0")
+    def setParams(self, strategy="mean", missingValue=float("nan"), inputCols=None,
+                  outputCols=None):
+        """
+        setParams(self, strategy="mean", missingValue=float("nan"), inputCols=None, \
+                  outputCols=None)
+        Sets params for this Imputer.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("2.2.0")
+    def setOutputCols(self, value):
+        """
+        Sets the value of :py:attr:`outputCols`.
+        """
+        return self._set(outputCols=value)
+
+    @since("2.2.0")
+    def getOutputCols(self):
+        """
+        Gets the value of :py:attr:`outputCols` or its default value.
+        """
+        return self.getOrDefault(self.outputCols)
+
+    @since("2.2.0")
+    def setStrategy(self, value):
+        """
+        Sets the value of :py:attr:`strategy`.
+        """
+        return self._set(strategy=value)
+
+    @since("2.2.0")
+    def getStrategy(self):
+        """
+        Gets the value of :py:attr:`strategy` or its default value.
+        """
+        return self.getOrDefault(self.strategy)
+
+    @since("2.2.0")
+    def setMissingValue(self, value):
+        """
+        Sets the value of :py:attr:`missingValue`.
+        """
+        return self._set(missingValue=value)
+
+    @since("2.2.0")
+    def getMissingValue(self):
+        """
+        Gets the value of :py:attr:`missingValue` or its default value.
+        """
+        return self.getOrDefault(self.missingValue)
+
+    def _create_model(self, java_model):
+        return ImputerModel(java_model)
+
+
+class ImputerModel(JavaModel, JavaMLReadable, JavaMLWritable):
+    """
+    .. note:: Experimental
+
+    Model fitted by :py:class:`Imputer`.
+
+    .. versionadded:: 2.2.0
+    """
+
+    @property
+    @since("2.2.0")
+    def surrogateDF(self):
+        """
+        Returns a DataFrame containing inputCols and their corresponding surrogates,
+        which are used to replace the missing values in the input DataFrame.
+        """
+        return self._call_java("surrogateDF")
+
+
+@inherit_doc
+class MaxAbsScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+    """
+    Rescale each feature individually to range [-1, 1] by dividing through the largest maximum
+    absolute value in each feature. It does not shift/center the data, and thus does not destroy
+    any sparsity.
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> df = spark.createDataFrame([(Vectors.dense([1.0]),), (Vectors.dense([2.0]),)], ["a"])
+    >>> maScaler = MaxAbsScaler(inputCol="a", outputCol="scaled")
+    >>> model = maScaler.fit(df)
+    >>> model.transform(df).show()
+    +-----+------+
+    |    a|scaled|
+    +-----+------+
+    |[1.0]| [0.5]|
+    |[2.0]| [1.0]|
+    +-----+------+
+    ...
+    >>> scalerPath = temp_path + "/max-abs-scaler"
+    >>> maScaler.save(scalerPath)
+    >>> loadedMAScaler = MaxAbsScaler.load(scalerPath)
+    >>> loadedMAScaler.getInputCol() == maScaler.getInputCol()
+    True
+    >>> loadedMAScaler.getOutputCol() == maScaler.getOutputCol()
+    True
+    >>> modelPath = temp_path + "/max-abs-scaler-model"
+    >>> model.save(modelPath)
+    >>> loadedModel = MaxAbsScalerModel.load(modelPath)
+    >>> loadedModel.maxAbs == model.maxAbs
+    True
+
+    .. versionadded:: 2.0.0
+    """
+
+    @keyword_only
+    def __init__(self, inputCol=None, outputCol=None):
+        """
+        __init__(self, inputCol=None, outputCol=None)
+        """
+        super(MaxAbsScaler, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.MaxAbsScaler", self.uid)
+        self._setDefault()
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("2.0.0")
+    def setParams(self, inputCol=None, outputCol=None):
+        """
+        setParams(self, inputCol=None, outputCol=None)
+        Sets params for this MaxAbsScaler.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return MaxAbsScalerModel(java_model)
+
+
+class MaxAbsScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
+    """
+    Model fitted by :py:class:`MaxAbsScaler`.
+
+    .. versionadded:: 2.0.0
+    """
+
+    @property
+    @since("2.0.0")
+    def maxAbs(self):
+        """
+        Max Abs vector.
+        """
+        return self._call_java("maxAbs")
+
+
+@inherit_doc
+class MinHashLSH(JavaEstimator, LSHParams, HasInputCol, HasOutputCol, HasSeed,
+                 JavaMLReadable, JavaMLWritable):
+
+    """
+    .. note:: Experimental
+
+    LSH class for Jaccard distance.
+    The input can be dense or sparse vectors, but it is more efficient if it is sparse.
+    For example, `Vectors.sparse(10, [(2, 1.0), (3, 1.0), (5, 1.0)])` means there are 10 elements
+    in the space. This set contains elements 2, 3, and 5. Also, any input vector must have at
+    least 1 non-zero index, and all non-zero values are treated as binary "1" values.
+
+    .. seealso:: `Wikipedia on MinHash <https://en.wikipedia.org/wiki/MinHash>`_
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> from pyspark.sql.functions import col
+    >>> data = [(0, Vectors.sparse(6, [0, 1, 2], [1.0, 1.0, 1.0]),),
+    ...         (1, Vectors.sparse(6, [2, 3, 4], [1.0, 1.0, 1.0]),),
+    ...         (2, Vectors.sparse(6, [0, 2, 4], [1.0, 1.0, 1.0]),)]
+    >>> df = spark.createDataFrame(data, ["id", "features"])
+    >>> mh = MinHashLSH(inputCol="features", outputCol="hashes", seed=12345)
+    >>> model = mh.fit(df)
+    >>> model.transform(df).head()
+    Row(id=0, features=SparseVector(6, {0: 1.0, 1: 1.0, 2: 1.0}), hashes=[DenseVector([6179668...
+    >>> data2 = [(3, Vectors.sparse(6, [1, 3, 5], [1.0, 1.0, 1.0]),),
+    ...          (4, Vectors.sparse(6, [2, 3, 5], [1.0, 1.0, 1.0]),),
+    ...          (5, Vectors.sparse(6, [1, 2, 4], [1.0, 1.0, 1.0]),)]
+    >>> df2 = spark.createDataFrame(data2, ["id", "features"])
+    >>> key = Vectors.sparse(6, [1, 2], [1.0, 1.0])
+    >>> model.approxNearestNeighbors(df2, key, 1).collect()
+    [Row(id=5, features=SparseVector(6, {1: 1.0, 2: 1.0, 4: 1.0}), hashes=[DenseVector([6179668...
+    >>> model.approxSimilarityJoin(df, df2, 0.6, distCol="JaccardDistance").select(
+    ...     col("datasetA.id").alias("idA"),
+    ...     col("datasetB.id").alias("idB"),
+    ...     col("JaccardDistance")).show()
+    +---+---+---------------+
+    |idA|idB|JaccardDistance|
+    +---+---+---------------+
+    |  0|  5|            0.5|
+    |  1|  4|            0.5|
+    +---+---+---------------+
+    ...
+    >>> mhPath = temp_path + "/mh"
+    >>> mh.save(mhPath)
+    >>> mh2 = MinHashLSH.load(mhPath)
+    >>> mh2.getOutputCol() == mh.getOutputCol()
+    True
+    >>> modelPath = temp_path + "/mh-model"
+    >>> model.save(modelPath)
+    >>> model2 = MinHashLSHModel.load(modelPath)
+    >>> model.transform(df).head().hashes == model2.transform(df).head().hashes
+    True
+
+    .. versionadded:: 2.2.0
+    """
+
+    @keyword_only
+    def __init__(self, inputCol=None, outputCol=None, seed=None, numHashTables=1):
+        """
+        __init__(self, inputCol=None, outputCol=None, seed=None, numHashTables=1)
+        """
+        super(MinHashLSH, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.MinHashLSH", self.uid)
+        self._setDefault(numHashTables=1)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("2.2.0")
+    def setParams(self, inputCol=None, outputCol=None, seed=None, numHashTables=1):
+        """
+        setParams(self, inputCol=None, outputCol=None, seed=None, numHashTables=1)
+        Sets params for this MinHashLSH.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return MinHashLSHModel(java_model)
+
+
+class MinHashLSHModel(LSHModel, JavaMLReadable, JavaMLWritable):
+    r"""
+    .. note:: Experimental
+
+    Model produced by :py:class:`MinHashLSH`, where where multiple hash functions are stored. Each
+    hash function is picked from the following family of hash functions, where :math:`a_i` and
+    :math:`b_i` are randomly chosen integers less than prime:
+    :math:`h_i(x) = ((x \cdot a_i + b_i) \mod prime)` This hash family is approximately min-wise
+    independent according to the reference.
+
+    .. seealso:: Tom Bohman, Colin Cooper, and Alan Frieze. "Min-wise independent linear
+        permutations." Electronic Journal of Combinatorics 7 (2000): R26.
+
+    .. versionadded:: 2.2.0
+    """
+
+
+@inherit_doc
+class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+    """
+    Rescale each feature individually to a common range [min, max] linearly using column summary
+    statistics, which is also known as min-max normalization or Rescaling. The rescaled value for
+    feature E is calculated as,
+
+    Rescaled(e_i) = (e_i - E_min) / (E_max - E_min) * (max - min) + min
+
+    For the case E_max == E_min, Rescaled(e_i) = 0.5 * (max + min)
+
+    .. note:: Since zero values will probably be transformed to non-zero values, output of the
+        transformer will be DenseVector even for sparse input.
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> df = spark.createDataFrame([(Vectors.dense([0.0]),), (Vectors.dense([2.0]),)], ["a"])
+    >>> mmScaler = MinMaxScaler(inputCol="a", outputCol="scaled")
+    >>> model = mmScaler.fit(df)
+    >>> model.originalMin
+    DenseVector([0.0])
+    >>> model.originalMax
+    DenseVector([2.0])
+    >>> model.transform(df).show()
+    +-----+------+
+    |    a|scaled|
+    +-----+------+
+    |[0.0]| [0.0]|
+    |[2.0]| [1.0]|
+    +-----+------+
+    ...
+    >>> minMaxScalerPath = temp_path + "/min-max-scaler"
+    >>> mmScaler.save(minMaxScalerPath)
+    >>> loadedMMScaler = MinMaxScaler.load(minMaxScalerPath)
+    >>> loadedMMScaler.getMin() == mmScaler.getMin()
+    True
+    >>> loadedMMScaler.getMax() == mmScaler.getMax()
+    True
+    >>> modelPath = temp_path + "/min-max-scaler-model"
+    >>> model.save(modelPath)
+    >>> loadedModel = MinMaxScalerModel.load(modelPath)
+    >>> loadedModel.originalMin == model.originalMin
+    True
+    >>> loadedModel.originalMax == model.originalMax
+    True
+
+    .. versionadded:: 1.6.0
+    """
+
+    min = Param(Params._dummy(), "min", "Lower bound of the output feature range",
+                typeConverter=TypeConverters.toFloat)
+    max = Param(Params._dummy(), "max", "Upper bound of the output feature range",
+                typeConverter=TypeConverters.toFloat)
+
+    @keyword_only
+    def __init__(self, min=0.0, max=1.0, inputCol=None, outputCol=None):
+        """
+        __init__(self, min=0.0, max=1.0, inputCol=None, outputCol=None)
+        """
+        super(MinMaxScaler, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.MinMaxScaler", self.uid)
+        self._setDefault(min=0.0, max=1.0)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.6.0")
+    def setParams(self, min=0.0, max=1.0, inputCol=None, outputCol=None):
+        """
+        setParams(self, min=0.0, max=1.0, inputCol=None, outputCol=None)
+        Sets params for this MinMaxScaler.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("1.6.0")
+    def setMin(self, value):
+        """
+        Sets the value of :py:attr:`min`.
+        """
+        return self._set(min=value)
+
+    @since("1.6.0")
+    def getMin(self):
+        """
+        Gets the value of min or its default value.
+        """
+        return self.getOrDefault(self.min)
+
+    @since("1.6.0")
+    def setMax(self, value):
+        """
+        Sets the value of :py:attr:`max`.
+        """
+        return self._set(max=value)
+
+    @since("1.6.0")
+    def getMax(self):
+        """
+        Gets the value of max or its default value.
+        """
+        return self.getOrDefault(self.max)
+
+    def _create_model(self, java_model):
+        return MinMaxScalerModel(java_model)
+
+
+class MinMaxScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
+    """
+    Model fitted by :py:class:`MinMaxScaler`.
+
+    .. versionadded:: 1.6.0
+    """
+
+    @property
+    @since("2.0.0")
+    def originalMin(self):
+        """
+        Min value for each original column during fitting.
+        """
+        return self._call_java("originalMin")
+
+    @property
+    @since("2.0.0")
+    def originalMax(self):
+        """
+        Max value for each original column during fitting.
+        """
+        return self._call_java("originalMax")
+
+
+@inherit_doc
+@ignore_unicode_prefix
+class NGram(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+    """
+    A feature transformer that converts the input array of strings into an array of n-grams. Null
+    values in the input array are ignored.
+    It returns an array of n-grams where each n-gram is represented by a space-separated string of
+    words.
+    When the input is empty, an empty array is returned.
+    When the input array length is less than n (number of elements per n-gram), no n-grams are
+    returned.
+
+    >>> df = spark.createDataFrame([Row(inputTokens=["a", "b", "c", "d", "e"])])
+    >>> ngram = NGram(n=2, inputCol="inputTokens", outputCol="nGrams")
+    >>> ngram.transform(df).head()
+    Row(inputTokens=[u'a', u'b', u'c', u'd', u'e'], nGrams=[u'a b', u'b c', u'c d', u'd e'])
+    >>> # Change n-gram length
+    >>> ngram.setParams(n=4).transform(df).head()
+    Row(inputTokens=[u'a', u'b', u'c', u'd', u'e'], nGrams=[u'a b c d', u'b c d e'])
+    >>> # Temporarily modify output column.
+    >>> ngram.transform(df, {ngram.outputCol: "output"}).head()
+    Row(inputTokens=[u'a', u'b', u'c', u'd', u'e'], output=[u'a b c d', u'b c d e'])
+    >>> ngram.transform(df).head()
+    Row(inputTokens=[u'a', u'b', u'c', u'd', u'e'], nGrams=[u'a b c d', u'b c d e'])
+    >>> # Must use keyword arguments to specify params.
+    >>> ngram.setParams("text")
+    Traceback (most recent call last):
+        ...
+    TypeError: Method setParams forces keyword arguments.
+    >>> ngramPath = temp_path + "/ngram"
+    >>> ngram.save(ngramPath)
+    >>> loadedNGram = NGram.load(ngramPath)
+    >>> loadedNGram.getN() == ngram.getN()
+    True
+
+    .. versionadded:: 1.5.0
+    """
+
+    n = Param(Params._dummy(), "n", "number of elements per n-gram (>=1)",
+              typeConverter=TypeConverters.toInt)
+
+    @keyword_only
+    def __init__(self, n=2, inputCol=None, outputCol=None):
+        """
+        __init__(self, n=2, inputCol=None, outputCol=None)
+        """
+        super(NGram, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.NGram", self.uid)
+        self._setDefault(n=2)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.5.0")
+    def setParams(self, n=2, inputCol=None, outputCol=None):
+        """
+        setParams(self, n=2, inputCol=None, outputCol=None)
+        Sets params for this NGram.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("1.5.0")
+    def setN(self, value):
+        """
+        Sets the value of :py:attr:`n`.
+        """
+        return self._set(n=value)
+
+    @since("1.5.0")
+    def getN(self):
+        """
+        Gets the value of n or its default value.
+        """
+        return self.getOrDefault(self.n)
+
+
+@inherit_doc
+class Normalizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+    """
+     Normalize a vector to have unit norm using the given p-norm.
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> svec = Vectors.sparse(4, {1: 4.0, 3: 3.0})
+    >>> df = spark.createDataFrame([(Vectors.dense([3.0, -4.0]), svec)], ["dense", "sparse"])
+    >>> normalizer = Normalizer(p=2.0, inputCol="dense", outputCol="features")
+    >>> normalizer.transform(df).head().features
+    DenseVector([0.6, -0.8])
+    >>> normalizer.setParams(inputCol="sparse", outputCol="freqs").transform(df).head().freqs
+    SparseVector(4, {1: 0.8, 3: 0.6})
+    >>> params = {normalizer.p: 1.0, normalizer.inputCol: "dense", normalizer.outputCol: "vector"}
+    >>> normalizer.transform(df, params).head().vector
+    DenseVector([0.4286, -0.5714])
+    >>> normalizerPath = temp_path + "/normalizer"
+    >>> normalizer.save(normalizerPath)
+    >>> loadedNormalizer = Normalizer.load(normalizerPath)
+    >>> loadedNormalizer.getP() == normalizer.getP()
+    True
+
+    .. versionadded:: 1.4.0
+    """
+
+    p = Param(Params._dummy(), "p", "the p norm value.",
+              typeConverter=TypeConverters.toFloat)
+
+    @keyword_only
+    def __init__(self, p=2.0, inputCol=None, outputCol=None):
+        """
+        __init__(self, p=2.0, inputCol=None, outputCol=None)
+        """
+        super(Normalizer, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Normalizer", self.uid)
+        self._setDefault(p=2.0)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, p=2.0, inputCol=None, outputCol=None):
+        """
+        setParams(self, p=2.0, inputCol=None, outputCol=None)
+        Sets params for this Normalizer.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("1.4.0")
+    def setP(self, value):
+        """
+        Sets the value of :py:attr:`p`.
+        """
+        return self._set(p=value)
+
+    @since("1.4.0")
+    def getP(self):
+        """
+        Gets the value of p or its default value.
+        """
+        return self.getOrDefault(self.p)
+
+
+@inherit_doc
+class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+    """
+    A one-hot encoder that maps a column of category indices to a
+    column of binary vectors, with at most a single one-value per row
+    that indicates the input category index.
+    For example with 5 categories, an input value of 2.0 would map to
+    an output vector of `[0.0, 0.0, 1.0, 0.0]`.
+    The last category is not included by default (configurable via
+    :py:attr:`dropLast`) because it makes the vector entries sum up to
+    one, and hence linearly dependent.
+    So an input value of 4.0 maps to `[0.0, 0.0, 0.0, 0.0]`.
+
+    .. note:: This is different from scikit-learn's OneHotEncoder,
+        which keeps all categories. The output vectors are sparse.
+
+    .. note:: Deprecated in 2.3.0. :py:class:`OneHotEncoderEstimator` will be renamed to
+        :py:class:`OneHotEncoder` and this :py:class:`OneHotEncoder` will be removed in 3.0.0.
+
+    .. seealso::
+
+       :py:class:`StringIndexer` for converting categorical values into
+       category indices
+
+    >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed")
+    >>> model = stringIndexer.fit(stringIndDf)
+    >>> td = model.transform(stringIndDf)
+    >>> encoder = OneHotEncoder(inputCol="indexed", outputCol="features")
+    >>> encoder.transform(td).head().features
+    SparseVector(2, {0: 1.0})
+    >>> encoder.setParams(outputCol="freqs").transform(td).head().freqs
+    SparseVector(2, {0: 1.0})
+    >>> params = {encoder.dropLast: False, encoder.outputCol: "test"}
+    >>> encoder.transform(td, params).head().test
+    SparseVector(3, {0: 1.0})
+    >>> onehotEncoderPath = temp_path + "/onehot-encoder"
+    >>> encoder.save(onehotEncoderPath)
+    >>> loadedEncoder = OneHotEncoder.load(onehotEncoderPath)
+    >>> loadedEncoder.getDropLast() == encoder.getDropLast()
+    True
+
+    .. versionadded:: 1.4.0
+    """
+
+    dropLast = Param(Params._dummy(), "dropLast", "whether to drop the last category",
+                     typeConverter=TypeConverters.toBoolean)
+
+    @keyword_only
+    def __init__(self, dropLast=True, inputCol=None, outputCol=None):
+        """
+        __init__(self, dropLast=True, inputCol=None, outputCol=None)
+        """
+        super(OneHotEncoder, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.OneHotEncoder", self.uid)
+        self._setDefault(dropLast=True)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, dropLast=True, inputCol=None, outputCol=None):
+        """
+        setParams(self, dropLast=True, inputCol=None, outputCol=None)
+        Sets params for this OneHotEncoder.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("1.4.0")
+    def setDropLast(self, value):
+        """
+        Sets the value of :py:attr:`dropLast`.
+        """
+        return self._set(dropLast=value)
+
+    @since("1.4.0")
+    def getDropLast(self):
+        """
+        Gets the value of dropLast or its default value.
+        """
+        return self.getOrDefault(self.dropLast)
+
+
+@inherit_doc
+class OneHotEncoderEstimator(JavaEstimator, HasInputCols, HasOutputCols, HasHandleInvalid,
+                             JavaMLReadable, JavaMLWritable):
+    """
+    A one-hot encoder that maps a column of category indices to a column of binary vectors, with
+    at most a single one-value per row that indicates the input category index.
+    For example with 5 categories, an input value of 2.0 would map to an output vector of
+    `[0.0, 0.0, 1.0, 0.0]`.
+    The last category is not included by default (configurable via `dropLast`),
+    because it makes the vector entries sum up to one, and hence linearly dependent.
+    So an input value of 4.0 maps to `[0.0, 0.0, 0.0, 0.0]`.
+
+    Note: This is different from scikit-learn's OneHotEncoder, which keeps all categories.
+    The output vectors are sparse.
+
+    When `handleInvalid` is configured to 'keep', an extra "category" indicating invalid values is
+    added as last category. So when `dropLast` is true, invalid values are encoded as all-zeros
+    vector.
+
+    Note: When encoding multi-column by using `inputCols` and `outputCols` params, input/output
+    cols come in pairs, specified by the order in the arrays, and each pair is treated
+    independently.
+
+    See `StringIndexer` for converting categorical values into category indices
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> df = spark.createDataFrame([(0.0,), (1.0,), (2.0,)], ["input"])
+    >>> ohe = OneHotEncoderEstimator(inputCols=["input"], outputCols=["output"])
+    >>> model = ohe.fit(df)
+    >>> model.transform(df).head().output
+    SparseVector(2, {0: 1.0})
+    >>> ohePath = temp_path + "/oheEstimator"
+    >>> ohe.save(ohePath)
+    >>> loadedOHE = OneHotEncoderEstimator.load(ohePath)
+    >>> loadedOHE.getInputCols() == ohe.getInputCols()
+    True
+    >>> modelPath = temp_path + "/ohe-model"
+    >>> model.save(modelPath)
+    >>> loadedModel = OneHotEncoderModel.load(modelPath)
+    >>> loadedModel.categorySizes == model.categorySizes
+    True
+
+    .. versionadded:: 2.3.0
+    """
+
+    handleInvalid = Param(Params._dummy(), "handleInvalid", "How to handle invalid data during " +
+                          "transform(). Options are 'keep' (invalid data presented as an extra " +
+                          "categorical feature) or error (throw an error). Note that this Param " +
+                          "is only used during transform; during fitting, invalid data will " +
+                          "result in an error.",
+                          typeConverter=TypeConverters.toString)
+
+    dropLast = Param(Params._dummy(), "dropLast", "whether to drop the last category",
+                     typeConverter=TypeConverters.toBoolean)
+
+    @keyword_only
+    def __init__(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True):
+        """
+        __init__(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True)
+        """
+        super(OneHotEncoderEstimator, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.feature.OneHotEncoderEstimator", self.uid)
+        self._setDefault(handleInvalid="error", dropLast=True)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("2.3.0")
+    def setParams(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True):
+        """
+        setParams(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True)
+        Sets params for this OneHotEncoderEstimator.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("2.3.0")
+    def setDropLast(self, value):
+        """
+        Sets the value of :py:attr:`dropLast`.
+        """
+        return self._set(dropLast=value)
+
+    @since("2.3.0")
+    def getDropLast(self):
+        """
+        Gets the value of dropLast or its default value.
+        """
+        return self.getOrDefault(self.dropLast)
+
+    def _create_model(self, java_model):
+        return OneHotEncoderModel(java_model)
+
+
+class OneHotEncoderModel(JavaModel, JavaMLReadable, JavaMLWritable):
+    """
+    Model fitted by :py:class:`OneHotEncoderEstimator`.
+
+    .. versionadded:: 2.3.0
+    """
+
+    @property
+    @since("2.3.0")
+    def categorySizes(self):
+        """
+        Original number of categories for each feature being encoded.
+        The array contains one value for each input column, in order.
+        """
+        return self._call_java("categorySizes")
+
+
+@inherit_doc
+class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
+                          JavaMLWritable):
+    """
+    Perform feature expansion in a polynomial space. As said in `wikipedia of Polynomial Expansion
+    <http://en.wikipedia.org/wiki/Polynomial_expansion>`_, "In mathematics, an
+    expansion of a product of sums expresses it as a sum of products by using the fact that
+    multiplication distributes over addition". Take a 2-variable feature vector as an example:
+    `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`.
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> df = spark.createDataFrame([(Vectors.dense([0.5, 2.0]),)], ["dense"])
+    >>> px = PolynomialExpansion(degree=2, inputCol="dense", outputCol="expanded")
+    >>> px.transform(df).head().expanded
+    DenseVector([0.5, 0.25, 2.0, 1.0, 4.0])
+    >>> px.setParams(outputCol="test").transform(df).head().test
+    DenseVector([0.5, 0.25, 2.0, 1.0, 4.0])
+    >>> polyExpansionPath = temp_path + "/poly-expansion"
+    >>> px.save(polyExpansionPath)
+    >>> loadedPx = PolynomialExpansion.load(polyExpansionPath)
+    >>> loadedPx.getDegree() == px.getDegree()
+    True
+
+    .. versionadded:: 1.4.0
+    """
+
+    degree = Param(Params._dummy(), "degree", "the polynomial degree to expand (>= 1)",
+                   typeConverter=TypeConverters.toInt)
+
+    @keyword_only
+    def __init__(self, degree=2, inputCol=None, outputCol=None):
+        """
+        __init__(self, degree=2, inputCol=None, outputCol=None)
+        """
+        super(PolynomialExpansion, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.feature.PolynomialExpansion", self.uid)
+        self._setDefault(degree=2)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, degree=2, inputCol=None, outputCol=None):
+        """
+        setParams(self, degree=2, inputCol=None, outputCol=None)
+        Sets params for this PolynomialExpansion.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("1.4.0")
+    def setDegree(self, value):
+        """
+        Sets the value of :py:attr:`degree`.
+        """
+        return self._set(degree=value)
+
+    @since("1.4.0")
+    def getDegree(self):
+        """
+        Gets the value of degree or its default value.
+        """
+        return self.getOrDefault(self.degree)
+
+
+@inherit_doc
+class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid,
+                          JavaMLReadable, JavaMLWritable):
+    """
+    .. note:: Experimental
+
+    `QuantileDiscretizer` takes a column with continuous features and outputs a column with binned
+    categorical features. The number of bins can be set using the :py:attr:`numBuckets` parameter.
+    It is possible that the number of buckets used will be less than this value, for example, if
+    there are too few distinct values of the input to create enough distinct quantiles.
+
+    NaN handling: Note also that
+    QuantileDiscretizer will raise an error when it finds NaN values in the dataset, but the user
+    can also choose to either keep or remove NaN values within the dataset by setting
+    :py:attr:`handleInvalid` parameter. If the user chooses to keep NaN values, they will be
+    handled specially and placed into their own bucket, for example, if 4 buckets are used, then
+    non-NaN data will be put into buckets[0-3], but NaNs will be counted in a special bucket[4].
+
+    Algorithm: The bin ranges are chosen using an approximate algorithm (see the documentation for
+    :py:meth:`~.DataFrameStatFunctions.approxQuantile` for a detailed description).
+    The precision of the approximation can be controlled with the
+    :py:attr:`relativeError` parameter.
+    The lower and upper bin bounds will be `-Infinity` and `+Infinity`, covering all real values.
+
+    >>> values = [(0.1,), (0.4,), (1.2,), (1.5,), (float("nan"),), (float("nan"),)]
+    >>> df = spark.createDataFrame(values, ["values"])
+    >>> qds = QuantileDiscretizer(numBuckets=2,
+    ...     inputCol="values", outputCol="buckets", relativeError=0.01, handleInvalid="error")
+    >>> qds.getRelativeError()
+    0.01
+    >>> bucketizer = qds.fit(df)
+    >>> qds.setHandleInvalid("keep").fit(df).transform(df).count()
+    6
+    >>> qds.setHandleInvalid("skip").fit(df).transform(df).count()
+    4
+    >>> splits = bucketizer.getSplits()
+    >>> splits[0]
+    -inf
+    >>> print("%2.1f" % round(splits[1], 1))
+    0.4
+    >>> bucketed = bucketizer.transform(df).head()
+    >>> bucketed.buckets
+    0.0
+    >>> quantileDiscretizerPath = temp_path + "/quantile-discretizer"
+    >>> qds.save(quantileDiscretizerPath)
+    >>> loadedQds = QuantileDiscretizer.load(quantileDiscretizerPath)
+    >>> loadedQds.getNumBuckets() == qds.getNumBuckets()
+    True
+
+    .. versionadded:: 2.0.0
+    """
+
+    numBuckets = Param(Params._dummy(), "numBuckets",
+                       "Maximum number of buckets (quantiles, or " +
+                       "categories) into which data points are grouped. Must be >= 2.",
+                       typeConverter=TypeConverters.toInt)
+
+    relativeError = Param(Params._dummy(), "relativeError", "The relative target precision for " +
+                          "the approximate quantile algorithm used to generate buckets. " +
+                          "Must be in the range [0, 1].",
+                          typeConverter=TypeConverters.toFloat)
+
+    handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid entries. " +
+                          "Options are skip (filter out rows with invalid values), " +
+                          "error (throw an error), or keep (keep invalid values in a special " +
+                          "additional bucket).",
+                          typeConverter=TypeConverters.toString)
+
+    @keyword_only
+    def __init__(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001,
+                 handleInvalid="error"):
+        """
+        __init__(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001, \
+                 handleInvalid="error")
+        """
+        super(QuantileDiscretizer, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.QuantileDiscretizer",
+                                            self.uid)
+        self._setDefault(numBuckets=2, relativeError=0.001, handleInvalid="error")
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("2.0.0")
+    def setParams(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001,
+                  handleInvalid="error"):
+        """
+        setParams(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001, \
+                  handleInvalid="error")
+        Set the params for the QuantileDiscretizer
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("2.0.0")
+    def setNumBuckets(self, value):
+        """
+        Sets the value of :py:attr:`numBuckets`.
+        """
+        return self._set(numBuckets=value)
+
+    @since("2.0.0")
+    def getNumBuckets(self):
+        """
+        Gets the value of numBuckets or its default value.
+        """
+        return self.getOrDefault(self.numBuckets)
+
+    @since("2.0.0")
+    def setRelativeError(self, value):
+        """
+        Sets the value of :py:attr:`relativeError`.
+        """
+        return self._set(relativeError=value)
+
+    @since("2.0.0")
+    def getRelativeError(self):
+        """
+        Gets the value of relativeError or its default value.
+        """
+        return self.getOrDefault(self.relativeError)
+
+    def _create_model(self, java_model):
+        """
+        Private method to convert the java_model to a Python model.
+        """
+        return Bucketizer(splits=list(java_model.getSplits()),
+                          inputCol=self.getInputCol(),
+                          outputCol=self.getOutputCol(),
+                          handleInvalid=self.getHandleInvalid())
+
+
+@inherit_doc
+@ignore_unicode_prefix
+class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+    """
+    A regex based tokenizer that extracts tokens either by using the
+    provided regex pattern (in Java dialect) to split the text
+    (default) or repeatedly matching the regex (if gaps is false).
+    Optional parameters also allow filtering tokens using a minimal
+    length.
+    It returns an array of strings that can be empty.
+
+    >>> df = spark.createDataFrame([("A B  c",)], ["text"])
+    >>> reTokenizer = RegexTokenizer(inputCol="text", outputCol="words")
+    >>> reTokenizer.transform(df).head()
+    Row(text=u'A B  c', words=[u'a', u'b', u'c'])
+    >>> # Change a parameter.
+    >>> reTokenizer.setParams(outputCol="tokens").transform(df).head()
+    Row(text=u'A B  c', tokens=[u'a', u'b', u'c'])
+    >>> # Temporarily modify a parameter.
+    >>> reTokenizer.transform(df, {reTokenizer.outputCol: "words"}).head()
+    Row(text=u'A B  c', words=[u'a', u'b', u'c'])
+    >>> reTokenizer.transform(df).head()
+    Row(text=u'A B  c', tokens=[u'a', u'b', u'c'])
+    >>> # Must use keyword arguments to specify params.
+    >>> reTokenizer.setParams("text")
+    Traceback (most recent call last):
+        ...
+    TypeError: Method setParams forces keyword arguments.
+    >>> regexTokenizerPath = temp_path + "/regex-tokenizer"
+    >>> reTokenizer.save(regexTokenizerPath)
+    >>> loadedReTokenizer = RegexTokenizer.load(regexTokenizerPath)
+    >>> loadedReTokenizer.getMinTokenLength() == reTokenizer.getMinTokenLength()
+    True
+    >>> loadedReTokenizer.getGaps() == reTokenizer.getGaps()
+    True
+
+    .. versionadded:: 1.4.0
+    """
+
+    minTokenLength = Param(Params._dummy(), "minTokenLength", "minimum token length (>= 0)",
+                           typeConverter=TypeConverters.toInt)
+    gaps = Param(Params._dummy(), "gaps", "whether regex splits on gaps (True) or matches tokens " +
+                 "(False)")
+    pattern = Param(Params._dummy(), "pattern", "regex pattern (Java dialect) used for tokenizing",
+                    typeConverter=TypeConverters.toString)
+    toLowercase = Param(Params._dummy(), "toLowercase", "whether to convert all characters to " +
+                        "lowercase before tokenizing", typeConverter=TypeConverters.toBoolean)
+
+    @keyword_only
+    def __init__(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None,
+                 outputCol=None, toLowercase=True):
+        """
+        __init__(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, \
+                 outputCol=None, toLowercase=True)
+        """
+        super(RegexTokenizer, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.RegexTokenizer", self.uid)
+        self._setDefault(minTokenLength=1, gaps=True, pattern="\\s+", toLowercase=True)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None,
+                  outputCol=None, toLowercase=True):
+        """
+        setParams(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, \
+                  outputCol=None, toLowercase=True)
+        Sets params for this RegexTokenizer.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("1.4.0")
+    def setMinTokenLength(self, value):
+        """
+        Sets the value of :py:attr:`minTokenLength`.
+        """
+        return self._set(minTokenLength=value)
+
+    @since("1.4.0")
+    def getMinTokenLength(self):
+        """
+        Gets the value of minTokenLength or its default value.
+        """
+        return self.getOrDefault(self.minTokenLength)
+
+    @since("1.4.0")
+    def setGaps(self, value):
+        """
+        Sets the value of :py:attr:`gaps`.
+        """
+        return self._set(gaps=value)
+
+    @since("1.4.0")
+    def getGaps(self):
+        """
+        Gets the value of gaps or its default value.
+        """
+        return self.getOrDefault(self.gaps)
+
+    @since("1.4.0")
+    def setPattern(self, value):
+        """
+        Sets the value of :py:attr:`pattern`.
+        """
+        return self._set(pattern=value)
+
+    @since("1.4.0")
+    def getPattern(self):
+        """
+        Gets the value of pattern or its default value.
+        """
+        return self.getOrDefault(self.pattern)
+
+    @since("2.0.0")
+    def setToLowercase(self, value):
+        """
+        Sets the value of :py:attr:`toLowercase`.
+        """
+        return self._set(toLowercase=value)
+
+    @since("2.0.0")
+    def getToLowercase(self):
+        """
+        Gets the value of toLowercase or its default value.
+        """
+        return self.getOrDefault(self.toLowercase)
+
+
+@inherit_doc
+class SQLTransformer(JavaTransformer, JavaMLReadable, JavaMLWritable):
+    """
+    Implements the transforms which are defined by SQL statement.
+    Currently we only support SQL syntax like 'SELECT ... FROM __THIS__'
+    where '__THIS__' represents the underlying table of the input dataset.
+
+    >>> df = spark.createDataFrame([(0, 1.0, 3.0), (2, 2.0, 5.0)], ["id", "v1", "v2"])
+    >>> sqlTrans = SQLTransformer(
+    ...     statement="SELECT *, (v1 + v2) AS v3, (v1 * v2) AS v4 FROM __THIS__")
+    >>> sqlTrans.transform(df).head()
+    Row(id=0, v1=1.0, v2=3.0, v3=4.0, v4=3.0)
+    >>> sqlTransformerPath = temp_path + "/sql-transformer"
+    >>> sqlTrans.save(sqlTransformerPath)
+    >>> loadedSqlTrans = SQLTransformer.load(sqlTransformerPath)
+    >>> loadedSqlTrans.getStatement() == sqlTrans.getStatement()
+    True
+
+    .. versionadded:: 1.6.0
+    """
+
+    statement = Param(Params._dummy(), "statement", "SQL statement",
+                      typeConverter=TypeConverters.toString)
+
+    @keyword_only
+    def __init__(self, statement=None):
+        """
+        __init__(self, statement=None)
+        """
+        super(SQLTransformer, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.SQLTransformer", self.uid)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.6.0")
+    def setParams(self, statement=None):
+        """
+        setParams(self, statement=None)
+        Sets params for this SQLTransformer.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("1.6.0")
+    def setStatement(self, value):
+        """
+        Sets the value of :py:attr:`statement`.
+        """
+        return self._set(statement=value)
+
+    @since("1.6.0")
+    def getStatement(self):
+        """
+        Gets the value of statement or its default value.
+        """
+        return self.getOrDefault(self.statement)
+
+
+@inherit_doc
+class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+    """
+    Standardizes features by removing the mean and scaling to unit variance using column summary
+    statistics on the samples in the training set.
+
+    The "unit std" is computed using the `corrected sample standard deviation \
+    <https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation>`_,
+    which is computed as the square root of the unbiased sample variance.
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> df = spark.createDataFrame([(Vectors.dense([0.0]),), (Vectors.dense([2.0]),)], ["a"])
+    >>> standardScaler = StandardScaler(inputCol="a", outputCol="scaled")
+    >>> model = standardScaler.fit(df)
+    >>> model.mean
+    DenseVector([1.0])
+    >>> model.std
+    DenseVector([1.4142])
+    >>> model.transform(df).collect()[1].scaled
+    DenseVector([1.4142])
+    >>> standardScalerPath = temp_path + "/standard-scaler"
+    >>> standardScaler.save(standardScalerPath)
+    >>> loadedStandardScaler = StandardScaler.load(standardScalerPath)
+    >>> loadedStandardScaler.getWithMean() == standardScaler.getWithMean()
+    True
+    >>> loadedStandardScaler.getWithStd() == standardScaler.getWithStd()
+    True
+    >>> modelPath = temp_path + "/standard-scaler-model"
+    >>> model.save(modelPath)
+    >>> loadedModel = StandardScalerModel.load(modelPath)
+    >>> loadedModel.std == model.std
+    True
+    >>> loadedModel.mean == model.mean
+    True
+
+    .. versionadded:: 1.4.0
+    """
+
+    withMean = Param(Params._dummy(), "withMean", "Center data with mean",
+                     typeConverter=TypeConverters.toBoolean)
+    withStd = Param(Params._dummy(), "withStd", "Scale to unit standard deviation",
+                    typeConverter=TypeConverters.toBoolean)
+
+    @keyword_only
+    def __init__(self, withMean=False, withStd=True, inputCol=None, outputCol=None):
+        """
+        __init__(self, withMean=False, withStd=True, inputCol=None, outputCol=None)
+        """
+        super(StandardScaler, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.StandardScaler", self.uid)
+        self._setDefault(withMean=False, withStd=True)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None):
+        """
+        setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None)
+        Sets params for this StandardScaler.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("1.4.0")
+    def setWithMean(self, value):
+        """
+        Sets the value of :py:attr:`withMean`.
+        """
+        return self._set(withMean=value)
+
+    @since("1.4.0")
+    def getWithMean(self):
+        """
+        Gets the value of withMean or its default value.
+        """
+        return self.getOrDefault(self.withMean)
+
+    @since("1.4.0")
+    def setWithStd(self, value):
+        """
+        Sets the value of :py:attr:`withStd`.
+        """
+        return self._set(withStd=value)
+
+    @since("1.4.0")
+    def getWithStd(self):
+        """
+        Gets the value of withStd or its default value.
+        """
+        return self.getOrDefault(self.withStd)
+
+    def _create_model(self, java_model):
+        return StandardScalerModel(java_model)
+
+
+class StandardScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
+    """
+    Model fitted by :py:class:`StandardScaler`.
+
+    .. versionadded:: 1.4.0
+    """
+
+    @property
+    @since("2.0.0")
+    def std(self):
+        """
+        Standard deviation of the StandardScalerModel.
+        """
+        return self._call_java("std")
+
+    @property
+    @since("2.0.0")
+    def mean(self):
+        """
+        Mean of the StandardScalerModel.
+        """
+        return self._call_java("mean")
+
+
+class _StringIndexerParams(JavaParams, HasHandleInvalid, HasInputCol, HasOutputCol):
+    """
+    Params for :py:attr:`StringIndexer` and :py:attr:`StringIndexerModel`.
+    """
+
+    stringOrderType = Param(Params._dummy(), "stringOrderType",
+                            "How to order labels of string column. The first label after " +
+                            "ordering is assigned an index of 0. Supported options: " +
+                            "frequencyDesc, frequencyAsc, alphabetDesc, alphabetAsc.",
+                            typeConverter=TypeConverters.toString)
+
+    handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid data (unseen " +
+                          "or NULL values) in features and label column of string type. " +
+                          "Options are 'skip' (filter out rows with invalid data), " +
+                          "error (throw an error), or 'keep' (put invalid data " +
+                          "in a special additional bucket, at index numLabels).",
+                          typeConverter=TypeConverters.toString)
+
+    def __init__(self, *args):
+        super(_StringIndexerParams, self).__init__(*args)
+        self._setDefault(handleInvalid="error", stringOrderType="frequencyDesc")
+
+    @since("2.3.0")
+    def getStringOrderType(self):
+        """
+        Gets the value of :py:attr:`stringOrderType` or its default value 'frequencyDesc'.
+        """
+        return self.getOrDefault(self.stringOrderType)
+
+
+@inherit_doc
+class StringIndexer(JavaEstimator, _StringIndexerParams, JavaMLReadable, JavaMLWritable):
+    """
+    A label indexer that maps a string column of labels to an ML column of label indices.
+    If the input column is numeric, we cast it to string and index the string values.
+    The indices are in [0, numLabels). By default, this is ordered by label frequencies
+    so the most frequent label gets index 0. The ordering behavior is controlled by
+    setting :py:attr:`stringOrderType`. Its default value is 'frequencyDesc'.
+
+    >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed", handleInvalid="error",
+    ...     stringOrderType="frequencyDesc")
+    >>> model = stringIndexer.fit(stringIndDf)
+    >>> td = model.transform(stringIndDf)
+    >>> sorted(set([(i[0], i[1]) for i in td.select(td.id, td.indexed).collect()]),
+    ...     key=lambda x: x[0])
+    [(0, 0.0), (1, 2.0), (2, 1.0), (3, 0.0), (4, 0.0), (5, 1.0)]
+    >>> inverter = IndexToString(inputCol="indexed", outputCol="label2", labels=model.labels)
+    >>> itd = inverter.transform(td)
+    >>> sorted(set([(i[0], str(i[1])) for i in itd.select(itd.id, itd.label2).collect()]),
+    ...     key=lambda x: x[0])
+    [(0, 'a'), (1, 'b'), (2, 'c'), (3, 'a'), (4, 'a'), (5, 'c')]
+    >>> stringIndexerPath = temp_path + "/string-indexer"
+    >>> stringIndexer.save(stringIndexerPath)
+    >>> loadedIndexer = StringIndexer.load(stringIndexerPath)
+    >>> loadedIndexer.getHandleInvalid() == stringIndexer.getHandleInvalid()
+    True
+    >>> modelPath = temp_path + "/string-indexer-model"
+    >>> model.save(modelPath)
+    >>> loadedModel = StringIndexerModel.load(modelPath)
+    >>> loadedModel.labels == model.labels
+    True
+    >>> indexToStringPath = temp_path + "/index-to-string"
+    >>> inverter.save(indexToStringPath)
+    >>> loadedInverter = IndexToString.load(indexToStringPath)
+    >>> loadedInverter.getLabels() == inverter.getLabels()
+    True
+    >>> stringIndexer.getStringOrderType()
+    'frequencyDesc'
+    >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed", handleInvalid="error",
+    ...     stringOrderType="alphabetDesc")
+    >>> model = stringIndexer.fit(stringIndDf)
+    >>> td = model.transform(stringIndDf)
+    >>> sorted(set([(i[0], i[1]) for i in td.select(td.id, td.indexed).collect()]),
+    ...     key=lambda x: x[0])
+    [(0, 2.0), (1, 1.0), (2, 0.0), (3, 2.0), (4, 2.0), (5, 0.0)]
+    >>> fromlabelsModel = StringIndexerModel.from_labels(["a", "b", "c"],
+    ...     inputCol="label", outputCol="indexed", handleInvalid="error")
+    >>> result = fromlabelsModel.transform(stringIndDf)
+    >>> sorted(set([(i[0], i[1]) for i in result.select(result.id, result.indexed).collect()]),
+    ...     key=lambda x: x[0])
+    [(0, 0.0), (1, 1.0), (2, 2.0), (3, 0.0), (4, 0.0), (5, 2.0)]
+
+    .. versionadded:: 1.4.0
+    """
+
+    @keyword_only
+    def __init__(self, inputCol=None, outputCol=None, handleInvalid="error",
+                 stringOrderType="frequencyDesc"):
+        """
+        __init__(self, inputCol=None, outputCol=None, handleInvalid="error", \
+                 stringOrderType="frequencyDesc")
+        """
+        super(StringIndexer, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.StringIndexer", self.uid)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, inputCol=None, outputCol=None, handleInvalid="error",
+                  stringOrderType="frequencyDesc"):
+        """
+        setParams(self, inputCol=None, outputCol=None, handleInvalid="error", \
+                  stringOrderType="frequencyDesc")
+        Sets params for this StringIndexer.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return StringIndexerModel(java_model)
+
+    @since("2.3.0")
+    def setStringOrderType(self, value):
+        """
+        Sets the value of :py:attr:`stringOrderType`.
+        """
+        return self._set(stringOrderType=value)
+
+
+class StringIndexerModel(JavaModel, _StringIndexerParams, JavaMLReadable, JavaMLWritable):
+    """
+    Model fitted by :py:class:`StringIndexer`.
+
+    .. versionadded:: 1.4.0
+    """
+
+    @classmethod
+    @since("2.4.0")
+    def from_labels(cls, labels, inputCol, outputCol=None, handleInvalid=None):
+        """
+        Construct the model directly from an array of label strings,
+        requires an active SparkContext.
+        """
+        sc = SparkContext._active_spark_context
+        java_class = sc._gateway.jvm.java.lang.String
+        jlabels = StringIndexerModel._new_java_array(labels, java_class)
+        model = StringIndexerModel._create_from_java_class(
+            "org.apache.spark.ml.feature.StringIndexerModel", jlabels)
+        model.setInputCol(inputCol)
+        if outputCol is not None:
+            model.setOutputCol(outputCol)
+        if handleInvalid is not None:
+            model.setHandleInvalid(handleInvalid)
+        return model
+
+    @property
+    @since("1.5.0")
+    def labels(self):
+        """
+        Ordered list of labels, corresponding to indices to be assigned.
+        """
+        return self._call_java("labels")
+
+    @since("2.4.0")
+    def setHandleInvalid(self, value):
+        """
+        Sets the value of :py:attr:`handleInvalid`.
+        """
+        return self._set(handleInvalid=value)
+
+
+@inherit_doc
+class IndexToString(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+    """
+    A :py:class:`Transformer` that maps a column of indices back to a new column of
+    corresponding string values.
+    The index-string mapping is either from the ML attributes of the input column,
+    or from user-supplied labels (which take precedence over ML attributes).
+    See L{StringIndexer} for converting strings into indices.
+
+    .. versionadded:: 1.6.0
+    """
+
+    labels = Param(Params._dummy(), "labels",
+                   "Optional array of labels specifying index-string mapping." +
+                   " If not provided or if empty, then metadata from inputCol is used instead.",
+                   typeConverter=TypeConverters.toListString)
+
+    @keyword_only
+    def __init__(self, inputCol=None, outputCol=None, labels=None):
+        """
+        __init__(self, inputCol=None, outputCol=None, labels=None)
+        """
+        super(IndexToString, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.IndexToString",
+                                            self.uid)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.6.0")
+    def setParams(self, inputCol=None, outputCol=None, labels=None):
+        """
+        setParams(self, inputCol=None, outputCol=None, labels=None)
+        Sets params for this IndexToString.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("1.6.0")
+    def setLabels(self, value):
+        """
+        Sets the value of :py:attr:`labels`.
+        """
+        return self._set(labels=value)
+
+    @since("1.6.0")
+    def getLabels(self):
+        """
+        Gets the value of :py:attr:`labels` or its default value.
+        """
+        return self.getOrDefault(self.labels)
+
+
+class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+    """
+    A feature transformer that filters out stop words from input.
+
+    .. note:: null values from input array are preserved unless adding null to stopWords explicitly.
+
+    >>> df = spark.createDataFrame([(["a", "b", "c"],)], ["text"])
+    >>> remover = StopWordsRemover(inputCol="text", outputCol="words", stopWords=["b"])
+    >>> remover.transform(df).head().words == ['a', 'c']
+    True
+    >>> stopWordsRemoverPath = temp_path + "/stopwords-remover"
+    >>> remover.save(stopWordsRemoverPath)
+    >>> loadedRemover = StopWordsRemover.load(stopWordsRemoverPath)
+    >>> loadedRemover.getStopWords() == remover.getStopWords()
+    True
+    >>> loadedRemover.getCaseSensitive() == remover.getCaseSensitive()
+    True
+
+    .. versionadded:: 1.6.0
+    """
+
+    stopWords = Param(Params._dummy(), "stopWords", "The words to be filtered out",
+                      typeConverter=TypeConverters.toListString)
+    caseSensitive = Param(Params._dummy(), "caseSensitive", "whether to do a case sensitive " +
+                          "comparison over the stop words", typeConverter=TypeConverters.toBoolean)
+    locale = Param(Params._dummy(), "locale", "locale of the input. ignored when case sensitive " +
+                   "is true", typeConverter=TypeConverters.toString)
+
+    @keyword_only
+    def __init__(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive=False,
+                 locale=None):
+        """
+        __init__(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive=false, \
+        locale=None)
+        """
+        super(StopWordsRemover, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.StopWordsRemover",
+                                            self.uid)
+        self._setDefault(stopWords=StopWordsRemover.loadDefaultStopWords("english"),
+                         caseSensitive=False, locale=self._java_obj.getLocale())
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.6.0")
+    def setParams(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive=False,
+                  locale=None):
+        """
+        setParams(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive=false, \
+        locale=None)
+        Sets params for this StopWordRemover.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("1.6.0")
+    def setStopWords(self, value):
+        """
+        Sets the value of :py:attr:`stopWords`.
+        """
+        return self._set(stopWords=value)
+
+    @since("1.6.0")
+    def getStopWords(self):
+        """
+        Gets the value of :py:attr:`stopWords` or its default value.
+        """
+        return self.getOrDefault(self.stopWords)
+
+    @since("1.6.0")
+    def setCaseSensitive(self, value):
+        """
+        Sets the value of :py:attr:`caseSensitive`.
+        """
+        return self._set(caseSensitive=value)
+
+    @since("1.6.0")
+    def getCaseSensitive(self):
+        """
+        Gets the value of :py:attr:`caseSensitive` or its default value.
+        """
+        return self.getOrDefault(self.caseSensitive)
+
+    @since("2.4.0")
+    def setLocale(self, value):
+        """
+        Sets the value of :py:attr:`locale`.
+        """
+        return self._set(locale=value)
+
+    @since("2.4.0")
+    def getLocale(self):
+        """
+        Gets the value of :py:attr:`locale`.
+        """
+        return self.getOrDefault(self.locale)
+
+    @staticmethod
+    @since("2.0.0")
+    def loadDefaultStopWords(language):
+        """
+        Loads the default stop words for the given language.
+        Supported languages: danish, dutch, english, finnish, french, german, hungarian,
+        italian, norwegian, portuguese, russian, spanish, swedish, turkish
+        """
+        stopWordsObj = _jvm().org.apache.spark.ml.feature.StopWordsRemover
+        return list(stopWordsObj.loadDefaultStopWords(language))
+
+
+@inherit_doc
+@ignore_unicode_prefix
+class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+    """
+    A tokenizer that converts the input string to lowercase and then
+    splits it by white spaces.
+
+    >>> df = spark.createDataFrame([("a b c",)], ["text"])
+    >>> tokenizer = Tokenizer(inputCol="text", outputCol="words")
+    >>> tokenizer.transform(df).head()
+    Row(text=u'a b c', words=[u'a', u'b', u'c'])
+    >>> # Change a parameter.
+    >>> tokenizer.setParams(outputCol="tokens").transform(df).head()
+    Row(text=u'a b c', tokens=[u'a', u'b', u'c'])
+    >>> # Temporarily modify a parameter.
+    >>> tokenizer.transform(df, {tokenizer.outputCol: "words"}).head()
+    Row(text=u'a b c', words=[u'a', u'b', u'c'])
+    >>> tokenizer.transform(df).head()
+    Row(text=u'a b c', tokens=[u'a', u'b', u'c'])
+    >>> # Must use keyword arguments to specify params.
+    >>> tokenizer.setParams("text")
+    Traceback (most recent call last):
+        ...
+    TypeError: Method setParams forces keyword arguments.
+    >>> tokenizerPath = temp_path + "/tokenizer"
+    >>> tokenizer.save(tokenizerPath)
+    >>> loadedTokenizer = Tokenizer.load(tokenizerPath)
+    >>> loadedTokenizer.transform(df).head().tokens == tokenizer.transform(df).head().tokens
+    True
+
+    .. versionadded:: 1.3.0
+    """
+
+    @keyword_only
+    def __init__(self, inputCol=None, outputCol=None):
+        """
+        __init__(self, inputCol=None, outputCol=None)
+        """
+        super(Tokenizer, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Tokenizer", self.uid)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.3.0")
+    def setParams(self, inputCol=None, outputCol=None):
+        """
+        setParams(self, inputCol=None, outputCol=None)
+        Sets params for this Tokenizer.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+
+@inherit_doc
+class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol, HasHandleInvalid, JavaMLReadable,
+                      JavaMLWritable):
+    """
+    A feature transformer that merges multiple columns into a vector column.
+
+    >>> df = spark.createDataFrame([(1, 0, 3)], ["a", "b", "c"])
+    >>> vecAssembler = VectorAssembler(inputCols=["a", "b", "c"], outputCol="features")
+    >>> vecAssembler.transform(df).head().features
+    DenseVector([1.0, 0.0, 3.0])
+    >>> vecAssembler.setParams(outputCol="freqs").transform(df).head().freqs
+    DenseVector([1.0, 0.0, 3.0])
+    >>> params = {vecAssembler.inputCols: ["b", "a"], vecAssembler.outputCol: "vector"}
+    >>> vecAssembler.transform(df, params).head().vector
+    DenseVector([0.0, 1.0])
+    >>> vectorAssemblerPath = temp_path + "/vector-assembler"
+    >>> vecAssembler.save(vectorAssemblerPath)
+    >>> loadedAssembler = VectorAssembler.load(vectorAssemblerPath)
+    >>> loadedAssembler.transform(df).head().freqs == vecAssembler.transform(df).head().freqs
+    True
+    >>> dfWithNullsAndNaNs = spark.createDataFrame(
+    ...    [(1.0, 2.0, None), (3.0, float("nan"), 4.0), (5.0, 6.0, 7.0)], ["a", "b", "c"])
+    >>> vecAssembler2 = VectorAssembler(inputCols=["a", "b", "c"], outputCol="features",
+    ...    handleInvalid="keep")
+    >>> vecAssembler2.transform(dfWithNullsAndNaNs).show()
+    +---+---+----+-------------+
+    |  a|  b|   c|     features|
+    +---+---+----+-------------+
+    |1.0|2.0|null|[1.0,2.0,NaN]|
+    |3.0|NaN| 4.0|[3.0,NaN,4.0]|
+    |5.0|6.0| 7.0|[5.0,6.0,7.0]|
+    +---+---+----+-------------+
+    ...
+    >>> vecAssembler2.setParams(handleInvalid="skip").transform(dfWithNullsAndNaNs).show()
+    +---+---+---+-------------+
+    |  a|  b|  c|     features|
+    +---+---+---+-------------+
+    |5.0|6.0|7.0|[5.0,6.0,7.0]|
+    +---+---+---+-------------+
+    ...
+
+    .. versionadded:: 1.4.0
+    """
+
+    handleInvalid = Param(Params._dummy(), "handleInvalid", "How to handle invalid data (NULL " +
+                          "and NaN values). Options are 'skip' (filter out rows with invalid " +
+                          "data), 'error' (throw an error), or 'keep' (return relevant number " +
+                          "of NaN in the output). Column lengths are taken from the size of ML " +
+                          "Attribute Group, which can be set using `VectorSizeHint` in a " +
+                          "pipeline before `VectorAssembler`. Column lengths can also be " +
+                          "inferred from first rows of the data since it is safe to do so but " +
+                          "only in case of 'error' or 'skip').",
+                          typeConverter=TypeConverters.toString)
+
+    @keyword_only
+    def __init__(self, inputCols=None, outputCol=None, handleInvalid="error"):
+        """
+        __init__(self, inputCols=None, outputCol=None, handleInvalid="error")
+        """
+        super(VectorAssembler, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorAssembler", self.uid)
+        self._setDefault(handleInvalid="error")
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, inputCols=None, outputCol=None, handleInvalid="error"):
+        """
+        setParams(self, inputCols=None, outputCol=None, handleInvalid="error")
+        Sets params for this VectorAssembler.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+
+@inherit_doc
+class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid, JavaMLReadable,
+                    JavaMLWritable):
+    """
+    Class for indexing categorical feature columns in a dataset of `Vector`.
+
+    This has 2 usage modes:
+      - Automatically identify categorical features (default behavior)
+         - This helps process a dataset of unknown vectors into a dataset with some continuous
+           features and some categorical features. The choice between continuous and categorical
+           is based upon a maxCategories parameter.
+         - Set maxCategories to the maximum number of categorical any categorical feature should
+           have.
+         - E.g.: Feature 0 has unique values {-1.0, 0.0}, and feature 1 values {1.0, 3.0, 5.0}.
+           If maxCategories = 2, then feature 0 will be declared categorical and use indices {0, 1},
+           and feature 1 will be declared continuous.
+      - Index all features, if all features are categorical
+         - If maxCategories is set to be very large, then this will build an index of unique
+           values for all features.
+         - Warning: This can cause problems if features are continuous since this will collect ALL
+           unique values to the driver.
+         - E.g.: Feature 0 has unique values {-1.0, 0.0}, and feature 1 values {1.0, 3.0, 5.0}.
+           If maxCategories >= 3, then both features will be declared categorical.
+
+     This returns a model which can transform categorical features to use 0-based indices.
+
+    Index stability:
+      - This is not guaranteed to choose the same category index across multiple runs.
+      - If a categorical feature includes value 0, then this is guaranteed to map value 0 to
+        index 0. This maintains vector sparsity.
+      - More stability may be added in the future.
+
+     TODO: Future extensions: The following functionality is planned for the future:
+      - Preserve metadata in transform; if a feature's metadata is already present,
+        do not recompute.
+      - Specify certain features to not index, either via a parameter or via existing metadata.
+      - Add warning if a categorical feature has only 1 category.
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> df = spark.createDataFrame([(Vectors.dense([-1.0, 0.0]),),
+    ...     (Vectors.dense([0.0, 1.0]),), (Vectors.dense([0.0, 2.0]),)], ["a"])
+    >>> indexer = VectorIndexer(maxCategories=2, inputCol="a", outputCol="indexed")
+    >>> model = indexer.fit(df)
+    >>> model.transform(df).head().indexed
+    DenseVector([1.0, 0.0])
+    >>> model.numFeatures
+    2
+    >>> model.categoryMaps
+    {0: {0.0: 0, -1.0: 1}}
+    >>> indexer.setParams(outputCol="test").fit(df).transform(df).collect()[1].test
+    DenseVector([0.0, 1.0])
+    >>> params = {indexer.maxCategories: 3, indexer.outputCol: "vector"}
+    >>> model2 = indexer.fit(df, params)
+    >>> model2.transform(df).head().vector
+    DenseVector([1.0, 0.0])
+    >>> vectorIndexerPath = temp_path + "/vector-indexer"
+    >>> indexer.save(vectorIndexerPath)
+    >>> loadedIndexer = VectorIndexer.load(vectorIndexerPath)
+    >>> loadedIndexer.getMaxCategories() == indexer.getMaxCategories()
+    True
+    >>> modelPath = temp_path + "/vector-indexer-model"
+    >>> model.save(modelPath)
+    >>> loadedModel = VectorIndexerModel.load(modelPath)
+    >>> loadedModel.numFeatures == model.numFeatures
+    True
+    >>> loadedModel.categoryMaps == model.categoryMaps
+    True
+    >>> dfWithInvalid = spark.createDataFrame([(Vectors.dense([3.0, 1.0]),)], ["a"])
+    >>> indexer.getHandleInvalid()
+    'error'
+    >>> model3 = indexer.setHandleInvalid("skip").fit(df)
+    >>> model3.transform(dfWithInvalid).count()
+    0
+    >>> model4 = indexer.setParams(handleInvalid="keep", outputCol="indexed").fit(df)
+    >>> model4.transform(dfWithInvalid).head().indexed
+    DenseVector([2.0, 1.0])
+
+    .. versionadded:: 1.4.0
+    """
+
+    maxCategories = Param(Params._dummy(), "maxCategories",
+                          "Threshold for the number of values a categorical feature can take " +
+                          "(>= 2). If a feature is found to have > maxCategories values, then " +
+                          "it is declared continuous.", typeConverter=TypeConverters.toInt)
+
+    handleInvalid = Param(Params._dummy(), "handleInvalid", "How to handle invalid data " +
+                          "(unseen labels or NULL values). Options are 'skip' (filter out " +
+                          "rows with invalid data), 'error' (throw an error), or 'keep' (put " +
+                          "invalid data in a special additional bucket, at index of the number " +
+                          "of categories of the feature).",
+                          typeConverter=TypeConverters.toString)
+
+    @keyword_only
+    def __init__(self, maxCategories=20, inputCol=None, outputCol=None, handleInvalid="error"):
+        """
+        __init__(self, maxCategories=20, inputCol=None, outputCol=None, handleInvalid="error")
+        """
+        super(VectorIndexer, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorIndexer", self.uid)
+        self._setDefault(maxCategories=20, handleInvalid="error")
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, maxCategories=20, inputCol=None, outputCol=None, handleInvalid="error"):
+        """
+        setParams(self, maxCategories=20, inputCol=None, outputCol=None, handleInvalid="error")
+        Sets params for this VectorIndexer.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("1.4.0")
+    def setMaxCategories(self, value):
+        """
+        Sets the value of :py:attr:`maxCategories`.
+        """
+        return self._set(maxCategories=value)
+
+    @since("1.4.0")
+    def getMaxCategories(self):
+        """
+        Gets the value of maxCategories or its default value.
+        """
+        return self.getOrDefault(self.maxCategories)
+
+    def _create_model(self, java_model):
+        return VectorIndexerModel(java_model)
+
+
+class VectorIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable):
+    """
+    Model fitted by :py:class:`VectorIndexer`.
+
+    Transform categorical features to use 0-based indices instead of their original values.
+      - Categorical features are mapped to indices.
+      - Continuous features (columns) are left unchanged.
+
+    This also appends metadata to the output column, marking features as Numeric (continuous),
+    Nominal (categorical), or Binary (either continuous or categorical).
+    Non-ML metadata is not carried over from the input to the output column.
+
+    This maintains vector sparsity.
+
+    .. versionadded:: 1.4.0
+    """
+
+    @property
+    @since("1.4.0")
+    def numFeatures(self):
+        """
+        Number of features, i.e., length of Vectors which this transforms.
+        """
+        return self._call_java("numFeatures")
+
+    @property
+    @since("1.4.0")
+    def categoryMaps(self):
+        """
+        Feature value index.  Keys are categorical feature indices (column indices).
+        Values are maps from original features values to 0-based category indices.
+        If a feature is not in this map, it is treated as continuous.
+        """
+        return self._call_java("javaCategoryMaps")
+
+
+@inherit_doc
+class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+    """
+    This class takes a feature vector and outputs a new feature vector with a subarray
+    of the original features.
+
+    The subset of features can be specified with either indices (`setIndices()`)
+    or names (`setNames()`).  At least one feature must be selected. Duplicate features
+    are not allowed, so there can be no overlap between selected indices and names.
+
+    The output vector will order features with the selected indices first (in the order given),
+    followed by the selected names (in the order given).
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> df = spark.createDataFrame([
+    ...     (Vectors.dense([-2.0, 2.3, 0.0, 0.0, 1.0]),),
+    ...     (Vectors.dense([0.0, 0.0, 0.0, 0.0, 0.0]),),
+    ...     (Vectors.dense([0.6, -1.1, -3.0, 4.5, 3.3]),)], ["features"])
+    >>> vs = VectorSlicer(inputCol="features", outputCol="sliced", indices=[1, 4])
+    >>> vs.transform(df).head().sliced
+    DenseVector([2.3, 1.0])
+    >>> vectorSlicerPath = temp_path + "/vector-slicer"
+    >>> vs.save(vectorSlicerPath)
+    >>> loadedVs = VectorSlicer.load(vectorSlicerPath)
+    >>> loadedVs.getIndices() == vs.getIndices()
+    True
+    >>> loadedVs.getNames() == vs.getNames()
+    True
+
+    .. versionadded:: 1.6.0
+    """
+
+    indices = Param(Params._dummy(), "indices", "An array of indices to select features from " +
+                    "a vector column. There can be no overlap with names.",
+                    typeConverter=TypeConverters.toListInt)
+    names = Param(Params._dummy(), "names", "An array of feature names to select features from " +
+                  "a vector column. These names must be specified by ML " +
+                  "org.apache.spark.ml.attribute.Attribute. There can be no overlap with " +
+                  "indices.", typeConverter=TypeConverters.toListString)
+
+    @keyword_only
+    def __init__(self, inputCol=None, outputCol=None, indices=None, names=None):
+        """
+        __init__(self, inputCol=None, outputCol=None, indices=None, names=None)
+        """
+        super(VectorSlicer, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorSlicer", self.uid)
+        self._setDefault(indices=[], names=[])
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.6.0")
+    def setParams(self, inputCol=None, outputCol=None, indices=None, names=None):
+        """
+        setParams(self, inputCol=None, outputCol=None, indices=None, names=None):
+        Sets params for this VectorSlicer.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("1.6.0")
+    def setIndices(self, value):
+        """
+        Sets the value of :py:attr:`indices`.
+        """
+        return self._set(indices=value)
+
+    @since("1.6.0")
+    def getIndices(self):
+        """
+        Gets the value of indices or its default value.
+        """
+        return self.getOrDefault(self.indices)
+
+    @since("1.6.0")
+    def setNames(self, value):
+        """
+        Sets the value of :py:attr:`names`.
+        """
+        return self._set(names=value)
+
+    @since("1.6.0")
+    def getNames(self):
+        """
+        Gets the value of names or its default value.
+        """
+        return self.getOrDefault(self.names)
+
+
+@inherit_doc
+@ignore_unicode_prefix
+class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, HasOutputCol,
+               JavaMLReadable, JavaMLWritable):
+    """
+    Word2Vec trains a model of `Map(String, Vector)`, i.e. transforms a word into a code for further
+    natural language processing or machine learning process.
+
+    >>> sent = ("a b " * 100 + "a c " * 10).split(" ")
+    >>> doc = spark.createDataFrame([(sent,), (sent,)], ["sentence"])
+    >>> word2Vec = Word2Vec(vectorSize=5, seed=42, inputCol="sentence", outputCol="model")
+    >>> model = word2Vec.fit(doc)
+    >>> model.getVectors().show()
+    +----+--------------------+
+    |word|              vector|
+    +----+--------------------+
+    |   a|[0.09461779892444...|
+    |   b|[1.15474212169647...|
+    |   c|[-0.3794820010662...|
+    +----+--------------------+
+    ...
+    >>> model.findSynonymsArray("a", 2)
+    [(u'b', 0.25053444504737854), (u'c', -0.6980510950088501)]
+    >>> from pyspark.sql.functions import format_number as fmt
+    >>> model.findSynonyms("a", 2).select("word", fmt("similarity", 5).alias("similarity")).show()
+    +----+----------+
+    |word|similarity|
+    +----+----------+
+    |   b|   0.25053|
+    |   c|  -0.69805|
+    +----+----------+
+    ...
+    >>> model.transform(doc).head().model
+    DenseVector([0.5524, -0.4995, -0.3599, 0.0241, 0.3461])
+    >>> word2vecPath = temp_path + "/word2vec"
+    >>> word2Vec.save(word2vecPath)
+    >>> loadedWord2Vec = Word2Vec.load(word2vecPath)
+    >>> loadedWord2Vec.getVectorSize() == word2Vec.getVectorSize()
+    True
+    >>> loadedWord2Vec.getNumPartitions() == word2Vec.getNumPartitions()
+    True
+    >>> loadedWord2Vec.getMinCount() == word2Vec.getMinCount()
+    True
+    >>> modelPath = temp_path + "/word2vec-model"
+    >>> model.save(modelPath)
+    >>> loadedModel = Word2VecModel.load(modelPath)
+    >>> loadedModel.getVectors().first().word == model.getVectors().first().word
+    True
+    >>> loadedModel.getVectors().first().vector == model.getVectors().first().vector
+    True
+
+    .. versionadded:: 1.4.0
+    """
+
+    vectorSize = Param(Params._dummy(), "vectorSize",
+                       "the dimension of codes after transforming from words",
+                       typeConverter=TypeConverters.toInt)
+    numPartitions = Param(Params._dummy(), "numPartitions",
+                          "number of partitions for sentences of words",
+                          typeConverter=TypeConverters.toInt)
+    minCount = Param(Params._dummy(), "minCount",
+                     "the minimum number of times a token must appear to be included in the " +
+                     "word2vec model's vocabulary", typeConverter=TypeConverters.toInt)
+    windowSize = Param(Params._dummy(), "windowSize",
+                       "the window size (context words from [-window, window]). Default value is 5",
+                       typeConverter=TypeConverters.toInt)
+    maxSentenceLength = Param(Params._dummy(), "maxSentenceLength",
+                              "Maximum length (in words) of each sentence in the input data. " +
+                              "Any sentence longer than this threshold will " +
+                              "be divided into chunks up to the size.",
+                              typeConverter=TypeConverters.toInt)
+
+    @keyword_only
+    def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
+                 seed=None, inputCol=None, outputCol=None, windowSize=5, maxSentenceLength=1000):
+        """
+        __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, \
+                 seed=None, inputCol=None, outputCol=None, windowSize=5, maxSentenceLength=1000)
+        """
+        super(Word2Vec, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Word2Vec", self.uid)
+        self._setDefault(vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
+                         windowSize=5, maxSentenceLength=1000)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
+                  seed=None, inputCol=None, outputCol=None, windowSize=5, maxSentenceLength=1000):
+        """
+        setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=None, \
+                 inputCol=None, outputCol=None, windowSize=5, maxSentenceLength=1000)
+        Sets params for this Word2Vec.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("1.4.0")
+    def setVectorSize(self, value):
+        """
+        Sets the value of :py:attr:`vectorSize`.
+        """
+        return self._set(vectorSize=value)
+
+    @since("1.4.0")
+    def getVectorSize(self):
+        """
+        Gets the value of vectorSize or its default value.
+        """
+        return self.getOrDefault(self.vectorSize)
+
+    @since("1.4.0")
+    def setNumPartitions(self, value):
+        """
+        Sets the value of :py:attr:`numPartitions`.
+        """
+        return self._set(numPartitions=value)
+
+    @since("1.4.0")
+    def getNumPartitions(self):
+        """
+        Gets the value of numPartitions or its default value.
+        """
+        return self.getOrDefault(self.numPartitions)
+
+    @since("1.4.0")
+    def setMinCount(self, value):
+        """
+        Sets the value of :py:attr:`minCount`.
+        """
+        return self._set(minCount=value)
+
+    @since("1.4.0")
+    def getMinCount(self):
+        """
+        Gets the value of minCount or its default value.
+        """
+        return self.getOrDefault(self.minCount)
+
+    @since("2.0.0")
+    def setWindowSize(self, value):
+        """
+        Sets the value of :py:attr:`windowSize`.
+        """
+        return self._set(windowSize=value)
+
+    @since("2.0.0")
+    def getWindowSize(self):
+        """
+        Gets the value of windowSize or its default value.
+        """
+        return self.getOrDefault(self.windowSize)
+
+    @since("2.0.0")
+    def setMaxSentenceLength(self, value):
+        """
+        Sets the value of :py:attr:`maxSentenceLength`.
+        """
+        return self._set(maxSentenceLength=value)
+
+    @since("2.0.0")
+    def getMaxSentenceLength(self):
+        """
+        Gets the value of maxSentenceLength or its default value.
+        """
+        return self.getOrDefault(self.maxSentenceLength)
+
+    def _create_model(self, java_model):
+        return Word2VecModel(java_model)
+
+
+class Word2VecModel(JavaModel, JavaMLReadable, JavaMLWritable):
+    """
+    Model fitted by :py:class:`Word2Vec`.
+
+    .. versionadded:: 1.4.0
+    """
+
+    @since("1.5.0")
+    def getVectors(self):
+        """
+        Returns the vector representation of the words as a dataframe
+        with two fields, word and vector.
+        """
+        return self._call_java("getVectors")
+
+    @since("1.5.0")
+    def findSynonyms(self, word, num):
+        """
+        Find "num" number of words closest in similarity to "word".
+        word can be a string or vector representation.
+        Returns a dataframe with two fields word and similarity (which
+        gives the cosine similarity).
+        """
+        if not isinstance(word, basestring):
+            word = _convert_to_vector(word)
+        return self._call_java("findSynonyms", word, num)
+
+    @since("2.3.0")
+    def findSynonymsArray(self, word, num):
+        """
+        Find "num" number of words closest in similarity to "word".
+        word can be a string or vector representation.
+        Returns an array with two fields word and similarity (which
+        gives the cosine similarity).
+        """
+        if not isinstance(word, basestring):
+            word = _convert_to_vector(word)
+        tuples = self._java_obj.findSynonymsArray(word, num)
+        return list(map(lambda st: (st._1(), st._2()), list(tuples)))
+
+
+@inherit_doc
+class PCA(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+    """
+    PCA trains a model to project vectors to a lower dimensional space of the
+    top :py:attr:`k` principal components.
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> data = [(Vectors.sparse(5, [(1, 1.0), (3, 7.0)]),),
+    ...     (Vectors.dense([2.0, 0.0, 3.0, 4.0, 5.0]),),
+    ...     (Vectors.dense([4.0, 0.0, 0.0, 6.0, 7.0]),)]
+    >>> df = spark.createDataFrame(data,["features"])
+    >>> pca = PCA(k=2, inputCol="features", outputCol="pca_features")
+    >>> model = pca.fit(df)
+    >>> model.transform(df).collect()[0].pca_features
+    DenseVector([1.648..., -4.013...])
+    >>> model.explainedVariance
+    DenseVector([0.794..., 0.205...])
+    >>> pcaPath = temp_path + "/pca"
+    >>> pca.save(pcaPath)
+    >>> loadedPca = PCA.load(pcaPath)
+    >>> loadedPca.getK() == pca.getK()
+    True
+    >>> modelPath = temp_path + "/pca-model"
+    >>> model.save(modelPath)
+    >>> loadedModel = PCAModel.load(modelPath)
+    >>> loadedModel.pc == model.pc
+    True
+    >>> loadedModel.explainedVariance == model.explainedVariance
+    True
+
+    .. versionadded:: 1.5.0
+    """
+
+    k = Param(Params._dummy(), "k", "the number of principal components",
+              typeConverter=TypeConverters.toInt)
+
+    @keyword_only
+    def __init__(self, k=None, inputCol=None, outputCol=None):
+        """
+        __init__(self, k=None, inputCol=None, outputCol=None)
+        """
+        super(PCA, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.PCA", self.uid)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.5.0")
+    def setParams(self, k=None, inputCol=None, outputCol=None):
+        """
+        setParams(self, k=None, inputCol=None, outputCol=None)
+        Set params for this PCA.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("1.5.0")
+    def setK(self, value):
+        """
+        Sets the value of :py:attr:`k`.
+        """
+        return self._set(k=value)
+
+    @since("1.5.0")
+    def getK(self):
+        """
+        Gets the value of k or its default value.
+        """
+        return self.getOrDefault(self.k)
+
+    def _create_model(self, java_model):
+        return PCAModel(java_model)
+
+
+class PCAModel(JavaModel, JavaMLReadable, JavaMLWritable):
+    """
+    Model fitted by :py:class:`PCA`. Transforms vectors to a lower dimensional space.
+
+    .. versionadded:: 1.5.0
+    """
+
+    @property
+    @since("2.0.0")
+    def pc(self):
+        """
+        Returns a principal components Matrix.
+        Each column is one principal component.
+        """
+        return self._call_java("pc")
+
+    @property
+    @since("2.0.0")
+    def explainedVariance(self):
+        """
+        Returns a vector of proportions of variance
+        explained by each principal component.
+        """
+        return self._call_java("explainedVariance")
+
+
+@inherit_doc
+class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol, HasHandleInvalid,
+               JavaMLReadable, JavaMLWritable):
+    """
+    .. note:: Experimental
+
+    Implements the transforms required for fitting a dataset against an
+    R model formula. Currently we support a limited subset of the R
+    operators, including '~', '.', ':', '+', and '-'. Also see the `R formula docs
+    <http://stat.ethz.ch/R-manual/R-patched/library/stats/html/formula.html>`_.
+
+    >>> df = spark.createDataFrame([
+    ...     (1.0, 1.0, "a"),
+    ...     (0.0, 2.0, "b"),
+    ...     (0.0, 0.0, "a")
+    ... ], ["y", "x", "s"])
+    >>> rf = RFormula(formula="y ~ x + s")
+    >>> model = rf.fit(df)
+    >>> model.transform(df).show()
+    +---+---+---+---------+-----+
+    |  y|  x|  s| features|label|
+    +---+---+---+---------+-----+
+    |1.0|1.0|  a|[1.0,1.0]|  1.0|
+    |0.0|2.0|  b|[2.0,0.0]|  0.0|
+    |0.0|0.0|  a|[0.0,1.0]|  0.0|
+    +---+---+---+---------+-----+
+    ...
+    >>> rf.fit(df, {rf.formula: "y ~ . - s"}).transform(df).show()
+    +---+---+---+--------+-----+
+    |  y|  x|  s|features|label|
+    +---+---+---+--------+-----+
+    |1.0|1.0|  a|   [1.0]|  1.0|
+    |0.0|2.0|  b|   [2.0]|  0.0|
+    |0.0|0.0|  a|   [0.0]|  0.0|
+    +---+---+---+--------+-----+
+    ...
+    >>> rFormulaPath = temp_path + "/rFormula"
+    >>> rf.save(rFormulaPath)
+    >>> loadedRF = RFormula.load(rFormulaPath)
+    >>> loadedRF.getFormula() == rf.getFormula()
+    True
+    >>> loadedRF.getFeaturesCol() == rf.getFeaturesCol()
+    True
+    >>> loadedRF.getLabelCol() == rf.getLabelCol()
+    True
+    >>> loadedRF.getHandleInvalid() == rf.getHandleInvalid()
+    True
+    >>> str(loadedRF)
+    'RFormula(y ~ x + s) (uid=...)'
+    >>> modelPath = temp_path + "/rFormulaModel"
+    >>> model.save(modelPath)
+    >>> loadedModel = RFormulaModel.load(modelPath)
+    >>> loadedModel.uid == model.uid
+    True
+    >>> loadedModel.transform(df).show()
+    +---+---+---+---------+-----+
+    |  y|  x|  s| features|label|
+    +---+---+---+---------+-----+
+    |1.0|1.0|  a|[1.0,1.0]|  1.0|
+    |0.0|2.0|  b|[2.0,0.0]|  0.0|
+    |0.0|0.0|  a|[0.0,1.0]|  0.0|
+    +---+---+---+---------+-----+
+    ...
+    >>> str(loadedModel)
+    'RFormulaModel(ResolvedRFormula(label=y, terms=[x,s], hasIntercept=true)) (uid=...)'
+
+    .. versionadded:: 1.5.0
+    """
+
+    formula = Param(Params._dummy(), "formula", "R model formula",
+                    typeConverter=TypeConverters.toString)
+
+    forceIndexLabel = Param(Params._dummy(), "forceIndexLabel",
+                            "Force to index label whether it is numeric or string",
+                            typeConverter=TypeConverters.toBoolean)
+
+    stringIndexerOrderType = Param(Params._dummy(), "stringIndexerOrderType",
+                                   "How to order categories of a string feature column used by " +
+                                   "StringIndexer. The last category after ordering is dropped " +
+                                   "when encoding strings. Supported options: frequencyDesc, " +
+                                   "frequencyAsc, alphabetDesc, alphabetAsc. The default value " +
+                                   "is frequencyDesc. When the ordering is set to alphabetDesc, " +
+                                   "RFormula drops the same category as R when encoding strings.",
+                                   typeConverter=TypeConverters.toString)
+
+    handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid entries. " +
+                          "Options are 'skip' (filter out rows with invalid values), " +
+                          "'error' (throw an error), or 'keep' (put invalid data in a special " +
+                          "additional bucket, at index numLabels).",
+                          typeConverter=TypeConverters.toString)
+
+    @keyword_only
+    def __init__(self, formula=None, featuresCol="features", labelCol="label",
+                 forceIndexLabel=False, stringIndexerOrderType="frequencyDesc",
+                 handleInvalid="error"):
+        """
+        __init__(self, formula=None, featuresCol="features", labelCol="label", \
+                 forceIndexLabel=False, stringIndexerOrderType="frequencyDesc", \
+                 handleInvalid="error")
+        """
+        super(RFormula, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.RFormula", self.uid)
+        self._setDefault(forceIndexLabel=False, stringIndexerOrderType="frequencyDesc",
+                         handleInvalid="error")
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.5.0")
+    def setParams(self, formula=None, featuresCol="features", labelCol="label",
+                  forceIndexLabel=False, stringIndexerOrderType="frequencyDesc",
+                  handleInvalid="error"):
+        """
+        setParams(self, formula=None, featuresCol="features", labelCol="label", \
+                  forceIndexLabel=False, stringIndexerOrderType="frequencyDesc", \
+                  handleInvalid="error")
+        Sets params for RFormula.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("1.5.0")
+    def setFormula(self, value):
+        """
+        Sets the value of :py:attr:`formula`.
+        """
+        return self._set(formula=value)
+
+    @since("1.5.0")
+    def getFormula(self):
+        """
+        Gets the value of :py:attr:`formula`.
+        """
+        return self.getOrDefault(self.formula)
+
+    @since("2.1.0")
+    def setForceIndexLabel(self, value):
+        """
+        Sets the value of :py:attr:`forceIndexLabel`.
+        """
+        return self._set(forceIndexLabel=value)
+
+    @since("2.1.0")
+    def getForceIndexLabel(self):
+        """
+        Gets the value of :py:attr:`forceIndexLabel`.
+        """
+        return self.getOrDefault(self.forceIndexLabel)
+
+    @since("2.3.0")
+    def setStringIndexerOrderType(self, value):
+        """
+        Sets the value of :py:attr:`stringIndexerOrderType`.
+        """
+        return self._set(stringIndexerOrderType=value)
+
+    @since("2.3.0")
+    def getStringIndexerOrderType(self):
+        """
+        Gets the value of :py:attr:`stringIndexerOrderType` or its default value 'frequencyDesc'.
+        """
+        return self.getOrDefault(self.stringIndexerOrderType)
+
+    def _create_model(self, java_model):
+        return RFormulaModel(java_model)
+
+    def __str__(self):
+        formulaStr = self.getFormula() if self.isDefined(self.formula) else ""
+        return "RFormula(%s) (uid=%s)" % (formulaStr, self.uid)
+
+
+class RFormulaModel(JavaModel, JavaMLReadable, JavaMLWritable):
+    """
+    .. note:: Experimental
+
+    Model fitted by :py:class:`RFormula`. Fitting is required to determine the
+    factor levels of formula terms.
+
+    .. versionadded:: 1.5.0
+    """
+
+    def __str__(self):
+        resolvedFormula = self._call_java("resolvedFormula")
+        return "RFormulaModel(%s) (uid=%s)" % (resolvedFormula, self.uid)
+
+
+@inherit_doc
+class ChiSqSelector(JavaEstimator, HasFeaturesCol, HasOutputCol, HasLabelCol, JavaMLReadable,
+                    JavaMLWritable):
+    """
+    .. note:: Experimental
+
+    Chi-Squared feature selection, which selects categorical features to use for predicting a
+    categorical label.
+    The selector supports different selection methods: `numTopFeatures`, `percentile`, `fpr`,
+    `fdr`, `fwe`.
+
+     * `numTopFeatures` chooses a fixed number of top features according to a chi-squared test.
+
+     * `percentile` is similar but chooses a fraction of all features
+       instead of a fixed number.
+
+     * `fpr` chooses all features whose p-values are below a threshold,
+       thus controlling the false positive rate of selection.
+
+     * `fdr` uses the `Benjamini-Hochberg procedure <https://en.wikipedia.org/wiki/
+       False_discovery_rate#Benjamini.E2.80.93Hochberg_procedure>`_
+       to choose all features whose false discovery rate is below a threshold.
+
+     * `fwe` chooses all features whose p-values are below a threshold. The threshold is scaled by
+       1/numFeatures, thus controlling the family-wise error rate of selection.
+
+    By default, the selection method is `numTopFeatures`, with the default number of top features
+    set to 50.
+
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> df = spark.createDataFrame(
+    ...    [(Vectors.dense([0.0, 0.0, 18.0, 1.0]), 1.0),
+    ...     (Vectors.dense([0.0, 1.0, 12.0, 0.0]), 0.0),
+    ...     (Vectors.dense([1.0, 0.0, 15.0, 0.1]), 0.0)],
+    ...    ["features", "label"])
+    >>> selector = ChiSqSelector(numTopFeatures=1, outputCol="selectedFeatures")
+    >>> model = selector.fit(df)
+    >>> model.transform(df).head().selectedFeatures
+    DenseVector([18.0])
+    >>> model.selectedFeatures
+    [2]
+    >>> chiSqSelectorPath = temp_path + "/chi-sq-selector"
+    >>> selector.save(chiSqSelectorPath)
+    >>> loadedSelector = ChiSqSelector.load(chiSqSelectorPath)
+    >>> loadedSelector.getNumTopFeatures() == selector.getNumTopFeatures()
+    True
+    >>> modelPath = temp_path + "/chi-sq-selector-model"
+    >>> model.save(modelPath)
+    >>> loadedModel = ChiSqSelectorModel.load(modelPath)
+    >>> loadedModel.selectedFeatures == model.selectedFeatures
+    True
+
+    .. versionadded:: 2.0.0
+    """
+
+    selectorType = Param(Params._dummy(), "selectorType",
+                         "The selector type of the ChisqSelector. " +
+                         "Supported options: numTopFeatures (default), percentile, fpr, fdr, fwe.",
+                         typeConverter=TypeConverters.toString)
+
+    numTopFeatures = \
+        Param(Params._dummy(), "numTopFeatures",
+              "Number of features that selector will select, ordered by ascending p-value. " +
+              "If the number of features is < numTopFeatures, then this will select " +
+              "all features.", typeConverter=TypeConverters.toInt)
+
+    percentile = Param(Params._dummy(), "percentile", "Percentile of features that selector " +
+                       "will select, ordered by ascending p-value.",
+                       typeConverter=TypeConverters.toFloat)
+
+    fpr = Param(Params._dummy(), "fpr", "The highest p-value for features to be kept.",
+                typeConverter=TypeConverters.toFloat)
+
+    fdr = Param(Params._dummy(), "fdr", "The upper bound of the expected false discovery rate.",
+                typeConverter=TypeConverters.toFloat)
+
+    fwe = Param(Params._dummy(), "fwe", "The upper bound of the expected family-wise error rate.",
+                typeConverter=TypeConverters.toFloat)
+
+    @keyword_only
+    def __init__(self, numTopFeatures=50, featuresCol="features", outputCol=None,
+                 labelCol="label", selectorType="numTopFeatures", percentile=0.1, fpr=0.05,
+                 fdr=0.05, fwe=0.05):
+        """
+        __init__(self, numTopFeatures=50, featuresCol="features", outputCol=None, \
+                 labelCol="label", selectorType="numTopFeatures", percentile=0.1, fpr=0.05, \
+                 fdr=0.05, fwe=0.05)
+        """
+        super(ChiSqSelector, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.ChiSqSelector", self.uid)
+        self._setDefault(numTopFeatures=50, selectorType="numTopFeatures", percentile=0.1,
+                         fpr=0.05, fdr=0.05, fwe=0.05)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("2.0.0")
+    def setParams(self, numTopFeatures=50, featuresCol="features", outputCol=None,
+                  labelCol="labels", selectorType="numTopFeatures", percentile=0.1, fpr=0.05,
+                  fdr=0.05, fwe=0.05):
+        """
+        setParams(self, numTopFeatures=50, featuresCol="features", outputCol=None, \
+                  labelCol="labels", selectorType="numTopFeatures", percentile=0.1, fpr=0.05, \
+                  fdr=0.05, fwe=0.05)
+        Sets params for this ChiSqSelector.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("2.1.0")
+    def setSelectorType(self, value):
+        """
+        Sets the value of :py:attr:`selectorType`.
+        """
+        return self._set(selectorType=value)
+
+    @since("2.1.0")
+    def getSelectorType(self):
+        """
+        Gets the value of selectorType or its default value.
+        """
+        return self.getOrDefault(self.selectorType)
+
+    @since("2.0.0")
+    def setNumTopFeatures(self, value):
+        """
+        Sets the value of :py:attr:`numTopFeatures`.
+        Only applicable when selectorType = "numTopFeatures".
+        """
+        return self._set(numTopFeatures=value)
+
+    @since("2.0.0")
+    def getNumTopFeatures(self):
+        """
+        Gets the value of numTopFeatures or its default value.
+        """
+        return self.getOrDefault(self.numTopFeatures)
+
+    @since("2.1.0")
+    def setPercentile(self, value):
+        """
+        Sets the value of :py:attr:`percentile`.
+        Only applicable when selectorType = "percentile".
+        """
+        return self._set(percentile=value)
+
+    @since("2.1.0")
+    def getPercentile(self):
+        """
+        Gets the value of percentile or its default value.
+        """
+        return self.getOrDefault(self.percentile)
+
+    @since("2.1.0")
+    def setFpr(self, value):
+        """
+        Sets the value of :py:attr:`fpr`.
+        Only applicable when selectorType = "fpr".
+        """
+        return self._set(fpr=value)
+
+    @since("2.1.0")
+    def getFpr(self):
+        """
+        Gets the value of fpr or its default value.
+        """
+        return self.getOrDefault(self.fpr)
+
+    @since("2.2.0")
+    def setFdr(self, value):
+        """
+        Sets the value of :py:attr:`fdr`.
+        Only applicable when selectorType = "fdr".
+        """
+        return self._set(fdr=value)
+
+    @since("2.2.0")
+    def getFdr(self):
+        """
+        Gets the value of fdr or its default value.
+        """
+        return self.getOrDefault(self.fdr)
+
+    @since("2.2.0")
+    def setFwe(self, value):
+        """
+        Sets the value of :py:attr:`fwe`.
+        Only applicable when selectorType = "fwe".
+        """
+        return self._set(fwe=value)
+
+    @since("2.2.0")
+    def getFwe(self):
+        """
+        Gets the value of fwe or its default value.
+        """
+        return self.getOrDefault(self.fwe)
+
+    def _create_model(self, java_model):
+        return ChiSqSelectorModel(java_model)
+
+
+class ChiSqSelectorModel(JavaModel, JavaMLReadable, JavaMLWritable):
+    """
+    .. note:: Experimental
+
+    Model fitted by :py:class:`ChiSqSelector`.
+
+    .. versionadded:: 2.0.0
+    """
+
+    @property
+    @since("2.0.0")
+    def selectedFeatures(self):
+        """
+        List of indices to select (filter).
+        """
+        return self._call_java("selectedFeatures")
+
+
+@inherit_doc
+class VectorSizeHint(JavaTransformer, HasInputCol, HasHandleInvalid, JavaMLReadable,
+                     JavaMLWritable):
+    """
+    .. note:: Experimental
+
+    A feature transformer that adds size information to the metadata of a vector column.
+    VectorAssembler needs size information for its input columns and cannot be used on streaming
+    dataframes without this metadata.
+
+    .. note:: VectorSizeHint modifies `inputCol` to include size metadata and does not have an
+        outputCol.
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> from pyspark.ml import Pipeline, PipelineModel
+    >>> data = [(Vectors.dense([1., 2., 3.]), 4.)]
+    >>> df = spark.createDataFrame(data, ["vector", "float"])
+    >>>
+    >>> sizeHint = VectorSizeHint(inputCol="vector", size=3, handleInvalid="skip")
+    >>> vecAssembler = VectorAssembler(inputCols=["vector", "float"], outputCol="assembled")
+    >>> pipeline = Pipeline(stages=[sizeHint, vecAssembler])
+    >>>
+    >>> pipelineModel = pipeline.fit(df)
+    >>> pipelineModel.transform(df).head().assembled
+    DenseVector([1.0, 2.0, 3.0, 4.0])
+    >>> vectorSizeHintPath = temp_path + "/vector-size-hint-pipeline"
+    >>> pipelineModel.save(vectorSizeHintPath)
+    >>> loadedPipeline = PipelineModel.load(vectorSizeHintPath)
+    >>> loaded = loadedPipeline.transform(df).head().assembled
+    >>> expected = pipelineModel.transform(df).head().assembled
+    >>> loaded == expected
+    True
+
+    .. versionadded:: 2.3.0
+    """
+
+    size = Param(Params._dummy(), "size", "Size of vectors in column.",
+                 typeConverter=TypeConverters.toInt)
+
+    handleInvalid = Param(Params._dummy(), "handleInvalid",
+                          "How to handle invalid vectors in inputCol. Invalid vectors include "
+                          "nulls and vectors with the wrong size. The options are `skip` (filter "
+                          "out rows with invalid vectors), `error` (throw an error) and "
+                          "`optimistic` (do not check the vector size, and keep all rows). "
+                          "`error` by default.",
+                          TypeConverters.toString)
+
+    @keyword_only
+    def __init__(self, inputCol=None, size=None, handleInvalid="error"):
+        """
+        __init__(self, inputCol=None, size=None, handleInvalid="error")
+        """
+        super(VectorSizeHint, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorSizeHint", self.uid)
+        self._setDefault(handleInvalid="error")
+        self.setParams(**self._input_kwargs)
+
+    @keyword_only
+    @since("2.3.0")
+    def setParams(self, inputCol=None, size=None, handleInvalid="error"):
+        """
+        setParams(self, inputCol=None, size=None, handleInvalid="error")
+        Sets params for this VectorSizeHint.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("2.3.0")
+    def getSize(self):
+        """ Gets size param, the size of vectors in `inputCol`."""
+        return self.getOrDefault(self.size)
+
+    @since("2.3.0")
+    def setSize(self, value):
+        """ Sets size param, the size of vectors in `inputCol`."""
+        return self._set(size=value)
+
+
+if __name__ == "__main__":
+    import doctest
+    import tempfile
+
+    import pyspark.ml.feature
+    from pyspark.sql import Row, SparkSession
+
+    globs = globals().copy()
+    features = pyspark.ml.feature.__dict__.copy()
+    globs.update(features)
+
+    # The small batch size here ensures that we see multiple batches,
+    # even in these small test examples:
+    spark = SparkSession.builder\
+        .master("local[2]")\
+        .appName("ml.feature tests")\
+        .getOrCreate()
+    sc = spark.sparkContext
+    globs['sc'] = sc
+    globs['spark'] = spark
+    testData = sc.parallelize([Row(id=0, label="a"), Row(id=1, label="b"),
+                               Row(id=2, label="c"), Row(id=3, label="a"),
+                               Row(id=4, label="a"), Row(id=5, label="c")], 2)
+    globs['stringIndDf'] = spark.createDataFrame(testData)
+    temp_path = tempfile.mkdtemp()
+    globs['temp_path'] = temp_path
+    try:
+        (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+        spark.stop()
+    finally:
+        from shutil import rmtree
+        try:
+            rmtree(temp_path)
+        except OSError:
+            pass
+    if failure_count:
+        sys.exit(-1)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/fpm.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/fpm.py
new file mode 100644
index 0000000000000000000000000000000000000000..886ad8409ca66fd1002cf905635306cf70a0e23b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/fpm.py
@@ -0,0 +1,349 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark import keyword_only, since
+from pyspark.sql import DataFrame
+from pyspark.ml.util import *
+from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, _jvm
+from pyspark.ml.param.shared import *
+
+__all__ = ["FPGrowth", "FPGrowthModel", "PrefixSpan"]
+
+
+class HasMinSupport(Params):
+    """
+    Mixin for param minSupport.
+    """
+
+    minSupport = Param(
+        Params._dummy(),
+        "minSupport",
+        "Minimal support level of the frequent pattern. [0.0, 1.0]. " +
+        "Any pattern that appears more than (minSupport * size-of-the-dataset) " +
+        "times will be output in the frequent itemsets.",
+        typeConverter=TypeConverters.toFloat)
+
+    def setMinSupport(self, value):
+        """
+        Sets the value of :py:attr:`minSupport`.
+        """
+        return self._set(minSupport=value)
+
+    def getMinSupport(self):
+        """
+        Gets the value of minSupport or its default value.
+        """
+        return self.getOrDefault(self.minSupport)
+
+
+class HasNumPartitions(Params):
+    """
+    Mixin for param numPartitions: Number of partitions (at least 1) used by parallel FP-growth.
+    """
+
+    numPartitions = Param(
+        Params._dummy(),
+        "numPartitions",
+        "Number of partitions (at least 1) used by parallel FP-growth. " +
+        "By default the param is not set, " +
+        "and partition number of the input dataset is used.",
+        typeConverter=TypeConverters.toInt)
+
+    def setNumPartitions(self, value):
+        """
+        Sets the value of :py:attr:`numPartitions`.
+        """
+        return self._set(numPartitions=value)
+
+    def getNumPartitions(self):
+        """
+        Gets the value of :py:attr:`numPartitions` or its default value.
+        """
+        return self.getOrDefault(self.numPartitions)
+
+
+class HasMinConfidence(Params):
+    """
+    Mixin for param minConfidence.
+    """
+
+    minConfidence = Param(
+        Params._dummy(),
+        "minConfidence",
+        "Minimal confidence for generating Association Rule. [0.0, 1.0]. " +
+        "minConfidence will not affect the mining for frequent itemsets, " +
+        "but will affect the association rules generation.",
+        typeConverter=TypeConverters.toFloat)
+
+    def setMinConfidence(self, value):
+        """
+        Sets the value of :py:attr:`minConfidence`.
+        """
+        return self._set(minConfidence=value)
+
+    def getMinConfidence(self):
+        """
+        Gets the value of minConfidence or its default value.
+        """
+        return self.getOrDefault(self.minConfidence)
+
+
+class HasItemsCol(Params):
+    """
+    Mixin for param itemsCol: items column name.
+    """
+
+    itemsCol = Param(Params._dummy(), "itemsCol",
+                     "items column name", typeConverter=TypeConverters.toString)
+
+    def setItemsCol(self, value):
+        """
+        Sets the value of :py:attr:`itemsCol`.
+        """
+        return self._set(itemsCol=value)
+
+    def getItemsCol(self):
+        """
+        Gets the value of itemsCol or its default value.
+        """
+        return self.getOrDefault(self.itemsCol)
+
+
+class FPGrowthModel(JavaModel, JavaMLWritable, JavaMLReadable):
+    """
+    .. note:: Experimental
+
+    Model fitted by FPGrowth.
+
+    .. versionadded:: 2.2.0
+    """
+    @property
+    @since("2.2.0")
+    def freqItemsets(self):
+        """
+        DataFrame with two columns:
+        * `items` - Itemset of the same type as the input column.
+        * `freq`  - Frequency of the itemset (`LongType`).
+        """
+        return self._call_java("freqItemsets")
+
+    @property
+    @since("2.2.0")
+    def associationRules(self):
+        """
+        DataFrame with four columns:
+        * `antecedent`  - Array of the same type as the input column.
+        * `consequent`  - Array of the same type as the input column.
+        * `confidence`  - Confidence for the rule (`DoubleType`).
+        * `lift`        - Lift for the rule (`DoubleType`).
+        """
+        return self._call_java("associationRules")
+
+
+class FPGrowth(JavaEstimator, HasItemsCol, HasPredictionCol,
+               HasMinSupport, HasNumPartitions, HasMinConfidence,
+               JavaMLWritable, JavaMLReadable):
+
+    r"""
+    .. note:: Experimental
+
+    A parallel FP-growth algorithm to mine frequent itemsets. The algorithm is described in
+    Li et al., PFP: Parallel FP-Growth for Query Recommendation [LI2008]_.
+    PFP distributes computation in such a way that each worker executes an
+    independent group of mining tasks. The FP-Growth algorithm is described in
+    Han et al., Mining frequent patterns without candidate generation [HAN2000]_
+
+    .. [LI2008] http://dx.doi.org/10.1145/1454008.1454027
+    .. [HAN2000] http://dx.doi.org/10.1145/335191.335372
+
+    .. note:: null values in the feature column are ignored during fit().
+    .. note:: Internally `transform` `collects` and `broadcasts` association rules.
+
+    >>> from pyspark.sql.functions import split
+    >>> data = (spark.read
+    ...     .text("data/mllib/sample_fpgrowth.txt")
+    ...     .select(split("value", "\s+").alias("items")))
+    >>> data.show(truncate=False)
+    +------------------------+
+    |items                   |
+    +------------------------+
+    |[r, z, h, k, p]         |
+    |[z, y, x, w, v, u, t, s]|
+    |[s, x, o, n, r]         |
+    |[x, z, y, m, t, s, q, e]|
+    |[z]                     |
+    |[x, z, y, r, q, t, p]   |
+    +------------------------+
+    >>> fp = FPGrowth(minSupport=0.2, minConfidence=0.7)
+    >>> fpm = fp.fit(data)
+    >>> fpm.freqItemsets.show(5)
+    +---------+----+
+    |    items|freq|
+    +---------+----+
+    |      [s]|   3|
+    |   [s, x]|   3|
+    |[s, x, z]|   2|
+    |   [s, z]|   2|
+    |      [r]|   3|
+    +---------+----+
+    only showing top 5 rows
+    >>> fpm.associationRules.show(5)
+    +----------+----------+----------+
+    |antecedent|consequent|confidence|
+    +----------+----------+----------+
+    |    [t, s]|       [y]|       1.0|
+    |    [t, s]|       [x]|       1.0|
+    |    [t, s]|       [z]|       1.0|
+    |       [p]|       [r]|       1.0|
+    |       [p]|       [z]|       1.0|
+    +----------+----------+----------+
+    only showing top 5 rows
+    >>> new_data = spark.createDataFrame([(["t", "s"], )], ["items"])
+    >>> sorted(fpm.transform(new_data).first().prediction)
+    ['x', 'y', 'z']
+
+    .. versionadded:: 2.2.0
+    """
+    @keyword_only
+    def __init__(self, minSupport=0.3, minConfidence=0.8, itemsCol="items",
+                 predictionCol="prediction", numPartitions=None):
+        """
+        __init__(self, minSupport=0.3, minConfidence=0.8, itemsCol="items", \
+                 predictionCol="prediction", numPartitions=None)
+        """
+        super(FPGrowth, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.fpm.FPGrowth", self.uid)
+        self._setDefault(minSupport=0.3, minConfidence=0.8,
+                         itemsCol="items", predictionCol="prediction")
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("2.2.0")
+    def setParams(self, minSupport=0.3, minConfidence=0.8, itemsCol="items",
+                  predictionCol="prediction", numPartitions=None):
+        """
+        setParams(self, minSupport=0.3, minConfidence=0.8, itemsCol="items", \
+                  predictionCol="prediction", numPartitions=None)
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return FPGrowthModel(java_model)
+
+
+class PrefixSpan(JavaParams):
+    """
+    .. note:: Experimental
+
+    A parallel PrefixSpan algorithm to mine frequent sequential patterns.
+    The PrefixSpan algorithm is described in J. Pei, et al., PrefixSpan: Mining Sequential Patterns
+    Efficiently by Prefix-Projected Pattern Growth
+    (see <a href="http://doi.org/10.1109/ICDE.2001.914830">here</a>).
+    This class is not yet an Estimator/Transformer, use :py:func:`findFrequentSequentialPatterns`
+    method to run the PrefixSpan algorithm.
+
+    @see <a href="https://en.wikipedia.org/wiki/Sequential_Pattern_Mining">Sequential Pattern Mining
+    (Wikipedia)</a>
+    .. versionadded:: 2.4.0
+
+    """
+
+    minSupport = Param(Params._dummy(), "minSupport", "The minimal support level of the " +
+                       "sequential pattern. Sequential pattern that appears more than " +
+                       "(minSupport * size-of-the-dataset) times will be output. Must be >= 0.",
+                       typeConverter=TypeConverters.toFloat)
+
+    maxPatternLength = Param(Params._dummy(), "maxPatternLength",
+                             "The maximal length of the sequential pattern. Must be > 0.",
+                             typeConverter=TypeConverters.toInt)
+
+    maxLocalProjDBSize = Param(Params._dummy(), "maxLocalProjDBSize",
+                               "The maximum number of items (including delimiters used in the " +
+                               "internal storage format) allowed in a projected database before " +
+                               "local processing. If a projected database exceeds this size, " +
+                               "another iteration of distributed prefix growth is run. " +
+                               "Must be > 0.",
+                               typeConverter=TypeConverters.toInt)
+
+    sequenceCol = Param(Params._dummy(), "sequenceCol", "The name of the sequence column in " +
+                        "dataset, rows with nulls in this column are ignored.",
+                        typeConverter=TypeConverters.toString)
+
+    @keyword_only
+    def __init__(self, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=32000000,
+                 sequenceCol="sequence"):
+        """
+        __init__(self, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=32000000, \
+                 sequenceCol="sequence")
+        """
+        super(PrefixSpan, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.fpm.PrefixSpan", self.uid)
+        self._setDefault(minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=32000000,
+                         sequenceCol="sequence")
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("2.4.0")
+    def setParams(self, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=32000000,
+                  sequenceCol="sequence"):
+        """
+        setParams(self, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=32000000, \
+                  sequenceCol="sequence")
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("2.4.0")
+    def findFrequentSequentialPatterns(self, dataset):
+        """
+        .. note:: Experimental
+
+        Finds the complete set of frequent sequential patterns in the input sequences of itemsets.
+
+        :param dataset: A dataframe containing a sequence column which is
+                        `ArrayType(ArrayType(T))` type, T is the item type for the input dataset.
+        :return: A `DataFrame` that contains columns of sequence and corresponding frequency.
+                 The schema of it will be:
+                 - `sequence: ArrayType(ArrayType(T))` (T is the item type)
+                 - `freq: Long`
+
+        >>> from pyspark.ml.fpm import PrefixSpan
+        >>> from pyspark.sql import Row
+        >>> df = sc.parallelize([Row(sequence=[[1, 2], [3]]),
+        ...                      Row(sequence=[[1], [3, 2], [1, 2]]),
+        ...                      Row(sequence=[[1, 2], [5]]),
+        ...                      Row(sequence=[[6]])]).toDF()
+        >>> prefixSpan = PrefixSpan(minSupport=0.5, maxPatternLength=5)
+        >>> prefixSpan.findFrequentSequentialPatterns(df).sort("sequence").show(truncate=False)
+        +----------+----+
+        |sequence  |freq|
+        +----------+----+
+        |[[1]]     |3   |
+        |[[1], [3]]|2   |
+        |[[1, 2]]  |3   |
+        |[[2]]     |3   |
+        |[[3]]     |2   |
+        +----------+----+
+
+        .. versionadded:: 2.4.0
+        """
+        self._transfer_params_to_java()
+        jdf = self._java_obj.findFrequentSequentialPatterns(dataset._jdf)
+        return DataFrame(jdf, dataset.sql_ctx)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/image.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/image.py
new file mode 100644
index 0000000000000000000000000000000000000000..edb90a3578546015fc183a0e994f48de48c08308
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/image.py
@@ -0,0 +1,268 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+.. attribute:: ImageSchema
+
+    An attribute of this module that contains the instance of :class:`_ImageSchema`.
+
+.. autoclass:: _ImageSchema
+   :members:
+"""
+
+import sys
+import warnings
+
+import numpy as np
+
+from pyspark import SparkContext
+from pyspark.sql.types import Row, _create_row, _parse_datatype_json_string
+from pyspark.sql import DataFrame, SparkSession
+
+__all__ = ["ImageSchema"]
+
+
+class _ImageSchema(object):
+    """
+    Internal class for `pyspark.ml.image.ImageSchema` attribute. Meant to be private and
+    not to be instantized. Use `pyspark.ml.image.ImageSchema` attribute to access the
+    APIs of this class.
+    """
+
+    def __init__(self):
+        self._imageSchema = None
+        self._ocvTypes = None
+        self._columnSchema = None
+        self._imageFields = None
+        self._undefinedImageType = None
+
+    @property
+    def imageSchema(self):
+        """
+        Returns the image schema.
+
+        :return: a :class:`StructType` with a single column of images
+               named "image" (nullable) and having the same type returned by :meth:`columnSchema`.
+
+        .. versionadded:: 2.3.0
+        """
+
+        if self._imageSchema is None:
+            ctx = SparkContext._active_spark_context
+            jschema = ctx._jvm.org.apache.spark.ml.image.ImageSchema.imageSchema()
+            self._imageSchema = _parse_datatype_json_string(jschema.json())
+        return self._imageSchema
+
+    @property
+    def ocvTypes(self):
+        """
+        Returns the OpenCV type mapping supported.
+
+        :return: a dictionary containing the OpenCV type mapping supported.
+
+        .. versionadded:: 2.3.0
+        """
+
+        if self._ocvTypes is None:
+            ctx = SparkContext._active_spark_context
+            self._ocvTypes = dict(ctx._jvm.org.apache.spark.ml.image.ImageSchema.javaOcvTypes())
+        return self._ocvTypes
+
+    @property
+    def columnSchema(self):
+        """
+        Returns the schema for the image column.
+
+        :return: a :class:`StructType` for image column,
+            ``struct<origin:string, height:int, width:int, nChannels:int, mode:int, data:binary>``.
+
+        .. versionadded:: 2.4.0
+        """
+
+        if self._columnSchema is None:
+            ctx = SparkContext._active_spark_context
+            jschema = ctx._jvm.org.apache.spark.ml.image.ImageSchema.columnSchema()
+            self._columnSchema = _parse_datatype_json_string(jschema.json())
+        return self._columnSchema
+
+    @property
+    def imageFields(self):
+        """
+        Returns field names of image columns.
+
+        :return: a list of field names.
+
+        .. versionadded:: 2.3.0
+        """
+
+        if self._imageFields is None:
+            ctx = SparkContext._active_spark_context
+            self._imageFields = list(ctx._jvm.org.apache.spark.ml.image.ImageSchema.imageFields())
+        return self._imageFields
+
+    @property
+    def undefinedImageType(self):
+        """
+        Returns the name of undefined image type for the invalid image.
+
+        .. versionadded:: 2.3.0
+        """
+
+        if self._undefinedImageType is None:
+            ctx = SparkContext._active_spark_context
+            self._undefinedImageType = \
+                ctx._jvm.org.apache.spark.ml.image.ImageSchema.undefinedImageType()
+        return self._undefinedImageType
+
+    def toNDArray(self, image):
+        """
+        Converts an image to an array with metadata.
+
+        :param `Row` image: A row that contains the image to be converted. It should
+            have the attributes specified in `ImageSchema.imageSchema`.
+        :return: a `numpy.ndarray` that is an image.
+
+        .. versionadded:: 2.3.0
+        """
+
+        if not isinstance(image, Row):
+            raise TypeError(
+                "image argument should be pyspark.sql.types.Row; however, "
+                "it got [%s]." % type(image))
+
+        if any(not hasattr(image, f) for f in self.imageFields):
+            raise ValueError(
+                "image argument should have attributes specified in "
+                "ImageSchema.imageSchema [%s]." % ", ".join(self.imageFields))
+
+        height = image.height
+        width = image.width
+        nChannels = image.nChannels
+        return np.ndarray(
+            shape=(height, width, nChannels),
+            dtype=np.uint8,
+            buffer=image.data,
+            strides=(width * nChannels, nChannels, 1))
+
+    def toImage(self, array, origin=""):
+        """
+        Converts an array with metadata to a two-dimensional image.
+
+        :param `numpy.ndarray` array: The array to convert to image.
+        :param str origin: Path to the image, optional.
+        :return: a :class:`Row` that is a two dimensional image.
+
+        .. versionadded:: 2.3.0
+        """
+
+        if not isinstance(array, np.ndarray):
+            raise TypeError(
+                "array argument should be numpy.ndarray; however, it got [%s]." % type(array))
+
+        if array.ndim != 3:
+            raise ValueError("Invalid array shape")
+
+        height, width, nChannels = array.shape
+        ocvTypes = ImageSchema.ocvTypes
+        if nChannels == 1:
+            mode = ocvTypes["CV_8UC1"]
+        elif nChannels == 3:
+            mode = ocvTypes["CV_8UC3"]
+        elif nChannels == 4:
+            mode = ocvTypes["CV_8UC4"]
+        else:
+            raise ValueError("Invalid number of channels")
+
+        # Running `bytearray(numpy.array([1]))` fails in specific Python versions
+        # with a specific Numpy version, for example in Python 3.6.0 and NumPy 1.13.3.
+        # Here, it avoids it by converting it to bytes.
+        data = bytearray(array.astype(dtype=np.uint8).ravel().tobytes())
+
+        # Creating new Row with _create_row(), because Row(name = value, ... )
+        # orders fields by name, which conflicts with expected schema order
+        # when the new DataFrame is created by UDF
+        return _create_row(self.imageFields,
+                           [origin, height, width, nChannels, mode, data])
+
+    def readImages(self, path, recursive=False, numPartitions=-1,
+                   dropImageFailures=False, sampleRatio=1.0, seed=0):
+        """
+        Reads the directory of images from the local or remote source.
+
+        .. note:: If multiple jobs are run in parallel with different sampleRatio or recursive flag,
+            there may be a race condition where one job overwrites the hadoop configs of another.
+
+        .. note:: If sample ratio is less than 1, sampling uses a PathFilter that is efficient but
+            potentially non-deterministic.
+
+        .. note:: Deprecated in 2.4.0. Use `spark.read.format("image").load(path)` instead and
+            this `readImages` will be removed in 3.0.0.
+
+        :param str path: Path to the image directory.
+        :param bool recursive: Recursive search flag.
+        :param int numPartitions: Number of DataFrame partitions.
+        :param bool dropImageFailures: Drop the files that are not valid images.
+        :param float sampleRatio: Fraction of the images loaded.
+        :param int seed: Random number seed.
+        :return: a :class:`DataFrame` with a single column of "images",
+               see ImageSchema for details.
+
+        >>> df = ImageSchema.readImages('data/mllib/images/origin/kittens', recursive=True)
+        >>> df.count()
+        5
+
+        .. versionadded:: 2.3.0
+        """
+        warnings.warn("`ImageSchema.readImage` is deprecated. " +
+                      "Use `spark.read.format(\"image\").load(path)` instead.", DeprecationWarning)
+        spark = SparkSession.builder.getOrCreate()
+        image_schema = spark._jvm.org.apache.spark.ml.image.ImageSchema
+        jsession = spark._jsparkSession
+        jresult = image_schema.readImages(path, jsession, recursive, numPartitions,
+                                          dropImageFailures, float(sampleRatio), seed)
+        return DataFrame(jresult, spark._wrapped)
+
+
+ImageSchema = _ImageSchema()
+
+
+# Monkey patch to disallow instantiation of this class.
+def _disallow_instance(_):
+    raise RuntimeError("Creating instance of _ImageSchema class is disallowed.")
+_ImageSchema.__init__ = _disallow_instance
+
+
+def _test():
+    import doctest
+    import pyspark.ml.image
+    globs = pyspark.ml.image.__dict__.copy()
+    spark = SparkSession.builder\
+        .master("local[2]")\
+        .appName("ml.image tests")\
+        .getOrCreate()
+    globs['spark'] = spark
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.ml.image, globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/linalg/__init__.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/linalg/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2548fd0f50b332cb094f6858b81edb01b380c130
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/linalg/__init__.py
@@ -0,0 +1,1169 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+MLlib utilities for linear algebra. For dense vectors, MLlib
+uses the NumPy C{array} type, so you can simply pass NumPy arrays
+around. For sparse vectors, users can construct a L{SparseVector}
+object from MLlib or pass SciPy C{scipy.sparse} column vectors if
+SciPy is available in their environment.
+"""
+
+import sys
+import array
+import struct
+
+if sys.version >= '3':
+    basestring = str
+    xrange = range
+    import copyreg as copy_reg
+    long = int
+else:
+    from itertools import izip as zip
+    import copy_reg
+
+import numpy as np
+
+from pyspark import since
+from pyspark.sql.types import UserDefinedType, StructField, StructType, ArrayType, DoubleType, \
+    IntegerType, ByteType, BooleanType
+
+
+__all__ = ['Vector', 'DenseVector', 'SparseVector', 'Vectors',
+           'Matrix', 'DenseMatrix', 'SparseMatrix', 'Matrices']
+
+
+if sys.version_info[:2] == (2, 7):
+    # speed up pickling array in Python 2.7
+    def fast_pickle_array(ar):
+        return array.array, (ar.typecode, ar.tostring())
+    copy_reg.pickle(array.array, fast_pickle_array)
+
+
+# Check whether we have SciPy. MLlib works without it too, but if we have it, some methods,
+# such as _dot and _serialize_double_vector, start to support scipy.sparse matrices.
+
+try:
+    import scipy.sparse
+    _have_scipy = True
+except:
+    # No SciPy in environment, but that's okay
+    _have_scipy = False
+
+
+def _convert_to_vector(l):
+    if isinstance(l, Vector):
+        return l
+    elif type(l) in (array.array, np.array, np.ndarray, list, tuple, xrange):
+        return DenseVector(l)
+    elif _have_scipy and scipy.sparse.issparse(l):
+        assert l.shape[1] == 1, "Expected column vector"
+        # Make sure the converted csc_matrix has sorted indices.
+        csc = l.tocsc()
+        if not csc.has_sorted_indices:
+            csc.sort_indices()
+        return SparseVector(l.shape[0], csc.indices, csc.data)
+    else:
+        raise TypeError("Cannot convert type %s into Vector" % type(l))
+
+
+def _vector_size(v):
+    """
+    Returns the size of the vector.
+
+    >>> _vector_size([1., 2., 3.])
+    3
+    >>> _vector_size((1., 2., 3.))
+    3
+    >>> _vector_size(array.array('d', [1., 2., 3.]))
+    3
+    >>> _vector_size(np.zeros(3))
+    3
+    >>> _vector_size(np.zeros((3, 1)))
+    3
+    >>> _vector_size(np.zeros((1, 3)))
+    Traceback (most recent call last):
+        ...
+    ValueError: Cannot treat an ndarray of shape (1, 3) as a vector
+    """
+    if isinstance(v, Vector):
+        return len(v)
+    elif type(v) in (array.array, list, tuple, xrange):
+        return len(v)
+    elif type(v) == np.ndarray:
+        if v.ndim == 1 or (v.ndim == 2 and v.shape[1] == 1):
+            return len(v)
+        else:
+            raise ValueError("Cannot treat an ndarray of shape %s as a vector" % str(v.shape))
+    elif _have_scipy and scipy.sparse.issparse(v):
+        assert v.shape[1] == 1, "Expected column vector"
+        return v.shape[0]
+    else:
+        raise TypeError("Cannot treat type %s as a vector" % type(v))
+
+
+def _format_float(f, digits=4):
+    s = str(round(f, digits))
+    if '.' in s:
+        s = s[:s.index('.') + 1 + digits]
+    return s
+
+
+def _format_float_list(l):
+    return [_format_float(x) for x in l]
+
+
+def _double_to_long_bits(value):
+    if np.isnan(value):
+        value = float('nan')
+    # pack double into 64 bits, then unpack as long int
+    return struct.unpack('Q', struct.pack('d', value))[0]
+
+
+class VectorUDT(UserDefinedType):
+    """
+    SQL user-defined type (UDT) for Vector.
+    """
+
+    @classmethod
+    def sqlType(cls):
+        return StructType([
+            StructField("type", ByteType(), False),
+            StructField("size", IntegerType(), True),
+            StructField("indices", ArrayType(IntegerType(), False), True),
+            StructField("values", ArrayType(DoubleType(), False), True)])
+
+    @classmethod
+    def module(cls):
+        return "pyspark.ml.linalg"
+
+    @classmethod
+    def scalaUDT(cls):
+        return "org.apache.spark.ml.linalg.VectorUDT"
+
+    def serialize(self, obj):
+        if isinstance(obj, SparseVector):
+            indices = [int(i) for i in obj.indices]
+            values = [float(v) for v in obj.values]
+            return (0, obj.size, indices, values)
+        elif isinstance(obj, DenseVector):
+            values = [float(v) for v in obj]
+            return (1, None, None, values)
+        else:
+            raise TypeError("cannot serialize %r of type %r" % (obj, type(obj)))
+
+    def deserialize(self, datum):
+        assert len(datum) == 4, \
+            "VectorUDT.deserialize given row with length %d but requires 4" % len(datum)
+        tpe = datum[0]
+        if tpe == 0:
+            return SparseVector(datum[1], datum[2], datum[3])
+        elif tpe == 1:
+            return DenseVector(datum[3])
+        else:
+            raise ValueError("do not recognize type %r" % tpe)
+
+    def simpleString(self):
+        return "vector"
+
+
+class MatrixUDT(UserDefinedType):
+    """
+    SQL user-defined type (UDT) for Matrix.
+    """
+
+    @classmethod
+    def sqlType(cls):
+        return StructType([
+            StructField("type", ByteType(), False),
+            StructField("numRows", IntegerType(), False),
+            StructField("numCols", IntegerType(), False),
+            StructField("colPtrs", ArrayType(IntegerType(), False), True),
+            StructField("rowIndices", ArrayType(IntegerType(), False), True),
+            StructField("values", ArrayType(DoubleType(), False), True),
+            StructField("isTransposed", BooleanType(), False)])
+
+    @classmethod
+    def module(cls):
+        return "pyspark.ml.linalg"
+
+    @classmethod
+    def scalaUDT(cls):
+        return "org.apache.spark.ml.linalg.MatrixUDT"
+
+    def serialize(self, obj):
+        if isinstance(obj, SparseMatrix):
+            colPtrs = [int(i) for i in obj.colPtrs]
+            rowIndices = [int(i) for i in obj.rowIndices]
+            values = [float(v) for v in obj.values]
+            return (0, obj.numRows, obj.numCols, colPtrs,
+                    rowIndices, values, bool(obj.isTransposed))
+        elif isinstance(obj, DenseMatrix):
+            values = [float(v) for v in obj.values]
+            return (1, obj.numRows, obj.numCols, None, None, values,
+                    bool(obj.isTransposed))
+        else:
+            raise TypeError("cannot serialize type %r" % (type(obj)))
+
+    def deserialize(self, datum):
+        assert len(datum) == 7, \
+            "MatrixUDT.deserialize given row with length %d but requires 7" % len(datum)
+        tpe = datum[0]
+        if tpe == 0:
+            return SparseMatrix(*datum[1:])
+        elif tpe == 1:
+            return DenseMatrix(datum[1], datum[2], datum[5], datum[6])
+        else:
+            raise ValueError("do not recognize type %r" % tpe)
+
+    def simpleString(self):
+        return "matrix"
+
+
+class Vector(object):
+
+    __UDT__ = VectorUDT()
+
+    """
+    Abstract class for DenseVector and SparseVector
+    """
+    def toArray(self):
+        """
+        Convert the vector into an numpy.ndarray
+
+        :return: numpy.ndarray
+        """
+        raise NotImplementedError
+
+
+class DenseVector(Vector):
+    """
+    A dense vector represented by a value array. We use numpy array for
+    storage and arithmetics will be delegated to the underlying numpy
+    array.
+
+    >>> v = Vectors.dense([1.0, 2.0])
+    >>> u = Vectors.dense([3.0, 4.0])
+    >>> v + u
+    DenseVector([4.0, 6.0])
+    >>> 2 - v
+    DenseVector([1.0, 0.0])
+    >>> v / 2
+    DenseVector([0.5, 1.0])
+    >>> v * u
+    DenseVector([3.0, 8.0])
+    >>> u / v
+    DenseVector([3.0, 2.0])
+    >>> u % 2
+    DenseVector([1.0, 0.0])
+    """
+    def __init__(self, ar):
+        if isinstance(ar, bytes):
+            ar = np.frombuffer(ar, dtype=np.float64)
+        elif not isinstance(ar, np.ndarray):
+            ar = np.array(ar, dtype=np.float64)
+        if ar.dtype != np.float64:
+            ar = ar.astype(np.float64)
+        self.array = ar
+
+    def __reduce__(self):
+        return DenseVector, (self.array.tostring(),)
+
+    def numNonzeros(self):
+        """
+        Number of nonzero elements. This scans all active values and count non zeros
+        """
+        return np.count_nonzero(self.array)
+
+    def norm(self, p):
+        """
+        Calculates the norm of a DenseVector.
+
+        >>> a = DenseVector([0, -1, 2, -3])
+        >>> a.norm(2)
+        3.7...
+        >>> a.norm(1)
+        6.0
+        """
+        return np.linalg.norm(self.array, p)
+
+    def dot(self, other):
+        """
+        Compute the dot product of two Vectors. We support
+        (Numpy array, list, SparseVector, or SciPy sparse)
+        and a target NumPy array that is either 1- or 2-dimensional.
+        Equivalent to calling numpy.dot of the two vectors.
+
+        >>> dense = DenseVector(array.array('d', [1., 2.]))
+        >>> dense.dot(dense)
+        5.0
+        >>> dense.dot(SparseVector(2, [0, 1], [2., 1.]))
+        4.0
+        >>> dense.dot(range(1, 3))
+        5.0
+        >>> dense.dot(np.array(range(1, 3)))
+        5.0
+        >>> dense.dot([1.,])
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> dense.dot(np.reshape([1., 2., 3., 4.], (2, 2), order='F'))
+        array([  5.,  11.])
+        >>> dense.dot(np.reshape([1., 2., 3.], (3, 1), order='F'))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        """
+        if type(other) == np.ndarray:
+            if other.ndim > 1:
+                assert len(self) == other.shape[0], "dimension mismatch"
+            return np.dot(self.array, other)
+        elif _have_scipy and scipy.sparse.issparse(other):
+            assert len(self) == other.shape[0], "dimension mismatch"
+            return other.transpose().dot(self.toArray())
+        else:
+            assert len(self) == _vector_size(other), "dimension mismatch"
+            if isinstance(other, SparseVector):
+                return other.dot(self)
+            elif isinstance(other, Vector):
+                return np.dot(self.toArray(), other.toArray())
+            else:
+                return np.dot(self.toArray(), other)
+
+    def squared_distance(self, other):
+        """
+        Squared distance of two Vectors.
+
+        >>> dense1 = DenseVector(array.array('d', [1., 2.]))
+        >>> dense1.squared_distance(dense1)
+        0.0
+        >>> dense2 = np.array([2., 1.])
+        >>> dense1.squared_distance(dense2)
+        2.0
+        >>> dense3 = [2., 1.]
+        >>> dense1.squared_distance(dense3)
+        2.0
+        >>> sparse1 = SparseVector(2, [0, 1], [2., 1.])
+        >>> dense1.squared_distance(sparse1)
+        2.0
+        >>> dense1.squared_distance([1.,])
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> dense1.squared_distance(SparseVector(1, [0,], [1.,]))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        """
+        assert len(self) == _vector_size(other), "dimension mismatch"
+        if isinstance(other, SparseVector):
+            return other.squared_distance(self)
+        elif _have_scipy and scipy.sparse.issparse(other):
+            return _convert_to_vector(other).squared_distance(self)
+
+        if isinstance(other, Vector):
+            other = other.toArray()
+        elif not isinstance(other, np.ndarray):
+            other = np.array(other)
+        diff = self.toArray() - other
+        return np.dot(diff, diff)
+
+    def toArray(self):
+        """
+        Returns an numpy.ndarray
+        """
+        return self.array
+
+    @property
+    def values(self):
+        """
+        Returns a list of values
+        """
+        return self.array
+
+    def __getitem__(self, item):
+        return self.array[item]
+
+    def __len__(self):
+        return len(self.array)
+
+    def __str__(self):
+        return "[" + ",".join([str(v) for v in self.array]) + "]"
+
+    def __repr__(self):
+        return "DenseVector([%s])" % (', '.join(_format_float(i) for i in self.array))
+
+    def __eq__(self, other):
+        if isinstance(other, DenseVector):
+            return np.array_equal(self.array, other.array)
+        elif isinstance(other, SparseVector):
+            if len(self) != other.size:
+                return False
+            return Vectors._equals(list(xrange(len(self))), self.array, other.indices, other.values)
+        return False
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __hash__(self):
+        size = len(self)
+        result = 31 + size
+        nnz = 0
+        i = 0
+        while i < size and nnz < 128:
+            if self.array[i] != 0:
+                result = 31 * result + i
+                bits = _double_to_long_bits(self.array[i])
+                result = 31 * result + (bits ^ (bits >> 32))
+                nnz += 1
+            i += 1
+        return result
+
+    def __getattr__(self, item):
+        return getattr(self.array, item)
+
+    def _delegate(op):
+        def func(self, other):
+            if isinstance(other, DenseVector):
+                other = other.array
+            return DenseVector(getattr(self.array, op)(other))
+        return func
+
+    __neg__ = _delegate("__neg__")
+    __add__ = _delegate("__add__")
+    __sub__ = _delegate("__sub__")
+    __mul__ = _delegate("__mul__")
+    __div__ = _delegate("__div__")
+    __truediv__ = _delegate("__truediv__")
+    __mod__ = _delegate("__mod__")
+    __radd__ = _delegate("__radd__")
+    __rsub__ = _delegate("__rsub__")
+    __rmul__ = _delegate("__rmul__")
+    __rdiv__ = _delegate("__rdiv__")
+    __rtruediv__ = _delegate("__rtruediv__")
+    __rmod__ = _delegate("__rmod__")
+
+
+class SparseVector(Vector):
+    """
+    A simple sparse vector class for passing data to MLlib. Users may
+    alternatively pass SciPy's {scipy.sparse} data types.
+    """
+    def __init__(self, size, *args):
+        """
+        Create a sparse vector, using either a dictionary, a list of
+        (index, value) pairs, or two separate arrays of indices and
+        values (sorted by index).
+
+        :param size: Size of the vector.
+        :param args: Active entries, as a dictionary {index: value, ...},
+          a list of tuples [(index, value), ...], or a list of strictly
+          increasing indices and a list of corresponding values [index, ...],
+          [value, ...]. Inactive entries are treated as zeros.
+
+        >>> SparseVector(4, {1: 1.0, 3: 5.5})
+        SparseVector(4, {1: 1.0, 3: 5.5})
+        >>> SparseVector(4, [(1, 1.0), (3, 5.5)])
+        SparseVector(4, {1: 1.0, 3: 5.5})
+        >>> SparseVector(4, [1, 3], [1.0, 5.5])
+        SparseVector(4, {1: 1.0, 3: 5.5})
+        >>> SparseVector(4, {1:1.0, 6:2.0})
+        Traceback (most recent call last):
+        ...
+        AssertionError: Index 6 is out of the size of vector with size=4
+        >>> SparseVector(4, {-1:1.0})
+        Traceback (most recent call last):
+        ...
+        AssertionError: Contains negative index -1
+        """
+        self.size = int(size)
+        """ Size of the vector. """
+        assert 1 <= len(args) <= 2, "must pass either 2 or 3 arguments"
+        if len(args) == 1:
+            pairs = args[0]
+            if type(pairs) == dict:
+                pairs = pairs.items()
+            pairs = sorted(pairs)
+            self.indices = np.array([p[0] for p in pairs], dtype=np.int32)
+            """ A list of indices corresponding to active entries. """
+            self.values = np.array([p[1] for p in pairs], dtype=np.float64)
+            """ A list of values corresponding to active entries. """
+        else:
+            if isinstance(args[0], bytes):
+                assert isinstance(args[1], bytes), "values should be string too"
+                if args[0]:
+                    self.indices = np.frombuffer(args[0], np.int32)
+                    self.values = np.frombuffer(args[1], np.float64)
+                else:
+                    # np.frombuffer() doesn't work well with empty string in older version
+                    self.indices = np.array([], dtype=np.int32)
+                    self.values = np.array([], dtype=np.float64)
+            else:
+                self.indices = np.array(args[0], dtype=np.int32)
+                self.values = np.array(args[1], dtype=np.float64)
+            assert len(self.indices) == len(self.values), "index and value arrays not same length"
+            for i in xrange(len(self.indices) - 1):
+                if self.indices[i] >= self.indices[i + 1]:
+                    raise TypeError(
+                        "Indices %s and %s are not strictly increasing"
+                        % (self.indices[i], self.indices[i + 1]))
+
+        if self.indices.size > 0:
+            assert np.max(self.indices) < self.size, \
+                "Index %d is out of the size of vector with size=%d" \
+                % (np.max(self.indices), self.size)
+            assert np.min(self.indices) >= 0, \
+                "Contains negative index %d" % (np.min(self.indices))
+
+    def numNonzeros(self):
+        """
+        Number of nonzero elements. This scans all active values and count non zeros.
+        """
+        return np.count_nonzero(self.values)
+
+    def norm(self, p):
+        """
+        Calculates the norm of a SparseVector.
+
+        >>> a = SparseVector(4, [0, 1], [3., -4.])
+        >>> a.norm(1)
+        7.0
+        >>> a.norm(2)
+        5.0
+        """
+        return np.linalg.norm(self.values, p)
+
+    def __reduce__(self):
+        return (
+            SparseVector,
+            (self.size, self.indices.tostring(), self.values.tostring()))
+
+    def dot(self, other):
+        """
+        Dot product with a SparseVector or 1- or 2-dimensional Numpy array.
+
+        >>> a = SparseVector(4, [1, 3], [3.0, 4.0])
+        >>> a.dot(a)
+        25.0
+        >>> a.dot(array.array('d', [1., 2., 3., 4.]))
+        22.0
+        >>> b = SparseVector(4, [2], [1.0])
+        >>> a.dot(b)
+        0.0
+        >>> a.dot(np.array([[1, 1], [2, 2], [3, 3], [4, 4]]))
+        array([ 22.,  22.])
+        >>> a.dot([1., 2., 3.])
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> a.dot(np.array([1., 2.]))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> a.dot(DenseVector([1., 2.]))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> a.dot(np.zeros((3, 2)))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        """
+
+        if isinstance(other, np.ndarray):
+            if other.ndim not in [2, 1]:
+                raise ValueError("Cannot call dot with %d-dimensional array" % other.ndim)
+            assert len(self) == other.shape[0], "dimension mismatch"
+            return np.dot(self.values, other[self.indices])
+
+        assert len(self) == _vector_size(other), "dimension mismatch"
+
+        if isinstance(other, DenseVector):
+            return np.dot(other.array[self.indices], self.values)
+
+        elif isinstance(other, SparseVector):
+            # Find out common indices.
+            self_cmind = np.in1d(self.indices, other.indices, assume_unique=True)
+            self_values = self.values[self_cmind]
+            if self_values.size == 0:
+                return 0.0
+            else:
+                other_cmind = np.in1d(other.indices, self.indices, assume_unique=True)
+                return np.dot(self_values, other.values[other_cmind])
+
+        else:
+            return self.dot(_convert_to_vector(other))
+
+    def squared_distance(self, other):
+        """
+        Squared distance from a SparseVector or 1-dimensional NumPy array.
+
+        >>> a = SparseVector(4, [1, 3], [3.0, 4.0])
+        >>> a.squared_distance(a)
+        0.0
+        >>> a.squared_distance(array.array('d', [1., 2., 3., 4.]))
+        11.0
+        >>> a.squared_distance(np.array([1., 2., 3., 4.]))
+        11.0
+        >>> b = SparseVector(4, [2], [1.0])
+        >>> a.squared_distance(b)
+        26.0
+        >>> b.squared_distance(a)
+        26.0
+        >>> b.squared_distance([1., 2.])
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> b.squared_distance(SparseVector(3, [1,], [1.0,]))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        """
+        assert len(self) == _vector_size(other), "dimension mismatch"
+
+        if isinstance(other, np.ndarray) or isinstance(other, DenseVector):
+            if isinstance(other, np.ndarray) and other.ndim != 1:
+                raise Exception("Cannot call squared_distance with %d-dimensional array" %
+                                other.ndim)
+            if isinstance(other, DenseVector):
+                other = other.array
+            sparse_ind = np.zeros(other.size, dtype=bool)
+            sparse_ind[self.indices] = True
+            dist = other[sparse_ind] - self.values
+            result = np.dot(dist, dist)
+
+            other_ind = other[~sparse_ind]
+            result += np.dot(other_ind, other_ind)
+            return result
+
+        elif isinstance(other, SparseVector):
+            result = 0.0
+            i, j = 0, 0
+            while i < len(self.indices) and j < len(other.indices):
+                if self.indices[i] == other.indices[j]:
+                    diff = self.values[i] - other.values[j]
+                    result += diff * diff
+                    i += 1
+                    j += 1
+                elif self.indices[i] < other.indices[j]:
+                    result += self.values[i] * self.values[i]
+                    i += 1
+                else:
+                    result += other.values[j] * other.values[j]
+                    j += 1
+            while i < len(self.indices):
+                result += self.values[i] * self.values[i]
+                i += 1
+            while j < len(other.indices):
+                result += other.values[j] * other.values[j]
+                j += 1
+            return result
+        else:
+            return self.squared_distance(_convert_to_vector(other))
+
+    def toArray(self):
+        """
+        Returns a copy of this SparseVector as a 1-dimensional NumPy array.
+        """
+        arr = np.zeros((self.size,), dtype=np.float64)
+        arr[self.indices] = self.values
+        return arr
+
+    def __len__(self):
+        return self.size
+
+    def __str__(self):
+        inds = "[" + ",".join([str(i) for i in self.indices]) + "]"
+        vals = "[" + ",".join([str(v) for v in self.values]) + "]"
+        return "(" + ",".join((str(self.size), inds, vals)) + ")"
+
+    def __repr__(self):
+        inds = self.indices
+        vals = self.values
+        entries = ", ".join(["{0}: {1}".format(inds[i], _format_float(vals[i]))
+                             for i in xrange(len(inds))])
+        return "SparseVector({0}, {{{1}}})".format(self.size, entries)
+
+    def __eq__(self, other):
+        if isinstance(other, SparseVector):
+            return other.size == self.size and np.array_equal(other.indices, self.indices) \
+                and np.array_equal(other.values, self.values)
+        elif isinstance(other, DenseVector):
+            if self.size != len(other):
+                return False
+            return Vectors._equals(self.indices, self.values, list(xrange(len(other))), other.array)
+        return False
+
+    def __getitem__(self, index):
+        inds = self.indices
+        vals = self.values
+        if not isinstance(index, int):
+            raise TypeError(
+                "Indices must be of type integer, got type %s" % type(index))
+
+        if index >= self.size or index < -self.size:
+            raise IndexError("Index %d out of bounds." % index)
+        if index < 0:
+            index += self.size
+
+        if (inds.size == 0) or (index > inds.item(-1)):
+            return 0.
+
+        insert_index = np.searchsorted(inds, index)
+        row_ind = inds[insert_index]
+        if row_ind == index:
+            return vals[insert_index]
+        return 0.
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __hash__(self):
+        result = 31 + self.size
+        nnz = 0
+        i = 0
+        while i < len(self.values) and nnz < 128:
+            if self.values[i] != 0:
+                result = 31 * result + int(self.indices[i])
+                bits = _double_to_long_bits(self.values[i])
+                result = 31 * result + (bits ^ (bits >> 32))
+                nnz += 1
+            i += 1
+        return result
+
+
+class Vectors(object):
+
+    """
+    Factory methods for working with vectors.
+
+    .. note:: Dense vectors are simply represented as NumPy array objects,
+        so there is no need to covert them for use in MLlib. For sparse vectors,
+        the factory methods in this class create an MLlib-compatible type, or users
+        can pass in SciPy's C{scipy.sparse} column vectors.
+    """
+
+    @staticmethod
+    def sparse(size, *args):
+        """
+        Create a sparse vector, using either a dictionary, a list of
+        (index, value) pairs, or two separate arrays of indices and
+        values (sorted by index).
+
+        :param size: Size of the vector.
+        :param args: Non-zero entries, as a dictionary, list of tuples,
+                     or two sorted lists containing indices and values.
+
+        >>> Vectors.sparse(4, {1: 1.0, 3: 5.5})
+        SparseVector(4, {1: 1.0, 3: 5.5})
+        >>> Vectors.sparse(4, [(1, 1.0), (3, 5.5)])
+        SparseVector(4, {1: 1.0, 3: 5.5})
+        >>> Vectors.sparse(4, [1, 3], [1.0, 5.5])
+        SparseVector(4, {1: 1.0, 3: 5.5})
+        """
+        return SparseVector(size, *args)
+
+    @staticmethod
+    def dense(*elements):
+        """
+        Create a dense vector of 64-bit floats from a Python list or numbers.
+
+        >>> Vectors.dense([1, 2, 3])
+        DenseVector([1.0, 2.0, 3.0])
+        >>> Vectors.dense(1.0, 2.0)
+        DenseVector([1.0, 2.0])
+        """
+        if len(elements) == 1 and not isinstance(elements[0], (float, int, long)):
+            # it's list, numpy.array or other iterable object.
+            elements = elements[0]
+        return DenseVector(elements)
+
+    @staticmethod
+    def squared_distance(v1, v2):
+        """
+        Squared distance between two vectors.
+        a and b can be of type SparseVector, DenseVector, np.ndarray
+        or array.array.
+
+        >>> a = Vectors.sparse(4, [(0, 1), (3, 4)])
+        >>> b = Vectors.dense([2, 5, 4, 1])
+        >>> a.squared_distance(b)
+        51.0
+        """
+        v1, v2 = _convert_to_vector(v1), _convert_to_vector(v2)
+        return v1.squared_distance(v2)
+
+    @staticmethod
+    def norm(vector, p):
+        """
+        Find norm of the given vector.
+        """
+        return _convert_to_vector(vector).norm(p)
+
+    @staticmethod
+    def zeros(size):
+        return DenseVector(np.zeros(size))
+
+    @staticmethod
+    def _equals(v1_indices, v1_values, v2_indices, v2_values):
+        """
+        Check equality between sparse/dense vectors,
+        v1_indices and v2_indices assume to be strictly increasing.
+        """
+        v1_size = len(v1_values)
+        v2_size = len(v2_values)
+        k1 = 0
+        k2 = 0
+        all_equal = True
+        while all_equal:
+            while k1 < v1_size and v1_values[k1] == 0:
+                k1 += 1
+            while k2 < v2_size and v2_values[k2] == 0:
+                k2 += 1
+
+            if k1 >= v1_size or k2 >= v2_size:
+                return k1 >= v1_size and k2 >= v2_size
+
+            all_equal = v1_indices[k1] == v2_indices[k2] and v1_values[k1] == v2_values[k2]
+            k1 += 1
+            k2 += 1
+        return all_equal
+
+
+class Matrix(object):
+
+    __UDT__ = MatrixUDT()
+
+    """
+    Represents a local matrix.
+    """
+    def __init__(self, numRows, numCols, isTransposed=False):
+        self.numRows = numRows
+        self.numCols = numCols
+        self.isTransposed = isTransposed
+
+    def toArray(self):
+        """
+        Returns its elements in a NumPy ndarray.
+        """
+        raise NotImplementedError
+
+    @staticmethod
+    def _convert_to_array(array_like, dtype):
+        """
+        Convert Matrix attributes which are array-like or buffer to array.
+        """
+        if isinstance(array_like, bytes):
+            return np.frombuffer(array_like, dtype=dtype)
+        return np.asarray(array_like, dtype=dtype)
+
+
+class DenseMatrix(Matrix):
+    """
+    Column-major dense matrix.
+    """
+    def __init__(self, numRows, numCols, values, isTransposed=False):
+        Matrix.__init__(self, numRows, numCols, isTransposed)
+        values = self._convert_to_array(values, np.float64)
+        assert len(values) == numRows * numCols
+        self.values = values
+
+    def __reduce__(self):
+        return DenseMatrix, (
+            self.numRows, self.numCols, self.values.tostring(),
+            int(self.isTransposed))
+
+    def __str__(self):
+        """
+        Pretty printing of a DenseMatrix
+
+        >>> dm = DenseMatrix(2, 2, range(4))
+        >>> print(dm)
+        DenseMatrix([[ 0.,  2.],
+                     [ 1.,  3.]])
+        >>> dm = DenseMatrix(2, 2, range(4), isTransposed=True)
+        >>> print(dm)
+        DenseMatrix([[ 0.,  1.],
+                     [ 2.,  3.]])
+        """
+        # Inspired by __repr__ in scipy matrices.
+        array_lines = repr(self.toArray()).splitlines()
+
+        # We need to adjust six spaces which is the difference in number
+        # of letters between "DenseMatrix" and "array"
+        x = '\n'.join([(" " * 6 + line) for line in array_lines[1:]])
+        return array_lines[0].replace("array", "DenseMatrix") + "\n" + x
+
+    def __repr__(self):
+        """
+        Representation of a DenseMatrix
+
+        >>> dm = DenseMatrix(2, 2, range(4))
+        >>> dm
+        DenseMatrix(2, 2, [0.0, 1.0, 2.0, 3.0], False)
+        """
+        # If the number of values are less than seventeen then return as it is.
+        # Else return first eight values and last eight values.
+        if len(self.values) < 17:
+            entries = _format_float_list(self.values)
+        else:
+            entries = (
+                _format_float_list(self.values[:8]) +
+                ["..."] +
+                _format_float_list(self.values[-8:])
+            )
+
+        entries = ", ".join(entries)
+        return "DenseMatrix({0}, {1}, [{2}], {3})".format(
+            self.numRows, self.numCols, entries, self.isTransposed)
+
+    def toArray(self):
+        """
+        Return an numpy.ndarray
+
+        >>> m = DenseMatrix(2, 2, range(4))
+        >>> m.toArray()
+        array([[ 0.,  2.],
+               [ 1.,  3.]])
+        """
+        if self.isTransposed:
+            return np.asfortranarray(
+                self.values.reshape((self.numRows, self.numCols)))
+        else:
+            return self.values.reshape((self.numRows, self.numCols), order='F')
+
+    def toSparse(self):
+        """Convert to SparseMatrix"""
+        if self.isTransposed:
+            values = np.ravel(self.toArray(), order='F')
+        else:
+            values = self.values
+        indices = np.nonzero(values)[0]
+        colCounts = np.bincount(indices // self.numRows)
+        colPtrs = np.cumsum(np.hstack(
+            (0, colCounts, np.zeros(self.numCols - colCounts.size))))
+        values = values[indices]
+        rowIndices = indices % self.numRows
+
+        return SparseMatrix(self.numRows, self.numCols, colPtrs, rowIndices, values)
+
+    def __getitem__(self, indices):
+        i, j = indices
+        if i < 0 or i >= self.numRows:
+            raise IndexError("Row index %d is out of range [0, %d)"
+                             % (i, self.numRows))
+        if j >= self.numCols or j < 0:
+            raise IndexError("Column index %d is out of range [0, %d)"
+                             % (j, self.numCols))
+
+        if self.isTransposed:
+            return self.values[i * self.numCols + j]
+        else:
+            return self.values[i + j * self.numRows]
+
+    def __eq__(self, other):
+        if (not isinstance(other, DenseMatrix) or
+                self.numRows != other.numRows or
+                self.numCols != other.numCols):
+            return False
+
+        self_values = np.ravel(self.toArray(), order='F')
+        other_values = np.ravel(other.toArray(), order='F')
+        return all(self_values == other_values)
+
+
+class SparseMatrix(Matrix):
+    """Sparse Matrix stored in CSC format."""
+    def __init__(self, numRows, numCols, colPtrs, rowIndices, values,
+                 isTransposed=False):
+        Matrix.__init__(self, numRows, numCols, isTransposed)
+        self.colPtrs = self._convert_to_array(colPtrs, np.int32)
+        self.rowIndices = self._convert_to_array(rowIndices, np.int32)
+        self.values = self._convert_to_array(values, np.float64)
+
+        if self.isTransposed:
+            if self.colPtrs.size != numRows + 1:
+                raise ValueError("Expected colPtrs of size %d, got %d."
+                                 % (numRows + 1, self.colPtrs.size))
+        else:
+            if self.colPtrs.size != numCols + 1:
+                raise ValueError("Expected colPtrs of size %d, got %d."
+                                 % (numCols + 1, self.colPtrs.size))
+        if self.rowIndices.size != self.values.size:
+            raise ValueError("Expected rowIndices of length %d, got %d."
+                             % (self.rowIndices.size, self.values.size))
+
+    def __str__(self):
+        """
+        Pretty printing of a SparseMatrix
+
+        >>> sm1 = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
+        >>> print(sm1)
+        2 X 2 CSCMatrix
+        (0,0) 2.0
+        (1,0) 3.0
+        (1,1) 4.0
+        >>> sm1 = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4], True)
+        >>> print(sm1)
+        2 X 2 CSRMatrix
+        (0,0) 2.0
+        (0,1) 3.0
+        (1,1) 4.0
+        """
+        spstr = "{0} X {1} ".format(self.numRows, self.numCols)
+        if self.isTransposed:
+            spstr += "CSRMatrix\n"
+        else:
+            spstr += "CSCMatrix\n"
+
+        cur_col = 0
+        smlist = []
+
+        # Display first 16 values.
+        if len(self.values) <= 16:
+            zipindval = zip(self.rowIndices, self.values)
+        else:
+            zipindval = zip(self.rowIndices[:16], self.values[:16])
+        for i, (rowInd, value) in enumerate(zipindval):
+            if self.colPtrs[cur_col + 1] <= i:
+                cur_col += 1
+            if self.isTransposed:
+                smlist.append('({0},{1}) {2}'.format(
+                    cur_col, rowInd, _format_float(value)))
+            else:
+                smlist.append('({0},{1}) {2}'.format(
+                    rowInd, cur_col, _format_float(value)))
+        spstr += "\n".join(smlist)
+
+        if len(self.values) > 16:
+            spstr += "\n.." * 2
+        return spstr
+
+    def __repr__(self):
+        """
+        Representation of a SparseMatrix
+
+        >>> sm1 = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
+        >>> sm1
+        SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2.0, 3.0, 4.0], False)
+        """
+        rowIndices = list(self.rowIndices)
+        colPtrs = list(self.colPtrs)
+
+        if len(self.values) <= 16:
+            values = _format_float_list(self.values)
+
+        else:
+            values = (
+                _format_float_list(self.values[:8]) +
+                ["..."] +
+                _format_float_list(self.values[-8:])
+            )
+            rowIndices = rowIndices[:8] + ["..."] + rowIndices[-8:]
+
+        if len(self.colPtrs) > 16:
+            colPtrs = colPtrs[:8] + ["..."] + colPtrs[-8:]
+
+        values = ", ".join(values)
+        rowIndices = ", ".join([str(ind) for ind in rowIndices])
+        colPtrs = ", ".join([str(ptr) for ptr in colPtrs])
+        return "SparseMatrix({0}, {1}, [{2}], [{3}], [{4}], {5})".format(
+            self.numRows, self.numCols, colPtrs, rowIndices,
+            values, self.isTransposed)
+
+    def __reduce__(self):
+        return SparseMatrix, (
+            self.numRows, self.numCols, self.colPtrs.tostring(),
+            self.rowIndices.tostring(), self.values.tostring(),
+            int(self.isTransposed))
+
+    def __getitem__(self, indices):
+        i, j = indices
+        if i < 0 or i >= self.numRows:
+            raise IndexError("Row index %d is out of range [0, %d)"
+                             % (i, self.numRows))
+        if j < 0 or j >= self.numCols:
+            raise IndexError("Column index %d is out of range [0, %d)"
+                             % (j, self.numCols))
+
+        # If a CSR matrix is given, then the row index should be searched
+        # for in ColPtrs, and the column index should be searched for in the
+        # corresponding slice obtained from rowIndices.
+        if self.isTransposed:
+            j, i = i, j
+
+        colStart = self.colPtrs[j]
+        colEnd = self.colPtrs[j + 1]
+        nz = self.rowIndices[colStart: colEnd]
+        ind = np.searchsorted(nz, i) + colStart
+        if ind < colEnd and self.rowIndices[ind] == i:
+            return self.values[ind]
+        else:
+            return 0.0
+
+    def toArray(self):
+        """
+        Return an numpy.ndarray
+        """
+        A = np.zeros((self.numRows, self.numCols), dtype=np.float64, order='F')
+        for k in xrange(self.colPtrs.size - 1):
+            startptr = self.colPtrs[k]
+            endptr = self.colPtrs[k + 1]
+            if self.isTransposed:
+                A[k, self.rowIndices[startptr:endptr]] = self.values[startptr:endptr]
+            else:
+                A[self.rowIndices[startptr:endptr], k] = self.values[startptr:endptr]
+        return A
+
+    def toDense(self):
+        densevals = np.ravel(self.toArray(), order='F')
+        return DenseMatrix(self.numRows, self.numCols, densevals)
+
+    # TODO: More efficient implementation:
+    def __eq__(self, other):
+        return np.all(self.toArray() == other.toArray())
+
+
+class Matrices(object):
+    @staticmethod
+    def dense(numRows, numCols, values):
+        """
+        Create a DenseMatrix
+        """
+        return DenseMatrix(numRows, numCols, values)
+
+    @staticmethod
+    def sparse(numRows, numCols, colPtrs, rowIndices, values):
+        """
+        Create a SparseMatrix
+        """
+        return SparseMatrix(numRows, numCols, colPtrs, rowIndices, values)
+
+
+def _test():
+    import doctest
+    try:
+        # Numpy 1.14+ changed it's string format.
+        np.set_printoptions(legacy='1.13')
+    except TypeError:
+        pass
+    (failure_count, test_count) = doctest.testmod(optionflags=doctest.ELLIPSIS)
+    if failure_count:
+        sys.exit(-1)
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/param/__init__.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/param/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..043c25cf9feb4ae72dbc0ad59bd486c2bb9e450a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/param/__init__.py
@@ -0,0 +1,511 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import array
+import sys
+if sys.version > '3':
+    basestring = str
+    xrange = range
+    unicode = str
+
+from abc import ABCMeta
+import copy
+import numpy as np
+
+from py4j.java_gateway import JavaObject
+
+from pyspark.ml.linalg import DenseVector, Vector, Matrix
+from pyspark.ml.util import Identifiable
+
+
+__all__ = ['Param', 'Params', 'TypeConverters']
+
+
+class Param(object):
+    """
+    A param with self-contained documentation.
+
+    .. versionadded:: 1.3.0
+    """
+
+    def __init__(self, parent, name, doc, typeConverter=None):
+        if not isinstance(parent, Identifiable):
+            raise TypeError("Parent must be an Identifiable but got type %s." % type(parent))
+        self.parent = parent.uid
+        self.name = str(name)
+        self.doc = str(doc)
+        self.typeConverter = TypeConverters.identity if typeConverter is None else typeConverter
+
+    def _copy_new_parent(self, parent):
+        """Copy the current param to a new parent, must be a dummy param."""
+        if self.parent == "undefined":
+            param = copy.copy(self)
+            param.parent = parent.uid
+            return param
+        else:
+            raise ValueError("Cannot copy from non-dummy parent %s." % parent)
+
+    def __str__(self):
+        return str(self.parent) + "__" + self.name
+
+    def __repr__(self):
+        return "Param(parent=%r, name=%r, doc=%r)" % (self.parent, self.name, self.doc)
+
+    def __hash__(self):
+        return hash(str(self))
+
+    def __eq__(self, other):
+        if isinstance(other, Param):
+            return self.parent == other.parent and self.name == other.name
+        else:
+            return False
+
+
+class TypeConverters(object):
+    """
+    .. note:: DeveloperApi
+
+    Factory methods for common type conversion functions for `Param.typeConverter`.
+
+    .. versionadded:: 2.0.0
+    """
+
+    @staticmethod
+    def _is_numeric(value):
+        vtype = type(value)
+        return vtype in [int, float, np.float64, np.int64] or vtype.__name__ == 'long'
+
+    @staticmethod
+    def _is_integer(value):
+        return TypeConverters._is_numeric(value) and float(value).is_integer()
+
+    @staticmethod
+    def _can_convert_to_list(value):
+        vtype = type(value)
+        return vtype in [list, np.ndarray, tuple, xrange, array.array] or isinstance(value, Vector)
+
+    @staticmethod
+    def _can_convert_to_string(value):
+        vtype = type(value)
+        return isinstance(value, basestring) or vtype in [np.unicode_, np.string_, np.str_]
+
+    @staticmethod
+    def identity(value):
+        """
+        Dummy converter that just returns value.
+        """
+        return value
+
+    @staticmethod
+    def toList(value):
+        """
+        Convert a value to a list, if possible.
+        """
+        if type(value) == list:
+            return value
+        elif type(value) in [np.ndarray, tuple, xrange, array.array]:
+            return list(value)
+        elif isinstance(value, Vector):
+            return list(value.toArray())
+        else:
+            raise TypeError("Could not convert %s to list" % value)
+
+    @staticmethod
+    def toListFloat(value):
+        """
+        Convert a value to list of floats, if possible.
+        """
+        if TypeConverters._can_convert_to_list(value):
+            value = TypeConverters.toList(value)
+            if all(map(lambda v: TypeConverters._is_numeric(v), value)):
+                return [float(v) for v in value]
+        raise TypeError("Could not convert %s to list of floats" % value)
+
+    @staticmethod
+    def toListInt(value):
+        """
+        Convert a value to list of ints, if possible.
+        """
+        if TypeConverters._can_convert_to_list(value):
+            value = TypeConverters.toList(value)
+            if all(map(lambda v: TypeConverters._is_integer(v), value)):
+                return [int(v) for v in value]
+        raise TypeError("Could not convert %s to list of ints" % value)
+
+    @staticmethod
+    def toListString(value):
+        """
+        Convert a value to list of strings, if possible.
+        """
+        if TypeConverters._can_convert_to_list(value):
+            value = TypeConverters.toList(value)
+            if all(map(lambda v: TypeConverters._can_convert_to_string(v), value)):
+                return [TypeConverters.toString(v) for v in value]
+        raise TypeError("Could not convert %s to list of strings" % value)
+
+    @staticmethod
+    def toVector(value):
+        """
+        Convert a value to a MLlib Vector, if possible.
+        """
+        if isinstance(value, Vector):
+            return value
+        elif TypeConverters._can_convert_to_list(value):
+            value = TypeConverters.toList(value)
+            if all(map(lambda v: TypeConverters._is_numeric(v), value)):
+                return DenseVector(value)
+        raise TypeError("Could not convert %s to vector" % value)
+
+    @staticmethod
+    def toMatrix(value):
+        """
+        Convert a value to a MLlib Matrix, if possible.
+        """
+        if isinstance(value, Matrix):
+            return value
+        raise TypeError("Could not convert %s to matrix" % value)
+
+    @staticmethod
+    def toFloat(value):
+        """
+        Convert a value to a float, if possible.
+        """
+        if TypeConverters._is_numeric(value):
+            return float(value)
+        else:
+            raise TypeError("Could not convert %s to float" % value)
+
+    @staticmethod
+    def toInt(value):
+        """
+        Convert a value to an int, if possible.
+        """
+        if TypeConverters._is_integer(value):
+            return int(value)
+        else:
+            raise TypeError("Could not convert %s to int" % value)
+
+    @staticmethod
+    def toString(value):
+        """
+        Convert a value to a string, if possible.
+        """
+        if isinstance(value, basestring):
+            return value
+        elif type(value) in [np.string_, np.str_]:
+            return str(value)
+        elif type(value) == np.unicode_:
+            return unicode(value)
+        else:
+            raise TypeError("Could not convert %s to string type" % type(value))
+
+    @staticmethod
+    def toBoolean(value):
+        """
+        Convert a value to a boolean, if possible.
+        """
+        if type(value) == bool:
+            return value
+        else:
+            raise TypeError("Boolean Param requires value of type bool. Found %s." % type(value))
+
+
+class Params(Identifiable):
+    """
+    Components that take parameters. This also provides an internal
+    param map to store parameter values attached to the instance.
+
+    .. versionadded:: 1.3.0
+    """
+
+    __metaclass__ = ABCMeta
+
+    def __init__(self):
+        super(Params, self).__init__()
+        #: internal param map for user-supplied values param map
+        self._paramMap = {}
+
+        #: internal param map for default values
+        self._defaultParamMap = {}
+
+        #: value returned by :py:func:`params`
+        self._params = None
+
+        # Copy the params from the class to the object
+        self._copy_params()
+
+    def _copy_params(self):
+        """
+        Copy all params defined on the class to current object.
+        """
+        cls = type(self)
+        src_name_attrs = [(x, getattr(cls, x)) for x in dir(cls)]
+        src_params = list(filter(lambda nameAttr: isinstance(nameAttr[1], Param), src_name_attrs))
+        for name, param in src_params:
+            setattr(self, name, param._copy_new_parent(self))
+
+    @property
+    def params(self):
+        """
+        Returns all params ordered by name. The default implementation
+        uses :py:func:`dir` to get all attributes of type
+        :py:class:`Param`.
+        """
+        if self._params is None:
+            self._params = list(filter(lambda attr: isinstance(attr, Param),
+                                       [getattr(self, x) for x in dir(self) if x != "params" and
+                                        not isinstance(getattr(type(self), x, None), property)]))
+        return self._params
+
+    def explainParam(self, param):
+        """
+        Explains a single param and returns its name, doc, and optional
+        default value and user-supplied value in a string.
+        """
+        param = self._resolveParam(param)
+        values = []
+        if self.isDefined(param):
+            if param in self._defaultParamMap:
+                values.append("default: %s" % self._defaultParamMap[param])
+            if param in self._paramMap:
+                values.append("current: %s" % self._paramMap[param])
+        else:
+            values.append("undefined")
+        valueStr = "(" + ", ".join(values) + ")"
+        return "%s: %s %s" % (param.name, param.doc, valueStr)
+
+    def explainParams(self):
+        """
+        Returns the documentation of all params with their optionally
+        default values and user-supplied values.
+        """
+        return "\n".join([self.explainParam(param) for param in self.params])
+
+    def getParam(self, paramName):
+        """
+        Gets a param by its name.
+        """
+        param = getattr(self, paramName)
+        if isinstance(param, Param):
+            return param
+        else:
+            raise ValueError("Cannot find param with name %s." % paramName)
+
+    def isSet(self, param):
+        """
+        Checks whether a param is explicitly set by user.
+        """
+        param = self._resolveParam(param)
+        return param in self._paramMap
+
+    def hasDefault(self, param):
+        """
+        Checks whether a param has a default value.
+        """
+        param = self._resolveParam(param)
+        return param in self._defaultParamMap
+
+    def isDefined(self, param):
+        """
+        Checks whether a param is explicitly set by user or has
+        a default value.
+        """
+        return self.isSet(param) or self.hasDefault(param)
+
+    def hasParam(self, paramName):
+        """
+        Tests whether this instance contains a param with a given
+        (string) name.
+        """
+        if isinstance(paramName, basestring):
+            p = getattr(self, paramName, None)
+            return isinstance(p, Param)
+        else:
+            raise TypeError("hasParam(): paramName must be a string")
+
+    def getOrDefault(self, param):
+        """
+        Gets the value of a param in the user-supplied param map or its
+        default value. Raises an error if neither is set.
+        """
+        param = self._resolveParam(param)
+        if param in self._paramMap:
+            return self._paramMap[param]
+        else:
+            return self._defaultParamMap[param]
+
+    def extractParamMap(self, extra=None):
+        """
+        Extracts the embedded default param values and user-supplied
+        values, and then merges them with extra values from input into
+        a flat param map, where the latter value is used if there exist
+        conflicts, i.e., with ordering: default param values <
+        user-supplied values < extra.
+
+        :param extra: extra param values
+        :return: merged param map
+        """
+        if extra is None:
+            extra = dict()
+        paramMap = self._defaultParamMap.copy()
+        paramMap.update(self._paramMap)
+        paramMap.update(extra)
+        return paramMap
+
+    def copy(self, extra=None):
+        """
+        Creates a copy of this instance with the same uid and some
+        extra params. The default implementation creates a
+        shallow copy using :py:func:`copy.copy`, and then copies the
+        embedded and extra parameters over and returns the copy.
+        Subclasses should override this method if the default approach
+        is not sufficient.
+
+        :param extra: Extra parameters to copy to the new instance
+        :return: Copy of this instance
+        """
+        if extra is None:
+            extra = dict()
+        that = copy.copy(self)
+        that._paramMap = {}
+        that._defaultParamMap = {}
+        return self._copyValues(that, extra)
+
+    def set(self, param, value):
+        """
+        Sets a parameter in the embedded param map.
+        """
+        self._shouldOwn(param)
+        try:
+            value = param.typeConverter(value)
+        except ValueError as e:
+            raise ValueError('Invalid param value given for param "%s". %s' % (param.name, e))
+        self._paramMap[param] = value
+
+    def _shouldOwn(self, param):
+        """
+        Validates that the input param belongs to this Params instance.
+        """
+        if not (self.uid == param.parent and self.hasParam(param.name)):
+            raise ValueError("Param %r does not belong to %r." % (param, self))
+
+    def _resolveParam(self, param):
+        """
+        Resolves a param and validates the ownership.
+
+        :param param: param name or the param instance, which must
+                      belong to this Params instance
+        :return: resolved param instance
+        """
+        if isinstance(param, Param):
+            self._shouldOwn(param)
+            return param
+        elif isinstance(param, basestring):
+            return self.getParam(param)
+        else:
+            raise ValueError("Cannot resolve %r as a param." % param)
+
+    @staticmethod
+    def _dummy():
+        """
+        Returns a dummy Params instance used as a placeholder to
+        generate docs.
+        """
+        dummy = Params()
+        dummy.uid = "undefined"
+        return dummy
+
+    def _set(self, **kwargs):
+        """
+        Sets user-supplied params.
+        """
+        for param, value in kwargs.items():
+            p = getattr(self, param)
+            if value is not None:
+                try:
+                    value = p.typeConverter(value)
+                except TypeError as e:
+                    raise TypeError('Invalid param value given for param "%s". %s' % (p.name, e))
+            self._paramMap[p] = value
+        return self
+
+    def _clear(self, param):
+        """
+        Clears a param from the param map if it has been explicitly set.
+        """
+        if self.isSet(param):
+            del self._paramMap[param]
+
+    def _setDefault(self, **kwargs):
+        """
+        Sets default params.
+        """
+        for param, value in kwargs.items():
+            p = getattr(self, param)
+            if value is not None and not isinstance(value, JavaObject):
+                try:
+                    value = p.typeConverter(value)
+                except TypeError as e:
+                    raise TypeError('Invalid default param value given for param "%s". %s'
+                                    % (p.name, e))
+            self._defaultParamMap[p] = value
+        return self
+
+    def _copyValues(self, to, extra=None):
+        """
+        Copies param values from this instance to another instance for
+        params shared by them.
+
+        :param to: the target instance
+        :param extra: extra params to be copied
+        :return: the target instance with param values copied
+        """
+        paramMap = self._paramMap.copy()
+        if extra is not None:
+            paramMap.update(extra)
+        for param in self.params:
+            # copy default params
+            if param in self._defaultParamMap and to.hasParam(param.name):
+                to._defaultParamMap[to.getParam(param.name)] = self._defaultParamMap[param]
+            # copy explicitly set params
+            if param in paramMap and to.hasParam(param.name):
+                to._set(**{param.name: paramMap[param]})
+        return to
+
+    def _resetUid(self, newUid):
+        """
+        Changes the uid of this instance. This updates both
+        the stored uid and the parent uid of params and param maps.
+        This is used by persistence (loading).
+        :param newUid: new uid to use, which is converted to unicode
+        :return: same instance, but with the uid and Param.parent values
+                 updated, including within param maps
+        """
+        newUid = unicode(newUid)
+        self.uid = newUid
+        newDefaultParamMap = dict()
+        newParamMap = dict()
+        for param in self.params:
+            newParam = copy.copy(param)
+            newParam.parent = newUid
+            if param in self._defaultParamMap:
+                newDefaultParamMap[newParam] = self._defaultParamMap[param]
+            if param in self._paramMap:
+                newParamMap[newParam] = self._paramMap[param]
+            param.parent = newUid
+        self._defaultParamMap = newDefaultParamMap
+        self._paramMap = newParamMap
+        return self
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/param/_shared_params_code_gen.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/param/_shared_params_code_gen.py
new file mode 100644
index 0000000000000000000000000000000000000000..e45ba840b412b438b3d4b5523cd2b4bd44cf4d01
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -0,0 +1,215 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+header = """#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#"""
+
+# Code generator for shared params (shared.py). Run under this folder with:
+# python _shared_params_code_gen.py > shared.py
+
+
+def _gen_param_header(name, doc, defaultValueStr, typeConverter):
+    """
+    Generates the header part for shared variables
+
+    :param name: param name
+    :param doc: param doc
+    """
+    template = '''class Has$Name(Params):
+    """
+    Mixin for param $name: $doc
+    """
+
+    $name = Param(Params._dummy(), "$name", "$doc", typeConverter=$typeConverter)
+
+    def __init__(self):
+        super(Has$Name, self).__init__()'''
+
+    if defaultValueStr is not None:
+        template += '''
+        self._setDefault($name=$defaultValueStr)'''
+
+    Name = name[0].upper() + name[1:]
+    if typeConverter is None:
+        typeConverter = str(None)
+    return template \
+        .replace("$name", name) \
+        .replace("$Name", Name) \
+        .replace("$doc", doc) \
+        .replace("$defaultValueStr", str(defaultValueStr)) \
+        .replace("$typeConverter", typeConverter)
+
+
+def _gen_param_code(name, doc, defaultValueStr):
+    """
+    Generates Python code for a shared param class.
+
+    :param name: param name
+    :param doc: param doc
+    :param defaultValueStr: string representation of the default value
+    :return: code string
+    """
+    # TODO: How to correctly inherit instance attributes?
+    template = '''
+    def set$Name(self, value):
+        """
+        Sets the value of :py:attr:`$name`.
+        """
+        return self._set($name=value)
+
+    def get$Name(self):
+        """
+        Gets the value of $name or its default value.
+        """
+        return self.getOrDefault(self.$name)'''
+
+    Name = name[0].upper() + name[1:]
+    return template \
+        .replace("$name", name) \
+        .replace("$Name", Name) \
+        .replace("$doc", doc) \
+        .replace("$defaultValueStr", str(defaultValueStr))
+
+if __name__ == "__main__":
+    print(header)
+    print("\n# DO NOT MODIFY THIS FILE! It was generated by _shared_params_code_gen.py.\n")
+    print("from pyspark.ml.param import *\n\n")
+    shared = [
+        ("maxIter", "max number of iterations (>= 0).", None, "TypeConverters.toInt"),
+        ("regParam", "regularization parameter (>= 0).", None, "TypeConverters.toFloat"),
+        ("featuresCol", "features column name.", "'features'", "TypeConverters.toString"),
+        ("labelCol", "label column name.", "'label'", "TypeConverters.toString"),
+        ("predictionCol", "prediction column name.", "'prediction'", "TypeConverters.toString"),
+        ("probabilityCol", "Column name for predicted class conditional probabilities. " +
+         "Note: Not all models output well-calibrated probability estimates! These probabilities " +
+         "should be treated as confidences, not precise probabilities.", "'probability'",
+         "TypeConverters.toString"),
+        ("rawPredictionCol", "raw prediction (a.k.a. confidence) column name.", "'rawPrediction'",
+         "TypeConverters.toString"),
+        ("inputCol", "input column name.", None, "TypeConverters.toString"),
+        ("inputCols", "input column names.", None, "TypeConverters.toListString"),
+        ("outputCol", "output column name.", "self.uid + '__output'", "TypeConverters.toString"),
+        ("outputCols", "output column names.", None, "TypeConverters.toListString"),
+        ("numFeatures", "number of features.", None, "TypeConverters.toInt"),
+        ("checkpointInterval", "set checkpoint interval (>= 1) or disable checkpoint (-1). " +
+         "E.g. 10 means that the cache will get checkpointed every 10 iterations. Note: " +
+         "this setting will be ignored if the checkpoint directory is not set in the SparkContext.",
+         None, "TypeConverters.toInt"),
+        ("seed", "random seed.", "hash(type(self).__name__)", "TypeConverters.toInt"),
+        ("tol", "the convergence tolerance for iterative algorithms (>= 0).", None,
+         "TypeConverters.toFloat"),
+        ("stepSize", "Step size to be used for each iteration of optimization (>= 0).", None,
+         "TypeConverters.toFloat"),
+        ("handleInvalid", "how to handle invalid entries. Options are skip (which will filter " +
+         "out rows with bad values), or error (which will throw an error). More options may be " +
+         "added later.", None, "TypeConverters.toString"),
+        ("elasticNetParam", "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, " +
+         "the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.", "0.0",
+         "TypeConverters.toFloat"),
+        ("fitIntercept", "whether to fit an intercept term.", "True", "TypeConverters.toBoolean"),
+        ("standardization", "whether to standardize the training features before fitting the " +
+         "model.", "True", "TypeConverters.toBoolean"),
+        ("thresholds", "Thresholds in multi-class classification to adjust the probability of " +
+         "predicting each class. Array must have length equal to the number of classes, with " +
+         "values > 0, excepting that at most one value may be 0. " +
+         "The class with largest value p/t is predicted, where p is the original " +
+         "probability of that class and t is the class's threshold.", None,
+         "TypeConverters.toListFloat"),
+        ("threshold", "threshold in binary classification prediction, in range [0, 1]",
+         "0.5", "TypeConverters.toFloat"),
+        ("weightCol", "weight column name. If this is not set or empty, we treat " +
+         "all instance weights as 1.0.", None, "TypeConverters.toString"),
+        ("solver", "the solver algorithm for optimization. If this is not set or empty, " +
+         "default value is 'auto'.", "'auto'", "TypeConverters.toString"),
+        ("varianceCol", "column name for the biased sample variance of prediction.",
+         None, "TypeConverters.toString"),
+        ("aggregationDepth", "suggested depth for treeAggregate (>= 2).", "2",
+         "TypeConverters.toInt"),
+        ("parallelism", "the number of threads to use when running parallel algorithms (>= 1).",
+         "1", "TypeConverters.toInt"),
+        ("collectSubModels", "Param for whether to collect a list of sub-models trained during " +
+         "tuning. If set to false, then only the single best sub-model will be available after " +
+         "fitting. If set to true, then all sub-models will be available. Warning: For large " +
+         "models, collecting all sub-models can cause OOMs on the Spark driver.",
+         "False", "TypeConverters.toBoolean"),
+        ("loss", "the loss function to be optimized.", None, "TypeConverters.toString"),
+        ("distanceMeasure", "the distance measure. Supported options: 'euclidean' and 'cosine'.",
+         "'euclidean'", "TypeConverters.toString")]
+
+    code = []
+    for name, doc, defaultValueStr, typeConverter in shared:
+        param_code = _gen_param_header(name, doc, defaultValueStr, typeConverter)
+        code.append(param_code + "\n" + _gen_param_code(name, doc, defaultValueStr))
+
+    decisionTreeParams = [
+        ("maxDepth", "Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; " +
+         "depth 1 means 1 internal node + 2 leaf nodes.", "TypeConverters.toInt"),
+        ("maxBins", "Max number of bins for" +
+         " discretizing continuous features.  Must be >=2 and >= number of categories for any" +
+         " categorical feature.", "TypeConverters.toInt"),
+        ("minInstancesPerNode", "Minimum number of instances each child must have after split. " +
+         "If a split causes the left or right child to have fewer than minInstancesPerNode, the " +
+         "split will be discarded as invalid. Should be >= 1.", "TypeConverters.toInt"),
+        ("minInfoGain", "Minimum information gain for a split to be considered at a tree node.",
+         "TypeConverters.toFloat"),
+        ("maxMemoryInMB", "Maximum memory in MB allocated to histogram aggregation. If too small," +
+         " then 1 node will be split per iteration, and its aggregates may exceed this size.",
+         "TypeConverters.toInt"),
+        ("cacheNodeIds", "If false, the algorithm will pass trees to executors to match " +
+         "instances with nodes. If true, the algorithm will cache node IDs for each instance. " +
+         "Caching can speed up training of deeper trees. Users can set how often should the " +
+         "cache be checkpointed or disable it by setting checkpointInterval.",
+         "TypeConverters.toBoolean")]
+
+    decisionTreeCode = '''class DecisionTreeParams(Params):
+    """
+    Mixin for Decision Tree parameters.
+    """
+
+    $dummyPlaceHolders
+
+    def __init__(self):
+        super(DecisionTreeParams, self).__init__()'''
+    dtParamMethods = ""
+    dummyPlaceholders = ""
+    paramTemplate = """$name = Param($owner, "$name", "$doc", typeConverter=$typeConverterStr)"""
+    for name, doc, typeConverterStr in decisionTreeParams:
+        if typeConverterStr is None:
+            typeConverterStr = str(None)
+        variable = paramTemplate.replace("$name", name).replace("$doc", doc) \
+            .replace("$typeConverterStr", typeConverterStr)
+        dummyPlaceholders += variable.replace("$owner", "Params._dummy()") + "\n    "
+        dtParamMethods += _gen_param_code(name, doc, None) + "\n"
+    code.append(decisionTreeCode.replace("$dummyPlaceHolders", dummyPlaceholders) + "\n" +
+                dtParamMethods)
+    print("\n\n\n".join(code))
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/param/shared.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/param/shared.py
new file mode 100644
index 0000000000000000000000000000000000000000..618f5bf0a810361a11567cb418adb0808c4fae83
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/param/shared.py
@@ -0,0 +1,816 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# DO NOT MODIFY THIS FILE! It was generated by _shared_params_code_gen.py.
+
+from pyspark.ml.param import *
+
+
+class HasMaxIter(Params):
+    """
+    Mixin for param maxIter: max number of iterations (>= 0).
+    """
+
+    maxIter = Param(Params._dummy(), "maxIter", "max number of iterations (>= 0).", typeConverter=TypeConverters.toInt)
+
+    def __init__(self):
+        super(HasMaxIter, self).__init__()
+
+    def setMaxIter(self, value):
+        """
+        Sets the value of :py:attr:`maxIter`.
+        """
+        return self._set(maxIter=value)
+
+    def getMaxIter(self):
+        """
+        Gets the value of maxIter or its default value.
+        """
+        return self.getOrDefault(self.maxIter)
+
+
+class HasRegParam(Params):
+    """
+    Mixin for param regParam: regularization parameter (>= 0).
+    """
+
+    regParam = Param(Params._dummy(), "regParam", "regularization parameter (>= 0).", typeConverter=TypeConverters.toFloat)
+
+    def __init__(self):
+        super(HasRegParam, self).__init__()
+
+    def setRegParam(self, value):
+        """
+        Sets the value of :py:attr:`regParam`.
+        """
+        return self._set(regParam=value)
+
+    def getRegParam(self):
+        """
+        Gets the value of regParam or its default value.
+        """
+        return self.getOrDefault(self.regParam)
+
+
+class HasFeaturesCol(Params):
+    """
+    Mixin for param featuresCol: features column name.
+    """
+
+    featuresCol = Param(Params._dummy(), "featuresCol", "features column name.", typeConverter=TypeConverters.toString)
+
+    def __init__(self):
+        super(HasFeaturesCol, self).__init__()
+        self._setDefault(featuresCol='features')
+
+    def setFeaturesCol(self, value):
+        """
+        Sets the value of :py:attr:`featuresCol`.
+        """
+        return self._set(featuresCol=value)
+
+    def getFeaturesCol(self):
+        """
+        Gets the value of featuresCol or its default value.
+        """
+        return self.getOrDefault(self.featuresCol)
+
+
+class HasLabelCol(Params):
+    """
+    Mixin for param labelCol: label column name.
+    """
+
+    labelCol = Param(Params._dummy(), "labelCol", "label column name.", typeConverter=TypeConverters.toString)
+
+    def __init__(self):
+        super(HasLabelCol, self).__init__()
+        self._setDefault(labelCol='label')
+
+    def setLabelCol(self, value):
+        """
+        Sets the value of :py:attr:`labelCol`.
+        """
+        return self._set(labelCol=value)
+
+    def getLabelCol(self):
+        """
+        Gets the value of labelCol or its default value.
+        """
+        return self.getOrDefault(self.labelCol)
+
+
+class HasPredictionCol(Params):
+    """
+    Mixin for param predictionCol: prediction column name.
+    """
+
+    predictionCol = Param(Params._dummy(), "predictionCol", "prediction column name.", typeConverter=TypeConverters.toString)
+
+    def __init__(self):
+        super(HasPredictionCol, self).__init__()
+        self._setDefault(predictionCol='prediction')
+
+    def setPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`predictionCol`.
+        """
+        return self._set(predictionCol=value)
+
+    def getPredictionCol(self):
+        """
+        Gets the value of predictionCol or its default value.
+        """
+        return self.getOrDefault(self.predictionCol)
+
+
+class HasProbabilityCol(Params):
+    """
+    Mixin for param probabilityCol: Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.
+    """
+
+    probabilityCol = Param(Params._dummy(), "probabilityCol", "Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.", typeConverter=TypeConverters.toString)
+
+    def __init__(self):
+        super(HasProbabilityCol, self).__init__()
+        self._setDefault(probabilityCol='probability')
+
+    def setProbabilityCol(self, value):
+        """
+        Sets the value of :py:attr:`probabilityCol`.
+        """
+        return self._set(probabilityCol=value)
+
+    def getProbabilityCol(self):
+        """
+        Gets the value of probabilityCol or its default value.
+        """
+        return self.getOrDefault(self.probabilityCol)
+
+
+class HasRawPredictionCol(Params):
+    """
+    Mixin for param rawPredictionCol: raw prediction (a.k.a. confidence) column name.
+    """
+
+    rawPredictionCol = Param(Params._dummy(), "rawPredictionCol", "raw prediction (a.k.a. confidence) column name.", typeConverter=TypeConverters.toString)
+
+    def __init__(self):
+        super(HasRawPredictionCol, self).__init__()
+        self._setDefault(rawPredictionCol='rawPrediction')
+
+    def setRawPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`rawPredictionCol`.
+        """
+        return self._set(rawPredictionCol=value)
+
+    def getRawPredictionCol(self):
+        """
+        Gets the value of rawPredictionCol or its default value.
+        """
+        return self.getOrDefault(self.rawPredictionCol)
+
+
+class HasInputCol(Params):
+    """
+    Mixin for param inputCol: input column name.
+    """
+
+    inputCol = Param(Params._dummy(), "inputCol", "input column name.", typeConverter=TypeConverters.toString)
+
+    def __init__(self):
+        super(HasInputCol, self).__init__()
+
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    def getInputCol(self):
+        """
+        Gets the value of inputCol or its default value.
+        """
+        return self.getOrDefault(self.inputCol)
+
+
+class HasInputCols(Params):
+    """
+    Mixin for param inputCols: input column names.
+    """
+
+    inputCols = Param(Params._dummy(), "inputCols", "input column names.", typeConverter=TypeConverters.toListString)
+
+    def __init__(self):
+        super(HasInputCols, self).__init__()
+
+    def setInputCols(self, value):
+        """
+        Sets the value of :py:attr:`inputCols`.
+        """
+        return self._set(inputCols=value)
+
+    def getInputCols(self):
+        """
+        Gets the value of inputCols or its default value.
+        """
+        return self.getOrDefault(self.inputCols)
+
+
+class HasOutputCol(Params):
+    """
+    Mixin for param outputCol: output column name.
+    """
+
+    outputCol = Param(Params._dummy(), "outputCol", "output column name.", typeConverter=TypeConverters.toString)
+
+    def __init__(self):
+        super(HasOutputCol, self).__init__()
+        self._setDefault(outputCol=self.uid + '__output')
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
+    def getOutputCol(self):
+        """
+        Gets the value of outputCol or its default value.
+        """
+        return self.getOrDefault(self.outputCol)
+
+
+class HasOutputCols(Params):
+    """
+    Mixin for param outputCols: output column names.
+    """
+
+    outputCols = Param(Params._dummy(), "outputCols", "output column names.", typeConverter=TypeConverters.toListString)
+
+    def __init__(self):
+        super(HasOutputCols, self).__init__()
+
+    def setOutputCols(self, value):
+        """
+        Sets the value of :py:attr:`outputCols`.
+        """
+        return self._set(outputCols=value)
+
+    def getOutputCols(self):
+        """
+        Gets the value of outputCols or its default value.
+        """
+        return self.getOrDefault(self.outputCols)
+
+
+class HasNumFeatures(Params):
+    """
+    Mixin for param numFeatures: number of features.
+    """
+
+    numFeatures = Param(Params._dummy(), "numFeatures", "number of features.", typeConverter=TypeConverters.toInt)
+
+    def __init__(self):
+        super(HasNumFeatures, self).__init__()
+
+    def setNumFeatures(self, value):
+        """
+        Sets the value of :py:attr:`numFeatures`.
+        """
+        return self._set(numFeatures=value)
+
+    def getNumFeatures(self):
+        """
+        Gets the value of numFeatures or its default value.
+        """
+        return self.getOrDefault(self.numFeatures)
+
+
+class HasCheckpointInterval(Params):
+    """
+    Mixin for param checkpointInterval: set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations. Note: this setting will be ignored if the checkpoint directory is not set in the SparkContext.
+    """
+
+    checkpointInterval = Param(Params._dummy(), "checkpointInterval", "set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations. Note: this setting will be ignored if the checkpoint directory is not set in the SparkContext.", typeConverter=TypeConverters.toInt)
+
+    def __init__(self):
+        super(HasCheckpointInterval, self).__init__()
+
+    def setCheckpointInterval(self, value):
+        """
+        Sets the value of :py:attr:`checkpointInterval`.
+        """
+        return self._set(checkpointInterval=value)
+
+    def getCheckpointInterval(self):
+        """
+        Gets the value of checkpointInterval or its default value.
+        """
+        return self.getOrDefault(self.checkpointInterval)
+
+
+class HasSeed(Params):
+    """
+    Mixin for param seed: random seed.
+    """
+
+    seed = Param(Params._dummy(), "seed", "random seed.", typeConverter=TypeConverters.toInt)
+
+    def __init__(self):
+        super(HasSeed, self).__init__()
+        self._setDefault(seed=hash(type(self).__name__))
+
+    def setSeed(self, value):
+        """
+        Sets the value of :py:attr:`seed`.
+        """
+        return self._set(seed=value)
+
+    def getSeed(self):
+        """
+        Gets the value of seed or its default value.
+        """
+        return self.getOrDefault(self.seed)
+
+
+class HasTol(Params):
+    """
+    Mixin for param tol: the convergence tolerance for iterative algorithms (>= 0).
+    """
+
+    tol = Param(Params._dummy(), "tol", "the convergence tolerance for iterative algorithms (>= 0).", typeConverter=TypeConverters.toFloat)
+
+    def __init__(self):
+        super(HasTol, self).__init__()
+
+    def setTol(self, value):
+        """
+        Sets the value of :py:attr:`tol`.
+        """
+        return self._set(tol=value)
+
+    def getTol(self):
+        """
+        Gets the value of tol or its default value.
+        """
+        return self.getOrDefault(self.tol)
+
+
+class HasStepSize(Params):
+    """
+    Mixin for param stepSize: Step size to be used for each iteration of optimization (>= 0).
+    """
+
+    stepSize = Param(Params._dummy(), "stepSize", "Step size to be used for each iteration of optimization (>= 0).", typeConverter=TypeConverters.toFloat)
+
+    def __init__(self):
+        super(HasStepSize, self).__init__()
+
+    def setStepSize(self, value):
+        """
+        Sets the value of :py:attr:`stepSize`.
+        """
+        return self._set(stepSize=value)
+
+    def getStepSize(self):
+        """
+        Gets the value of stepSize or its default value.
+        """
+        return self.getOrDefault(self.stepSize)
+
+
+class HasHandleInvalid(Params):
+    """
+    Mixin for param handleInvalid: how to handle invalid entries. Options are skip (which will filter out rows with bad values), or error (which will throw an error). More options may be added later.
+    """
+
+    handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid entries. Options are skip (which will filter out rows with bad values), or error (which will throw an error). More options may be added later.", typeConverter=TypeConverters.toString)
+
+    def __init__(self):
+        super(HasHandleInvalid, self).__init__()
+
+    def setHandleInvalid(self, value):
+        """
+        Sets the value of :py:attr:`handleInvalid`.
+        """
+        return self._set(handleInvalid=value)
+
+    def getHandleInvalid(self):
+        """
+        Gets the value of handleInvalid or its default value.
+        """
+        return self.getOrDefault(self.handleInvalid)
+
+
+class HasElasticNetParam(Params):
+    """
+    Mixin for param elasticNetParam: the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.
+    """
+
+    elasticNetParam = Param(Params._dummy(), "elasticNetParam", "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.", typeConverter=TypeConverters.toFloat)
+
+    def __init__(self):
+        super(HasElasticNetParam, self).__init__()
+        self._setDefault(elasticNetParam=0.0)
+
+    def setElasticNetParam(self, value):
+        """
+        Sets the value of :py:attr:`elasticNetParam`.
+        """
+        return self._set(elasticNetParam=value)
+
+    def getElasticNetParam(self):
+        """
+        Gets the value of elasticNetParam or its default value.
+        """
+        return self.getOrDefault(self.elasticNetParam)
+
+
+class HasFitIntercept(Params):
+    """
+    Mixin for param fitIntercept: whether to fit an intercept term.
+    """
+
+    fitIntercept = Param(Params._dummy(), "fitIntercept", "whether to fit an intercept term.", typeConverter=TypeConverters.toBoolean)
+
+    def __init__(self):
+        super(HasFitIntercept, self).__init__()
+        self._setDefault(fitIntercept=True)
+
+    def setFitIntercept(self, value):
+        """
+        Sets the value of :py:attr:`fitIntercept`.
+        """
+        return self._set(fitIntercept=value)
+
+    def getFitIntercept(self):
+        """
+        Gets the value of fitIntercept or its default value.
+        """
+        return self.getOrDefault(self.fitIntercept)
+
+
+class HasStandardization(Params):
+    """
+    Mixin for param standardization: whether to standardize the training features before fitting the model.
+    """
+
+    standardization = Param(Params._dummy(), "standardization", "whether to standardize the training features before fitting the model.", typeConverter=TypeConverters.toBoolean)
+
+    def __init__(self):
+        super(HasStandardization, self).__init__()
+        self._setDefault(standardization=True)
+
+    def setStandardization(self, value):
+        """
+        Sets the value of :py:attr:`standardization`.
+        """
+        return self._set(standardization=value)
+
+    def getStandardization(self):
+        """
+        Gets the value of standardization or its default value.
+        """
+        return self.getOrDefault(self.standardization)
+
+
+class HasThresholds(Params):
+    """
+    Mixin for param thresholds: Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values > 0, excepting that at most one value may be 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class's threshold.
+    """
+
+    thresholds = Param(Params._dummy(), "thresholds", "Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values > 0, excepting that at most one value may be 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class's threshold.", typeConverter=TypeConverters.toListFloat)
+
+    def __init__(self):
+        super(HasThresholds, self).__init__()
+
+    def setThresholds(self, value):
+        """
+        Sets the value of :py:attr:`thresholds`.
+        """
+        return self._set(thresholds=value)
+
+    def getThresholds(self):
+        """
+        Gets the value of thresholds or its default value.
+        """
+        return self.getOrDefault(self.thresholds)
+
+
+class HasThreshold(Params):
+    """
+    Mixin for param threshold: threshold in binary classification prediction, in range [0, 1]
+    """
+
+    threshold = Param(Params._dummy(), "threshold", "threshold in binary classification prediction, in range [0, 1]", typeConverter=TypeConverters.toFloat)
+
+    def __init__(self):
+        super(HasThreshold, self).__init__()
+        self._setDefault(threshold=0.5)
+
+    def setThreshold(self, value):
+        """
+        Sets the value of :py:attr:`threshold`.
+        """
+        return self._set(threshold=value)
+
+    def getThreshold(self):
+        """
+        Gets the value of threshold or its default value.
+        """
+        return self.getOrDefault(self.threshold)
+
+
+class HasWeightCol(Params):
+    """
+    Mixin for param weightCol: weight column name. If this is not set or empty, we treat all instance weights as 1.0.
+    """
+
+    weightCol = Param(Params._dummy(), "weightCol", "weight column name. If this is not set or empty, we treat all instance weights as 1.0.", typeConverter=TypeConverters.toString)
+
+    def __init__(self):
+        super(HasWeightCol, self).__init__()
+
+    def setWeightCol(self, value):
+        """
+        Sets the value of :py:attr:`weightCol`.
+        """
+        return self._set(weightCol=value)
+
+    def getWeightCol(self):
+        """
+        Gets the value of weightCol or its default value.
+        """
+        return self.getOrDefault(self.weightCol)
+
+
+class HasSolver(Params):
+    """
+    Mixin for param solver: the solver algorithm for optimization. If this is not set or empty, default value is 'auto'.
+    """
+
+    solver = Param(Params._dummy(), "solver", "the solver algorithm for optimization. If this is not set or empty, default value is 'auto'.", typeConverter=TypeConverters.toString)
+
+    def __init__(self):
+        super(HasSolver, self).__init__()
+        self._setDefault(solver='auto')
+
+    def setSolver(self, value):
+        """
+        Sets the value of :py:attr:`solver`.
+        """
+        return self._set(solver=value)
+
+    def getSolver(self):
+        """
+        Gets the value of solver or its default value.
+        """
+        return self.getOrDefault(self.solver)
+
+
+class HasVarianceCol(Params):
+    """
+    Mixin for param varianceCol: column name for the biased sample variance of prediction.
+    """
+
+    varianceCol = Param(Params._dummy(), "varianceCol", "column name for the biased sample variance of prediction.", typeConverter=TypeConverters.toString)
+
+    def __init__(self):
+        super(HasVarianceCol, self).__init__()
+
+    def setVarianceCol(self, value):
+        """
+        Sets the value of :py:attr:`varianceCol`.
+        """
+        return self._set(varianceCol=value)
+
+    def getVarianceCol(self):
+        """
+        Gets the value of varianceCol or its default value.
+        """
+        return self.getOrDefault(self.varianceCol)
+
+
+class HasAggregationDepth(Params):
+    """
+    Mixin for param aggregationDepth: suggested depth for treeAggregate (>= 2).
+    """
+
+    aggregationDepth = Param(Params._dummy(), "aggregationDepth", "suggested depth for treeAggregate (>= 2).", typeConverter=TypeConverters.toInt)
+
+    def __init__(self):
+        super(HasAggregationDepth, self).__init__()
+        self._setDefault(aggregationDepth=2)
+
+    def setAggregationDepth(self, value):
+        """
+        Sets the value of :py:attr:`aggregationDepth`.
+        """
+        return self._set(aggregationDepth=value)
+
+    def getAggregationDepth(self):
+        """
+        Gets the value of aggregationDepth or its default value.
+        """
+        return self.getOrDefault(self.aggregationDepth)
+
+
+class HasParallelism(Params):
+    """
+    Mixin for param parallelism: the number of threads to use when running parallel algorithms (>= 1).
+    """
+
+    parallelism = Param(Params._dummy(), "parallelism", "the number of threads to use when running parallel algorithms (>= 1).", typeConverter=TypeConverters.toInt)
+
+    def __init__(self):
+        super(HasParallelism, self).__init__()
+        self._setDefault(parallelism=1)
+
+    def setParallelism(self, value):
+        """
+        Sets the value of :py:attr:`parallelism`.
+        """
+        return self._set(parallelism=value)
+
+    def getParallelism(self):
+        """
+        Gets the value of parallelism or its default value.
+        """
+        return self.getOrDefault(self.parallelism)
+
+
+class HasCollectSubModels(Params):
+    """
+    Mixin for param collectSubModels: Param for whether to collect a list of sub-models trained during tuning. If set to false, then only the single best sub-model will be available after fitting. If set to true, then all sub-models will be available. Warning: For large models, collecting all sub-models can cause OOMs on the Spark driver.
+    """
+
+    collectSubModels = Param(Params._dummy(), "collectSubModels", "Param for whether to collect a list of sub-models trained during tuning. If set to false, then only the single best sub-model will be available after fitting. If set to true, then all sub-models will be available. Warning: For large models, collecting all sub-models can cause OOMs on the Spark driver.", typeConverter=TypeConverters.toBoolean)
+
+    def __init__(self):
+        super(HasCollectSubModels, self).__init__()
+        self._setDefault(collectSubModels=False)
+
+    def setCollectSubModels(self, value):
+        """
+        Sets the value of :py:attr:`collectSubModels`.
+        """
+        return self._set(collectSubModels=value)
+
+    def getCollectSubModels(self):
+        """
+        Gets the value of collectSubModels or its default value.
+        """
+        return self.getOrDefault(self.collectSubModels)
+
+
+class HasLoss(Params):
+    """
+    Mixin for param loss: the loss function to be optimized.
+    """
+
+    loss = Param(Params._dummy(), "loss", "the loss function to be optimized.", typeConverter=TypeConverters.toString)
+
+    def __init__(self):
+        super(HasLoss, self).__init__()
+
+    def setLoss(self, value):
+        """
+        Sets the value of :py:attr:`loss`.
+        """
+        return self._set(loss=value)
+
+    def getLoss(self):
+        """
+        Gets the value of loss or its default value.
+        """
+        return self.getOrDefault(self.loss)
+
+
+class DecisionTreeParams(Params):
+    """
+    Mixin for Decision Tree parameters.
+    """
+
+    maxDepth = Param(Params._dummy(), "maxDepth", "Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.", typeConverter=TypeConverters.toInt)
+    maxBins = Param(Params._dummy(), "maxBins", "Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.", typeConverter=TypeConverters.toInt)
+    minInstancesPerNode = Param(Params._dummy(), "minInstancesPerNode", "Minimum number of instances each child must have after split. If a split causes the left or right child to have fewer than minInstancesPerNode, the split will be discarded as invalid. Should be >= 1.", typeConverter=TypeConverters.toInt)
+    minInfoGain = Param(Params._dummy(), "minInfoGain", "Minimum information gain for a split to be considered at a tree node.", typeConverter=TypeConverters.toFloat)
+    maxMemoryInMB = Param(Params._dummy(), "maxMemoryInMB", "Maximum memory in MB allocated to histogram aggregation. If too small, then 1 node will be split per iteration, and its aggregates may exceed this size.", typeConverter=TypeConverters.toInt)
+    cacheNodeIds = Param(Params._dummy(), "cacheNodeIds", "If false, the algorithm will pass trees to executors to match instances with nodes. If true, the algorithm will cache node IDs for each instance. Caching can speed up training of deeper trees. Users can set how often should the cache be checkpointed or disable it by setting checkpointInterval.", typeConverter=TypeConverters.toBoolean)
+    
+
+    def __init__(self):
+        super(DecisionTreeParams, self).__init__()
+
+    def setMaxDepth(self, value):
+        """
+        Sets the value of :py:attr:`maxDepth`.
+        """
+        return self._set(maxDepth=value)
+
+    def getMaxDepth(self):
+        """
+        Gets the value of maxDepth or its default value.
+        """
+        return self.getOrDefault(self.maxDepth)
+
+    def setMaxBins(self, value):
+        """
+        Sets the value of :py:attr:`maxBins`.
+        """
+        return self._set(maxBins=value)
+
+    def getMaxBins(self):
+        """
+        Gets the value of maxBins or its default value.
+        """
+        return self.getOrDefault(self.maxBins)
+
+    def setMinInstancesPerNode(self, value):
+        """
+        Sets the value of :py:attr:`minInstancesPerNode`.
+        """
+        return self._set(minInstancesPerNode=value)
+
+    def getMinInstancesPerNode(self):
+        """
+        Gets the value of minInstancesPerNode or its default value.
+        """
+        return self.getOrDefault(self.minInstancesPerNode)
+
+    def setMinInfoGain(self, value):
+        """
+        Sets the value of :py:attr:`minInfoGain`.
+        """
+        return self._set(minInfoGain=value)
+
+    def getMinInfoGain(self):
+        """
+        Gets the value of minInfoGain or its default value.
+        """
+        return self.getOrDefault(self.minInfoGain)
+
+    def setMaxMemoryInMB(self, value):
+        """
+        Sets the value of :py:attr:`maxMemoryInMB`.
+        """
+        return self._set(maxMemoryInMB=value)
+
+    def getMaxMemoryInMB(self):
+        """
+        Gets the value of maxMemoryInMB or its default value.
+        """
+        return self.getOrDefault(self.maxMemoryInMB)
+
+    def setCacheNodeIds(self, value):
+        """
+        Sets the value of :py:attr:`cacheNodeIds`.
+        """
+        return self._set(cacheNodeIds=value)
+
+    def getCacheNodeIds(self):
+        """
+        Gets the value of cacheNodeIds or its default value.
+        """
+        return self.getOrDefault(self.cacheNodeIds)
+
+
+class HasDistanceMeasure(Params):
+    """
+    Mixin for param distanceMeasure: the distance measure. Supported options: 'euclidean' and 'cosine'.
+    """
+
+    distanceMeasure = Param(Params._dummy(), "distanceMeasure", "the distance measure. Supported options: 'euclidean' and 'cosine'.", typeConverter=TypeConverters.toString)
+
+    def __init__(self):
+        super(HasDistanceMeasure, self).__init__()
+        self._setDefault(distanceMeasure='euclidean')
+
+    def setDistanceMeasure(self, value):
+        """
+        Sets the value of :py:attr:`distanceMeasure`.
+        """
+        return self._set(distanceMeasure=value)
+
+    def getDistanceMeasure(self):
+        """
+        Gets the value of distanceMeasure or its default value.
+        """
+        return self.getOrDefault(self.distanceMeasure)
+
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/pipeline.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/pipeline.py
new file mode 100644
index 0000000000000000000000000000000000000000..097530230cbca90795dbed83047f49906362f699
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/pipeline.py
@@ -0,0 +1,390 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import os
+
+if sys.version > '3':
+    basestring = str
+
+from pyspark import since, keyword_only, SparkContext
+from pyspark.ml.base import Estimator, Model, Transformer
+from pyspark.ml.param import Param, Params
+from pyspark.ml.util import *
+from pyspark.ml.wrapper import JavaParams
+from pyspark.ml.common import inherit_doc
+
+
+@inherit_doc
+class Pipeline(Estimator, MLReadable, MLWritable):
+    """
+    A simple pipeline, which acts as an estimator. A Pipeline consists
+    of a sequence of stages, each of which is either an
+    :py:class:`Estimator` or a :py:class:`Transformer`. When
+    :py:meth:`Pipeline.fit` is called, the stages are executed in
+    order. If a stage is an :py:class:`Estimator`, its
+    :py:meth:`Estimator.fit` method will be called on the input
+    dataset to fit a model. Then the model, which is a transformer,
+    will be used to transform the dataset as the input to the next
+    stage. If a stage is a :py:class:`Transformer`, its
+    :py:meth:`Transformer.transform` method will be called to produce
+    the dataset for the next stage. The fitted model from a
+    :py:class:`Pipeline` is a :py:class:`PipelineModel`, which
+    consists of fitted models and transformers, corresponding to the
+    pipeline stages. If stages is an empty list, the pipeline acts as an
+    identity transformer.
+
+    .. versionadded:: 1.3.0
+    """
+
+    stages = Param(Params._dummy(), "stages", "a list of pipeline stages")
+
+    @keyword_only
+    def __init__(self, stages=None):
+        """
+        __init__(self, stages=None)
+        """
+        super(Pipeline, self).__init__()
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @since("1.3.0")
+    def setStages(self, value):
+        """
+        Set pipeline stages.
+
+        :param value: a list of transformers or estimators
+        :return: the pipeline instance
+        """
+        return self._set(stages=value)
+
+    @since("1.3.0")
+    def getStages(self):
+        """
+        Get pipeline stages.
+        """
+        return self.getOrDefault(self.stages)
+
+    @keyword_only
+    @since("1.3.0")
+    def setParams(self, stages=None):
+        """
+        setParams(self, stages=None)
+        Sets params for Pipeline.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def _fit(self, dataset):
+        stages = self.getStages()
+        for stage in stages:
+            if not (isinstance(stage, Estimator) or isinstance(stage, Transformer)):
+                raise TypeError(
+                    "Cannot recognize a pipeline stage of type %s." % type(stage))
+        indexOfLastEstimator = -1
+        for i, stage in enumerate(stages):
+            if isinstance(stage, Estimator):
+                indexOfLastEstimator = i
+        transformers = []
+        for i, stage in enumerate(stages):
+            if i <= indexOfLastEstimator:
+                if isinstance(stage, Transformer):
+                    transformers.append(stage)
+                    dataset = stage.transform(dataset)
+                else:  # must be an Estimator
+                    model = stage.fit(dataset)
+                    transformers.append(model)
+                    if i < indexOfLastEstimator:
+                        dataset = model.transform(dataset)
+            else:
+                transformers.append(stage)
+        return PipelineModel(transformers)
+
+    @since("1.4.0")
+    def copy(self, extra=None):
+        """
+        Creates a copy of this instance.
+
+        :param extra: extra parameters
+        :returns: new instance
+        """
+        if extra is None:
+            extra = dict()
+        that = Params.copy(self, extra)
+        stages = [stage.copy(extra) for stage in that.getStages()]
+        return that.setStages(stages)
+
+    @since("2.0.0")
+    def write(self):
+        """Returns an MLWriter instance for this ML instance."""
+        allStagesAreJava = PipelineSharedReadWrite.checkStagesForJava(self.getStages())
+        if allStagesAreJava:
+            return JavaMLWriter(self)
+        return PipelineWriter(self)
+
+    @classmethod
+    @since("2.0.0")
+    def read(cls):
+        """Returns an MLReader instance for this class."""
+        return PipelineReader(cls)
+
+    @classmethod
+    def _from_java(cls, java_stage):
+        """
+        Given a Java Pipeline, create and return a Python wrapper of it.
+        Used for ML persistence.
+        """
+        # Create a new instance of this stage.
+        py_stage = cls()
+        # Load information from java_stage to the instance.
+        py_stages = [JavaParams._from_java(s) for s in java_stage.getStages()]
+        py_stage.setStages(py_stages)
+        py_stage._resetUid(java_stage.uid())
+        return py_stage
+
+    def _to_java(self):
+        """
+        Transfer this instance to a Java Pipeline.  Used for ML persistence.
+
+        :return: Java object equivalent to this instance.
+        """
+
+        gateway = SparkContext._gateway
+        cls = SparkContext._jvm.org.apache.spark.ml.PipelineStage
+        java_stages = gateway.new_array(cls, len(self.getStages()))
+        for idx, stage in enumerate(self.getStages()):
+            java_stages[idx] = stage._to_java()
+
+        _java_obj = JavaParams._new_java_obj("org.apache.spark.ml.Pipeline", self.uid)
+        _java_obj.setStages(java_stages)
+
+        return _java_obj
+
+
+@inherit_doc
+class PipelineWriter(MLWriter):
+    """
+    (Private) Specialization of :py:class:`MLWriter` for :py:class:`Pipeline` types
+    """
+
+    def __init__(self, instance):
+        super(PipelineWriter, self).__init__()
+        self.instance = instance
+
+    def saveImpl(self, path):
+        stages = self.instance.getStages()
+        PipelineSharedReadWrite.validateStages(stages)
+        PipelineSharedReadWrite.saveImpl(self.instance, stages, self.sc, path)
+
+
+@inherit_doc
+class PipelineReader(MLReader):
+    """
+    (Private) Specialization of :py:class:`MLReader` for :py:class:`Pipeline` types
+    """
+
+    def __init__(self, cls):
+        super(PipelineReader, self).__init__()
+        self.cls = cls
+
+    def load(self, path):
+        metadata = DefaultParamsReader.loadMetadata(path, self.sc)
+        if 'language' not in metadata['paramMap'] or metadata['paramMap']['language'] != 'Python':
+            return JavaMLReader(self.cls).load(path)
+        else:
+            uid, stages = PipelineSharedReadWrite.load(metadata, self.sc, path)
+            return Pipeline(stages=stages)._resetUid(uid)
+
+
+@inherit_doc
+class PipelineModelWriter(MLWriter):
+    """
+    (Private) Specialization of :py:class:`MLWriter` for :py:class:`PipelineModel` types
+    """
+
+    def __init__(self, instance):
+        super(PipelineModelWriter, self).__init__()
+        self.instance = instance
+
+    def saveImpl(self, path):
+        stages = self.instance.stages
+        PipelineSharedReadWrite.validateStages(stages)
+        PipelineSharedReadWrite.saveImpl(self.instance, stages, self.sc, path)
+
+
+@inherit_doc
+class PipelineModelReader(MLReader):
+    """
+    (Private) Specialization of :py:class:`MLReader` for :py:class:`PipelineModel` types
+    """
+
+    def __init__(self, cls):
+        super(PipelineModelReader, self).__init__()
+        self.cls = cls
+
+    def load(self, path):
+        metadata = DefaultParamsReader.loadMetadata(path, self.sc)
+        if 'language' not in metadata['paramMap'] or metadata['paramMap']['language'] != 'Python':
+            return JavaMLReader(self.cls).load(path)
+        else:
+            uid, stages = PipelineSharedReadWrite.load(metadata, self.sc, path)
+            return PipelineModel(stages=stages)._resetUid(uid)
+
+
+@inherit_doc
+class PipelineModel(Model, MLReadable, MLWritable):
+    """
+    Represents a compiled pipeline with transformers and fitted models.
+
+    .. versionadded:: 1.3.0
+    """
+
+    def __init__(self, stages):
+        super(PipelineModel, self).__init__()
+        self.stages = stages
+
+    def _transform(self, dataset):
+        for t in self.stages:
+            dataset = t.transform(dataset)
+        return dataset
+
+    @since("1.4.0")
+    def copy(self, extra=None):
+        """
+        Creates a copy of this instance.
+
+        :param extra: extra parameters
+        :returns: new instance
+        """
+        if extra is None:
+            extra = dict()
+        stages = [stage.copy(extra) for stage in self.stages]
+        return PipelineModel(stages)
+
+    @since("2.0.0")
+    def write(self):
+        """Returns an MLWriter instance for this ML instance."""
+        allStagesAreJava = PipelineSharedReadWrite.checkStagesForJava(self.stages)
+        if allStagesAreJava:
+            return JavaMLWriter(self)
+        return PipelineModelWriter(self)
+
+    @classmethod
+    @since("2.0.0")
+    def read(cls):
+        """Returns an MLReader instance for this class."""
+        return PipelineModelReader(cls)
+
+    @classmethod
+    def _from_java(cls, java_stage):
+        """
+        Given a Java PipelineModel, create and return a Python wrapper of it.
+        Used for ML persistence.
+        """
+        # Load information from java_stage to the instance.
+        py_stages = [JavaParams._from_java(s) for s in java_stage.stages()]
+        # Create a new instance of this stage.
+        py_stage = cls(py_stages)
+        py_stage._resetUid(java_stage.uid())
+        return py_stage
+
+    def _to_java(self):
+        """
+        Transfer this instance to a Java PipelineModel.  Used for ML persistence.
+
+        :return: Java object equivalent to this instance.
+        """
+
+        gateway = SparkContext._gateway
+        cls = SparkContext._jvm.org.apache.spark.ml.Transformer
+        java_stages = gateway.new_array(cls, len(self.stages))
+        for idx, stage in enumerate(self.stages):
+            java_stages[idx] = stage._to_java()
+
+        _java_obj =\
+            JavaParams._new_java_obj("org.apache.spark.ml.PipelineModel", self.uid, java_stages)
+
+        return _java_obj
+
+
+@inherit_doc
+class PipelineSharedReadWrite():
+    """
+    .. note:: DeveloperApi
+
+    Functions for :py:class:`MLReader` and :py:class:`MLWriter` shared between
+    :py:class:`Pipeline` and :py:class:`PipelineModel`
+
+    .. versionadded:: 2.3.0
+    """
+
+    @staticmethod
+    def checkStagesForJava(stages):
+        return all(isinstance(stage, JavaMLWritable) for stage in stages)
+
+    @staticmethod
+    def validateStages(stages):
+        """
+        Check that all stages are Writable
+        """
+        for stage in stages:
+            if not isinstance(stage, MLWritable):
+                raise ValueError("Pipeline write will fail on this pipeline " +
+                                 "because stage %s of type %s is not MLWritable",
+                                 stage.uid, type(stage))
+
+    @staticmethod
+    def saveImpl(instance, stages, sc, path):
+        """
+        Save metadata and stages for a :py:class:`Pipeline` or :py:class:`PipelineModel`
+        - save metadata to path/metadata
+        - save stages to stages/IDX_UID
+        """
+        stageUids = [stage.uid for stage in stages]
+        jsonParams = {'stageUids': stageUids, 'language': 'Python'}
+        DefaultParamsWriter.saveMetadata(instance, path, sc, paramMap=jsonParams)
+        stagesDir = os.path.join(path, "stages")
+        for index, stage in enumerate(stages):
+            stage.write().save(PipelineSharedReadWrite
+                               .getStagePath(stage.uid, index, len(stages), stagesDir))
+
+    @staticmethod
+    def load(metadata, sc, path):
+        """
+        Load metadata and stages for a :py:class:`Pipeline` or :py:class:`PipelineModel`
+
+        :return: (UID, list of stages)
+        """
+        stagesDir = os.path.join(path, "stages")
+        stageUids = metadata['paramMap']['stageUids']
+        stages = []
+        for index, stageUid in enumerate(stageUids):
+            stagePath = \
+                PipelineSharedReadWrite.getStagePath(stageUid, index, len(stageUids), stagesDir)
+            stage = DefaultParamsReader.loadParamsInstance(stagePath, sc)
+            stages.append(stage)
+        return (metadata['uid'], stages)
+
+    @staticmethod
+    def getStagePath(stageUid, stageIdx, numStages, stagesDir):
+        """
+        Get path for saving the given stage.
+        """
+        stageIdxDigits = len(str(numStages))
+        stageDir = str(stageIdx).zfill(stageIdxDigits) + "_" + stageUid
+        stagePath = os.path.join(stagesDir, stageDir)
+        return stagePath
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/recommendation.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/recommendation.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8eae9bd268d33445e7a785ceeab6784b10e7fab
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/recommendation.py
@@ -0,0 +1,485 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+
+from pyspark import since, keyword_only
+from pyspark.ml.util import *
+from pyspark.ml.wrapper import JavaEstimator, JavaModel
+from pyspark.ml.param.shared import *
+from pyspark.ml.common import inherit_doc
+
+
+__all__ = ['ALS', 'ALSModel']
+
+
+@inherit_doc
+class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, HasPredictionCol, HasRegParam, HasSeed,
+          JavaMLWritable, JavaMLReadable):
+    """
+    Alternating Least Squares (ALS) matrix factorization.
+
+    ALS attempts to estimate the ratings matrix `R` as the product of
+    two lower-rank matrices, `X` and `Y`, i.e. `X * Yt = R`. Typically
+    these approximations are called 'factor' matrices. The general
+    approach is iterative. During each iteration, one of the factor
+    matrices is held constant, while the other is solved for using least
+    squares. The newly-solved factor matrix is then held constant while
+    solving for the other factor matrix.
+
+    This is a blocked implementation of the ALS factorization algorithm
+    that groups the two sets of factors (referred to as "users" and
+    "products") into blocks and reduces communication by only sending
+    one copy of each user vector to each product block on each
+    iteration, and only for the product blocks that need that user's
+    feature vector. This is achieved by pre-computing some information
+    about the ratings matrix to determine the "out-links" of each user
+    (which blocks of products it will contribute to) and "in-link"
+    information for each product (which of the feature vectors it
+    receives from each user block it will depend on). This allows us to
+    send only an array of feature vectors between each user block and
+    product block, and have the product block find the users' ratings
+    and update the products based on these messages.
+
+    For implicit preference data, the algorithm used is based on
+    `"Collaborative Filtering for Implicit Feedback Datasets",
+    <http://dx.doi.org/10.1109/ICDM.2008.22>`_, adapted for the blocked
+    approach used here.
+
+    Essentially instead of finding the low-rank approximations to the
+    rating matrix `R`, this finds the approximations for a preference
+    matrix `P` where the elements of `P` are 1 if r > 0 and 0 if r <= 0.
+    The ratings then act as 'confidence' values related to strength of
+    indicated user preferences rather than explicit ratings given to
+    items.
+
+    >>> df = spark.createDataFrame(
+    ...     [(0, 0, 4.0), (0, 1, 2.0), (1, 1, 3.0), (1, 2, 4.0), (2, 1, 1.0), (2, 2, 5.0)],
+    ...     ["user", "item", "rating"])
+    >>> als = ALS(rank=10, maxIter=5, seed=0)
+    >>> model = als.fit(df)
+    >>> model.rank
+    10
+    >>> model.userFactors.orderBy("id").collect()
+    [Row(id=0, features=[...]), Row(id=1, ...), Row(id=2, ...)]
+    >>> test = spark.createDataFrame([(0, 2), (1, 0), (2, 0)], ["user", "item"])
+    >>> predictions = sorted(model.transform(test).collect(), key=lambda r: r[0])
+    >>> predictions[0]
+    Row(user=0, item=2, prediction=-0.13807615637779236)
+    >>> predictions[1]
+    Row(user=1, item=0, prediction=2.6258413791656494)
+    >>> predictions[2]
+    Row(user=2, item=0, prediction=-1.5018409490585327)
+    >>> user_recs = model.recommendForAllUsers(3)
+    >>> user_recs.where(user_recs.user == 0)\
+        .select("recommendations.item", "recommendations.rating").collect()
+    [Row(item=[0, 1, 2], rating=[3.910..., 1.992..., -0.138...])]
+    >>> item_recs = model.recommendForAllItems(3)
+    >>> item_recs.where(item_recs.item == 2)\
+        .select("recommendations.user", "recommendations.rating").collect()
+    [Row(user=[2, 1, 0], rating=[4.901..., 3.981..., -0.138...])]
+    >>> user_subset = df.where(df.user == 2)
+    >>> user_subset_recs = model.recommendForUserSubset(user_subset, 3)
+    >>> user_subset_recs.select("recommendations.item", "recommendations.rating").first()
+    Row(item=[2, 1, 0], rating=[4.901..., 1.056..., -1.501...])
+    >>> item_subset = df.where(df.item == 0)
+    >>> item_subset_recs = model.recommendForItemSubset(item_subset, 3)
+    >>> item_subset_recs.select("recommendations.user", "recommendations.rating").first()
+    Row(user=[0, 1, 2], rating=[3.910..., 2.625..., -1.501...])
+    >>> als_path = temp_path + "/als"
+    >>> als.save(als_path)
+    >>> als2 = ALS.load(als_path)
+    >>> als.getMaxIter()
+    5
+    >>> model_path = temp_path + "/als_model"
+    >>> model.save(model_path)
+    >>> model2 = ALSModel.load(model_path)
+    >>> model.rank == model2.rank
+    True
+    >>> sorted(model.userFactors.collect()) == sorted(model2.userFactors.collect())
+    True
+    >>> sorted(model.itemFactors.collect()) == sorted(model2.itemFactors.collect())
+    True
+
+    .. versionadded:: 1.4.0
+    """
+
+    rank = Param(Params._dummy(), "rank", "rank of the factorization",
+                 typeConverter=TypeConverters.toInt)
+    numUserBlocks = Param(Params._dummy(), "numUserBlocks", "number of user blocks",
+                          typeConverter=TypeConverters.toInt)
+    numItemBlocks = Param(Params._dummy(), "numItemBlocks", "number of item blocks",
+                          typeConverter=TypeConverters.toInt)
+    implicitPrefs = Param(Params._dummy(), "implicitPrefs", "whether to use implicit preference",
+                          typeConverter=TypeConverters.toBoolean)
+    alpha = Param(Params._dummy(), "alpha", "alpha for implicit preference",
+                  typeConverter=TypeConverters.toFloat)
+    userCol = Param(Params._dummy(), "userCol", "column name for user ids. Ids must be within " +
+                    "the integer value range.", typeConverter=TypeConverters.toString)
+    itemCol = Param(Params._dummy(), "itemCol", "column name for item ids. Ids must be within " +
+                    "the integer value range.", typeConverter=TypeConverters.toString)
+    ratingCol = Param(Params._dummy(), "ratingCol", "column name for ratings",
+                      typeConverter=TypeConverters.toString)
+    nonnegative = Param(Params._dummy(), "nonnegative",
+                        "whether to use nonnegative constraint for least squares",
+                        typeConverter=TypeConverters.toBoolean)
+    intermediateStorageLevel = Param(Params._dummy(), "intermediateStorageLevel",
+                                     "StorageLevel for intermediate datasets. Cannot be 'NONE'.",
+                                     typeConverter=TypeConverters.toString)
+    finalStorageLevel = Param(Params._dummy(), "finalStorageLevel",
+                              "StorageLevel for ALS model factors.",
+                              typeConverter=TypeConverters.toString)
+    coldStartStrategy = Param(Params._dummy(), "coldStartStrategy", "strategy for dealing with " +
+                              "unknown or new users/items at prediction time. This may be useful " +
+                              "in cross-validation or production scenarios, for handling " +
+                              "user/item ids the model has not seen in the training data. " +
+                              "Supported values: 'nan', 'drop'.",
+                              typeConverter=TypeConverters.toString)
+
+    @keyword_only
+    def __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
+                 implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=None,
+                 ratingCol="rating", nonnegative=False, checkpointInterval=10,
+                 intermediateStorageLevel="MEMORY_AND_DISK",
+                 finalStorageLevel="MEMORY_AND_DISK", coldStartStrategy="nan"):
+        """
+        __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, \
+                 implicitPrefs=false, alpha=1.0, userCol="user", itemCol="item", seed=None, \
+                 ratingCol="rating", nonnegative=false, checkpointInterval=10, \
+                 intermediateStorageLevel="MEMORY_AND_DISK", \
+                 finalStorageLevel="MEMORY_AND_DISK", coldStartStrategy="nan")
+        """
+        super(ALS, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.recommendation.ALS", self.uid)
+        self._setDefault(rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
+                         implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item",
+                         ratingCol="rating", nonnegative=False, checkpointInterval=10,
+                         intermediateStorageLevel="MEMORY_AND_DISK",
+                         finalStorageLevel="MEMORY_AND_DISK", coldStartStrategy="nan")
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
+                  implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=None,
+                  ratingCol="rating", nonnegative=False, checkpointInterval=10,
+                  intermediateStorageLevel="MEMORY_AND_DISK",
+                  finalStorageLevel="MEMORY_AND_DISK", coldStartStrategy="nan"):
+        """
+        setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, \
+                 implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=None, \
+                 ratingCol="rating", nonnegative=False, checkpointInterval=10, \
+                 intermediateStorageLevel="MEMORY_AND_DISK", \
+                 finalStorageLevel="MEMORY_AND_DISK", coldStartStrategy="nan")
+        Sets params for ALS.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return ALSModel(java_model)
+
+    @since("1.4.0")
+    def setRank(self, value):
+        """
+        Sets the value of :py:attr:`rank`.
+        """
+        return self._set(rank=value)
+
+    @since("1.4.0")
+    def getRank(self):
+        """
+        Gets the value of rank or its default value.
+        """
+        return self.getOrDefault(self.rank)
+
+    @since("1.4.0")
+    def setNumUserBlocks(self, value):
+        """
+        Sets the value of :py:attr:`numUserBlocks`.
+        """
+        return self._set(numUserBlocks=value)
+
+    @since("1.4.0")
+    def getNumUserBlocks(self):
+        """
+        Gets the value of numUserBlocks or its default value.
+        """
+        return self.getOrDefault(self.numUserBlocks)
+
+    @since("1.4.0")
+    def setNumItemBlocks(self, value):
+        """
+        Sets the value of :py:attr:`numItemBlocks`.
+        """
+        return self._set(numItemBlocks=value)
+
+    @since("1.4.0")
+    def getNumItemBlocks(self):
+        """
+        Gets the value of numItemBlocks or its default value.
+        """
+        return self.getOrDefault(self.numItemBlocks)
+
+    @since("1.4.0")
+    def setNumBlocks(self, value):
+        """
+        Sets both :py:attr:`numUserBlocks` and :py:attr:`numItemBlocks` to the specific value.
+        """
+        self._set(numUserBlocks=value)
+        return self._set(numItemBlocks=value)
+
+    @since("1.4.0")
+    def setImplicitPrefs(self, value):
+        """
+        Sets the value of :py:attr:`implicitPrefs`.
+        """
+        return self._set(implicitPrefs=value)
+
+    @since("1.4.0")
+    def getImplicitPrefs(self):
+        """
+        Gets the value of implicitPrefs or its default value.
+        """
+        return self.getOrDefault(self.implicitPrefs)
+
+    @since("1.4.0")
+    def setAlpha(self, value):
+        """
+        Sets the value of :py:attr:`alpha`.
+        """
+        return self._set(alpha=value)
+
+    @since("1.4.0")
+    def getAlpha(self):
+        """
+        Gets the value of alpha or its default value.
+        """
+        return self.getOrDefault(self.alpha)
+
+    @since("1.4.0")
+    def setUserCol(self, value):
+        """
+        Sets the value of :py:attr:`userCol`.
+        """
+        return self._set(userCol=value)
+
+    @since("1.4.0")
+    def getUserCol(self):
+        """
+        Gets the value of userCol or its default value.
+        """
+        return self.getOrDefault(self.userCol)
+
+    @since("1.4.0")
+    def setItemCol(self, value):
+        """
+        Sets the value of :py:attr:`itemCol`.
+        """
+        return self._set(itemCol=value)
+
+    @since("1.4.0")
+    def getItemCol(self):
+        """
+        Gets the value of itemCol or its default value.
+        """
+        return self.getOrDefault(self.itemCol)
+
+    @since("1.4.0")
+    def setRatingCol(self, value):
+        """
+        Sets the value of :py:attr:`ratingCol`.
+        """
+        return self._set(ratingCol=value)
+
+    @since("1.4.0")
+    def getRatingCol(self):
+        """
+        Gets the value of ratingCol or its default value.
+        """
+        return self.getOrDefault(self.ratingCol)
+
+    @since("1.4.0")
+    def setNonnegative(self, value):
+        """
+        Sets the value of :py:attr:`nonnegative`.
+        """
+        return self._set(nonnegative=value)
+
+    @since("1.4.0")
+    def getNonnegative(self):
+        """
+        Gets the value of nonnegative or its default value.
+        """
+        return self.getOrDefault(self.nonnegative)
+
+    @since("2.0.0")
+    def setIntermediateStorageLevel(self, value):
+        """
+        Sets the value of :py:attr:`intermediateStorageLevel`.
+        """
+        return self._set(intermediateStorageLevel=value)
+
+    @since("2.0.0")
+    def getIntermediateStorageLevel(self):
+        """
+        Gets the value of intermediateStorageLevel or its default value.
+        """
+        return self.getOrDefault(self.intermediateStorageLevel)
+
+    @since("2.0.0")
+    def setFinalStorageLevel(self, value):
+        """
+        Sets the value of :py:attr:`finalStorageLevel`.
+        """
+        return self._set(finalStorageLevel=value)
+
+    @since("2.0.0")
+    def getFinalStorageLevel(self):
+        """
+        Gets the value of finalStorageLevel or its default value.
+        """
+        return self.getOrDefault(self.finalStorageLevel)
+
+    @since("2.2.0")
+    def setColdStartStrategy(self, value):
+        """
+        Sets the value of :py:attr:`coldStartStrategy`.
+        """
+        return self._set(coldStartStrategy=value)
+
+    @since("2.2.0")
+    def getColdStartStrategy(self):
+        """
+        Gets the value of coldStartStrategy or its default value.
+        """
+        return self.getOrDefault(self.coldStartStrategy)
+
+
+class ALSModel(JavaModel, JavaMLWritable, JavaMLReadable):
+    """
+    Model fitted by ALS.
+
+    .. versionadded:: 1.4.0
+    """
+
+    @property
+    @since("1.4.0")
+    def rank(self):
+        """rank of the matrix factorization model"""
+        return self._call_java("rank")
+
+    @property
+    @since("1.4.0")
+    def userFactors(self):
+        """
+        a DataFrame that stores user factors in two columns: `id` and
+        `features`
+        """
+        return self._call_java("userFactors")
+
+    @property
+    @since("1.4.0")
+    def itemFactors(self):
+        """
+        a DataFrame that stores item factors in two columns: `id` and
+        `features`
+        """
+        return self._call_java("itemFactors")
+
+    @since("2.2.0")
+    def recommendForAllUsers(self, numItems):
+        """
+        Returns top `numItems` items recommended for each user, for all users.
+
+        :param numItems: max number of recommendations for each user
+        :return: a DataFrame of (userCol, recommendations), where recommendations are
+                 stored as an array of (itemCol, rating) Rows.
+        """
+        return self._call_java("recommendForAllUsers", numItems)
+
+    @since("2.2.0")
+    def recommendForAllItems(self, numUsers):
+        """
+        Returns top `numUsers` users recommended for each item, for all items.
+
+        :param numUsers: max number of recommendations for each item
+        :return: a DataFrame of (itemCol, recommendations), where recommendations are
+                 stored as an array of (userCol, rating) Rows.
+        """
+        return self._call_java("recommendForAllItems", numUsers)
+
+    @since("2.3.0")
+    def recommendForUserSubset(self, dataset, numItems):
+        """
+        Returns top `numItems` items recommended for each user id in the input data set. Note that
+        if there are duplicate ids in the input dataset, only one set of recommendations per unique
+        id will be returned.
+
+        :param dataset: a Dataset containing a column of user ids. The column name must match
+                        `userCol`.
+        :param numItems: max number of recommendations for each user
+        :return: a DataFrame of (userCol, recommendations), where recommendations are
+                 stored as an array of (itemCol, rating) Rows.
+        """
+        return self._call_java("recommendForUserSubset", dataset, numItems)
+
+    @since("2.3.0")
+    def recommendForItemSubset(self, dataset, numUsers):
+        """
+        Returns top `numUsers` users recommended for each item id in the input data set. Note that
+        if there are duplicate ids in the input dataset, only one set of recommendations per unique
+        id will be returned.
+
+        :param dataset: a Dataset containing a column of item ids. The column name must match
+                        `itemCol`.
+        :param numUsers: max number of recommendations for each item
+        :return: a DataFrame of (itemCol, recommendations), where recommendations are
+                 stored as an array of (userCol, rating) Rows.
+        """
+        return self._call_java("recommendForItemSubset", dataset, numUsers)
+
+
+if __name__ == "__main__":
+    import doctest
+    import pyspark.ml.recommendation
+    from pyspark.sql import SparkSession
+    globs = pyspark.ml.recommendation.__dict__.copy()
+    # The small batch size here ensures that we see multiple batches,
+    # even in these small test examples:
+    spark = SparkSession.builder\
+        .master("local[2]")\
+        .appName("ml.recommendation tests")\
+        .getOrCreate()
+    sc = spark.sparkContext
+    globs['sc'] = sc
+    globs['spark'] = spark
+    import tempfile
+    temp_path = tempfile.mkdtemp()
+    globs['temp_path'] = temp_path
+    try:
+        (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+        spark.stop()
+    finally:
+        from shutil import rmtree
+        try:
+            rmtree(temp_path)
+        except OSError:
+            pass
+    if failure_count:
+        sys.exit(-1)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/regression.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/regression.py
new file mode 100644
index 0000000000000000000000000000000000000000..98f436135184736c83ad3a1055e957085142fd3f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/regression.py
@@ -0,0 +1,1874 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import warnings
+
+from pyspark import since, keyword_only
+from pyspark.ml.param.shared import *
+from pyspark.ml.util import *
+from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaWrapper
+from pyspark.ml.common import inherit_doc
+from pyspark.sql import DataFrame
+
+
+__all__ = ['AFTSurvivalRegression', 'AFTSurvivalRegressionModel',
+           'DecisionTreeRegressor', 'DecisionTreeRegressionModel',
+           'GBTRegressor', 'GBTRegressionModel',
+           'GeneralizedLinearRegression', 'GeneralizedLinearRegressionModel',
+           'GeneralizedLinearRegressionSummary', 'GeneralizedLinearRegressionTrainingSummary',
+           'IsotonicRegression', 'IsotonicRegressionModel',
+           'LinearRegression', 'LinearRegressionModel',
+           'LinearRegressionSummary', 'LinearRegressionTrainingSummary',
+           'RandomForestRegressor', 'RandomForestRegressionModel']
+
+
+@inherit_doc
+class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
+                       HasRegParam, HasTol, HasElasticNetParam, HasFitIntercept,
+                       HasStandardization, HasSolver, HasWeightCol, HasAggregationDepth, HasLoss,
+                       JavaMLWritable, JavaMLReadable):
+    """
+    Linear regression.
+
+    The learning objective is to minimize the specified loss function, with regularization.
+    This supports two kinds of loss:
+
+    * squaredError (a.k.a squared loss)
+    * huber (a hybrid of squared error for relatively small errors and absolute error for \
+    relatively large ones, and we estimate the scale parameter from training data)
+
+    This supports multiple types of regularization:
+
+    * none (a.k.a. ordinary least squares)
+    * L2 (ridge regression)
+    * L1 (Lasso)
+    * L2 + L1 (elastic net)
+
+    Note: Fitting with huber loss only supports none and L2 regularization.
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> df = spark.createDataFrame([
+    ...     (1.0, 2.0, Vectors.dense(1.0)),
+    ...     (0.0, 2.0, Vectors.sparse(1, [], []))], ["label", "weight", "features"])
+    >>> lr = LinearRegression(maxIter=5, regParam=0.0, solver="normal", weightCol="weight")
+    >>> model = lr.fit(df)
+    >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> abs(model.transform(test0).head().prediction - (-1.0)) < 0.001
+    True
+    >>> abs(model.coefficients[0] - 1.0) < 0.001
+    True
+    >>> abs(model.intercept - 0.0) < 0.001
+    True
+    >>> test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
+    >>> abs(model.transform(test1).head().prediction - 1.0) < 0.001
+    True
+    >>> lr.setParams("vector")
+    Traceback (most recent call last):
+        ...
+    TypeError: Method setParams forces keyword arguments.
+    >>> lr_path = temp_path + "/lr"
+    >>> lr.save(lr_path)
+    >>> lr2 = LinearRegression.load(lr_path)
+    >>> lr2.getMaxIter()
+    5
+    >>> model_path = temp_path + "/lr_model"
+    >>> model.save(model_path)
+    >>> model2 = LinearRegressionModel.load(model_path)
+    >>> model.coefficients[0] == model2.coefficients[0]
+    True
+    >>> model.intercept == model2.intercept
+    True
+    >>> model.numFeatures
+    1
+    >>> model.write().format("pmml").save(model_path + "_2")
+
+    .. versionadded:: 1.4.0
+    """
+
+    solver = Param(Params._dummy(), "solver", "The solver algorithm for optimization. Supported " +
+                   "options: auto, normal, l-bfgs.", typeConverter=TypeConverters.toString)
+
+    loss = Param(Params._dummy(), "loss", "The loss function to be optimized. Supported " +
+                 "options: squaredError, huber.", typeConverter=TypeConverters.toString)
+
+    epsilon = Param(Params._dummy(), "epsilon", "The shape parameter to control the amount of " +
+                    "robustness. Must be > 1.0. Only valid when loss is huber",
+                    typeConverter=TypeConverters.toFloat)
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
+                 standardization=True, solver="auto", weightCol=None, aggregationDepth=2,
+                 loss="squaredError", epsilon=1.35):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
+                 standardization=True, solver="auto", weightCol=None, aggregationDepth=2, \
+                 loss="squaredError", epsilon=1.35)
+        """
+        super(LinearRegression, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.regression.LinearRegression", self.uid)
+        self._setDefault(maxIter=100, regParam=0.0, tol=1e-6, loss="squaredError", epsilon=1.35)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
+                  standardization=True, solver="auto", weightCol=None, aggregationDepth=2,
+                  loss="squaredError", epsilon=1.35):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
+                  standardization=True, solver="auto", weightCol=None, aggregationDepth=2, \
+                  loss="squaredError", epsilon=1.35)
+        Sets params for linear regression.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return LinearRegressionModel(java_model)
+
+    @since("2.3.0")
+    def setEpsilon(self, value):
+        """
+        Sets the value of :py:attr:`epsilon`.
+        """
+        return self._set(epsilon=value)
+
+    @since("2.3.0")
+    def getEpsilon(self):
+        """
+        Gets the value of epsilon or its default value.
+        """
+        return self.getOrDefault(self.epsilon)
+
+
+class LinearRegressionModel(JavaModel, JavaPredictionModel, GeneralJavaMLWritable, JavaMLReadable):
+    """
+    Model fitted by :class:`LinearRegression`.
+
+    .. versionadded:: 1.4.0
+    """
+
+    @property
+    @since("2.0.0")
+    def coefficients(self):
+        """
+        Model coefficients.
+        """
+        return self._call_java("coefficients")
+
+    @property
+    @since("1.4.0")
+    def intercept(self):
+        """
+        Model intercept.
+        """
+        return self._call_java("intercept")
+
+    @property
+    @since("2.3.0")
+    def scale(self):
+        r"""
+        The value by which :math:`\|y - X'w\|` is scaled down when loss is "huber", otherwise 1.0.
+        """
+        return self._call_java("scale")
+
+    @property
+    @since("2.0.0")
+    def summary(self):
+        """
+        Gets summary (e.g. residuals, mse, r-squared ) of model on
+        training set. An exception is thrown if
+        `trainingSummary is None`.
+        """
+        if self.hasSummary:
+            java_lrt_summary = self._call_java("summary")
+            return LinearRegressionTrainingSummary(java_lrt_summary)
+        else:
+            raise RuntimeError("No training summary available for this %s" %
+                               self.__class__.__name__)
+
+    @property
+    @since("2.0.0")
+    def hasSummary(self):
+        """
+        Indicates whether a training summary exists for this model
+        instance.
+        """
+        return self._call_java("hasSummary")
+
+    @since("2.0.0")
+    def evaluate(self, dataset):
+        """
+        Evaluates the model on a test dataset.
+
+        :param dataset:
+          Test dataset to evaluate model on, where dataset is an
+          instance of :py:class:`pyspark.sql.DataFrame`
+        """
+        if not isinstance(dataset, DataFrame):
+            raise ValueError("dataset must be a DataFrame but got %s." % type(dataset))
+        java_lr_summary = self._call_java("evaluate", dataset)
+        return LinearRegressionSummary(java_lr_summary)
+
+
+class LinearRegressionSummary(JavaWrapper):
+    """
+    .. note:: Experimental
+
+    Linear regression results evaluated on a dataset.
+
+    .. versionadded:: 2.0.0
+    """
+
+    @property
+    @since("2.0.0")
+    def predictions(self):
+        """
+        Dataframe outputted by the model's `transform` method.
+        """
+        return self._call_java("predictions")
+
+    @property
+    @since("2.0.0")
+    def predictionCol(self):
+        """
+        Field in "predictions" which gives the predicted value of
+        the label at each instance.
+        """
+        return self._call_java("predictionCol")
+
+    @property
+    @since("2.0.0")
+    def labelCol(self):
+        """
+        Field in "predictions" which gives the true label of each
+        instance.
+        """
+        return self._call_java("labelCol")
+
+    @property
+    @since("2.0.0")
+    def featuresCol(self):
+        """
+        Field in "predictions" which gives the features of each instance
+        as a vector.
+        """
+        return self._call_java("featuresCol")
+
+    @property
+    @since("2.0.0")
+    def explainedVariance(self):
+        r"""
+        Returns the explained variance regression score.
+        explainedVariance = :math:`1 - \frac{variance(y - \hat{y})}{variance(y)}`
+
+        .. seealso:: `Wikipedia explain variation
+            <http://en.wikipedia.org/wiki/Explained_variation>`_
+
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LinearRegression.weightCol`. This will change in later Spark
+            versions.
+        """
+        return self._call_java("explainedVariance")
+
+    @property
+    @since("2.0.0")
+    def meanAbsoluteError(self):
+        """
+        Returns the mean absolute error, which is a risk function
+        corresponding to the expected value of the absolute error
+        loss or l1-norm loss.
+
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LinearRegression.weightCol`. This will change in later Spark
+            versions.
+        """
+        return self._call_java("meanAbsoluteError")
+
+    @property
+    @since("2.0.0")
+    def meanSquaredError(self):
+        """
+        Returns the mean squared error, which is a risk function
+        corresponding to the expected value of the squared error
+        loss or quadratic loss.
+
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LinearRegression.weightCol`. This will change in later Spark
+            versions.
+        """
+        return self._call_java("meanSquaredError")
+
+    @property
+    @since("2.0.0")
+    def rootMeanSquaredError(self):
+        """
+        Returns the root mean squared error, which is defined as the
+        square root of the mean squared error.
+
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LinearRegression.weightCol`. This will change in later Spark
+            versions.
+        """
+        return self._call_java("rootMeanSquaredError")
+
+    @property
+    @since("2.0.0")
+    def r2(self):
+        """
+        Returns R^2, the coefficient of determination.
+
+        .. seealso:: `Wikipedia coefficient of determination
+            <http://en.wikipedia.org/wiki/Coefficient_of_determination>`_
+
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LinearRegression.weightCol`. This will change in later Spark
+            versions.
+        """
+        return self._call_java("r2")
+
+    @property
+    @since("2.4.0")
+    def r2adj(self):
+        """
+        Returns Adjusted R^2, the adjusted coefficient of determination.
+
+        .. seealso:: `Wikipedia coefficient of determination, Adjusted R^2
+            <https://en.wikipedia.org/wiki/Coefficient_of_determination#Adjusted_R2>`_
+
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LinearRegression.weightCol`. This will change in later Spark versions.
+        """
+        return self._call_java("r2adj")
+
+    @property
+    @since("2.0.0")
+    def residuals(self):
+        """
+        Residuals (label - predicted value)
+        """
+        return self._call_java("residuals")
+
+    @property
+    @since("2.0.0")
+    def numInstances(self):
+        """
+        Number of instances in DataFrame predictions
+        """
+        return self._call_java("numInstances")
+
+    @property
+    @since("2.2.0")
+    def degreesOfFreedom(self):
+        """
+        Degrees of freedom.
+        """
+        return self._call_java("degreesOfFreedom")
+
+    @property
+    @since("2.0.0")
+    def devianceResiduals(self):
+        """
+        The weighted residuals, the usual residuals rescaled by the
+        square root of the instance weights.
+        """
+        return self._call_java("devianceResiduals")
+
+    @property
+    @since("2.0.0")
+    def coefficientStandardErrors(self):
+        """
+        Standard error of estimated coefficients and intercept.
+        This value is only available when using the "normal" solver.
+
+        If :py:attr:`LinearRegression.fitIntercept` is set to True,
+        then the last element returned corresponds to the intercept.
+
+        .. seealso:: :py:attr:`LinearRegression.solver`
+        """
+        return self._call_java("coefficientStandardErrors")
+
+    @property
+    @since("2.0.0")
+    def tValues(self):
+        """
+        T-statistic of estimated coefficients and intercept.
+        This value is only available when using the "normal" solver.
+
+        If :py:attr:`LinearRegression.fitIntercept` is set to True,
+        then the last element returned corresponds to the intercept.
+
+        .. seealso:: :py:attr:`LinearRegression.solver`
+        """
+        return self._call_java("tValues")
+
+    @property
+    @since("2.0.0")
+    def pValues(self):
+        """
+        Two-sided p-value of estimated coefficients and intercept.
+        This value is only available when using the "normal" solver.
+
+        If :py:attr:`LinearRegression.fitIntercept` is set to True,
+        then the last element returned corresponds to the intercept.
+
+        .. seealso:: :py:attr:`LinearRegression.solver`
+        """
+        return self._call_java("pValues")
+
+
+@inherit_doc
+class LinearRegressionTrainingSummary(LinearRegressionSummary):
+    """
+    .. note:: Experimental
+
+    Linear regression training results. Currently, the training summary ignores the
+    training weights except for the objective trace.
+
+    .. versionadded:: 2.0.0
+    """
+
+    @property
+    @since("2.0.0")
+    def objectiveHistory(self):
+        """
+        Objective function (scaled loss + regularization) at each
+        iteration.
+        This value is only available when using the "l-bfgs" solver.
+
+        .. seealso:: :py:attr:`LinearRegression.solver`
+        """
+        return self._call_java("objectiveHistory")
+
+    @property
+    @since("2.0.0")
+    def totalIterations(self):
+        """
+        Number of training iterations until termination.
+        This value is only available when using the "l-bfgs" solver.
+
+        .. seealso:: :py:attr:`LinearRegression.solver`
+        """
+        return self._call_java("totalIterations")
+
+
+@inherit_doc
+class IsotonicRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
+                         HasWeightCol, JavaMLWritable, JavaMLReadable):
+    """
+    Currently implemented using parallelized pool adjacent violators algorithm.
+    Only univariate (single feature) algorithm supported.
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> df = spark.createDataFrame([
+    ...     (1.0, Vectors.dense(1.0)),
+    ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
+    >>> ir = IsotonicRegression()
+    >>> model = ir.fit(df)
+    >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.transform(test0).head().prediction
+    0.0
+    >>> model.boundaries
+    DenseVector([0.0, 1.0])
+    >>> ir_path = temp_path + "/ir"
+    >>> ir.save(ir_path)
+    >>> ir2 = IsotonicRegression.load(ir_path)
+    >>> ir2.getIsotonic()
+    True
+    >>> model_path = temp_path + "/ir_model"
+    >>> model.save(model_path)
+    >>> model2 = IsotonicRegressionModel.load(model_path)
+    >>> model.boundaries == model2.boundaries
+    True
+    >>> model.predictions == model2.predictions
+    True
+
+    .. versionadded:: 1.6.0
+    """
+
+    isotonic = \
+        Param(Params._dummy(), "isotonic",
+              "whether the output sequence should be isotonic/increasing (true) or" +
+              "antitonic/decreasing (false).", typeConverter=TypeConverters.toBoolean)
+    featureIndex = \
+        Param(Params._dummy(), "featureIndex",
+              "The index of the feature if featuresCol is a vector column, no effect otherwise.",
+              typeConverter=TypeConverters.toInt)
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 weightCol=None, isotonic=True, featureIndex=0):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 weightCol=None, isotonic=True, featureIndex=0):
+        """
+        super(IsotonicRegression, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.regression.IsotonicRegression", self.uid)
+        self._setDefault(isotonic=True, featureIndex=0)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  weightCol=None, isotonic=True, featureIndex=0):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 weightCol=None, isotonic=True, featureIndex=0):
+        Set the params for IsotonicRegression.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return IsotonicRegressionModel(java_model)
+
+    def setIsotonic(self, value):
+        """
+        Sets the value of :py:attr:`isotonic`.
+        """
+        return self._set(isotonic=value)
+
+    def getIsotonic(self):
+        """
+        Gets the value of isotonic or its default value.
+        """
+        return self.getOrDefault(self.isotonic)
+
+    def setFeatureIndex(self, value):
+        """
+        Sets the value of :py:attr:`featureIndex`.
+        """
+        return self._set(featureIndex=value)
+
+    def getFeatureIndex(self):
+        """
+        Gets the value of featureIndex or its default value.
+        """
+        return self.getOrDefault(self.featureIndex)
+
+
+class IsotonicRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
+    """
+    Model fitted by :class:`IsotonicRegression`.
+
+    .. versionadded:: 1.6.0
+    """
+
+    @property
+    @since("1.6.0")
+    def boundaries(self):
+        """
+        Boundaries in increasing order for which predictions are known.
+        """
+        return self._call_java("boundaries")
+
+    @property
+    @since("1.6.0")
+    def predictions(self):
+        """
+        Predictions associated with the boundaries at the same index, monotone because of isotonic
+        regression.
+        """
+        return self._call_java("predictions")
+
+
+class TreeEnsembleParams(DecisionTreeParams):
+    """
+    Mixin for Decision Tree-based ensemble algorithms parameters.
+    """
+
+    subsamplingRate = Param(Params._dummy(), "subsamplingRate", "Fraction of the training data " +
+                            "used for learning each decision tree, in range (0, 1].",
+                            typeConverter=TypeConverters.toFloat)
+
+    supportedFeatureSubsetStrategies = ["auto", "all", "onethird", "sqrt", "log2"]
+
+    featureSubsetStrategy = \
+        Param(Params._dummy(), "featureSubsetStrategy",
+              "The number of features to consider for splits at each tree node. Supported " +
+              "options: 'auto' (choose automatically for task: If numTrees == 1, set to " +
+              "'all'. If numTrees > 1 (forest), set to 'sqrt' for classification and to " +
+              "'onethird' for regression), 'all' (use all features), 'onethird' (use " +
+              "1/3 of the features), 'sqrt' (use sqrt(number of features)), 'log2' (use " +
+              "log2(number of features)), 'n' (when n is in the range (0, 1.0], use " +
+              "n * number of features. When n is in the range (1, number of features), use" +
+              " n features). default = 'auto'", typeConverter=TypeConverters.toString)
+
+    def __init__(self):
+        super(TreeEnsembleParams, self).__init__()
+
+    @since("1.4.0")
+    def setSubsamplingRate(self, value):
+        """
+        Sets the value of :py:attr:`subsamplingRate`.
+        """
+        return self._set(subsamplingRate=value)
+
+    @since("1.4.0")
+    def getSubsamplingRate(self):
+        """
+        Gets the value of subsamplingRate or its default value.
+        """
+        return self.getOrDefault(self.subsamplingRate)
+
+    @since("1.4.0")
+    def setFeatureSubsetStrategy(self, value):
+        """
+        Sets the value of :py:attr:`featureSubsetStrategy`.
+
+        .. note:: Deprecated in 2.4.0 and will be removed in 3.0.0.
+        """
+        return self._set(featureSubsetStrategy=value)
+
+    @since("1.4.0")
+    def getFeatureSubsetStrategy(self):
+        """
+        Gets the value of featureSubsetStrategy or its default value.
+        """
+        return self.getOrDefault(self.featureSubsetStrategy)
+
+
+class TreeRegressorParams(Params):
+    """
+    Private class to track supported impurity measures.
+    """
+
+    supportedImpurities = ["variance"]
+    impurity = Param(Params._dummy(), "impurity",
+                     "Criterion used for information gain calculation (case-insensitive). " +
+                     "Supported options: " +
+                     ", ".join(supportedImpurities), typeConverter=TypeConverters.toString)
+
+    def __init__(self):
+        super(TreeRegressorParams, self).__init__()
+
+    @since("1.4.0")
+    def setImpurity(self, value):
+        """
+        Sets the value of :py:attr:`impurity`.
+        """
+        return self._set(impurity=value)
+
+    @since("1.4.0")
+    def getImpurity(self):
+        """
+        Gets the value of impurity or its default value.
+        """
+        return self.getOrDefault(self.impurity)
+
+
+class RandomForestParams(TreeEnsembleParams):
+    """
+    Private class to track supported random forest parameters.
+    """
+
+    numTrees = Param(Params._dummy(), "numTrees", "Number of trees to train (>= 1).",
+                     typeConverter=TypeConverters.toInt)
+
+    def __init__(self):
+        super(RandomForestParams, self).__init__()
+
+    @since("1.4.0")
+    def setNumTrees(self, value):
+        """
+        Sets the value of :py:attr:`numTrees`.
+        """
+        return self._set(numTrees=value)
+
+    @since("1.4.0")
+    def getNumTrees(self):
+        """
+        Gets the value of numTrees or its default value.
+        """
+        return self.getOrDefault(self.numTrees)
+
+
+class GBTParams(TreeEnsembleParams):
+    """
+    Private class to track supported GBT params.
+    """
+    supportedLossTypes = ["squared", "absolute"]
+
+
+@inherit_doc
+class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
+                            DecisionTreeParams, TreeRegressorParams, HasCheckpointInterval,
+                            HasSeed, JavaMLWritable, JavaMLReadable, HasVarianceCol):
+    """
+    `Decision tree <http://en.wikipedia.org/wiki/Decision_tree_learning>`_
+    learning algorithm for regression.
+    It supports both continuous and categorical features.
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> df = spark.createDataFrame([
+    ...     (1.0, Vectors.dense(1.0)),
+    ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
+    >>> dt = DecisionTreeRegressor(maxDepth=2, varianceCol="variance")
+    >>> model = dt.fit(df)
+    >>> model.depth
+    1
+    >>> model.numNodes
+    3
+    >>> model.featureImportances
+    SparseVector(1, {0: 1.0})
+    >>> model.numFeatures
+    1
+    >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.transform(test0).head().prediction
+    0.0
+    >>> test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
+    >>> model.transform(test1).head().prediction
+    1.0
+    >>> dtr_path = temp_path + "/dtr"
+    >>> dt.save(dtr_path)
+    >>> dt2 = DecisionTreeRegressor.load(dtr_path)
+    >>> dt2.getMaxDepth()
+    2
+    >>> model_path = temp_path + "/dtr_model"
+    >>> model.save(model_path)
+    >>> model2 = DecisionTreeRegressionModel.load(model_path)
+    >>> model.numNodes == model2.numNodes
+    True
+    >>> model.depth == model2.depth
+    True
+    >>> model.transform(test1).head().variance
+    0.0
+
+    .. versionadded:: 1.4.0
+    """
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance",
+                 seed=None, varianceCol=None):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
+                 impurity="variance", seed=None, varianceCol=None)
+        """
+        super(DecisionTreeRegressor, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.regression.DecisionTreeRegressor", self.uid)
+        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                         impurity="variance")
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                  impurity="variance", seed=None, varianceCol=None):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
+                  impurity="variance", seed=None, varianceCol=None)
+        Sets params for the DecisionTreeRegressor.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return DecisionTreeRegressionModel(java_model)
+
+
+@inherit_doc
+class DecisionTreeModel(JavaModel, JavaPredictionModel):
+    """
+    Abstraction for Decision Tree models.
+
+    .. versionadded:: 1.5.0
+    """
+
+    @property
+    @since("1.5.0")
+    def numNodes(self):
+        """Return number of nodes of the decision tree."""
+        return self._call_java("numNodes")
+
+    @property
+    @since("1.5.0")
+    def depth(self):
+        """Return depth of the decision tree."""
+        return self._call_java("depth")
+
+    @property
+    @since("2.0.0")
+    def toDebugString(self):
+        """Full description of model."""
+        return self._call_java("toDebugString")
+
+    def __repr__(self):
+        return self._call_java("toString")
+
+
+@inherit_doc
+class TreeEnsembleModel(JavaModel):
+    """
+    (private abstraction)
+
+    Represents a tree ensemble model.
+    """
+
+    @property
+    @since("2.0.0")
+    def trees(self):
+        """Trees in this ensemble. Warning: These have null parent Estimators."""
+        return [DecisionTreeModel(m) for m in list(self._call_java("trees"))]
+
+    @property
+    @since("2.0.0")
+    def getNumTrees(self):
+        """Number of trees in ensemble."""
+        return self._call_java("getNumTrees")
+
+    @property
+    @since("1.5.0")
+    def treeWeights(self):
+        """Return the weights for each tree"""
+        return list(self._call_java("javaTreeWeights"))
+
+    @property
+    @since("2.0.0")
+    def totalNumNodes(self):
+        """Total number of nodes, summed over all trees in the ensemble."""
+        return self._call_java("totalNumNodes")
+
+    @property
+    @since("2.0.0")
+    def toDebugString(self):
+        """Full description of model."""
+        return self._call_java("toDebugString")
+
+    def __repr__(self):
+        return self._call_java("toString")
+
+
+@inherit_doc
+class DecisionTreeRegressionModel(DecisionTreeModel, JavaMLWritable, JavaMLReadable):
+    """
+    Model fitted by :class:`DecisionTreeRegressor`.
+
+    .. versionadded:: 1.4.0
+    """
+
+    @property
+    @since("2.0.0")
+    def featureImportances(self):
+        """
+        Estimate of the importance of each feature.
+
+        This generalizes the idea of "Gini" importance to other losses,
+        following the explanation of Gini importance from "Random Forests" documentation
+        by Leo Breiman and Adele Cutler, and following the implementation from scikit-learn.
+
+        This feature importance is calculated as follows:
+          - importance(feature j) = sum (over nodes which split on feature j) of the gain,
+            where gain is scaled by the number of instances passing through node
+          - Normalize importances for tree to sum to 1.
+
+        .. note:: Feature importance for single decision trees can have high variance due to
+              correlated predictor variables. Consider using a :py:class:`RandomForestRegressor`
+              to determine feature importance instead.
+        """
+        return self._call_java("featureImportances")
+
+
+@inherit_doc
+class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed,
+                            RandomForestParams, TreeRegressorParams, HasCheckpointInterval,
+                            JavaMLWritable, JavaMLReadable):
+    """
+    `Random Forest <http://en.wikipedia.org/wiki/Random_forest>`_
+    learning algorithm for regression.
+    It supports both continuous and categorical features.
+
+    >>> from numpy import allclose
+    >>> from pyspark.ml.linalg import Vectors
+    >>> df = spark.createDataFrame([
+    ...     (1.0, Vectors.dense(1.0)),
+    ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
+    >>> rf = RandomForestRegressor(numTrees=2, maxDepth=2, seed=42)
+    >>> model = rf.fit(df)
+    >>> model.featureImportances
+    SparseVector(1, {0: 1.0})
+    >>> allclose(model.treeWeights, [1.0, 1.0])
+    True
+    >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.transform(test0).head().prediction
+    0.0
+    >>> model.numFeatures
+    1
+    >>> model.trees
+    [DecisionTreeRegressionModel (uid=...) of depth..., DecisionTreeRegressionModel...]
+    >>> model.getNumTrees
+    2
+    >>> test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
+    >>> model.transform(test1).head().prediction
+    0.5
+    >>> rfr_path = temp_path + "/rfr"
+    >>> rf.save(rfr_path)
+    >>> rf2 = RandomForestRegressor.load(rfr_path)
+    >>> rf2.getNumTrees()
+    2
+    >>> model_path = temp_path + "/rfr_model"
+    >>> model.save(model_path)
+    >>> model2 = RandomForestRegressionModel.load(model_path)
+    >>> model.featureImportances == model2.featureImportances
+    True
+
+    .. versionadded:: 1.4.0
+    """
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                 impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20,
+                 featureSubsetStrategy="auto"):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                 maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
+                 impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20, \
+                 featureSubsetStrategy="auto")
+        """
+        super(RandomForestRegressor, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.regression.RandomForestRegressor", self.uid)
+        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                         maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                         impurity="variance", subsamplingRate=1.0, numTrees=20,
+                         featureSubsetStrategy="auto")
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
+                  impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20,
+                  featureSubsetStrategy="auto"):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
+                  impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20, \
+                  featureSubsetStrategy="auto")
+        Sets params for linear regression.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return RandomForestRegressionModel(java_model)
+
+    @since("2.4.0")
+    def setFeatureSubsetStrategy(self, value):
+        """
+        Sets the value of :py:attr:`featureSubsetStrategy`.
+        """
+        return self._set(featureSubsetStrategy=value)
+
+
+class RandomForestRegressionModel(TreeEnsembleModel, JavaPredictionModel, JavaMLWritable,
+                                  JavaMLReadable):
+    """
+    Model fitted by :class:`RandomForestRegressor`.
+
+    .. versionadded:: 1.4.0
+    """
+
+    @property
+    @since("2.0.0")
+    def trees(self):
+        """Trees in this ensemble. Warning: These have null parent Estimators."""
+        return [DecisionTreeRegressionModel(m) for m in list(self._call_java("trees"))]
+
+    @property
+    @since("2.0.0")
+    def featureImportances(self):
+        """
+        Estimate of the importance of each feature.
+
+        Each feature's importance is the average of its importance across all trees in the ensemble
+        The importance vector is normalized to sum to 1. This method is suggested by Hastie et al.
+        (Hastie, Tibshirani, Friedman. "The Elements of Statistical Learning, 2nd Edition." 2001.)
+        and follows the implementation from scikit-learn.
+
+        .. seealso:: :py:attr:`DecisionTreeRegressionModel.featureImportances`
+        """
+        return self._call_java("featureImportances")
+
+
+@inherit_doc
+class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
+                   GBTParams, HasCheckpointInterval, HasStepSize, HasSeed, JavaMLWritable,
+                   JavaMLReadable, TreeRegressorParams):
+    """
+    `Gradient-Boosted Trees (GBTs) <http://en.wikipedia.org/wiki/Gradient_boosting>`_
+    learning algorithm for regression.
+    It supports both continuous and categorical features.
+
+    >>> from numpy import allclose
+    >>> from pyspark.ml.linalg import Vectors
+    >>> df = spark.createDataFrame([
+    ...     (1.0, Vectors.dense(1.0)),
+    ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
+    >>> gbt = GBTRegressor(maxIter=5, maxDepth=2, seed=42)
+    >>> print(gbt.getImpurity())
+    variance
+    >>> print(gbt.getFeatureSubsetStrategy())
+    all
+    >>> model = gbt.fit(df)
+    >>> model.featureImportances
+    SparseVector(1, {0: 1.0})
+    >>> model.numFeatures
+    1
+    >>> allclose(model.treeWeights, [1.0, 0.1, 0.1, 0.1, 0.1])
+    True
+    >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.transform(test0).head().prediction
+    0.0
+    >>> test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
+    >>> model.transform(test1).head().prediction
+    1.0
+    >>> gbtr_path = temp_path + "gbtr"
+    >>> gbt.save(gbtr_path)
+    >>> gbt2 = GBTRegressor.load(gbtr_path)
+    >>> gbt2.getMaxDepth()
+    2
+    >>> model_path = temp_path + "gbtr_model"
+    >>> model.save(model_path)
+    >>> model2 = GBTRegressionModel.load(model_path)
+    >>> model.featureImportances == model2.featureImportances
+    True
+    >>> model.treeWeights == model2.treeWeights
+    True
+    >>> model.trees
+    [DecisionTreeRegressionModel (uid=...) of depth..., DecisionTreeRegressionModel...]
+    >>> validation = spark.createDataFrame([(0.0, Vectors.dense(-1.0))],
+    ...              ["label", "features"])
+    >>> model.evaluateEachIteration(validation, "squared")
+    [0.0, 0.0, 0.0, 0.0, 0.0]
+
+    .. versionadded:: 1.4.0
+    """
+
+    lossType = Param(Params._dummy(), "lossType",
+                     "Loss function which GBT tries to minimize (case-insensitive). " +
+                     "Supported options: " + ", ".join(GBTParams.supportedLossTypes),
+                     typeConverter=TypeConverters.toString)
+
+    stepSize = Param(Params._dummy(), "stepSize",
+                     "Step size (a.k.a. learning rate) in interval (0, 1] for shrinking " +
+                     "the contribution of each estimator.",
+                     typeConverter=TypeConverters.toFloat)
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                 maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
+                 checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None,
+                 impurity="variance", featureSubsetStrategy="all"):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                 maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, \
+                 checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None, \
+                 impurity="variance", featureSubsetStrategy="all")
+        """
+        super(GBTRegressor, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.regression.GBTRegressor", self.uid)
+        self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                         maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
+                         checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1,
+                         impurity="variance", featureSubsetStrategy="all")
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
+                  maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
+                  checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None,
+                  impuriy="variance", featureSubsetStrategy="all"):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
+                  maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, \
+                  checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None, \
+                  impurity="variance", featureSubsetStrategy="all")
+        Sets params for Gradient Boosted Tree Regression.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return GBTRegressionModel(java_model)
+
+    @since("1.4.0")
+    def setLossType(self, value):
+        """
+        Sets the value of :py:attr:`lossType`.
+        """
+        return self._set(lossType=value)
+
+    @since("1.4.0")
+    def getLossType(self):
+        """
+        Gets the value of lossType or its default value.
+        """
+        return self.getOrDefault(self.lossType)
+
+    @since("2.4.0")
+    def setFeatureSubsetStrategy(self, value):
+        """
+        Sets the value of :py:attr:`featureSubsetStrategy`.
+        """
+        return self._set(featureSubsetStrategy=value)
+
+
+class GBTRegressionModel(TreeEnsembleModel, JavaPredictionModel, JavaMLWritable, JavaMLReadable):
+    """
+    Model fitted by :class:`GBTRegressor`.
+
+    .. versionadded:: 1.4.0
+    """
+
+    @property
+    @since("2.0.0")
+    def featureImportances(self):
+        """
+        Estimate of the importance of each feature.
+
+        Each feature's importance is the average of its importance across all trees in the ensemble
+        The importance vector is normalized to sum to 1. This method is suggested by Hastie et al.
+        (Hastie, Tibshirani, Friedman. "The Elements of Statistical Learning, 2nd Edition." 2001.)
+        and follows the implementation from scikit-learn.
+
+        .. seealso:: :py:attr:`DecisionTreeRegressionModel.featureImportances`
+        """
+        return self._call_java("featureImportances")
+
+    @property
+    @since("2.0.0")
+    def trees(self):
+        """Trees in this ensemble. Warning: These have null parent Estimators."""
+        return [DecisionTreeRegressionModel(m) for m in list(self._call_java("trees"))]
+
+    @since("2.4.0")
+    def evaluateEachIteration(self, dataset, loss):
+        """
+        Method to compute error or loss for every iteration of gradient boosting.
+
+        :param dataset:
+            Test dataset to evaluate model on, where dataset is an
+            instance of :py:class:`pyspark.sql.DataFrame`
+        :param loss:
+            The loss function used to compute error.
+            Supported options: squared, absolute
+        """
+        return self._call_java("evaluateEachIteration", dataset, loss)
+
+
+@inherit_doc
+class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
+                            HasFitIntercept, HasMaxIter, HasTol, HasAggregationDepth,
+                            JavaMLWritable, JavaMLReadable):
+    """
+    .. note:: Experimental
+
+    Accelerated Failure Time (AFT) Model Survival Regression
+
+    Fit a parametric AFT survival regression model based on the Weibull distribution
+    of the survival time.
+
+    .. seealso:: `AFT Model <https://en.wikipedia.org/wiki/Accelerated_failure_time_model>`_
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> df = spark.createDataFrame([
+    ...     (1.0, Vectors.dense(1.0), 1.0),
+    ...     (1e-40, Vectors.sparse(1, [], []), 0.0)], ["label", "features", "censor"])
+    >>> aftsr = AFTSurvivalRegression()
+    >>> model = aftsr.fit(df)
+    >>> model.predict(Vectors.dense(6.3))
+    1.0
+    >>> model.predictQuantiles(Vectors.dense(6.3))
+    DenseVector([0.0101, 0.0513, 0.1054, 0.2877, 0.6931, 1.3863, 2.3026, 2.9957, 4.6052])
+    >>> model.transform(df).show()
+    +-------+---------+------+----------+
+    |  label| features|censor|prediction|
+    +-------+---------+------+----------+
+    |    1.0|    [1.0]|   1.0|       1.0|
+    |1.0E-40|(1,[],[])|   0.0|       1.0|
+    +-------+---------+------+----------+
+    ...
+    >>> aftsr_path = temp_path + "/aftsr"
+    >>> aftsr.save(aftsr_path)
+    >>> aftsr2 = AFTSurvivalRegression.load(aftsr_path)
+    >>> aftsr2.getMaxIter()
+    100
+    >>> model_path = temp_path + "/aftsr_model"
+    >>> model.save(model_path)
+    >>> model2 = AFTSurvivalRegressionModel.load(model_path)
+    >>> model.coefficients == model2.coefficients
+    True
+    >>> model.intercept == model2.intercept
+    True
+    >>> model.scale == model2.scale
+    True
+
+    .. versionadded:: 1.6.0
+    """
+
+    censorCol = Param(Params._dummy(), "censorCol",
+                      "censor column name. The value of this column could be 0 or 1. " +
+                      "If the value is 1, it means the event has occurred i.e. " +
+                      "uncensored; otherwise censored.", typeConverter=TypeConverters.toString)
+    quantileProbabilities = \
+        Param(Params._dummy(), "quantileProbabilities",
+              "quantile probabilities array. Values of the quantile probabilities array " +
+              "should be in the range (0, 1) and the array should be non-empty.",
+              typeConverter=TypeConverters.toListFloat)
+    quantilesCol = Param(Params._dummy(), "quantilesCol",
+                         "quantiles column name. This column will output quantiles of " +
+                         "corresponding quantileProbabilities if it is set.",
+                         typeConverter=TypeConverters.toString)
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor",
+                 quantileProbabilities=list([0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]),
+                 quantilesCol=None, aggregationDepth=2):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor", \
+                 quantileProbabilities=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99], \
+                 quantilesCol=None, aggregationDepth=2)
+        """
+        super(AFTSurvivalRegression, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.regression.AFTSurvivalRegression", self.uid)
+        self._setDefault(censorCol="censor",
+                         quantileProbabilities=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99],
+                         maxIter=100, tol=1E-6)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("1.6.0")
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor",
+                  quantileProbabilities=list([0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]),
+                  quantilesCol=None, aggregationDepth=2):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor", \
+                  quantileProbabilities=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99], \
+                  quantilesCol=None, aggregationDepth=2):
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return AFTSurvivalRegressionModel(java_model)
+
+    @since("1.6.0")
+    def setCensorCol(self, value):
+        """
+        Sets the value of :py:attr:`censorCol`.
+        """
+        return self._set(censorCol=value)
+
+    @since("1.6.0")
+    def getCensorCol(self):
+        """
+        Gets the value of censorCol or its default value.
+        """
+        return self.getOrDefault(self.censorCol)
+
+    @since("1.6.0")
+    def setQuantileProbabilities(self, value):
+        """
+        Sets the value of :py:attr:`quantileProbabilities`.
+        """
+        return self._set(quantileProbabilities=value)
+
+    @since("1.6.0")
+    def getQuantileProbabilities(self):
+        """
+        Gets the value of quantileProbabilities or its default value.
+        """
+        return self.getOrDefault(self.quantileProbabilities)
+
+    @since("1.6.0")
+    def setQuantilesCol(self, value):
+        """
+        Sets the value of :py:attr:`quantilesCol`.
+        """
+        return self._set(quantilesCol=value)
+
+    @since("1.6.0")
+    def getQuantilesCol(self):
+        """
+        Gets the value of quantilesCol or its default value.
+        """
+        return self.getOrDefault(self.quantilesCol)
+
+
+class AFTSurvivalRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
+    """
+    .. note:: Experimental
+
+    Model fitted by :class:`AFTSurvivalRegression`.
+
+    .. versionadded:: 1.6.0
+    """
+
+    @property
+    @since("2.0.0")
+    def coefficients(self):
+        """
+        Model coefficients.
+        """
+        return self._call_java("coefficients")
+
+    @property
+    @since("1.6.0")
+    def intercept(self):
+        """
+        Model intercept.
+        """
+        return self._call_java("intercept")
+
+    @property
+    @since("1.6.0")
+    def scale(self):
+        """
+        Model scale parameter.
+        """
+        return self._call_java("scale")
+
+    @since("2.0.0")
+    def predictQuantiles(self, features):
+        """
+        Predicted Quantiles
+        """
+        return self._call_java("predictQuantiles", features)
+
+    @since("2.0.0")
+    def predict(self, features):
+        """
+        Predicted value
+        """
+        return self._call_java("predict", features)
+
+
+@inherit_doc
+class GeneralizedLinearRegression(JavaEstimator, HasLabelCol, HasFeaturesCol, HasPredictionCol,
+                                  HasFitIntercept, HasMaxIter, HasTol, HasRegParam, HasWeightCol,
+                                  HasSolver, JavaMLWritable, JavaMLReadable):
+    """
+    .. note:: Experimental
+
+    Generalized Linear Regression.
+
+    Fit a Generalized Linear Model specified by giving a symbolic description of the linear
+    predictor (link function) and a description of the error distribution (family). It supports
+    "gaussian", "binomial", "poisson", "gamma" and "tweedie" as family. Valid link functions for
+    each family is listed below. The first link function of each family is the default one.
+
+    * "gaussian" -> "identity", "log", "inverse"
+
+    * "binomial" -> "logit", "probit", "cloglog"
+
+    * "poisson"  -> "log", "identity", "sqrt"
+
+    * "gamma"    -> "inverse", "identity", "log"
+
+    * "tweedie"  -> power link function specified through "linkPower". \
+                    The default link power in the tweedie family is 1 - variancePower.
+
+    .. seealso:: `GLM <https://en.wikipedia.org/wiki/Generalized_linear_model>`_
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> df = spark.createDataFrame([
+    ...     (1.0, Vectors.dense(0.0, 0.0)),
+    ...     (1.0, Vectors.dense(1.0, 2.0)),
+    ...     (2.0, Vectors.dense(0.0, 0.0)),
+    ...     (2.0, Vectors.dense(1.0, 1.0)),], ["label", "features"])
+    >>> glr = GeneralizedLinearRegression(family="gaussian", link="identity", linkPredictionCol="p")
+    >>> model = glr.fit(df)
+    >>> transformed = model.transform(df)
+    >>> abs(transformed.head().prediction - 1.5) < 0.001
+    True
+    >>> abs(transformed.head().p - 1.5) < 0.001
+    True
+    >>> model.coefficients
+    DenseVector([1.5..., -1.0...])
+    >>> model.numFeatures
+    2
+    >>> abs(model.intercept - 1.5) < 0.001
+    True
+    >>> glr_path = temp_path + "/glr"
+    >>> glr.save(glr_path)
+    >>> glr2 = GeneralizedLinearRegression.load(glr_path)
+    >>> glr.getFamily() == glr2.getFamily()
+    True
+    >>> model_path = temp_path + "/glr_model"
+    >>> model.save(model_path)
+    >>> model2 = GeneralizedLinearRegressionModel.load(model_path)
+    >>> model.intercept == model2.intercept
+    True
+    >>> model.coefficients[0] == model2.coefficients[0]
+    True
+
+    .. versionadded:: 2.0.0
+    """
+
+    family = Param(Params._dummy(), "family", "The name of family which is a description of " +
+                   "the error distribution to be used in the model. Supported options: " +
+                   "gaussian (default), binomial, poisson, gamma and tweedie.",
+                   typeConverter=TypeConverters.toString)
+    link = Param(Params._dummy(), "link", "The name of link function which provides the " +
+                 "relationship between the linear predictor and the mean of the distribution " +
+                 "function. Supported options: identity, log, inverse, logit, probit, cloglog " +
+                 "and sqrt.", typeConverter=TypeConverters.toString)
+    linkPredictionCol = Param(Params._dummy(), "linkPredictionCol", "link prediction (linear " +
+                              "predictor) column name", typeConverter=TypeConverters.toString)
+    variancePower = Param(Params._dummy(), "variancePower", "The power in the variance function " +
+                          "of the Tweedie distribution which characterizes the relationship " +
+                          "between the variance and mean of the distribution. Only applicable " +
+                          "for the Tweedie family. Supported values: 0 and [1, Inf).",
+                          typeConverter=TypeConverters.toFloat)
+    linkPower = Param(Params._dummy(), "linkPower", "The index in the power link function. " +
+                      "Only applicable to the Tweedie family.",
+                      typeConverter=TypeConverters.toFloat)
+    solver = Param(Params._dummy(), "solver", "The solver algorithm for optimization. Supported " +
+                   "options: irls.", typeConverter=TypeConverters.toString)
+    offsetCol = Param(Params._dummy(), "offsetCol", "The offset column name. If this is not set " +
+                      "or empty, we treat all instance offsets as 0.0",
+                      typeConverter=TypeConverters.toString)
+
+    @keyword_only
+    def __init__(self, labelCol="label", featuresCol="features", predictionCol="prediction",
+                 family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6,
+                 regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None,
+                 variancePower=0.0, linkPower=None, offsetCol=None):
+        """
+        __init__(self, labelCol="label", featuresCol="features", predictionCol="prediction", \
+                 family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6, \
+                 regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None, \
+                 variancePower=0.0, linkPower=None, offsetCol=None)
+        """
+        super(GeneralizedLinearRegression, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.regression.GeneralizedLinearRegression", self.uid)
+        self._setDefault(family="gaussian", maxIter=25, tol=1e-6, regParam=0.0, solver="irls",
+                         variancePower=0.0)
+        kwargs = self._input_kwargs
+
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("2.0.0")
+    def setParams(self, labelCol="label", featuresCol="features", predictionCol="prediction",
+                  family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6,
+                  regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None,
+                  variancePower=0.0, linkPower=None, offsetCol=None):
+        """
+        setParams(self, labelCol="label", featuresCol="features", predictionCol="prediction", \
+                  family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6, \
+                  regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None, \
+                  variancePower=0.0, linkPower=None, offsetCol=None)
+        Sets params for generalized linear regression.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return GeneralizedLinearRegressionModel(java_model)
+
+    @since("2.0.0")
+    def setFamily(self, value):
+        """
+        Sets the value of :py:attr:`family`.
+        """
+        return self._set(family=value)
+
+    @since("2.0.0")
+    def getFamily(self):
+        """
+        Gets the value of family or its default value.
+        """
+        return self.getOrDefault(self.family)
+
+    @since("2.0.0")
+    def setLinkPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`linkPredictionCol`.
+        """
+        return self._set(linkPredictionCol=value)
+
+    @since("2.0.0")
+    def getLinkPredictionCol(self):
+        """
+        Gets the value of linkPredictionCol or its default value.
+        """
+        return self.getOrDefault(self.linkPredictionCol)
+
+    @since("2.0.0")
+    def setLink(self, value):
+        """
+        Sets the value of :py:attr:`link`.
+        """
+        return self._set(link=value)
+
+    @since("2.0.0")
+    def getLink(self):
+        """
+        Gets the value of link or its default value.
+        """
+        return self.getOrDefault(self.link)
+
+    @since("2.2.0")
+    def setVariancePower(self, value):
+        """
+        Sets the value of :py:attr:`variancePower`.
+        """
+        return self._set(variancePower=value)
+
+    @since("2.2.0")
+    def getVariancePower(self):
+        """
+        Gets the value of variancePower or its default value.
+        """
+        return self.getOrDefault(self.variancePower)
+
+    @since("2.2.0")
+    def setLinkPower(self, value):
+        """
+        Sets the value of :py:attr:`linkPower`.
+        """
+        return self._set(linkPower=value)
+
+    @since("2.2.0")
+    def getLinkPower(self):
+        """
+        Gets the value of linkPower or its default value.
+        """
+        return self.getOrDefault(self.linkPower)
+
+    @since("2.3.0")
+    def setOffsetCol(self, value):
+        """
+        Sets the value of :py:attr:`offsetCol`.
+        """
+        return self._set(offsetCol=value)
+
+    @since("2.3.0")
+    def getOffsetCol(self):
+        """
+        Gets the value of offsetCol or its default value.
+        """
+        return self.getOrDefault(self.offsetCol)
+
+
+class GeneralizedLinearRegressionModel(JavaModel, JavaPredictionModel, JavaMLWritable,
+                                       JavaMLReadable):
+    """
+    .. note:: Experimental
+
+    Model fitted by :class:`GeneralizedLinearRegression`.
+
+    .. versionadded:: 2.0.0
+    """
+
+    @property
+    @since("2.0.0")
+    def coefficients(self):
+        """
+        Model coefficients.
+        """
+        return self._call_java("coefficients")
+
+    @property
+    @since("2.0.0")
+    def intercept(self):
+        """
+        Model intercept.
+        """
+        return self._call_java("intercept")
+
+    @property
+    @since("2.0.0")
+    def summary(self):
+        """
+        Gets summary (e.g. residuals, deviance, pValues) of model on
+        training set. An exception is thrown if
+        `trainingSummary is None`.
+        """
+        if self.hasSummary:
+            java_glrt_summary = self._call_java("summary")
+            return GeneralizedLinearRegressionTrainingSummary(java_glrt_summary)
+        else:
+            raise RuntimeError("No training summary available for this %s" %
+                               self.__class__.__name__)
+
+    @property
+    @since("2.0.0")
+    def hasSummary(self):
+        """
+        Indicates whether a training summary exists for this model
+        instance.
+        """
+        return self._call_java("hasSummary")
+
+    @since("2.0.0")
+    def evaluate(self, dataset):
+        """
+        Evaluates the model on a test dataset.
+
+        :param dataset:
+          Test dataset to evaluate model on, where dataset is an
+          instance of :py:class:`pyspark.sql.DataFrame`
+        """
+        if not isinstance(dataset, DataFrame):
+            raise ValueError("dataset must be a DataFrame but got %s." % type(dataset))
+        java_glr_summary = self._call_java("evaluate", dataset)
+        return GeneralizedLinearRegressionSummary(java_glr_summary)
+
+
+class GeneralizedLinearRegressionSummary(JavaWrapper):
+    """
+    .. note:: Experimental
+
+    Generalized linear regression results evaluated on a dataset.
+
+    .. versionadded:: 2.0.0
+    """
+
+    @property
+    @since("2.0.0")
+    def predictions(self):
+        """
+        Predictions output by the model's `transform` method.
+        """
+        return self._call_java("predictions")
+
+    @property
+    @since("2.0.0")
+    def predictionCol(self):
+        """
+        Field in :py:attr:`predictions` which gives the predicted value of each instance.
+        This is set to a new column name if the original model's `predictionCol` is not set.
+        """
+        return self._call_java("predictionCol")
+
+    @property
+    @since("2.2.0")
+    def numInstances(self):
+        """
+        Number of instances in DataFrame predictions.
+        """
+        return self._call_java("numInstances")
+
+    @property
+    @since("2.0.0")
+    def rank(self):
+        """
+        The numeric rank of the fitted linear model.
+        """
+        return self._call_java("rank")
+
+    @property
+    @since("2.0.0")
+    def degreesOfFreedom(self):
+        """
+        Degrees of freedom.
+        """
+        return self._call_java("degreesOfFreedom")
+
+    @property
+    @since("2.0.0")
+    def residualDegreeOfFreedom(self):
+        """
+        The residual degrees of freedom.
+        """
+        return self._call_java("residualDegreeOfFreedom")
+
+    @property
+    @since("2.0.0")
+    def residualDegreeOfFreedomNull(self):
+        """
+        The residual degrees of freedom for the null model.
+        """
+        return self._call_java("residualDegreeOfFreedomNull")
+
+    @since("2.0.0")
+    def residuals(self, residualsType="deviance"):
+        """
+        Get the residuals of the fitted model by type.
+
+        :param residualsType: The type of residuals which should be returned.
+                              Supported options: deviance (default), pearson, working, and response.
+        """
+        return self._call_java("residuals", residualsType)
+
+    @property
+    @since("2.0.0")
+    def nullDeviance(self):
+        """
+        The deviance for the null model.
+        """
+        return self._call_java("nullDeviance")
+
+    @property
+    @since("2.0.0")
+    def deviance(self):
+        """
+        The deviance for the fitted model.
+        """
+        return self._call_java("deviance")
+
+    @property
+    @since("2.0.0")
+    def dispersion(self):
+        """
+        The dispersion of the fitted model.
+        It is taken as 1.0 for the "binomial" and "poisson" families, and otherwise
+        estimated by the residual Pearson's Chi-Squared statistic (which is defined as
+        sum of the squares of the Pearson residuals) divided by the residual degrees of freedom.
+        """
+        return self._call_java("dispersion")
+
+    @property
+    @since("2.0.0")
+    def aic(self):
+        """
+        Akaike's "An Information Criterion"(AIC) for the fitted model.
+        """
+        return self._call_java("aic")
+
+
+@inherit_doc
+class GeneralizedLinearRegressionTrainingSummary(GeneralizedLinearRegressionSummary):
+    """
+    .. note:: Experimental
+
+    Generalized linear regression training results.
+
+    .. versionadded:: 2.0.0
+    """
+
+    @property
+    @since("2.0.0")
+    def numIterations(self):
+        """
+        Number of training iterations.
+        """
+        return self._call_java("numIterations")
+
+    @property
+    @since("2.0.0")
+    def solver(self):
+        """
+        The numeric solver used for training.
+        """
+        return self._call_java("solver")
+
+    @property
+    @since("2.0.0")
+    def coefficientStandardErrors(self):
+        """
+        Standard error of estimated coefficients and intercept.
+
+        If :py:attr:`GeneralizedLinearRegression.fitIntercept` is set to True,
+        then the last element returned corresponds to the intercept.
+        """
+        return self._call_java("coefficientStandardErrors")
+
+    @property
+    @since("2.0.0")
+    def tValues(self):
+        """
+        T-statistic of estimated coefficients and intercept.
+
+        If :py:attr:`GeneralizedLinearRegression.fitIntercept` is set to True,
+        then the last element returned corresponds to the intercept.
+        """
+        return self._call_java("tValues")
+
+    @property
+    @since("2.0.0")
+    def pValues(self):
+        """
+        Two-sided p-value of estimated coefficients and intercept.
+
+        If :py:attr:`GeneralizedLinearRegression.fitIntercept` is set to True,
+        then the last element returned corresponds to the intercept.
+        """
+        return self._call_java("pValues")
+
+    def __repr__(self):
+        return self._call_java("toString")
+
+
+if __name__ == "__main__":
+    import doctest
+    import pyspark.ml.regression
+    from pyspark.sql import SparkSession
+    globs = pyspark.ml.regression.__dict__.copy()
+    # The small batch size here ensures that we see multiple batches,
+    # even in these small test examples:
+    spark = SparkSession.builder\
+        .master("local[2]")\
+        .appName("ml.regression tests")\
+        .getOrCreate()
+    sc = spark.sparkContext
+    globs['sc'] = sc
+    globs['spark'] = spark
+    import tempfile
+    temp_path = tempfile.mkdtemp()
+    globs['temp_path'] = temp_path
+    try:
+        (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+        spark.stop()
+    finally:
+        from shutil import rmtree
+        try:
+            rmtree(temp_path)
+        except OSError:
+            pass
+    if failure_count:
+        sys.exit(-1)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/stat.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/stat.py
new file mode 100644
index 0000000000000000000000000000000000000000..370154fc6d62a2630d4e6e26ed5b8085df198e49
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/stat.py
@@ -0,0 +1,414 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+
+from pyspark import since, SparkContext
+from pyspark.ml.common import _java2py, _py2java
+from pyspark.ml.wrapper import JavaWrapper, _jvm
+from pyspark.sql.column import Column, _to_seq
+from pyspark.sql.functions import lit
+
+
+class ChiSquareTest(object):
+    """
+    .. note:: Experimental
+
+    Conduct Pearson's independence test for every feature against the label. For each feature,
+    the (feature, label) pairs are converted into a contingency matrix for which the Chi-squared
+    statistic is computed. All label and feature values must be categorical.
+
+    The null hypothesis is that the occurrence of the outcomes is statistically independent.
+
+    .. versionadded:: 2.2.0
+
+    """
+    @staticmethod
+    @since("2.2.0")
+    def test(dataset, featuresCol, labelCol):
+        """
+        Perform a Pearson's independence test using dataset.
+
+        :param dataset:
+          DataFrame of categorical labels and categorical features.
+          Real-valued features will be treated as categorical for each distinct value.
+        :param featuresCol:
+          Name of features column in dataset, of type `Vector` (`VectorUDT`).
+        :param labelCol:
+          Name of label column in dataset, of any numerical type.
+        :return:
+          DataFrame containing the test result for every feature against the label.
+          This DataFrame will contain a single Row with the following fields:
+          - `pValues: Vector`
+          - `degreesOfFreedom: Array[Int]`
+          - `statistics: Vector`
+          Each of these fields has one value per feature.
+
+        >>> from pyspark.ml.linalg import Vectors
+        >>> from pyspark.ml.stat import ChiSquareTest
+        >>> dataset = [[0, Vectors.dense([0, 0, 1])],
+        ...            [0, Vectors.dense([1, 0, 1])],
+        ...            [1, Vectors.dense([2, 1, 1])],
+        ...            [1, Vectors.dense([3, 1, 1])]]
+        >>> dataset = spark.createDataFrame(dataset, ["label", "features"])
+        >>> chiSqResult = ChiSquareTest.test(dataset, 'features', 'label')
+        >>> chiSqResult.select("degreesOfFreedom").collect()[0]
+        Row(degreesOfFreedom=[3, 1, 0])
+        """
+        sc = SparkContext._active_spark_context
+        javaTestObj = _jvm().org.apache.spark.ml.stat.ChiSquareTest
+        args = [_py2java(sc, arg) for arg in (dataset, featuresCol, labelCol)]
+        return _java2py(sc, javaTestObj.test(*args))
+
+
+class Correlation(object):
+    """
+    .. note:: Experimental
+
+    Compute the correlation matrix for the input dataset of Vectors using the specified method.
+    Methods currently supported: `pearson` (default), `spearman`.
+
+    .. note:: For Spearman, a rank correlation, we need to create an RDD[Double] for each column
+      and sort it in order to retrieve the ranks and then join the columns back into an RDD[Vector],
+      which is fairly costly. Cache the input Dataset before calling corr with `method = 'spearman'`
+      to avoid recomputing the common lineage.
+
+    .. versionadded:: 2.2.0
+
+    """
+    @staticmethod
+    @since("2.2.0")
+    def corr(dataset, column, method="pearson"):
+        """
+        Compute the correlation matrix with specified method using dataset.
+
+        :param dataset:
+          A Dataset or a DataFrame.
+        :param column:
+          The name of the column of vectors for which the correlation coefficient needs
+          to be computed. This must be a column of the dataset, and it must contain
+          Vector objects.
+        :param method:
+          String specifying the method to use for computing correlation.
+          Supported: `pearson` (default), `spearman`.
+        :return:
+          A DataFrame that contains the correlation matrix of the column of vectors. This
+          DataFrame contains a single row and a single column of name
+          '$METHODNAME($COLUMN)'.
+
+        >>> from pyspark.ml.linalg import Vectors
+        >>> from pyspark.ml.stat import Correlation
+        >>> dataset = [[Vectors.dense([1, 0, 0, -2])],
+        ...            [Vectors.dense([4, 5, 0, 3])],
+        ...            [Vectors.dense([6, 7, 0, 8])],
+        ...            [Vectors.dense([9, 0, 0, 1])]]
+        >>> dataset = spark.createDataFrame(dataset, ['features'])
+        >>> pearsonCorr = Correlation.corr(dataset, 'features', 'pearson').collect()[0][0]
+        >>> print(str(pearsonCorr).replace('nan', 'NaN'))
+        DenseMatrix([[ 1.        ,  0.0556...,         NaN,  0.4004...],
+                     [ 0.0556...,  1.        ,         NaN,  0.9135...],
+                     [        NaN,         NaN,  1.        ,         NaN],
+                     [ 0.4004...,  0.9135...,         NaN,  1.        ]])
+        >>> spearmanCorr = Correlation.corr(dataset, 'features', method='spearman').collect()[0][0]
+        >>> print(str(spearmanCorr).replace('nan', 'NaN'))
+        DenseMatrix([[ 1.        ,  0.1054...,         NaN,  0.4       ],
+                     [ 0.1054...,  1.        ,         NaN,  0.9486... ],
+                     [        NaN,         NaN,  1.        ,         NaN],
+                     [ 0.4       ,  0.9486... ,         NaN,  1.        ]])
+        """
+        sc = SparkContext._active_spark_context
+        javaCorrObj = _jvm().org.apache.spark.ml.stat.Correlation
+        args = [_py2java(sc, arg) for arg in (dataset, column, method)]
+        return _java2py(sc, javaCorrObj.corr(*args))
+
+
+class KolmogorovSmirnovTest(object):
+    """
+    .. note:: Experimental
+
+    Conduct the two-sided Kolmogorov Smirnov (KS) test for data sampled from a continuous
+    distribution.
+
+    By comparing the largest difference between the empirical cumulative
+    distribution of the sample data and the theoretical distribution we can provide a test for the
+    the null hypothesis that the sample data comes from that theoretical distribution.
+
+    .. versionadded:: 2.4.0
+
+    """
+    @staticmethod
+    @since("2.4.0")
+    def test(dataset, sampleCol, distName, *params):
+        """
+        Conduct a one-sample, two-sided Kolmogorov-Smirnov test for probability distribution
+        equality. Currently supports the normal distribution, taking as parameters the mean and
+        standard deviation.
+
+        :param dataset:
+          a Dataset or a DataFrame containing the sample of data to test.
+        :param sampleCol:
+          Name of sample column in dataset, of any numerical type.
+        :param distName:
+          a `string` name for a theoretical distribution, currently only support "norm".
+        :param params:
+          a list of `Double` values specifying the parameters to be used for the theoretical
+          distribution. For "norm" distribution, the parameters includes mean and variance.
+        :return:
+          A DataFrame that contains the Kolmogorov-Smirnov test result for the input sampled data.
+          This DataFrame will contain a single Row with the following fields:
+          - `pValue: Double`
+          - `statistic: Double`
+
+        >>> from pyspark.ml.stat import KolmogorovSmirnovTest
+        >>> dataset = [[-1.0], [0.0], [1.0]]
+        >>> dataset = spark.createDataFrame(dataset, ['sample'])
+        >>> ksResult = KolmogorovSmirnovTest.test(dataset, 'sample', 'norm', 0.0, 1.0).first()
+        >>> round(ksResult.pValue, 3)
+        1.0
+        >>> round(ksResult.statistic, 3)
+        0.175
+        >>> dataset = [[2.0], [3.0], [4.0]]
+        >>> dataset = spark.createDataFrame(dataset, ['sample'])
+        >>> ksResult = KolmogorovSmirnovTest.test(dataset, 'sample', 'norm', 3.0, 1.0).first()
+        >>> round(ksResult.pValue, 3)
+        1.0
+        >>> round(ksResult.statistic, 3)
+        0.175
+        """
+        sc = SparkContext._active_spark_context
+        javaTestObj = _jvm().org.apache.spark.ml.stat.KolmogorovSmirnovTest
+        dataset = _py2java(sc, dataset)
+        params = [float(param) for param in params]
+        return _java2py(sc, javaTestObj.test(dataset, sampleCol, distName,
+                                             _jvm().PythonUtils.toSeq(params)))
+
+
+class Summarizer(object):
+    """
+    .. note:: Experimental
+
+    Tools for vectorized statistics on MLlib Vectors.
+    The methods in this package provide various statistics for Vectors contained inside DataFrames.
+    This class lets users pick the statistics they would like to extract for a given column.
+
+    >>> from pyspark.ml.stat import Summarizer
+    >>> from pyspark.sql import Row
+    >>> from pyspark.ml.linalg import Vectors
+    >>> summarizer = Summarizer.metrics("mean", "count")
+    >>> df = sc.parallelize([Row(weight=1.0, features=Vectors.dense(1.0, 1.0, 1.0)),
+    ...                      Row(weight=0.0, features=Vectors.dense(1.0, 2.0, 3.0))]).toDF()
+    >>> df.select(summarizer.summary(df.features, df.weight)).show(truncate=False)
+    +-----------------------------------+
+    |aggregate_metrics(features, weight)|
+    +-----------------------------------+
+    |[[1.0,1.0,1.0], 1]                 |
+    +-----------------------------------+
+    <BLANKLINE>
+    >>> df.select(summarizer.summary(df.features)).show(truncate=False)
+    +--------------------------------+
+    |aggregate_metrics(features, 1.0)|
+    +--------------------------------+
+    |[[1.0,1.5,2.0], 2]              |
+    +--------------------------------+
+    <BLANKLINE>
+    >>> df.select(Summarizer.mean(df.features, df.weight)).show(truncate=False)
+    +--------------+
+    |mean(features)|
+    +--------------+
+    |[1.0,1.0,1.0] |
+    +--------------+
+    <BLANKLINE>
+    >>> df.select(Summarizer.mean(df.features)).show(truncate=False)
+    +--------------+
+    |mean(features)|
+    +--------------+
+    |[1.0,1.5,2.0] |
+    +--------------+
+    <BLANKLINE>
+
+    .. versionadded:: 2.4.0
+
+    """
+    @staticmethod
+    @since("2.4.0")
+    def mean(col, weightCol=None):
+        """
+        return a column of mean summary
+        """
+        return Summarizer._get_single_metric(col, weightCol, "mean")
+
+    @staticmethod
+    @since("2.4.0")
+    def variance(col, weightCol=None):
+        """
+        return a column of variance summary
+        """
+        return Summarizer._get_single_metric(col, weightCol, "variance")
+
+    @staticmethod
+    @since("2.4.0")
+    def count(col, weightCol=None):
+        """
+        return a column of count summary
+        """
+        return Summarizer._get_single_metric(col, weightCol, "count")
+
+    @staticmethod
+    @since("2.4.0")
+    def numNonZeros(col, weightCol=None):
+        """
+        return a column of numNonZero summary
+        """
+        return Summarizer._get_single_metric(col, weightCol, "numNonZeros")
+
+    @staticmethod
+    @since("2.4.0")
+    def max(col, weightCol=None):
+        """
+        return a column of max summary
+        """
+        return Summarizer._get_single_metric(col, weightCol, "max")
+
+    @staticmethod
+    @since("2.4.0")
+    def min(col, weightCol=None):
+        """
+        return a column of min summary
+        """
+        return Summarizer._get_single_metric(col, weightCol, "min")
+
+    @staticmethod
+    @since("2.4.0")
+    def normL1(col, weightCol=None):
+        """
+        return a column of normL1 summary
+        """
+        return Summarizer._get_single_metric(col, weightCol, "normL1")
+
+    @staticmethod
+    @since("2.4.0")
+    def normL2(col, weightCol=None):
+        """
+        return a column of normL2 summary
+        """
+        return Summarizer._get_single_metric(col, weightCol, "normL2")
+
+    @staticmethod
+    def _check_param(featuresCol, weightCol):
+        if weightCol is None:
+            weightCol = lit(1.0)
+        if not isinstance(featuresCol, Column) or not isinstance(weightCol, Column):
+            raise TypeError("featureCol and weightCol should be a Column")
+        return featuresCol, weightCol
+
+    @staticmethod
+    def _get_single_metric(col, weightCol, metric):
+        col, weightCol = Summarizer._check_param(col, weightCol)
+        return Column(JavaWrapper._new_java_obj("org.apache.spark.ml.stat.Summarizer." + metric,
+                                                col._jc, weightCol._jc))
+
+    @staticmethod
+    @since("2.4.0")
+    def metrics(*metrics):
+        """
+        Given a list of metrics, provides a builder that it turns computes metrics from a column.
+
+        See the documentation of [[Summarizer]] for an example.
+
+        The following metrics are accepted (case sensitive):
+         - mean: a vector that contains the coefficient-wise mean.
+         - variance: a vector tha contains the coefficient-wise variance.
+         - count: the count of all vectors seen.
+         - numNonzeros: a vector with the number of non-zeros for each coefficients
+         - max: the maximum for each coefficient.
+         - min: the minimum for each coefficient.
+         - normL2: the Euclidian norm for each coefficient.
+         - normL1: the L1 norm of each coefficient (sum of the absolute values).
+
+        :param metrics:
+         metrics that can be provided.
+        :return:
+         an object of :py:class:`pyspark.ml.stat.SummaryBuilder`
+
+        Note: Currently, the performance of this interface is about 2x~3x slower then using the RDD
+        interface.
+        """
+        sc = SparkContext._active_spark_context
+        js = JavaWrapper._new_java_obj("org.apache.spark.ml.stat.Summarizer.metrics",
+                                       _to_seq(sc, metrics))
+        return SummaryBuilder(js)
+
+
+class SummaryBuilder(JavaWrapper):
+    """
+    .. note:: Experimental
+
+    A builder object that provides summary statistics about a given column.
+
+    Users should not directly create such builders, but instead use one of the methods in
+    :py:class:`pyspark.ml.stat.Summarizer`
+
+    .. versionadded:: 2.4.0
+
+    """
+    def __init__(self, jSummaryBuilder):
+        super(SummaryBuilder, self).__init__(jSummaryBuilder)
+
+    @since("2.4.0")
+    def summary(self, featuresCol, weightCol=None):
+        """
+        Returns an aggregate object that contains the summary of the column with the requested
+        metrics.
+
+        :param featuresCol:
+         a column that contains features Vector object.
+        :param weightCol:
+         a column that contains weight value. Default weight is 1.0.
+        :return:
+         an aggregate column that contains the statistics. The exact content of this
+         structure is determined during the creation of the builder.
+        """
+        featuresCol, weightCol = Summarizer._check_param(featuresCol, weightCol)
+        return Column(self._java_obj.summary(featuresCol._jc, weightCol._jc))
+
+
+if __name__ == "__main__":
+    import doctest
+    import numpy
+    import pyspark.ml.stat
+    from pyspark.sql import SparkSession
+    try:
+        # Numpy 1.14+ changed it's string format.
+        numpy.set_printoptions(legacy='1.13')
+    except TypeError:
+        pass
+
+    globs = pyspark.ml.stat.__dict__.copy()
+    # The small batch size here ensures that we see multiple batches,
+    # even in these small test examples:
+    spark = SparkSession.builder \
+        .master("local[2]") \
+        .appName("ml.stat tests") \
+        .getOrCreate()
+    sc = spark.sparkContext
+    globs['sc'] = sc
+    globs['spark'] = spark
+
+    failure_count, test_count = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/tests.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/tests.py
new file mode 100755
index 0000000000000000000000000000000000000000..821e037af02715c6f7ab67d8e48a57604096d980
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/tests.py
@@ -0,0 +1,2762 @@
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Unit tests for MLlib Python DataFrame-based APIs.
+"""
+import sys
+if sys.version > '3':
+    xrange = range
+    basestring = str
+
+try:
+    import xmlrunner
+except ImportError:
+    xmlrunner = None
+
+if sys.version_info[:2] <= (2, 6):
+    try:
+        import unittest2 as unittest
+    except ImportError:
+        sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier')
+        sys.exit(1)
+else:
+    import unittest
+
+from shutil import rmtree
+import tempfile
+import array as pyarray
+import numpy as np
+from numpy import abs, all, arange, array, array_equal, inf, ones, tile, zeros
+import inspect
+import py4j
+
+from pyspark import keyword_only, SparkContext
+from pyspark.ml import Estimator, Model, Pipeline, PipelineModel, Transformer, UnaryTransformer
+from pyspark.ml.classification import *
+from pyspark.ml.clustering import *
+from pyspark.ml.common import _java2py, _py2java
+from pyspark.ml.evaluation import BinaryClassificationEvaluator, ClusteringEvaluator, \
+    MulticlassClassificationEvaluator, RegressionEvaluator
+from pyspark.ml.feature import *
+from pyspark.ml.fpm import FPGrowth, FPGrowthModel
+from pyspark.ml.image import ImageSchema
+from pyspark.ml.linalg import DenseMatrix, DenseMatrix, DenseVector, Matrices, MatrixUDT, \
+    SparseMatrix, SparseVector, Vector, VectorUDT, Vectors
+from pyspark.ml.param import Param, Params, TypeConverters
+from pyspark.ml.param.shared import HasInputCol, HasMaxIter, HasSeed
+from pyspark.ml.recommendation import ALS
+from pyspark.ml.regression import DecisionTreeRegressor, GeneralizedLinearRegression, \
+    LinearRegression
+from pyspark.ml.stat import ChiSquareTest
+from pyspark.ml.tuning import *
+from pyspark.ml.util import *
+from pyspark.ml.wrapper import JavaParams, JavaWrapper
+from pyspark.serializers import PickleSerializer
+from pyspark.sql import DataFrame, Row, SparkSession, HiveContext
+from pyspark.sql.functions import rand
+from pyspark.sql.types import DoubleType, IntegerType
+from pyspark.storagelevel import *
+from pyspark.tests import QuietTest, ReusedPySparkTestCase as PySparkTestCase
+
+ser = PickleSerializer()
+
+
+class MLlibTestCase(unittest.TestCase):
+    def setUp(self):
+        self.sc = SparkContext('local[4]', "MLlib tests")
+        self.spark = SparkSession(self.sc)
+
+    def tearDown(self):
+        self.spark.stop()
+
+
+class SparkSessionTestCase(PySparkTestCase):
+    @classmethod
+    def setUpClass(cls):
+        PySparkTestCase.setUpClass()
+        cls.spark = SparkSession(cls.sc)
+
+    @classmethod
+    def tearDownClass(cls):
+        PySparkTestCase.tearDownClass()
+        cls.spark.stop()
+
+
+class MockDataset(DataFrame):
+
+    def __init__(self):
+        self.index = 0
+
+
+class HasFake(Params):
+
+    def __init__(self):
+        super(HasFake, self).__init__()
+        self.fake = Param(self, "fake", "fake param")
+
+    def getFake(self):
+        return self.getOrDefault(self.fake)
+
+
+class MockTransformer(Transformer, HasFake):
+
+    def __init__(self):
+        super(MockTransformer, self).__init__()
+        self.dataset_index = None
+
+    def _transform(self, dataset):
+        self.dataset_index = dataset.index
+        dataset.index += 1
+        return dataset
+
+
+class MockUnaryTransformer(UnaryTransformer, DefaultParamsReadable, DefaultParamsWritable):
+
+    shift = Param(Params._dummy(), "shift", "The amount by which to shift " +
+                  "data in a DataFrame",
+                  typeConverter=TypeConverters.toFloat)
+
+    def __init__(self, shiftVal=1):
+        super(MockUnaryTransformer, self).__init__()
+        self._setDefault(shift=1)
+        self._set(shift=shiftVal)
+
+    def getShift(self):
+        return self.getOrDefault(self.shift)
+
+    def setShift(self, shift):
+        self._set(shift=shift)
+
+    def createTransformFunc(self):
+        shiftVal = self.getShift()
+        return lambda x: x + shiftVal
+
+    def outputDataType(self):
+        return DoubleType()
+
+    def validateInputType(self, inputType):
+        if inputType != DoubleType():
+            raise TypeError("Bad input type: {}. ".format(inputType) +
+                            "Requires Double.")
+
+
+class MockEstimator(Estimator, HasFake):
+
+    def __init__(self):
+        super(MockEstimator, self).__init__()
+        self.dataset_index = None
+
+    def _fit(self, dataset):
+        self.dataset_index = dataset.index
+        model = MockModel()
+        self._copyValues(model)
+        return model
+
+
+class MockModel(MockTransformer, Model, HasFake):
+    pass
+
+
+class JavaWrapperMemoryTests(SparkSessionTestCase):
+
+    def test_java_object_gets_detached(self):
+        df = self.spark.createDataFrame([(1.0, 2.0, Vectors.dense(1.0)),
+                                         (0.0, 2.0, Vectors.sparse(1, [], []))],
+                                        ["label", "weight", "features"])
+        lr = LinearRegression(maxIter=1, regParam=0.0, solver="normal", weightCol="weight",
+                              fitIntercept=False)
+
+        model = lr.fit(df)
+        summary = model.summary
+
+        self.assertIsInstance(model, JavaWrapper)
+        self.assertIsInstance(summary, JavaWrapper)
+        self.assertIsInstance(model, JavaParams)
+        self.assertNotIsInstance(summary, JavaParams)
+
+        error_no_object = 'Target Object ID does not exist for this gateway'
+
+        self.assertIn("LinearRegression_", model._java_obj.toString())
+        self.assertIn("LinearRegressionTrainingSummary", summary._java_obj.toString())
+
+        model.__del__()
+
+        with self.assertRaisesRegexp(py4j.protocol.Py4JError, error_no_object):
+            model._java_obj.toString()
+        self.assertIn("LinearRegressionTrainingSummary", summary._java_obj.toString())
+
+        try:
+            summary.__del__()
+        except:
+            pass
+
+        with self.assertRaisesRegexp(py4j.protocol.Py4JError, error_no_object):
+            model._java_obj.toString()
+        with self.assertRaisesRegexp(py4j.protocol.Py4JError, error_no_object):
+            summary._java_obj.toString()
+
+
+class ParamTypeConversionTests(PySparkTestCase):
+    """
+    Test that param type conversion happens.
+    """
+
+    def test_int(self):
+        lr = LogisticRegression(maxIter=5.0)
+        self.assertEqual(lr.getMaxIter(), 5)
+        self.assertTrue(type(lr.getMaxIter()) == int)
+        self.assertRaises(TypeError, lambda: LogisticRegression(maxIter="notAnInt"))
+        self.assertRaises(TypeError, lambda: LogisticRegression(maxIter=5.1))
+
+    def test_float(self):
+        lr = LogisticRegression(tol=1)
+        self.assertEqual(lr.getTol(), 1.0)
+        self.assertTrue(type(lr.getTol()) == float)
+        self.assertRaises(TypeError, lambda: LogisticRegression(tol="notAFloat"))
+
+    def test_vector(self):
+        ewp = ElementwiseProduct(scalingVec=[1, 3])
+        self.assertEqual(ewp.getScalingVec(), DenseVector([1.0, 3.0]))
+        ewp = ElementwiseProduct(scalingVec=np.array([1.2, 3.4]))
+        self.assertEqual(ewp.getScalingVec(), DenseVector([1.2, 3.4]))
+        self.assertRaises(TypeError, lambda: ElementwiseProduct(scalingVec=["a", "b"]))
+
+    def test_list(self):
+        l = [0, 1]
+        for lst_like in [l, np.array(l), DenseVector(l), SparseVector(len(l),
+                         range(len(l)), l), pyarray.array('l', l), xrange(2), tuple(l)]:
+            converted = TypeConverters.toList(lst_like)
+            self.assertEqual(type(converted), list)
+            self.assertListEqual(converted, l)
+
+    def test_list_int(self):
+        for indices in [[1.0, 2.0], np.array([1.0, 2.0]), DenseVector([1.0, 2.0]),
+                        SparseVector(2, {0: 1.0, 1: 2.0}), xrange(1, 3), (1.0, 2.0),
+                        pyarray.array('d', [1.0, 2.0])]:
+            vs = VectorSlicer(indices=indices)
+            self.assertListEqual(vs.getIndices(), [1, 2])
+            self.assertTrue(all([type(v) == int for v in vs.getIndices()]))
+        self.assertRaises(TypeError, lambda: VectorSlicer(indices=["a", "b"]))
+
+    def test_list_float(self):
+        b = Bucketizer(splits=[1, 4])
+        self.assertEqual(b.getSplits(), [1.0, 4.0])
+        self.assertTrue(all([type(v) == float for v in b.getSplits()]))
+        self.assertRaises(TypeError, lambda: Bucketizer(splits=["a", 1.0]))
+
+    def test_list_string(self):
+        for labels in [np.array(['a', u'b']), ['a', u'b'], np.array(['a', 'b'])]:
+            idx_to_string = IndexToString(labels=labels)
+            self.assertListEqual(idx_to_string.getLabels(), ['a', 'b'])
+        self.assertRaises(TypeError, lambda: IndexToString(labels=['a', 2]))
+
+    def test_string(self):
+        lr = LogisticRegression()
+        for col in ['features', u'features', np.str_('features')]:
+            lr.setFeaturesCol(col)
+            self.assertEqual(lr.getFeaturesCol(), 'features')
+        self.assertRaises(TypeError, lambda: LogisticRegression(featuresCol=2.3))
+
+    def test_bool(self):
+        self.assertRaises(TypeError, lambda: LogisticRegression(fitIntercept=1))
+        self.assertRaises(TypeError, lambda: LogisticRegression(fitIntercept="false"))
+
+
+class PipelineTests(PySparkTestCase):
+
+    def test_pipeline(self):
+        dataset = MockDataset()
+        estimator0 = MockEstimator()
+        transformer1 = MockTransformer()
+        estimator2 = MockEstimator()
+        transformer3 = MockTransformer()
+        pipeline = Pipeline(stages=[estimator0, transformer1, estimator2, transformer3])
+        pipeline_model = pipeline.fit(dataset, {estimator0.fake: 0, transformer1.fake: 1})
+        model0, transformer1, model2, transformer3 = pipeline_model.stages
+        self.assertEqual(0, model0.dataset_index)
+        self.assertEqual(0, model0.getFake())
+        self.assertEqual(1, transformer1.dataset_index)
+        self.assertEqual(1, transformer1.getFake())
+        self.assertEqual(2, dataset.index)
+        self.assertIsNone(model2.dataset_index, "The last model shouldn't be called in fit.")
+        self.assertIsNone(transformer3.dataset_index,
+                          "The last transformer shouldn't be called in fit.")
+        dataset = pipeline_model.transform(dataset)
+        self.assertEqual(2, model0.dataset_index)
+        self.assertEqual(3, transformer1.dataset_index)
+        self.assertEqual(4, model2.dataset_index)
+        self.assertEqual(5, transformer3.dataset_index)
+        self.assertEqual(6, dataset.index)
+
+    def test_identity_pipeline(self):
+        dataset = MockDataset()
+
+        def doTransform(pipeline):
+            pipeline_model = pipeline.fit(dataset)
+            return pipeline_model.transform(dataset)
+        # check that empty pipeline did not perform any transformation
+        self.assertEqual(dataset.index, doTransform(Pipeline(stages=[])).index)
+        # check that failure to set stages param will raise KeyError for missing param
+        self.assertRaises(KeyError, lambda: doTransform(Pipeline()))
+
+
+class TestParams(HasMaxIter, HasInputCol, HasSeed):
+    """
+    A subclass of Params mixed with HasMaxIter, HasInputCol and HasSeed.
+    """
+    @keyword_only
+    def __init__(self, seed=None):
+        super(TestParams, self).__init__()
+        self._setDefault(maxIter=10)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, seed=None):
+        """
+        setParams(self, seed=None)
+        Sets params for this test.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+
+class OtherTestParams(HasMaxIter, HasInputCol, HasSeed):
+    """
+    A subclass of Params mixed with HasMaxIter, HasInputCol and HasSeed.
+    """
+    @keyword_only
+    def __init__(self, seed=None):
+        super(OtherTestParams, self).__init__()
+        self._setDefault(maxIter=10)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, seed=None):
+        """
+        setParams(self, seed=None)
+        Sets params for this test.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+
+class HasThrowableProperty(Params):
+
+    def __init__(self):
+        super(HasThrowableProperty, self).__init__()
+        self.p = Param(self, "none", "empty param")
+
+    @property
+    def test_property(self):
+        raise RuntimeError("Test property to raise error when invoked")
+
+
+class ParamTests(SparkSessionTestCase):
+
+    def test_copy_new_parent(self):
+        testParams = TestParams()
+        # Copying an instantiated param should fail
+        with self.assertRaises(ValueError):
+            testParams.maxIter._copy_new_parent(testParams)
+        # Copying a dummy param should succeed
+        TestParams.maxIter._copy_new_parent(testParams)
+        maxIter = testParams.maxIter
+        self.assertEqual(maxIter.name, "maxIter")
+        self.assertEqual(maxIter.doc, "max number of iterations (>= 0).")
+        self.assertTrue(maxIter.parent == testParams.uid)
+
+    def test_param(self):
+        testParams = TestParams()
+        maxIter = testParams.maxIter
+        self.assertEqual(maxIter.name, "maxIter")
+        self.assertEqual(maxIter.doc, "max number of iterations (>= 0).")
+        self.assertTrue(maxIter.parent == testParams.uid)
+
+    def test_hasparam(self):
+        testParams = TestParams()
+        self.assertTrue(all([testParams.hasParam(p.name) for p in testParams.params]))
+        self.assertFalse(testParams.hasParam("notAParameter"))
+        self.assertTrue(testParams.hasParam(u"maxIter"))
+
+    def test_resolveparam(self):
+        testParams = TestParams()
+        self.assertEqual(testParams._resolveParam(testParams.maxIter), testParams.maxIter)
+        self.assertEqual(testParams._resolveParam("maxIter"), testParams.maxIter)
+
+        self.assertEqual(testParams._resolveParam(u"maxIter"), testParams.maxIter)
+        if sys.version_info[0] >= 3:
+            # In Python 3, it is allowed to get/set attributes with non-ascii characters.
+            e_cls = AttributeError
+        else:
+            e_cls = UnicodeEncodeError
+        self.assertRaises(e_cls, lambda: testParams._resolveParam(u"ì•„"))
+
+    def test_params(self):
+        testParams = TestParams()
+        maxIter = testParams.maxIter
+        inputCol = testParams.inputCol
+        seed = testParams.seed
+
+        params = testParams.params
+        self.assertEqual(params, [inputCol, maxIter, seed])
+
+        self.assertTrue(testParams.hasParam(maxIter.name))
+        self.assertTrue(testParams.hasDefault(maxIter))
+        self.assertFalse(testParams.isSet(maxIter))
+        self.assertTrue(testParams.isDefined(maxIter))
+        self.assertEqual(testParams.getMaxIter(), 10)
+        testParams.setMaxIter(100)
+        self.assertTrue(testParams.isSet(maxIter))
+        self.assertEqual(testParams.getMaxIter(), 100)
+
+        self.assertTrue(testParams.hasParam(inputCol.name))
+        self.assertFalse(testParams.hasDefault(inputCol))
+        self.assertFalse(testParams.isSet(inputCol))
+        self.assertFalse(testParams.isDefined(inputCol))
+        with self.assertRaises(KeyError):
+            testParams.getInputCol()
+
+        otherParam = Param(Params._dummy(), "otherParam", "Parameter used to test that " +
+                           "set raises an error for a non-member parameter.",
+                           typeConverter=TypeConverters.toString)
+        with self.assertRaises(ValueError):
+            testParams.set(otherParam, "value")
+
+        # Since the default is normally random, set it to a known number for debug str
+        testParams._setDefault(seed=41)
+        testParams.setSeed(43)
+
+        self.assertEqual(
+            testParams.explainParams(),
+            "\n".join(["inputCol: input column name. (undefined)",
+                       "maxIter: max number of iterations (>= 0). (default: 10, current: 100)",
+                       "seed: random seed. (default: 41, current: 43)"]))
+
+    def test_kmeans_param(self):
+        algo = KMeans()
+        self.assertEqual(algo.getInitMode(), "k-means||")
+        algo.setK(10)
+        self.assertEqual(algo.getK(), 10)
+        algo.setInitSteps(10)
+        self.assertEqual(algo.getInitSteps(), 10)
+        self.assertEqual(algo.getDistanceMeasure(), "euclidean")
+        algo.setDistanceMeasure("cosine")
+        self.assertEqual(algo.getDistanceMeasure(), "cosine")
+
+    def test_hasseed(self):
+        noSeedSpecd = TestParams()
+        withSeedSpecd = TestParams(seed=42)
+        other = OtherTestParams()
+        # Check that we no longer use 42 as the magic number
+        self.assertNotEqual(noSeedSpecd.getSeed(), 42)
+        origSeed = noSeedSpecd.getSeed()
+        # Check that we only compute the seed once
+        self.assertEqual(noSeedSpecd.getSeed(), origSeed)
+        # Check that a specified seed is honored
+        self.assertEqual(withSeedSpecd.getSeed(), 42)
+        # Check that a different class has a different seed
+        self.assertNotEqual(other.getSeed(), noSeedSpecd.getSeed())
+
+    def test_param_property_error(self):
+        param_store = HasThrowableProperty()
+        self.assertRaises(RuntimeError, lambda: param_store.test_property)
+        params = param_store.params  # should not invoke the property 'test_property'
+        self.assertEqual(len(params), 1)
+
+    def test_word2vec_param(self):
+        model = Word2Vec().setWindowSize(6)
+        # Check windowSize is set properly
+        self.assertEqual(model.getWindowSize(), 6)
+
+    def test_copy_param_extras(self):
+        tp = TestParams(seed=42)
+        extra = {tp.getParam(TestParams.inputCol.name): "copy_input"}
+        tp_copy = tp.copy(extra=extra)
+        self.assertEqual(tp.uid, tp_copy.uid)
+        self.assertEqual(tp.params, tp_copy.params)
+        for k, v in extra.items():
+            self.assertTrue(tp_copy.isDefined(k))
+            self.assertEqual(tp_copy.getOrDefault(k), v)
+        copied_no_extra = {}
+        for k, v in tp_copy._paramMap.items():
+            if k not in extra:
+                copied_no_extra[k] = v
+        self.assertEqual(tp._paramMap, copied_no_extra)
+        self.assertEqual(tp._defaultParamMap, tp_copy._defaultParamMap)
+
+    def test_logistic_regression_check_thresholds(self):
+        self.assertIsInstance(
+            LogisticRegression(threshold=0.5, thresholds=[0.5, 0.5]),
+            LogisticRegression
+        )
+
+        self.assertRaisesRegexp(
+            ValueError,
+            "Logistic Regression getThreshold found inconsistent.*$",
+            LogisticRegression, threshold=0.42, thresholds=[0.5, 0.5]
+        )
+
+    def test_preserve_set_state(self):
+        dataset = self.spark.createDataFrame([(0.5,)], ["data"])
+        binarizer = Binarizer(inputCol="data")
+        self.assertFalse(binarizer.isSet("threshold"))
+        binarizer.transform(dataset)
+        binarizer._transfer_params_from_java()
+        self.assertFalse(binarizer.isSet("threshold"),
+                         "Params not explicitly set should remain unset after transform")
+
+    def test_default_params_transferred(self):
+        dataset = self.spark.createDataFrame([(0.5,)], ["data"])
+        binarizer = Binarizer(inputCol="data")
+        # intentionally change the pyspark default, but don't set it
+        binarizer._defaultParamMap[binarizer.outputCol] = "my_default"
+        result = binarizer.transform(dataset).select("my_default").collect()
+        self.assertFalse(binarizer.isSet(binarizer.outputCol))
+        self.assertEqual(result[0][0], 1.0)
+
+    @staticmethod
+    def check_params(test_self, py_stage, check_params_exist=True):
+        """
+        Checks common requirements for Params.params:
+          - set of params exist in Java and Python and are ordered by names
+          - param parent has the same UID as the object's UID
+          - default param value from Java matches value in Python
+          - optionally check if all params from Java also exist in Python
+        """
+        py_stage_str = "%s %s" % (type(py_stage), py_stage)
+        if not hasattr(py_stage, "_to_java"):
+            return
+        java_stage = py_stage._to_java()
+        if java_stage is None:
+            return
+        test_self.assertEqual(py_stage.uid, java_stage.uid(), msg=py_stage_str)
+        if check_params_exist:
+            param_names = [p.name for p in py_stage.params]
+            java_params = list(java_stage.params())
+            java_param_names = [jp.name() for jp in java_params]
+            test_self.assertEqual(
+                param_names, sorted(java_param_names),
+                "Param list in Python does not match Java for %s:\nJava = %s\nPython = %s"
+                % (py_stage_str, java_param_names, param_names))
+        for p in py_stage.params:
+            test_self.assertEqual(p.parent, py_stage.uid)
+            java_param = java_stage.getParam(p.name)
+            py_has_default = py_stage.hasDefault(p)
+            java_has_default = java_stage.hasDefault(java_param)
+            test_self.assertEqual(py_has_default, java_has_default,
+                                  "Default value mismatch of param %s for Params %s"
+                                  % (p.name, str(py_stage)))
+            if py_has_default:
+                if p.name == "seed":
+                    continue  # Random seeds between Spark and PySpark are different
+                java_default = _java2py(test_self.sc,
+                                        java_stage.clear(java_param).getOrDefault(java_param))
+                py_stage._clear(p)
+                py_default = py_stage.getOrDefault(p)
+                # equality test for NaN is always False
+                if isinstance(java_default, float) and np.isnan(java_default):
+                    java_default = "NaN"
+                    py_default = "NaN" if np.isnan(py_default) else "not NaN"
+                test_self.assertEqual(
+                    java_default, py_default,
+                    "Java default %s != python default %s of param %s for Params %s"
+                    % (str(java_default), str(py_default), p.name, str(py_stage)))
+
+
+class EvaluatorTests(SparkSessionTestCase):
+
+    def test_java_params(self):
+        """
+        This tests a bug fixed by SPARK-18274 which causes multiple copies
+        of a Params instance in Python to be linked to the same Java instance.
+        """
+        evaluator = RegressionEvaluator(metricName="r2")
+        df = self.spark.createDataFrame([Row(label=1.0, prediction=1.1)])
+        evaluator.evaluate(df)
+        self.assertEqual(evaluator._java_obj.getMetricName(), "r2")
+        evaluatorCopy = evaluator.copy({evaluator.metricName: "mae"})
+        evaluator.evaluate(df)
+        evaluatorCopy.evaluate(df)
+        self.assertEqual(evaluator._java_obj.getMetricName(), "r2")
+        self.assertEqual(evaluatorCopy._java_obj.getMetricName(), "mae")
+
+    def test_clustering_evaluator_with_cosine_distance(self):
+        featureAndPredictions = map(lambda x: (Vectors.dense(x[0]), x[1]),
+                                    [([1.0, 1.0], 1.0), ([10.0, 10.0], 1.0), ([1.0, 0.5], 2.0),
+                                     ([10.0, 4.4], 2.0), ([-1.0, 1.0], 3.0), ([-100.0, 90.0], 3.0)])
+        dataset = self.spark.createDataFrame(featureAndPredictions, ["features", "prediction"])
+        evaluator = ClusteringEvaluator(predictionCol="prediction", distanceMeasure="cosine")
+        self.assertEqual(evaluator.getDistanceMeasure(), "cosine")
+        self.assertTrue(np.isclose(evaluator.evaluate(dataset),  0.992671213, atol=1e-5))
+
+
+class FeatureTests(SparkSessionTestCase):
+
+    def test_binarizer(self):
+        b0 = Binarizer()
+        self.assertListEqual(b0.params, [b0.inputCol, b0.outputCol, b0.threshold])
+        self.assertTrue(all([~b0.isSet(p) for p in b0.params]))
+        self.assertTrue(b0.hasDefault(b0.threshold))
+        self.assertEqual(b0.getThreshold(), 0.0)
+        b0.setParams(inputCol="input", outputCol="output").setThreshold(1.0)
+        self.assertTrue(all([b0.isSet(p) for p in b0.params]))
+        self.assertEqual(b0.getThreshold(), 1.0)
+        self.assertEqual(b0.getInputCol(), "input")
+        self.assertEqual(b0.getOutputCol(), "output")
+
+        b0c = b0.copy({b0.threshold: 2.0})
+        self.assertEqual(b0c.uid, b0.uid)
+        self.assertListEqual(b0c.params, b0.params)
+        self.assertEqual(b0c.getThreshold(), 2.0)
+
+        b1 = Binarizer(threshold=2.0, inputCol="input", outputCol="output")
+        self.assertNotEqual(b1.uid, b0.uid)
+        self.assertEqual(b1.getThreshold(), 2.0)
+        self.assertEqual(b1.getInputCol(), "input")
+        self.assertEqual(b1.getOutputCol(), "output")
+
+    def test_idf(self):
+        dataset = self.spark.createDataFrame([
+            (DenseVector([1.0, 2.0]),),
+            (DenseVector([0.0, 1.0]),),
+            (DenseVector([3.0, 0.2]),)], ["tf"])
+        idf0 = IDF(inputCol="tf")
+        self.assertListEqual(idf0.params, [idf0.inputCol, idf0.minDocFreq, idf0.outputCol])
+        idf0m = idf0.fit(dataset, {idf0.outputCol: "idf"})
+        self.assertEqual(idf0m.uid, idf0.uid,
+                         "Model should inherit the UID from its parent estimator.")
+        output = idf0m.transform(dataset)
+        self.assertIsNotNone(output.head().idf)
+        # Test that parameters transferred to Python Model
+        ParamTests.check_params(self, idf0m)
+
+    def test_ngram(self):
+        dataset = self.spark.createDataFrame([
+            Row(input=["a", "b", "c", "d", "e"])])
+        ngram0 = NGram(n=4, inputCol="input", outputCol="output")
+        self.assertEqual(ngram0.getN(), 4)
+        self.assertEqual(ngram0.getInputCol(), "input")
+        self.assertEqual(ngram0.getOutputCol(), "output")
+        transformedDF = ngram0.transform(dataset)
+        self.assertEqual(transformedDF.head().output, ["a b c d", "b c d e"])
+
+    def test_stopwordsremover(self):
+        dataset = self.spark.createDataFrame([Row(input=["a", "panda"])])
+        stopWordRemover = StopWordsRemover(inputCol="input", outputCol="output")
+        # Default
+        self.assertEqual(stopWordRemover.getInputCol(), "input")
+        transformedDF = stopWordRemover.transform(dataset)
+        self.assertEqual(transformedDF.head().output, ["panda"])
+        self.assertEqual(type(stopWordRemover.getStopWords()), list)
+        self.assertTrue(isinstance(stopWordRemover.getStopWords()[0], basestring))
+        # Custom
+        stopwords = ["panda"]
+        stopWordRemover.setStopWords(stopwords)
+        self.assertEqual(stopWordRemover.getInputCol(), "input")
+        self.assertEqual(stopWordRemover.getStopWords(), stopwords)
+        transformedDF = stopWordRemover.transform(dataset)
+        self.assertEqual(transformedDF.head().output, ["a"])
+        # with language selection
+        stopwords = StopWordsRemover.loadDefaultStopWords("turkish")
+        dataset = self.spark.createDataFrame([Row(input=["acaba", "ama", "biri"])])
+        stopWordRemover.setStopWords(stopwords)
+        self.assertEqual(stopWordRemover.getStopWords(), stopwords)
+        transformedDF = stopWordRemover.transform(dataset)
+        self.assertEqual(transformedDF.head().output, [])
+        # with locale
+        stopwords = ["BELKİ"]
+        dataset = self.spark.createDataFrame([Row(input=["belki"])])
+        stopWordRemover.setStopWords(stopwords).setLocale("tr")
+        self.assertEqual(stopWordRemover.getStopWords(), stopwords)
+        transformedDF = stopWordRemover.transform(dataset)
+        self.assertEqual(transformedDF.head().output, [])
+
+    def test_count_vectorizer_with_binary(self):
+        dataset = self.spark.createDataFrame([
+            (0, "a a a b b c".split(' '), SparseVector(3, {0: 1.0, 1: 1.0, 2: 1.0}),),
+            (1, "a a".split(' '), SparseVector(3, {0: 1.0}),),
+            (2, "a b".split(' '), SparseVector(3, {0: 1.0, 1: 1.0}),),
+            (3, "c".split(' '), SparseVector(3, {2: 1.0}),)], ["id", "words", "expected"])
+        cv = CountVectorizer(binary=True, inputCol="words", outputCol="features")
+        model = cv.fit(dataset)
+
+        transformedList = model.transform(dataset).select("features", "expected").collect()
+
+        for r in transformedList:
+            feature, expected = r
+            self.assertEqual(feature, expected)
+
+    def test_count_vectorizer_with_maxDF(self):
+        dataset = self.spark.createDataFrame([
+            (0, "a b c d".split(' '), SparseVector(3, {0: 1.0, 1: 1.0, 2: 1.0}),),
+            (1, "a b c".split(' '), SparseVector(3, {0: 1.0, 1: 1.0}),),
+            (2, "a b".split(' '), SparseVector(3, {0: 1.0}),),
+            (3, "a".split(' '), SparseVector(3,  {}),)], ["id", "words", "expected"])
+        cv = CountVectorizer(inputCol="words", outputCol="features")
+        model1 = cv.setMaxDF(3).fit(dataset)
+        self.assertEqual(model1.vocabulary, ['b', 'c', 'd'])
+
+        transformedList1 = model1.transform(dataset).select("features", "expected").collect()
+
+        for r in transformedList1:
+            feature, expected = r
+            self.assertEqual(feature, expected)
+
+        model2 = cv.setMaxDF(0.75).fit(dataset)
+        self.assertEqual(model2.vocabulary, ['b', 'c', 'd'])
+
+        transformedList2 = model2.transform(dataset).select("features", "expected").collect()
+
+        for r in transformedList2:
+            feature, expected = r
+            self.assertEqual(feature, expected)
+
+    def test_count_vectorizer_from_vocab(self):
+        model = CountVectorizerModel.from_vocabulary(["a", "b", "c"], inputCol="words",
+                                                     outputCol="features", minTF=2)
+        self.assertEqual(model.vocabulary, ["a", "b", "c"])
+        self.assertEqual(model.getMinTF(), 2)
+
+        dataset = self.spark.createDataFrame([
+            (0, "a a a b b c".split(' '), SparseVector(3, {0: 3.0, 1: 2.0}),),
+            (1, "a a".split(' '), SparseVector(3, {0: 2.0}),),
+            (2, "a b".split(' '), SparseVector(3, {}),)], ["id", "words", "expected"])
+
+        transformed_list = model.transform(dataset).select("features", "expected").collect()
+
+        for r in transformed_list:
+            feature, expected = r
+            self.assertEqual(feature, expected)
+
+        # Test an empty vocabulary
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(Exception, "vocabSize.*invalid.*0"):
+                CountVectorizerModel.from_vocabulary([], inputCol="words")
+
+        # Test model with default settings can transform
+        model_default = CountVectorizerModel.from_vocabulary(["a", "b", "c"], inputCol="words")
+        transformed_list = model_default.transform(dataset)\
+            .select(model_default.getOrDefault(model_default.outputCol)).collect()
+        self.assertEqual(len(transformed_list), 3)
+
+    def test_rformula_force_index_label(self):
+        df = self.spark.createDataFrame([
+            (1.0, 1.0, "a"),
+            (0.0, 2.0, "b"),
+            (1.0, 0.0, "a")], ["y", "x", "s"])
+        # Does not index label by default since it's numeric type.
+        rf = RFormula(formula="y ~ x + s")
+        model = rf.fit(df)
+        transformedDF = model.transform(df)
+        self.assertEqual(transformedDF.head().label, 1.0)
+        # Force to index label.
+        rf2 = RFormula(formula="y ~ x + s").setForceIndexLabel(True)
+        model2 = rf2.fit(df)
+        transformedDF2 = model2.transform(df)
+        self.assertEqual(transformedDF2.head().label, 0.0)
+
+    def test_rformula_string_indexer_order_type(self):
+        df = self.spark.createDataFrame([
+            (1.0, 1.0, "a"),
+            (0.0, 2.0, "b"),
+            (1.0, 0.0, "a")], ["y", "x", "s"])
+        rf = RFormula(formula="y ~ x + s", stringIndexerOrderType="alphabetDesc")
+        self.assertEqual(rf.getStringIndexerOrderType(), 'alphabetDesc')
+        transformedDF = rf.fit(df).transform(df)
+        observed = transformedDF.select("features").collect()
+        expected = [[1.0, 0.0], [2.0, 1.0], [0.0, 0.0]]
+        for i in range(0, len(expected)):
+            self.assertTrue(all(observed[i]["features"].toArray() == expected[i]))
+
+    def test_string_indexer_handle_invalid(self):
+        df = self.spark.createDataFrame([
+            (0, "a"),
+            (1, "d"),
+            (2, None)], ["id", "label"])
+
+        si1 = StringIndexer(inputCol="label", outputCol="indexed", handleInvalid="keep",
+                            stringOrderType="alphabetAsc")
+        model1 = si1.fit(df)
+        td1 = model1.transform(df)
+        actual1 = td1.select("id", "indexed").collect()
+        expected1 = [Row(id=0, indexed=0.0), Row(id=1, indexed=1.0), Row(id=2, indexed=2.0)]
+        self.assertEqual(actual1, expected1)
+
+        si2 = si1.setHandleInvalid("skip")
+        model2 = si2.fit(df)
+        td2 = model2.transform(df)
+        actual2 = td2.select("id", "indexed").collect()
+        expected2 = [Row(id=0, indexed=0.0), Row(id=1, indexed=1.0)]
+        self.assertEqual(actual2, expected2)
+
+    def test_string_indexer_from_labels(self):
+        model = StringIndexerModel.from_labels(["a", "b", "c"], inputCol="label",
+                                               outputCol="indexed", handleInvalid="keep")
+        self.assertEqual(model.labels, ["a", "b", "c"])
+
+        df1 = self.spark.createDataFrame([
+            (0, "a"),
+            (1, "c"),
+            (2, None),
+            (3, "b"),
+            (4, "b")], ["id", "label"])
+
+        result1 = model.transform(df1)
+        actual1 = result1.select("id", "indexed").collect()
+        expected1 = [Row(id=0, indexed=0.0), Row(id=1, indexed=2.0), Row(id=2, indexed=3.0),
+                     Row(id=3, indexed=1.0), Row(id=4, indexed=1.0)]
+        self.assertEqual(actual1, expected1)
+
+        model_empty_labels = StringIndexerModel.from_labels(
+            [], inputCol="label", outputCol="indexed", handleInvalid="keep")
+        actual2 = model_empty_labels.transform(df1).select("id", "indexed").collect()
+        expected2 = [Row(id=0, indexed=0.0), Row(id=1, indexed=0.0), Row(id=2, indexed=0.0),
+                     Row(id=3, indexed=0.0), Row(id=4, indexed=0.0)]
+        self.assertEqual(actual2, expected2)
+
+        # Test model with default settings can transform
+        model_default = StringIndexerModel.from_labels(["a", "b", "c"], inputCol="label")
+        df2 = self.spark.createDataFrame([
+            (0, "a"),
+            (1, "c"),
+            (2, "b"),
+            (3, "b"),
+            (4, "b")], ["id", "label"])
+        transformed_list = model_default.transform(df2)\
+            .select(model_default.getOrDefault(model_default.outputCol)).collect()
+        self.assertEqual(len(transformed_list), 5)
+
+    def test_vector_size_hint(self):
+        df = self.spark.createDataFrame(
+            [(0, Vectors.dense([0.0, 10.0, 0.5])),
+             (1, Vectors.dense([1.0, 11.0, 0.5, 0.6])),
+             (2, Vectors.dense([2.0, 12.0]))],
+            ["id", "vector"])
+
+        sizeHint = VectorSizeHint(
+            inputCol="vector",
+            handleInvalid="skip")
+        sizeHint.setSize(3)
+        self.assertEqual(sizeHint.getSize(), 3)
+
+        output = sizeHint.transform(df).head().vector
+        expected = DenseVector([0.0, 10.0, 0.5])
+        self.assertEqual(output, expected)
+
+
+class HasInducedError(Params):
+
+    def __init__(self):
+        super(HasInducedError, self).__init__()
+        self.inducedError = Param(self, "inducedError",
+                                  "Uniformly-distributed error added to feature")
+
+    def getInducedError(self):
+        return self.getOrDefault(self.inducedError)
+
+
+class InducedErrorModel(Model, HasInducedError):
+
+    def __init__(self):
+        super(InducedErrorModel, self).__init__()
+
+    def _transform(self, dataset):
+        return dataset.withColumn("prediction",
+                                  dataset.feature + (rand(0) * self.getInducedError()))
+
+
+class InducedErrorEstimator(Estimator, HasInducedError):
+
+    def __init__(self, inducedError=1.0):
+        super(InducedErrorEstimator, self).__init__()
+        self._set(inducedError=inducedError)
+
+    def _fit(self, dataset):
+        model = InducedErrorModel()
+        self._copyValues(model)
+        return model
+
+
+class CrossValidatorTests(SparkSessionTestCase):
+
+    def test_copy(self):
+        dataset = self.spark.createDataFrame([
+            (10, 10.0),
+            (50, 50.0),
+            (100, 100.0),
+            (500, 500.0)] * 10,
+            ["feature", "label"])
+
+        iee = InducedErrorEstimator()
+        evaluator = RegressionEvaluator(metricName="rmse")
+
+        grid = (ParamGridBuilder()
+                .addGrid(iee.inducedError, [100.0, 0.0, 10000.0])
+                .build())
+        cv = CrossValidator(estimator=iee, estimatorParamMaps=grid, evaluator=evaluator)
+        cvCopied = cv.copy()
+        self.assertEqual(cv.getEstimator().uid, cvCopied.getEstimator().uid)
+
+        cvModel = cv.fit(dataset)
+        cvModelCopied = cvModel.copy()
+        for index in range(len(cvModel.avgMetrics)):
+            self.assertTrue(abs(cvModel.avgMetrics[index] - cvModelCopied.avgMetrics[index])
+                            < 0.0001)
+
+    def test_fit_minimize_metric(self):
+        dataset = self.spark.createDataFrame([
+            (10, 10.0),
+            (50, 50.0),
+            (100, 100.0),
+            (500, 500.0)] * 10,
+            ["feature", "label"])
+
+        iee = InducedErrorEstimator()
+        evaluator = RegressionEvaluator(metricName="rmse")
+
+        grid = (ParamGridBuilder()
+                .addGrid(iee.inducedError, [100.0, 0.0, 10000.0])
+                .build())
+        cv = CrossValidator(estimator=iee, estimatorParamMaps=grid, evaluator=evaluator)
+        cvModel = cv.fit(dataset)
+        bestModel = cvModel.bestModel
+        bestModelMetric = evaluator.evaluate(bestModel.transform(dataset))
+
+        self.assertEqual(0.0, bestModel.getOrDefault('inducedError'),
+                         "Best model should have zero induced error")
+        self.assertEqual(0.0, bestModelMetric, "Best model has RMSE of 0")
+
+    def test_fit_maximize_metric(self):
+        dataset = self.spark.createDataFrame([
+            (10, 10.0),
+            (50, 50.0),
+            (100, 100.0),
+            (500, 500.0)] * 10,
+            ["feature", "label"])
+
+        iee = InducedErrorEstimator()
+        evaluator = RegressionEvaluator(metricName="r2")
+
+        grid = (ParamGridBuilder()
+                .addGrid(iee.inducedError, [100.0, 0.0, 10000.0])
+                .build())
+        cv = CrossValidator(estimator=iee, estimatorParamMaps=grid, evaluator=evaluator)
+        cvModel = cv.fit(dataset)
+        bestModel = cvModel.bestModel
+        bestModelMetric = evaluator.evaluate(bestModel.transform(dataset))
+
+        self.assertEqual(0.0, bestModel.getOrDefault('inducedError'),
+                         "Best model should have zero induced error")
+        self.assertEqual(1.0, bestModelMetric, "Best model has R-squared of 1")
+
+    def test_param_grid_type_coercion(self):
+        lr = LogisticRegression(maxIter=10)
+        paramGrid = ParamGridBuilder().addGrid(lr.regParam, [0.5, 1]).build()
+        for param in paramGrid:
+            for v in param.values():
+                assert(type(v) == float)
+
+    def test_save_load_trained_model(self):
+        # This tests saving and loading the trained model only.
+        # Save/load for CrossValidator will be added later: SPARK-13786
+        temp_path = tempfile.mkdtemp()
+        dataset = self.spark.createDataFrame(
+            [(Vectors.dense([0.0]), 0.0),
+             (Vectors.dense([0.4]), 1.0),
+             (Vectors.dense([0.5]), 0.0),
+             (Vectors.dense([0.6]), 1.0),
+             (Vectors.dense([1.0]), 1.0)] * 10,
+            ["features", "label"])
+        lr = LogisticRegression()
+        grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build()
+        evaluator = BinaryClassificationEvaluator()
+        cv = CrossValidator(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator)
+        cvModel = cv.fit(dataset)
+        lrModel = cvModel.bestModel
+
+        cvModelPath = temp_path + "/cvModel"
+        lrModel.save(cvModelPath)
+        loadedLrModel = LogisticRegressionModel.load(cvModelPath)
+        self.assertEqual(loadedLrModel.uid, lrModel.uid)
+        self.assertEqual(loadedLrModel.intercept, lrModel.intercept)
+
+    def test_save_load_simple_estimator(self):
+        temp_path = tempfile.mkdtemp()
+        dataset = self.spark.createDataFrame(
+            [(Vectors.dense([0.0]), 0.0),
+             (Vectors.dense([0.4]), 1.0),
+             (Vectors.dense([0.5]), 0.0),
+             (Vectors.dense([0.6]), 1.0),
+             (Vectors.dense([1.0]), 1.0)] * 10,
+            ["features", "label"])
+
+        lr = LogisticRegression()
+        grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build()
+        evaluator = BinaryClassificationEvaluator()
+
+        # test save/load of CrossValidator
+        cv = CrossValidator(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator)
+        cvModel = cv.fit(dataset)
+        cvPath = temp_path + "/cv"
+        cv.save(cvPath)
+        loadedCV = CrossValidator.load(cvPath)
+        self.assertEqual(loadedCV.getEstimator().uid, cv.getEstimator().uid)
+        self.assertEqual(loadedCV.getEvaluator().uid, cv.getEvaluator().uid)
+        self.assertEqual(loadedCV.getEstimatorParamMaps(), cv.getEstimatorParamMaps())
+
+        # test save/load of CrossValidatorModel
+        cvModelPath = temp_path + "/cvModel"
+        cvModel.save(cvModelPath)
+        loadedModel = CrossValidatorModel.load(cvModelPath)
+        self.assertEqual(loadedModel.bestModel.uid, cvModel.bestModel.uid)
+
+    def test_parallel_evaluation(self):
+        dataset = self.spark.createDataFrame(
+            [(Vectors.dense([0.0]), 0.0),
+             (Vectors.dense([0.4]), 1.0),
+             (Vectors.dense([0.5]), 0.0),
+             (Vectors.dense([0.6]), 1.0),
+             (Vectors.dense([1.0]), 1.0)] * 10,
+            ["features", "label"])
+
+        lr = LogisticRegression()
+        grid = ParamGridBuilder().addGrid(lr.maxIter, [5, 6]).build()
+        evaluator = BinaryClassificationEvaluator()
+
+        # test save/load of CrossValidator
+        cv = CrossValidator(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator)
+        cv.setParallelism(1)
+        cvSerialModel = cv.fit(dataset)
+        cv.setParallelism(2)
+        cvParallelModel = cv.fit(dataset)
+        self.assertEqual(cvSerialModel.avgMetrics, cvParallelModel.avgMetrics)
+
+    def test_expose_sub_models(self):
+        temp_path = tempfile.mkdtemp()
+        dataset = self.spark.createDataFrame(
+            [(Vectors.dense([0.0]), 0.0),
+             (Vectors.dense([0.4]), 1.0),
+             (Vectors.dense([0.5]), 0.0),
+             (Vectors.dense([0.6]), 1.0),
+             (Vectors.dense([1.0]), 1.0)] * 10,
+            ["features", "label"])
+
+        lr = LogisticRegression()
+        grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build()
+        evaluator = BinaryClassificationEvaluator()
+
+        numFolds = 3
+        cv = CrossValidator(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator,
+                            numFolds=numFolds, collectSubModels=True)
+
+        def checkSubModels(subModels):
+            self.assertEqual(len(subModels), numFolds)
+            for i in range(numFolds):
+                self.assertEqual(len(subModels[i]), len(grid))
+
+        cvModel = cv.fit(dataset)
+        checkSubModels(cvModel.subModels)
+
+        # Test the default value for option "persistSubModel" to be "true"
+        testSubPath = temp_path + "/testCrossValidatorSubModels"
+        savingPathWithSubModels = testSubPath + "cvModel3"
+        cvModel.save(savingPathWithSubModels)
+        cvModel3 = CrossValidatorModel.load(savingPathWithSubModels)
+        checkSubModels(cvModel3.subModels)
+        cvModel4 = cvModel3.copy()
+        checkSubModels(cvModel4.subModels)
+
+        savingPathWithoutSubModels = testSubPath + "cvModel2"
+        cvModel.write().option("persistSubModels", "false").save(savingPathWithoutSubModels)
+        cvModel2 = CrossValidatorModel.load(savingPathWithoutSubModels)
+        self.assertEqual(cvModel2.subModels, None)
+
+        for i in range(numFolds):
+            for j in range(len(grid)):
+                self.assertEqual(cvModel.subModels[i][j].uid, cvModel3.subModels[i][j].uid)
+
+    def test_save_load_nested_estimator(self):
+        temp_path = tempfile.mkdtemp()
+        dataset = self.spark.createDataFrame(
+            [(Vectors.dense([0.0]), 0.0),
+             (Vectors.dense([0.4]), 1.0),
+             (Vectors.dense([0.5]), 0.0),
+             (Vectors.dense([0.6]), 1.0),
+             (Vectors.dense([1.0]), 1.0)] * 10,
+            ["features", "label"])
+
+        ova = OneVsRest(classifier=LogisticRegression())
+        lr1 = LogisticRegression().setMaxIter(100)
+        lr2 = LogisticRegression().setMaxIter(150)
+        grid = ParamGridBuilder().addGrid(ova.classifier, [lr1, lr2]).build()
+        evaluator = MulticlassClassificationEvaluator()
+
+        # test save/load of CrossValidator
+        cv = CrossValidator(estimator=ova, estimatorParamMaps=grid, evaluator=evaluator)
+        cvModel = cv.fit(dataset)
+        cvPath = temp_path + "/cv"
+        cv.save(cvPath)
+        loadedCV = CrossValidator.load(cvPath)
+        self.assertEqual(loadedCV.getEstimator().uid, cv.getEstimator().uid)
+        self.assertEqual(loadedCV.getEvaluator().uid, cv.getEvaluator().uid)
+
+        originalParamMap = cv.getEstimatorParamMaps()
+        loadedParamMap = loadedCV.getEstimatorParamMaps()
+        for i, param in enumerate(loadedParamMap):
+            for p in param:
+                if p.name == "classifier":
+                    self.assertEqual(param[p].uid, originalParamMap[i][p].uid)
+                else:
+                    self.assertEqual(param[p], originalParamMap[i][p])
+
+        # test save/load of CrossValidatorModel
+        cvModelPath = temp_path + "/cvModel"
+        cvModel.save(cvModelPath)
+        loadedModel = CrossValidatorModel.load(cvModelPath)
+        self.assertEqual(loadedModel.bestModel.uid, cvModel.bestModel.uid)
+
+
+class TrainValidationSplitTests(SparkSessionTestCase):
+
+    def test_fit_minimize_metric(self):
+        dataset = self.spark.createDataFrame([
+            (10, 10.0),
+            (50, 50.0),
+            (100, 100.0),
+            (500, 500.0)] * 10,
+            ["feature", "label"])
+
+        iee = InducedErrorEstimator()
+        evaluator = RegressionEvaluator(metricName="rmse")
+
+        grid = ParamGridBuilder() \
+            .addGrid(iee.inducedError, [100.0, 0.0, 10000.0]) \
+            .build()
+        tvs = TrainValidationSplit(estimator=iee, estimatorParamMaps=grid, evaluator=evaluator)
+        tvsModel = tvs.fit(dataset)
+        bestModel = tvsModel.bestModel
+        bestModelMetric = evaluator.evaluate(bestModel.transform(dataset))
+        validationMetrics = tvsModel.validationMetrics
+
+        self.assertEqual(0.0, bestModel.getOrDefault('inducedError'),
+                         "Best model should have zero induced error")
+        self.assertEqual(0.0, bestModelMetric, "Best model has RMSE of 0")
+        self.assertEqual(len(grid), len(validationMetrics),
+                         "validationMetrics has the same size of grid parameter")
+        self.assertEqual(0.0, min(validationMetrics))
+
+    def test_fit_maximize_metric(self):
+        dataset = self.spark.createDataFrame([
+            (10, 10.0),
+            (50, 50.0),
+            (100, 100.0),
+            (500, 500.0)] * 10,
+            ["feature", "label"])
+
+        iee = InducedErrorEstimator()
+        evaluator = RegressionEvaluator(metricName="r2")
+
+        grid = ParamGridBuilder() \
+            .addGrid(iee.inducedError, [100.0, 0.0, 10000.0]) \
+            .build()
+        tvs = TrainValidationSplit(estimator=iee, estimatorParamMaps=grid, evaluator=evaluator)
+        tvsModel = tvs.fit(dataset)
+        bestModel = tvsModel.bestModel
+        bestModelMetric = evaluator.evaluate(bestModel.transform(dataset))
+        validationMetrics = tvsModel.validationMetrics
+
+        self.assertEqual(0.0, bestModel.getOrDefault('inducedError'),
+                         "Best model should have zero induced error")
+        self.assertEqual(1.0, bestModelMetric, "Best model has R-squared of 1")
+        self.assertEqual(len(grid), len(validationMetrics),
+                         "validationMetrics has the same size of grid parameter")
+        self.assertEqual(1.0, max(validationMetrics))
+
+    def test_save_load_trained_model(self):
+        # This tests saving and loading the trained model only.
+        # Save/load for TrainValidationSplit will be added later: SPARK-13786
+        temp_path = tempfile.mkdtemp()
+        dataset = self.spark.createDataFrame(
+            [(Vectors.dense([0.0]), 0.0),
+             (Vectors.dense([0.4]), 1.0),
+             (Vectors.dense([0.5]), 0.0),
+             (Vectors.dense([0.6]), 1.0),
+             (Vectors.dense([1.0]), 1.0)] * 10,
+            ["features", "label"])
+        lr = LogisticRegression()
+        grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build()
+        evaluator = BinaryClassificationEvaluator()
+        tvs = TrainValidationSplit(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator)
+        tvsModel = tvs.fit(dataset)
+        lrModel = tvsModel.bestModel
+
+        tvsModelPath = temp_path + "/tvsModel"
+        lrModel.save(tvsModelPath)
+        loadedLrModel = LogisticRegressionModel.load(tvsModelPath)
+        self.assertEqual(loadedLrModel.uid, lrModel.uid)
+        self.assertEqual(loadedLrModel.intercept, lrModel.intercept)
+
+    def test_save_load_simple_estimator(self):
+        # This tests saving and loading the trained model only.
+        # Save/load for TrainValidationSplit will be added later: SPARK-13786
+        temp_path = tempfile.mkdtemp()
+        dataset = self.spark.createDataFrame(
+            [(Vectors.dense([0.0]), 0.0),
+             (Vectors.dense([0.4]), 1.0),
+             (Vectors.dense([0.5]), 0.0),
+             (Vectors.dense([0.6]), 1.0),
+             (Vectors.dense([1.0]), 1.0)] * 10,
+            ["features", "label"])
+        lr = LogisticRegression()
+        grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build()
+        evaluator = BinaryClassificationEvaluator()
+        tvs = TrainValidationSplit(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator)
+        tvsModel = tvs.fit(dataset)
+
+        tvsPath = temp_path + "/tvs"
+        tvs.save(tvsPath)
+        loadedTvs = TrainValidationSplit.load(tvsPath)
+        self.assertEqual(loadedTvs.getEstimator().uid, tvs.getEstimator().uid)
+        self.assertEqual(loadedTvs.getEvaluator().uid, tvs.getEvaluator().uid)
+        self.assertEqual(loadedTvs.getEstimatorParamMaps(), tvs.getEstimatorParamMaps())
+
+        tvsModelPath = temp_path + "/tvsModel"
+        tvsModel.save(tvsModelPath)
+        loadedModel = TrainValidationSplitModel.load(tvsModelPath)
+        self.assertEqual(loadedModel.bestModel.uid, tvsModel.bestModel.uid)
+
+    def test_parallel_evaluation(self):
+        dataset = self.spark.createDataFrame(
+            [(Vectors.dense([0.0]), 0.0),
+             (Vectors.dense([0.4]), 1.0),
+             (Vectors.dense([0.5]), 0.0),
+             (Vectors.dense([0.6]), 1.0),
+             (Vectors.dense([1.0]), 1.0)] * 10,
+            ["features", "label"])
+        lr = LogisticRegression()
+        grid = ParamGridBuilder().addGrid(lr.maxIter, [5, 6]).build()
+        evaluator = BinaryClassificationEvaluator()
+        tvs = TrainValidationSplit(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator)
+        tvs.setParallelism(1)
+        tvsSerialModel = tvs.fit(dataset)
+        tvs.setParallelism(2)
+        tvsParallelModel = tvs.fit(dataset)
+        self.assertEqual(tvsSerialModel.validationMetrics, tvsParallelModel.validationMetrics)
+
+    def test_expose_sub_models(self):
+        temp_path = tempfile.mkdtemp()
+        dataset = self.spark.createDataFrame(
+            [(Vectors.dense([0.0]), 0.0),
+             (Vectors.dense([0.4]), 1.0),
+             (Vectors.dense([0.5]), 0.0),
+             (Vectors.dense([0.6]), 1.0),
+             (Vectors.dense([1.0]), 1.0)] * 10,
+            ["features", "label"])
+        lr = LogisticRegression()
+        grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build()
+        evaluator = BinaryClassificationEvaluator()
+        tvs = TrainValidationSplit(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator,
+                                   collectSubModels=True)
+        tvsModel = tvs.fit(dataset)
+        self.assertEqual(len(tvsModel.subModels), len(grid))
+
+        # Test the default value for option "persistSubModel" to be "true"
+        testSubPath = temp_path + "/testTrainValidationSplitSubModels"
+        savingPathWithSubModels = testSubPath + "cvModel3"
+        tvsModel.save(savingPathWithSubModels)
+        tvsModel3 = TrainValidationSplitModel.load(savingPathWithSubModels)
+        self.assertEqual(len(tvsModel3.subModels), len(grid))
+        tvsModel4 = tvsModel3.copy()
+        self.assertEqual(len(tvsModel4.subModels), len(grid))
+
+        savingPathWithoutSubModels = testSubPath + "cvModel2"
+        tvsModel.write().option("persistSubModels", "false").save(savingPathWithoutSubModels)
+        tvsModel2 = TrainValidationSplitModel.load(savingPathWithoutSubModels)
+        self.assertEqual(tvsModel2.subModels, None)
+
+        for i in range(len(grid)):
+            self.assertEqual(tvsModel.subModels[i].uid, tvsModel3.subModels[i].uid)
+
+    def test_save_load_nested_estimator(self):
+        # This tests saving and loading the trained model only.
+        # Save/load for TrainValidationSplit will be added later: SPARK-13786
+        temp_path = tempfile.mkdtemp()
+        dataset = self.spark.createDataFrame(
+            [(Vectors.dense([0.0]), 0.0),
+             (Vectors.dense([0.4]), 1.0),
+             (Vectors.dense([0.5]), 0.0),
+             (Vectors.dense([0.6]), 1.0),
+             (Vectors.dense([1.0]), 1.0)] * 10,
+            ["features", "label"])
+        ova = OneVsRest(classifier=LogisticRegression())
+        lr1 = LogisticRegression().setMaxIter(100)
+        lr2 = LogisticRegression().setMaxIter(150)
+        grid = ParamGridBuilder().addGrid(ova.classifier, [lr1, lr2]).build()
+        evaluator = MulticlassClassificationEvaluator()
+
+        tvs = TrainValidationSplit(estimator=ova, estimatorParamMaps=grid, evaluator=evaluator)
+        tvsModel = tvs.fit(dataset)
+        tvsPath = temp_path + "/tvs"
+        tvs.save(tvsPath)
+        loadedTvs = TrainValidationSplit.load(tvsPath)
+        self.assertEqual(loadedTvs.getEstimator().uid, tvs.getEstimator().uid)
+        self.assertEqual(loadedTvs.getEvaluator().uid, tvs.getEvaluator().uid)
+
+        originalParamMap = tvs.getEstimatorParamMaps()
+        loadedParamMap = loadedTvs.getEstimatorParamMaps()
+        for i, param in enumerate(loadedParamMap):
+            for p in param:
+                if p.name == "classifier":
+                    self.assertEqual(param[p].uid, originalParamMap[i][p].uid)
+                else:
+                    self.assertEqual(param[p], originalParamMap[i][p])
+
+        tvsModelPath = temp_path + "/tvsModel"
+        tvsModel.save(tvsModelPath)
+        loadedModel = TrainValidationSplitModel.load(tvsModelPath)
+        self.assertEqual(loadedModel.bestModel.uid, tvsModel.bestModel.uid)
+
+    def test_copy(self):
+        dataset = self.spark.createDataFrame([
+            (10, 10.0),
+            (50, 50.0),
+            (100, 100.0),
+            (500, 500.0)] * 10,
+            ["feature", "label"])
+
+        iee = InducedErrorEstimator()
+        evaluator = RegressionEvaluator(metricName="r2")
+
+        grid = ParamGridBuilder() \
+            .addGrid(iee.inducedError, [100.0, 0.0, 10000.0]) \
+            .build()
+        tvs = TrainValidationSplit(estimator=iee, estimatorParamMaps=grid, evaluator=evaluator)
+        tvsModel = tvs.fit(dataset)
+        tvsCopied = tvs.copy()
+        tvsModelCopied = tvsModel.copy()
+
+        self.assertEqual(tvs.getEstimator().uid, tvsCopied.getEstimator().uid,
+                         "Copied TrainValidationSplit has the same uid of Estimator")
+
+        self.assertEqual(tvsModel.bestModel.uid, tvsModelCopied.bestModel.uid)
+        self.assertEqual(len(tvsModel.validationMetrics),
+                         len(tvsModelCopied.validationMetrics),
+                         "Copied validationMetrics has the same size of the original")
+        for index in range(len(tvsModel.validationMetrics)):
+            self.assertEqual(tvsModel.validationMetrics[index],
+                             tvsModelCopied.validationMetrics[index])
+
+
+class PersistenceTest(SparkSessionTestCase):
+
+    def test_linear_regression(self):
+        lr = LinearRegression(maxIter=1)
+        path = tempfile.mkdtemp()
+        lr_path = path + "/lr"
+        lr.save(lr_path)
+        lr2 = LinearRegression.load(lr_path)
+        self.assertEqual(lr.uid, lr2.uid)
+        self.assertEqual(type(lr.uid), type(lr2.uid))
+        self.assertEqual(lr2.uid, lr2.maxIter.parent,
+                         "Loaded LinearRegression instance uid (%s) did not match Param's uid (%s)"
+                         % (lr2.uid, lr2.maxIter.parent))
+        self.assertEqual(lr._defaultParamMap[lr.maxIter], lr2._defaultParamMap[lr2.maxIter],
+                         "Loaded LinearRegression instance default params did not match " +
+                         "original defaults")
+        try:
+            rmtree(path)
+        except OSError:
+            pass
+
+    def test_linear_regression_pmml_basic(self):
+        # Most of the validation is done in the Scala side, here we just check
+        # that we output text rather than parquet (e.g. that the format flag
+        # was respected).
+        df = self.spark.createDataFrame([(1.0, 2.0, Vectors.dense(1.0)),
+                                         (0.0, 2.0, Vectors.sparse(1, [], []))],
+                                        ["label", "weight", "features"])
+        lr = LinearRegression(maxIter=1)
+        model = lr.fit(df)
+        path = tempfile.mkdtemp()
+        lr_path = path + "/lr-pmml"
+        model.write().format("pmml").save(lr_path)
+        pmml_text_list = self.sc.textFile(lr_path).collect()
+        pmml_text = "\n".join(pmml_text_list)
+        self.assertIn("Apache Spark", pmml_text)
+        self.assertIn("PMML", pmml_text)
+
+    def test_logistic_regression(self):
+        lr = LogisticRegression(maxIter=1)
+        path = tempfile.mkdtemp()
+        lr_path = path + "/logreg"
+        lr.save(lr_path)
+        lr2 = LogisticRegression.load(lr_path)
+        self.assertEqual(lr2.uid, lr2.maxIter.parent,
+                         "Loaded LogisticRegression instance uid (%s) "
+                         "did not match Param's uid (%s)"
+                         % (lr2.uid, lr2.maxIter.parent))
+        self.assertEqual(lr._defaultParamMap[lr.maxIter], lr2._defaultParamMap[lr2.maxIter],
+                         "Loaded LogisticRegression instance default params did not match " +
+                         "original defaults")
+        try:
+            rmtree(path)
+        except OSError:
+            pass
+
+    def _compare_params(self, m1, m2, param):
+        """
+        Compare 2 ML Params instances for the given param, and assert both have the same param value
+        and parent. The param must be a parameter of m1.
+        """
+        # Prevent key not found error in case of some param in neither paramMap nor defaultParamMap.
+        if m1.isDefined(param):
+            paramValue1 = m1.getOrDefault(param)
+            paramValue2 = m2.getOrDefault(m2.getParam(param.name))
+            if isinstance(paramValue1, Params):
+                self._compare_pipelines(paramValue1, paramValue2)
+            else:
+                self.assertEqual(paramValue1, paramValue2)  # for general types param
+            # Assert parents are equal
+            self.assertEqual(param.parent, m2.getParam(param.name).parent)
+        else:
+            # If m1 is not defined param, then m2 should not, too. See SPARK-14931.
+            self.assertFalse(m2.isDefined(m2.getParam(param.name)))
+
+    def _compare_pipelines(self, m1, m2):
+        """
+        Compare 2 ML types, asserting that they are equivalent.
+        This currently supports:
+         - basic types
+         - Pipeline, PipelineModel
+         - OneVsRest, OneVsRestModel
+        This checks:
+         - uid
+         - type
+         - Param values and parents
+        """
+        self.assertEqual(m1.uid, m2.uid)
+        self.assertEqual(type(m1), type(m2))
+        if isinstance(m1, JavaParams) or isinstance(m1, Transformer):
+            self.assertEqual(len(m1.params), len(m2.params))
+            for p in m1.params:
+                self._compare_params(m1, m2, p)
+        elif isinstance(m1, Pipeline):
+            self.assertEqual(len(m1.getStages()), len(m2.getStages()))
+            for s1, s2 in zip(m1.getStages(), m2.getStages()):
+                self._compare_pipelines(s1, s2)
+        elif isinstance(m1, PipelineModel):
+            self.assertEqual(len(m1.stages), len(m2.stages))
+            for s1, s2 in zip(m1.stages, m2.stages):
+                self._compare_pipelines(s1, s2)
+        elif isinstance(m1, OneVsRest) or isinstance(m1, OneVsRestModel):
+            for p in m1.params:
+                self._compare_params(m1, m2, p)
+            if isinstance(m1, OneVsRestModel):
+                self.assertEqual(len(m1.models), len(m2.models))
+                for x, y in zip(m1.models, m2.models):
+                    self._compare_pipelines(x, y)
+        else:
+            raise RuntimeError("_compare_pipelines does not yet support type: %s" % type(m1))
+
+    def test_pipeline_persistence(self):
+        """
+        Pipeline[HashingTF, PCA]
+        """
+        temp_path = tempfile.mkdtemp()
+
+        try:
+            df = self.spark.createDataFrame([(["a", "b", "c"],), (["c", "d", "e"],)], ["words"])
+            tf = HashingTF(numFeatures=10, inputCol="words", outputCol="features")
+            pca = PCA(k=2, inputCol="features", outputCol="pca_features")
+            pl = Pipeline(stages=[tf, pca])
+            model = pl.fit(df)
+
+            pipeline_path = temp_path + "/pipeline"
+            pl.save(pipeline_path)
+            loaded_pipeline = Pipeline.load(pipeline_path)
+            self._compare_pipelines(pl, loaded_pipeline)
+
+            model_path = temp_path + "/pipeline-model"
+            model.save(model_path)
+            loaded_model = PipelineModel.load(model_path)
+            self._compare_pipelines(model, loaded_model)
+        finally:
+            try:
+                rmtree(temp_path)
+            except OSError:
+                pass
+
+    def test_nested_pipeline_persistence(self):
+        """
+        Pipeline[HashingTF, Pipeline[PCA]]
+        """
+        temp_path = tempfile.mkdtemp()
+
+        try:
+            df = self.spark.createDataFrame([(["a", "b", "c"],), (["c", "d", "e"],)], ["words"])
+            tf = HashingTF(numFeatures=10, inputCol="words", outputCol="features")
+            pca = PCA(k=2, inputCol="features", outputCol="pca_features")
+            p0 = Pipeline(stages=[pca])
+            pl = Pipeline(stages=[tf, p0])
+            model = pl.fit(df)
+
+            pipeline_path = temp_path + "/pipeline"
+            pl.save(pipeline_path)
+            loaded_pipeline = Pipeline.load(pipeline_path)
+            self._compare_pipelines(pl, loaded_pipeline)
+
+            model_path = temp_path + "/pipeline-model"
+            model.save(model_path)
+            loaded_model = PipelineModel.load(model_path)
+            self._compare_pipelines(model, loaded_model)
+        finally:
+            try:
+                rmtree(temp_path)
+            except OSError:
+                pass
+
+    def test_python_transformer_pipeline_persistence(self):
+        """
+        Pipeline[MockUnaryTransformer, Binarizer]
+        """
+        temp_path = tempfile.mkdtemp()
+
+        try:
+            df = self.spark.range(0, 10).toDF('input')
+            tf = MockUnaryTransformer(shiftVal=2)\
+                .setInputCol("input").setOutputCol("shiftedInput")
+            tf2 = Binarizer(threshold=6, inputCol="shiftedInput", outputCol="binarized")
+            pl = Pipeline(stages=[tf, tf2])
+            model = pl.fit(df)
+
+            pipeline_path = temp_path + "/pipeline"
+            pl.save(pipeline_path)
+            loaded_pipeline = Pipeline.load(pipeline_path)
+            self._compare_pipelines(pl, loaded_pipeline)
+
+            model_path = temp_path + "/pipeline-model"
+            model.save(model_path)
+            loaded_model = PipelineModel.load(model_path)
+            self._compare_pipelines(model, loaded_model)
+        finally:
+            try:
+                rmtree(temp_path)
+            except OSError:
+                pass
+
+    def test_onevsrest(self):
+        temp_path = tempfile.mkdtemp()
+        df = self.spark.createDataFrame([(0.0, Vectors.dense(1.0, 0.8)),
+                                         (1.0, Vectors.sparse(2, [], [])),
+                                         (2.0, Vectors.dense(0.5, 0.5))] * 10,
+                                        ["label", "features"])
+        lr = LogisticRegression(maxIter=5, regParam=0.01)
+        ovr = OneVsRest(classifier=lr)
+        model = ovr.fit(df)
+        ovrPath = temp_path + "/ovr"
+        ovr.save(ovrPath)
+        loadedOvr = OneVsRest.load(ovrPath)
+        self._compare_pipelines(ovr, loadedOvr)
+        modelPath = temp_path + "/ovrModel"
+        model.save(modelPath)
+        loadedModel = OneVsRestModel.load(modelPath)
+        self._compare_pipelines(model, loadedModel)
+
+    def test_decisiontree_classifier(self):
+        dt = DecisionTreeClassifier(maxDepth=1)
+        path = tempfile.mkdtemp()
+        dtc_path = path + "/dtc"
+        dt.save(dtc_path)
+        dt2 = DecisionTreeClassifier.load(dtc_path)
+        self.assertEqual(dt2.uid, dt2.maxDepth.parent,
+                         "Loaded DecisionTreeClassifier instance uid (%s) "
+                         "did not match Param's uid (%s)"
+                         % (dt2.uid, dt2.maxDepth.parent))
+        self.assertEqual(dt._defaultParamMap[dt.maxDepth], dt2._defaultParamMap[dt2.maxDepth],
+                         "Loaded DecisionTreeClassifier instance default params did not match " +
+                         "original defaults")
+        try:
+            rmtree(path)
+        except OSError:
+            pass
+
+    def test_decisiontree_regressor(self):
+        dt = DecisionTreeRegressor(maxDepth=1)
+        path = tempfile.mkdtemp()
+        dtr_path = path + "/dtr"
+        dt.save(dtr_path)
+        dt2 = DecisionTreeClassifier.load(dtr_path)
+        self.assertEqual(dt2.uid, dt2.maxDepth.parent,
+                         "Loaded DecisionTreeRegressor instance uid (%s) "
+                         "did not match Param's uid (%s)"
+                         % (dt2.uid, dt2.maxDepth.parent))
+        self.assertEqual(dt._defaultParamMap[dt.maxDepth], dt2._defaultParamMap[dt2.maxDepth],
+                         "Loaded DecisionTreeRegressor instance default params did not match " +
+                         "original defaults")
+        try:
+            rmtree(path)
+        except OSError:
+            pass
+
+    def test_default_read_write(self):
+        temp_path = tempfile.mkdtemp()
+
+        lr = LogisticRegression()
+        lr.setMaxIter(50)
+        lr.setThreshold(.75)
+        writer = DefaultParamsWriter(lr)
+
+        savePath = temp_path + "/lr"
+        writer.save(savePath)
+
+        reader = DefaultParamsReadable.read()
+        lr2 = reader.load(savePath)
+
+        self.assertEqual(lr.uid, lr2.uid)
+        self.assertEqual(lr.extractParamMap(), lr2.extractParamMap())
+
+        # test overwrite
+        lr.setThreshold(.8)
+        writer.overwrite().save(savePath)
+
+        reader = DefaultParamsReadable.read()
+        lr3 = reader.load(savePath)
+
+        self.assertEqual(lr.uid, lr3.uid)
+        self.assertEqual(lr.extractParamMap(), lr3.extractParamMap())
+
+    def test_default_read_write_default_params(self):
+        lr = LogisticRegression()
+        self.assertFalse(lr.isSet(lr.getParam("threshold")))
+
+        lr.setMaxIter(50)
+        lr.setThreshold(.75)
+
+        # `threshold` is set by user, default param `predictionCol` is not set by user.
+        self.assertTrue(lr.isSet(lr.getParam("threshold")))
+        self.assertFalse(lr.isSet(lr.getParam("predictionCol")))
+        self.assertTrue(lr.hasDefault(lr.getParam("predictionCol")))
+
+        writer = DefaultParamsWriter(lr)
+        metadata = json.loads(writer._get_metadata_to_save(lr, self.sc))
+        self.assertTrue("defaultParamMap" in metadata)
+
+        reader = DefaultParamsReadable.read()
+        metadataStr = json.dumps(metadata, separators=[',',  ':'])
+        loadedMetadata = reader._parseMetaData(metadataStr, )
+        reader.getAndSetParams(lr, loadedMetadata)
+
+        self.assertTrue(lr.isSet(lr.getParam("threshold")))
+        self.assertFalse(lr.isSet(lr.getParam("predictionCol")))
+        self.assertTrue(lr.hasDefault(lr.getParam("predictionCol")))
+
+        # manually create metadata without `defaultParamMap` section.
+        del metadata['defaultParamMap']
+        metadataStr = json.dumps(metadata, separators=[',',  ':'])
+        loadedMetadata = reader._parseMetaData(metadataStr, )
+        with self.assertRaisesRegexp(AssertionError, "`defaultParamMap` section not found"):
+            reader.getAndSetParams(lr, loadedMetadata)
+
+        # Prior to 2.4.0, metadata doesn't have `defaultParamMap`.
+        metadata['sparkVersion'] = '2.3.0'
+        metadataStr = json.dumps(metadata, separators=[',',  ':'])
+        loadedMetadata = reader._parseMetaData(metadataStr, )
+        reader.getAndSetParams(lr, loadedMetadata)
+
+
+class LDATest(SparkSessionTestCase):
+
+    def _compare(self, m1, m2):
+        """
+        Temp method for comparing instances.
+        TODO: Replace with generic implementation once SPARK-14706 is merged.
+        """
+        self.assertEqual(m1.uid, m2.uid)
+        self.assertEqual(type(m1), type(m2))
+        self.assertEqual(len(m1.params), len(m2.params))
+        for p in m1.params:
+            if m1.isDefined(p):
+                self.assertEqual(m1.getOrDefault(p), m2.getOrDefault(p))
+                self.assertEqual(p.parent, m2.getParam(p.name).parent)
+        if isinstance(m1, LDAModel):
+            self.assertEqual(m1.vocabSize(), m2.vocabSize())
+            self.assertEqual(m1.topicsMatrix(), m2.topicsMatrix())
+
+    def test_persistence(self):
+        # Test save/load for LDA, LocalLDAModel, DistributedLDAModel.
+        df = self.spark.createDataFrame([
+            [1, Vectors.dense([0.0, 1.0])],
+            [2, Vectors.sparse(2, {0: 1.0})],
+        ], ["id", "features"])
+        # Fit model
+        lda = LDA(k=2, seed=1, optimizer="em")
+        distributedModel = lda.fit(df)
+        self.assertTrue(distributedModel.isDistributed())
+        localModel = distributedModel.toLocal()
+        self.assertFalse(localModel.isDistributed())
+        # Define paths
+        path = tempfile.mkdtemp()
+        lda_path = path + "/lda"
+        dist_model_path = path + "/distLDAModel"
+        local_model_path = path + "/localLDAModel"
+        # Test LDA
+        lda.save(lda_path)
+        lda2 = LDA.load(lda_path)
+        self._compare(lda, lda2)
+        # Test DistributedLDAModel
+        distributedModel.save(dist_model_path)
+        distributedModel2 = DistributedLDAModel.load(dist_model_path)
+        self._compare(distributedModel, distributedModel2)
+        # Test LocalLDAModel
+        localModel.save(local_model_path)
+        localModel2 = LocalLDAModel.load(local_model_path)
+        self._compare(localModel, localModel2)
+        # Clean up
+        try:
+            rmtree(path)
+        except OSError:
+            pass
+
+
+class TrainingSummaryTest(SparkSessionTestCase):
+
+    def test_linear_regression_summary(self):
+        df = self.spark.createDataFrame([(1.0, 2.0, Vectors.dense(1.0)),
+                                         (0.0, 2.0, Vectors.sparse(1, [], []))],
+                                        ["label", "weight", "features"])
+        lr = LinearRegression(maxIter=5, regParam=0.0, solver="normal", weightCol="weight",
+                              fitIntercept=False)
+        model = lr.fit(df)
+        self.assertTrue(model.hasSummary)
+        s = model.summary
+        # test that api is callable and returns expected types
+        self.assertGreater(s.totalIterations, 0)
+        self.assertTrue(isinstance(s.predictions, DataFrame))
+        self.assertEqual(s.predictionCol, "prediction")
+        self.assertEqual(s.labelCol, "label")
+        self.assertEqual(s.featuresCol, "features")
+        objHist = s.objectiveHistory
+        self.assertTrue(isinstance(objHist, list) and isinstance(objHist[0], float))
+        self.assertAlmostEqual(s.explainedVariance, 0.25, 2)
+        self.assertAlmostEqual(s.meanAbsoluteError, 0.0)
+        self.assertAlmostEqual(s.meanSquaredError, 0.0)
+        self.assertAlmostEqual(s.rootMeanSquaredError, 0.0)
+        self.assertAlmostEqual(s.r2, 1.0, 2)
+        self.assertAlmostEqual(s.r2adj, 1.0, 2)
+        self.assertTrue(isinstance(s.residuals, DataFrame))
+        self.assertEqual(s.numInstances, 2)
+        self.assertEqual(s.degreesOfFreedom, 1)
+        devResiduals = s.devianceResiduals
+        self.assertTrue(isinstance(devResiduals, list) and isinstance(devResiduals[0], float))
+        coefStdErr = s.coefficientStandardErrors
+        self.assertTrue(isinstance(coefStdErr, list) and isinstance(coefStdErr[0], float))
+        tValues = s.tValues
+        self.assertTrue(isinstance(tValues, list) and isinstance(tValues[0], float))
+        pValues = s.pValues
+        self.assertTrue(isinstance(pValues, list) and isinstance(pValues[0], float))
+        # test evaluation (with training dataset) produces a summary with same values
+        # one check is enough to verify a summary is returned
+        # The child class LinearRegressionTrainingSummary runs full test
+        sameSummary = model.evaluate(df)
+        self.assertAlmostEqual(sameSummary.explainedVariance, s.explainedVariance)
+
+    def test_glr_summary(self):
+        from pyspark.ml.linalg import Vectors
+        df = self.spark.createDataFrame([(1.0, 2.0, Vectors.dense(1.0)),
+                                         (0.0, 2.0, Vectors.sparse(1, [], []))],
+                                        ["label", "weight", "features"])
+        glr = GeneralizedLinearRegression(family="gaussian", link="identity", weightCol="weight",
+                                          fitIntercept=False)
+        model = glr.fit(df)
+        self.assertTrue(model.hasSummary)
+        s = model.summary
+        # test that api is callable and returns expected types
+        self.assertEqual(s.numIterations, 1)  # this should default to a single iteration of WLS
+        self.assertTrue(isinstance(s.predictions, DataFrame))
+        self.assertEqual(s.predictionCol, "prediction")
+        self.assertEqual(s.numInstances, 2)
+        self.assertTrue(isinstance(s.residuals(), DataFrame))
+        self.assertTrue(isinstance(s.residuals("pearson"), DataFrame))
+        coefStdErr = s.coefficientStandardErrors
+        self.assertTrue(isinstance(coefStdErr, list) and isinstance(coefStdErr[0], float))
+        tValues = s.tValues
+        self.assertTrue(isinstance(tValues, list) and isinstance(tValues[0], float))
+        pValues = s.pValues
+        self.assertTrue(isinstance(pValues, list) and isinstance(pValues[0], float))
+        self.assertEqual(s.degreesOfFreedom, 1)
+        self.assertEqual(s.residualDegreeOfFreedom, 1)
+        self.assertEqual(s.residualDegreeOfFreedomNull, 2)
+        self.assertEqual(s.rank, 1)
+        self.assertTrue(isinstance(s.solver, basestring))
+        self.assertTrue(isinstance(s.aic, float))
+        self.assertTrue(isinstance(s.deviance, float))
+        self.assertTrue(isinstance(s.nullDeviance, float))
+        self.assertTrue(isinstance(s.dispersion, float))
+        # test evaluation (with training dataset) produces a summary with same values
+        # one check is enough to verify a summary is returned
+        # The child class GeneralizedLinearRegressionTrainingSummary runs full test
+        sameSummary = model.evaluate(df)
+        self.assertAlmostEqual(sameSummary.deviance, s.deviance)
+
+    def test_binary_logistic_regression_summary(self):
+        df = self.spark.createDataFrame([(1.0, 2.0, Vectors.dense(1.0)),
+                                         (0.0, 2.0, Vectors.sparse(1, [], []))],
+                                        ["label", "weight", "features"])
+        lr = LogisticRegression(maxIter=5, regParam=0.01, weightCol="weight", fitIntercept=False)
+        model = lr.fit(df)
+        self.assertTrue(model.hasSummary)
+        s = model.summary
+        # test that api is callable and returns expected types
+        self.assertTrue(isinstance(s.predictions, DataFrame))
+        self.assertEqual(s.probabilityCol, "probability")
+        self.assertEqual(s.labelCol, "label")
+        self.assertEqual(s.featuresCol, "features")
+        self.assertEqual(s.predictionCol, "prediction")
+        objHist = s.objectiveHistory
+        self.assertTrue(isinstance(objHist, list) and isinstance(objHist[0], float))
+        self.assertGreater(s.totalIterations, 0)
+        self.assertTrue(isinstance(s.labels, list))
+        self.assertTrue(isinstance(s.truePositiveRateByLabel, list))
+        self.assertTrue(isinstance(s.falsePositiveRateByLabel, list))
+        self.assertTrue(isinstance(s.precisionByLabel, list))
+        self.assertTrue(isinstance(s.recallByLabel, list))
+        self.assertTrue(isinstance(s.fMeasureByLabel(), list))
+        self.assertTrue(isinstance(s.fMeasureByLabel(1.0), list))
+        self.assertTrue(isinstance(s.roc, DataFrame))
+        self.assertAlmostEqual(s.areaUnderROC, 1.0, 2)
+        self.assertTrue(isinstance(s.pr, DataFrame))
+        self.assertTrue(isinstance(s.fMeasureByThreshold, DataFrame))
+        self.assertTrue(isinstance(s.precisionByThreshold, DataFrame))
+        self.assertTrue(isinstance(s.recallByThreshold, DataFrame))
+        self.assertAlmostEqual(s.accuracy, 1.0, 2)
+        self.assertAlmostEqual(s.weightedTruePositiveRate, 1.0, 2)
+        self.assertAlmostEqual(s.weightedFalsePositiveRate, 0.0, 2)
+        self.assertAlmostEqual(s.weightedRecall, 1.0, 2)
+        self.assertAlmostEqual(s.weightedPrecision, 1.0, 2)
+        self.assertAlmostEqual(s.weightedFMeasure(), 1.0, 2)
+        self.assertAlmostEqual(s.weightedFMeasure(1.0), 1.0, 2)
+        # test evaluation (with training dataset) produces a summary with same values
+        # one check is enough to verify a summary is returned, Scala version runs full test
+        sameSummary = model.evaluate(df)
+        self.assertAlmostEqual(sameSummary.areaUnderROC, s.areaUnderROC)
+
+    def test_multiclass_logistic_regression_summary(self):
+        df = self.spark.createDataFrame([(1.0, 2.0, Vectors.dense(1.0)),
+                                         (0.0, 2.0, Vectors.sparse(1, [], [])),
+                                         (2.0, 2.0, Vectors.dense(2.0)),
+                                         (2.0, 2.0, Vectors.dense(1.9))],
+                                        ["label", "weight", "features"])
+        lr = LogisticRegression(maxIter=5, regParam=0.01, weightCol="weight", fitIntercept=False)
+        model = lr.fit(df)
+        self.assertTrue(model.hasSummary)
+        s = model.summary
+        # test that api is callable and returns expected types
+        self.assertTrue(isinstance(s.predictions, DataFrame))
+        self.assertEqual(s.probabilityCol, "probability")
+        self.assertEqual(s.labelCol, "label")
+        self.assertEqual(s.featuresCol, "features")
+        self.assertEqual(s.predictionCol, "prediction")
+        objHist = s.objectiveHistory
+        self.assertTrue(isinstance(objHist, list) and isinstance(objHist[0], float))
+        self.assertGreater(s.totalIterations, 0)
+        self.assertTrue(isinstance(s.labels, list))
+        self.assertTrue(isinstance(s.truePositiveRateByLabel, list))
+        self.assertTrue(isinstance(s.falsePositiveRateByLabel, list))
+        self.assertTrue(isinstance(s.precisionByLabel, list))
+        self.assertTrue(isinstance(s.recallByLabel, list))
+        self.assertTrue(isinstance(s.fMeasureByLabel(), list))
+        self.assertTrue(isinstance(s.fMeasureByLabel(1.0), list))
+        self.assertAlmostEqual(s.accuracy, 0.75, 2)
+        self.assertAlmostEqual(s.weightedTruePositiveRate, 0.75, 2)
+        self.assertAlmostEqual(s.weightedFalsePositiveRate, 0.25, 2)
+        self.assertAlmostEqual(s.weightedRecall, 0.75, 2)
+        self.assertAlmostEqual(s.weightedPrecision, 0.583, 2)
+        self.assertAlmostEqual(s.weightedFMeasure(), 0.65, 2)
+        self.assertAlmostEqual(s.weightedFMeasure(1.0), 0.65, 2)
+        # test evaluation (with training dataset) produces a summary with same values
+        # one check is enough to verify a summary is returned, Scala version runs full test
+        sameSummary = model.evaluate(df)
+        self.assertAlmostEqual(sameSummary.accuracy, s.accuracy)
+
+    def test_gaussian_mixture_summary(self):
+        data = [(Vectors.dense(1.0),), (Vectors.dense(5.0),), (Vectors.dense(10.0),),
+                (Vectors.sparse(1, [], []),)]
+        df = self.spark.createDataFrame(data, ["features"])
+        gmm = GaussianMixture(k=2)
+        model = gmm.fit(df)
+        self.assertTrue(model.hasSummary)
+        s = model.summary
+        self.assertTrue(isinstance(s.predictions, DataFrame))
+        self.assertEqual(s.probabilityCol, "probability")
+        self.assertTrue(isinstance(s.probability, DataFrame))
+        self.assertEqual(s.featuresCol, "features")
+        self.assertEqual(s.predictionCol, "prediction")
+        self.assertTrue(isinstance(s.cluster, DataFrame))
+        self.assertEqual(len(s.clusterSizes), 2)
+        self.assertEqual(s.k, 2)
+        self.assertEqual(s.numIter, 3)
+
+    def test_bisecting_kmeans_summary(self):
+        data = [(Vectors.dense(1.0),), (Vectors.dense(5.0),), (Vectors.dense(10.0),),
+                (Vectors.sparse(1, [], []),)]
+        df = self.spark.createDataFrame(data, ["features"])
+        bkm = BisectingKMeans(k=2)
+        model = bkm.fit(df)
+        self.assertTrue(model.hasSummary)
+        s = model.summary
+        self.assertTrue(isinstance(s.predictions, DataFrame))
+        self.assertEqual(s.featuresCol, "features")
+        self.assertEqual(s.predictionCol, "prediction")
+        self.assertTrue(isinstance(s.cluster, DataFrame))
+        self.assertEqual(len(s.clusterSizes), 2)
+        self.assertEqual(s.k, 2)
+        self.assertEqual(s.numIter, 20)
+
+    def test_kmeans_summary(self):
+        data = [(Vectors.dense([0.0, 0.0]),), (Vectors.dense([1.0, 1.0]),),
+                (Vectors.dense([9.0, 8.0]),), (Vectors.dense([8.0, 9.0]),)]
+        df = self.spark.createDataFrame(data, ["features"])
+        kmeans = KMeans(k=2, seed=1)
+        model = kmeans.fit(df)
+        self.assertTrue(model.hasSummary)
+        s = model.summary
+        self.assertTrue(isinstance(s.predictions, DataFrame))
+        self.assertEqual(s.featuresCol, "features")
+        self.assertEqual(s.predictionCol, "prediction")
+        self.assertTrue(isinstance(s.cluster, DataFrame))
+        self.assertEqual(len(s.clusterSizes), 2)
+        self.assertEqual(s.k, 2)
+        self.assertEqual(s.numIter, 1)
+
+
+class KMeansTests(SparkSessionTestCase):
+
+    def test_kmeans_cosine_distance(self):
+        data = [(Vectors.dense([1.0, 1.0]),), (Vectors.dense([10.0, 10.0]),),
+                (Vectors.dense([1.0, 0.5]),), (Vectors.dense([10.0, 4.4]),),
+                (Vectors.dense([-1.0, 1.0]),), (Vectors.dense([-100.0, 90.0]),)]
+        df = self.spark.createDataFrame(data, ["features"])
+        kmeans = KMeans(k=3, seed=1, distanceMeasure="cosine")
+        model = kmeans.fit(df)
+        result = model.transform(df).collect()
+        self.assertTrue(result[0].prediction == result[1].prediction)
+        self.assertTrue(result[2].prediction == result[3].prediction)
+        self.assertTrue(result[4].prediction == result[5].prediction)
+
+
+class OneVsRestTests(SparkSessionTestCase):
+
+    def test_copy(self):
+        df = self.spark.createDataFrame([(0.0, Vectors.dense(1.0, 0.8)),
+                                         (1.0, Vectors.sparse(2, [], [])),
+                                         (2.0, Vectors.dense(0.5, 0.5))],
+                                        ["label", "features"])
+        lr = LogisticRegression(maxIter=5, regParam=0.01)
+        ovr = OneVsRest(classifier=lr)
+        ovr1 = ovr.copy({lr.maxIter: 10})
+        self.assertEqual(ovr.getClassifier().getMaxIter(), 5)
+        self.assertEqual(ovr1.getClassifier().getMaxIter(), 10)
+        model = ovr.fit(df)
+        model1 = model.copy({model.predictionCol: "indexed"})
+        self.assertEqual(model1.getPredictionCol(), "indexed")
+
+    def test_output_columns(self):
+        df = self.spark.createDataFrame([(0.0, Vectors.dense(1.0, 0.8)),
+                                         (1.0, Vectors.sparse(2, [], [])),
+                                         (2.0, Vectors.dense(0.5, 0.5))],
+                                        ["label", "features"])
+        lr = LogisticRegression(maxIter=5, regParam=0.01)
+        ovr = OneVsRest(classifier=lr, parallelism=1)
+        model = ovr.fit(df)
+        output = model.transform(df)
+        self.assertEqual(output.columns, ["label", "features", "prediction"])
+
+    def test_parallelism_doesnt_change_output(self):
+        df = self.spark.createDataFrame([(0.0, Vectors.dense(1.0, 0.8)),
+                                         (1.0, Vectors.sparse(2, [], [])),
+                                         (2.0, Vectors.dense(0.5, 0.5))],
+                                        ["label", "features"])
+        ovrPar1 = OneVsRest(classifier=LogisticRegression(maxIter=5, regParam=.01), parallelism=1)
+        modelPar1 = ovrPar1.fit(df)
+        ovrPar2 = OneVsRest(classifier=LogisticRegression(maxIter=5, regParam=.01), parallelism=2)
+        modelPar2 = ovrPar2.fit(df)
+        for i, model in enumerate(modelPar1.models):
+            self.assertTrue(np.allclose(model.coefficients.toArray(),
+                                        modelPar2.models[i].coefficients.toArray(), atol=1E-4))
+            self.assertTrue(np.allclose(model.intercept, modelPar2.models[i].intercept, atol=1E-4))
+
+    def test_support_for_weightCol(self):
+        df = self.spark.createDataFrame([(0.0, Vectors.dense(1.0, 0.8), 1.0),
+                                         (1.0, Vectors.sparse(2, [], []), 1.0),
+                                         (2.0, Vectors.dense(0.5, 0.5), 1.0)],
+                                        ["label", "features", "weight"])
+        # classifier inherits hasWeightCol
+        lr = LogisticRegression(maxIter=5, regParam=0.01)
+        ovr = OneVsRest(classifier=lr, weightCol="weight")
+        self.assertIsNotNone(ovr.fit(df))
+        # classifier doesn't inherit hasWeightCol
+        dt = DecisionTreeClassifier()
+        ovr2 = OneVsRest(classifier=dt, weightCol="weight")
+        self.assertIsNotNone(ovr2.fit(df))
+
+
+class HashingTFTest(SparkSessionTestCase):
+
+    def test_apply_binary_term_freqs(self):
+
+        df = self.spark.createDataFrame([(0, ["a", "a", "b", "c", "c", "c"])], ["id", "words"])
+        n = 10
+        hashingTF = HashingTF()
+        hashingTF.setInputCol("words").setOutputCol("features").setNumFeatures(n).setBinary(True)
+        output = hashingTF.transform(df)
+        features = output.select("features").first().features.toArray()
+        expected = Vectors.dense([1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]).toArray()
+        for i in range(0, n):
+            self.assertAlmostEqual(features[i], expected[i], 14, "Error at " + str(i) +
+                                   ": expected " + str(expected[i]) + ", got " + str(features[i]))
+
+
+class GeneralizedLinearRegressionTest(SparkSessionTestCase):
+
+    def test_tweedie_distribution(self):
+
+        df = self.spark.createDataFrame(
+            [(1.0, Vectors.dense(0.0, 0.0)),
+             (1.0, Vectors.dense(1.0, 2.0)),
+             (2.0, Vectors.dense(0.0, 0.0)),
+             (2.0, Vectors.dense(1.0, 1.0)), ], ["label", "features"])
+
+        glr = GeneralizedLinearRegression(family="tweedie", variancePower=1.6)
+        model = glr.fit(df)
+        self.assertTrue(np.allclose(model.coefficients.toArray(), [-0.4645, 0.3402], atol=1E-4))
+        self.assertTrue(np.isclose(model.intercept, 0.7841, atol=1E-4))
+
+        model2 = glr.setLinkPower(-1.0).fit(df)
+        self.assertTrue(np.allclose(model2.coefficients.toArray(), [-0.6667, 0.5], atol=1E-4))
+        self.assertTrue(np.isclose(model2.intercept, 0.6667, atol=1E-4))
+
+    def test_offset(self):
+
+        df = self.spark.createDataFrame(
+            [(0.2, 1.0, 2.0, Vectors.dense(0.0, 5.0)),
+             (0.5, 2.1, 0.5, Vectors.dense(1.0, 2.0)),
+             (0.9, 0.4, 1.0, Vectors.dense(2.0, 1.0)),
+             (0.7, 0.7, 0.0, Vectors.dense(3.0, 3.0))], ["label", "weight", "offset", "features"])
+
+        glr = GeneralizedLinearRegression(family="poisson", weightCol="weight", offsetCol="offset")
+        model = glr.fit(df)
+        self.assertTrue(np.allclose(model.coefficients.toArray(), [0.664647, -0.3192581],
+                                    atol=1E-4))
+        self.assertTrue(np.isclose(model.intercept, -1.561613, atol=1E-4))
+
+
+class LinearRegressionTest(SparkSessionTestCase):
+
+    def test_linear_regression_with_huber_loss(self):
+
+        data_path = "data/mllib/sample_linear_regression_data.txt"
+        df = self.spark.read.format("libsvm").load(data_path)
+
+        lir = LinearRegression(loss="huber", epsilon=2.0)
+        model = lir.fit(df)
+
+        expectedCoefficients = [0.136, 0.7648, -0.7761, 2.4236, 0.537,
+                                1.2612, -0.333, -0.5694, -0.6311, 0.6053]
+        expectedIntercept = 0.1607
+        expectedScale = 9.758
+
+        self.assertTrue(
+            np.allclose(model.coefficients.toArray(), expectedCoefficients, atol=1E-3))
+        self.assertTrue(np.isclose(model.intercept, expectedIntercept, atol=1E-3))
+        self.assertTrue(np.isclose(model.scale, expectedScale, atol=1E-3))
+
+
+class LogisticRegressionTest(SparkSessionTestCase):
+
+    def test_binomial_logistic_regression_with_bound(self):
+
+        df = self.spark.createDataFrame(
+            [(1.0, 1.0, Vectors.dense(0.0, 5.0)),
+             (0.0, 2.0, Vectors.dense(1.0, 2.0)),
+             (1.0, 3.0, Vectors.dense(2.0, 1.0)),
+             (0.0, 4.0, Vectors.dense(3.0, 3.0)), ], ["label", "weight", "features"])
+
+        lor = LogisticRegression(regParam=0.01, weightCol="weight",
+                                 lowerBoundsOnCoefficients=Matrices.dense(1, 2, [-1.0, -1.0]),
+                                 upperBoundsOnIntercepts=Vectors.dense(0.0))
+        model = lor.fit(df)
+        self.assertTrue(
+            np.allclose(model.coefficients.toArray(), [-0.2944, -0.0484], atol=1E-4))
+        self.assertTrue(np.isclose(model.intercept, 0.0, atol=1E-4))
+
+    def test_multinomial_logistic_regression_with_bound(self):
+
+        data_path = "data/mllib/sample_multiclass_classification_data.txt"
+        df = self.spark.read.format("libsvm").load(data_path)
+
+        lor = LogisticRegression(regParam=0.01,
+                                 lowerBoundsOnCoefficients=Matrices.dense(3, 4, range(12)),
+                                 upperBoundsOnIntercepts=Vectors.dense(0.0, 0.0, 0.0))
+        model = lor.fit(df)
+        expected = [[4.593, 4.5516, 9.0099, 12.2904],
+                    [1.0, 8.1093, 7.0, 10.0],
+                    [3.041, 5.0, 8.0, 11.0]]
+        for i in range(0, len(expected)):
+            self.assertTrue(
+                np.allclose(model.coefficientMatrix.toArray()[i], expected[i], atol=1E-4))
+        self.assertTrue(
+            np.allclose(model.interceptVector.toArray(), [-0.9057, -1.1392, -0.0033], atol=1E-4))
+
+
+class MultilayerPerceptronClassifierTest(SparkSessionTestCase):
+
+    def test_raw_and_probability_prediction(self):
+
+        data_path = "data/mllib/sample_multiclass_classification_data.txt"
+        df = self.spark.read.format("libsvm").load(data_path)
+
+        mlp = MultilayerPerceptronClassifier(maxIter=100, layers=[4, 5, 4, 3],
+                                             blockSize=128, seed=123)
+        model = mlp.fit(df)
+        test = self.sc.parallelize([Row(features=Vectors.dense(0.1, 0.1, 0.25, 0.25))]).toDF()
+        result = model.transform(test).head()
+        expected_prediction = 2.0
+        expected_probability = [0.0, 0.0, 1.0]
+        expected_rawPrediction = [57.3955, -124.5462, 67.9943]
+        self.assertTrue(result.prediction, expected_prediction)
+        self.assertTrue(np.allclose(result.probability, expected_probability, atol=1E-4))
+        self.assertTrue(np.allclose(result.rawPrediction, expected_rawPrediction, atol=1E-4))
+
+
+class FPGrowthTests(SparkSessionTestCase):
+    def setUp(self):
+        super(FPGrowthTests, self).setUp()
+        self.data = self.spark.createDataFrame(
+            [([1, 2], ), ([1, 2], ), ([1, 2, 3], ), ([1, 3], )],
+            ["items"])
+
+    def test_association_rules(self):
+        fp = FPGrowth()
+        fpm = fp.fit(self.data)
+
+        expected_association_rules = self.spark.createDataFrame(
+            [([3], [1], 1.0, 1.0), ([2], [1], 1.0, 1.0)],
+            ["antecedent", "consequent", "confidence", "lift"]
+        )
+        actual_association_rules = fpm.associationRules
+
+        self.assertEqual(actual_association_rules.subtract(expected_association_rules).count(), 0)
+        self.assertEqual(expected_association_rules.subtract(actual_association_rules).count(), 0)
+
+    def test_freq_itemsets(self):
+        fp = FPGrowth()
+        fpm = fp.fit(self.data)
+
+        expected_freq_itemsets = self.spark.createDataFrame(
+            [([1], 4), ([2], 3), ([2, 1], 3), ([3], 2), ([3, 1], 2)],
+            ["items", "freq"]
+        )
+        actual_freq_itemsets = fpm.freqItemsets
+
+        self.assertEqual(actual_freq_itemsets.subtract(expected_freq_itemsets).count(), 0)
+        self.assertEqual(expected_freq_itemsets.subtract(actual_freq_itemsets).count(), 0)
+
+    def tearDown(self):
+        del self.data
+
+
+class ImageReaderTest(SparkSessionTestCase):
+
+    def test_read_images(self):
+        data_path = 'data/mllib/images/origin/kittens'
+        df = ImageSchema.readImages(data_path, recursive=True, dropImageFailures=True)
+        self.assertEqual(df.count(), 4)
+        first_row = df.take(1)[0][0]
+        array = ImageSchema.toNDArray(first_row)
+        self.assertEqual(len(array), first_row[1])
+        self.assertEqual(ImageSchema.toImage(array, origin=first_row[0]), first_row)
+        self.assertEqual(df.schema, ImageSchema.imageSchema)
+        self.assertEqual(df.schema["image"].dataType, ImageSchema.columnSchema)
+        expected = {'CV_8UC3': 16, 'Undefined': -1, 'CV_8U': 0, 'CV_8UC1': 0, 'CV_8UC4': 24}
+        self.assertEqual(ImageSchema.ocvTypes, expected)
+        expected = ['origin', 'height', 'width', 'nChannels', 'mode', 'data']
+        self.assertEqual(ImageSchema.imageFields, expected)
+        self.assertEqual(ImageSchema.undefinedImageType, "Undefined")
+
+        with QuietTest(self.sc):
+            self.assertRaisesRegexp(
+                TypeError,
+                "image argument should be pyspark.sql.types.Row; however",
+                lambda: ImageSchema.toNDArray("a"))
+
+        with QuietTest(self.sc):
+            self.assertRaisesRegexp(
+                ValueError,
+                "image argument should have attributes specified in",
+                lambda: ImageSchema.toNDArray(Row(a=1)))
+
+        with QuietTest(self.sc):
+            self.assertRaisesRegexp(
+                TypeError,
+                "array argument should be numpy.ndarray; however, it got",
+                lambda: ImageSchema.toImage("a"))
+
+
+class ImageReaderTest2(PySparkTestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        super(ImageReaderTest2, cls).setUpClass()
+        cls.hive_available = True
+        # Note that here we enable Hive's support.
+        cls.spark = None
+        try:
+            cls.sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
+        except py4j.protocol.Py4JError:
+            cls.tearDownClass()
+            cls.hive_available = False
+        except TypeError:
+            cls.tearDownClass()
+            cls.hive_available = False
+        if cls.hive_available:
+            cls.spark = HiveContext._createForTesting(cls.sc)
+
+    def setUp(self):
+        if not self.hive_available:
+            self.skipTest("Hive is not available.")
+
+    @classmethod
+    def tearDownClass(cls):
+        super(ImageReaderTest2, cls).tearDownClass()
+        if cls.spark is not None:
+            cls.spark.sparkSession.stop()
+            cls.spark = None
+
+    def test_read_images_multiple_times(self):
+        # This test case is to check if `ImageSchema.readImages` tries to
+        # initiate Hive client multiple times. See SPARK-22651.
+        data_path = 'data/mllib/images/origin/kittens'
+        ImageSchema.readImages(data_path, recursive=True, dropImageFailures=True)
+        ImageSchema.readImages(data_path, recursive=True, dropImageFailures=True)
+
+
+class ALSTest(SparkSessionTestCase):
+
+    def test_storage_levels(self):
+        df = self.spark.createDataFrame(
+            [(0, 0, 4.0), (0, 1, 2.0), (1, 1, 3.0), (1, 2, 4.0), (2, 1, 1.0), (2, 2, 5.0)],
+            ["user", "item", "rating"])
+        als = ALS().setMaxIter(1).setRank(1)
+        # test default params
+        als.fit(df)
+        self.assertEqual(als.getIntermediateStorageLevel(), "MEMORY_AND_DISK")
+        self.assertEqual(als._java_obj.getIntermediateStorageLevel(), "MEMORY_AND_DISK")
+        self.assertEqual(als.getFinalStorageLevel(), "MEMORY_AND_DISK")
+        self.assertEqual(als._java_obj.getFinalStorageLevel(), "MEMORY_AND_DISK")
+        # test non-default params
+        als.setIntermediateStorageLevel("MEMORY_ONLY_2")
+        als.setFinalStorageLevel("DISK_ONLY")
+        als.fit(df)
+        self.assertEqual(als.getIntermediateStorageLevel(), "MEMORY_ONLY_2")
+        self.assertEqual(als._java_obj.getIntermediateStorageLevel(), "MEMORY_ONLY_2")
+        self.assertEqual(als.getFinalStorageLevel(), "DISK_ONLY")
+        self.assertEqual(als._java_obj.getFinalStorageLevel(), "DISK_ONLY")
+
+
+class DefaultValuesTests(PySparkTestCase):
+    """
+    Test :py:class:`JavaParams` classes to see if their default Param values match
+    those in their Scala counterparts.
+    """
+
+    def test_java_params(self):
+        import pyspark.ml.feature
+        import pyspark.ml.classification
+        import pyspark.ml.clustering
+        import pyspark.ml.evaluation
+        import pyspark.ml.pipeline
+        import pyspark.ml.recommendation
+        import pyspark.ml.regression
+
+        modules = [pyspark.ml.feature, pyspark.ml.classification, pyspark.ml.clustering,
+                   pyspark.ml.evaluation, pyspark.ml.pipeline, pyspark.ml.recommendation,
+                   pyspark.ml.regression]
+        for module in modules:
+            for name, cls in inspect.getmembers(module, inspect.isclass):
+                if not name.endswith('Model') and not name.endswith('Params')\
+                        and issubclass(cls, JavaParams) and not inspect.isabstract(cls):
+                    # NOTE: disable check_params_exist until there is parity with Scala API
+                    ParamTests.check_params(self, cls(), check_params_exist=False)
+
+        # Additional classes that need explicit construction
+        from pyspark.ml.feature import CountVectorizerModel, StringIndexerModel
+        ParamTests.check_params(self, CountVectorizerModel.from_vocabulary(['a'], 'input'),
+                                check_params_exist=False)
+        ParamTests.check_params(self, StringIndexerModel.from_labels(['a', 'b'], 'input'),
+                                check_params_exist=False)
+
+
+def _squared_distance(a, b):
+    if isinstance(a, Vector):
+        return a.squared_distance(b)
+    else:
+        return b.squared_distance(a)
+
+
+class VectorTests(MLlibTestCase):
+
+    def _test_serialize(self, v):
+        self.assertEqual(v, ser.loads(ser.dumps(v)))
+        jvec = self.sc._jvm.org.apache.spark.ml.python.MLSerDe.loads(bytearray(ser.dumps(v)))
+        nv = ser.loads(bytes(self.sc._jvm.org.apache.spark.ml.python.MLSerDe.dumps(jvec)))
+        self.assertEqual(v, nv)
+        vs = [v] * 100
+        jvecs = self.sc._jvm.org.apache.spark.ml.python.MLSerDe.loads(bytearray(ser.dumps(vs)))
+        nvs = ser.loads(bytes(self.sc._jvm.org.apache.spark.ml.python.MLSerDe.dumps(jvecs)))
+        self.assertEqual(vs, nvs)
+
+    def test_serialize(self):
+        self._test_serialize(DenseVector(range(10)))
+        self._test_serialize(DenseVector(array([1., 2., 3., 4.])))
+        self._test_serialize(DenseVector(pyarray.array('d', range(10))))
+        self._test_serialize(SparseVector(4, {1: 1, 3: 2}))
+        self._test_serialize(SparseVector(3, {}))
+        self._test_serialize(DenseMatrix(2, 3, range(6)))
+        sm1 = SparseMatrix(
+            3, 4, [0, 2, 2, 4, 4], [1, 2, 1, 2], [1.0, 2.0, 4.0, 5.0])
+        self._test_serialize(sm1)
+
+    def test_dot(self):
+        sv = SparseVector(4, {1: 1, 3: 2})
+        dv = DenseVector(array([1., 2., 3., 4.]))
+        lst = DenseVector([1, 2, 3, 4])
+        mat = array([[1., 2., 3., 4.],
+                     [1., 2., 3., 4.],
+                     [1., 2., 3., 4.],
+                     [1., 2., 3., 4.]])
+        arr = pyarray.array('d', [0, 1, 2, 3])
+        self.assertEqual(10.0, sv.dot(dv))
+        self.assertTrue(array_equal(array([3., 6., 9., 12.]), sv.dot(mat)))
+        self.assertEqual(30.0, dv.dot(dv))
+        self.assertTrue(array_equal(array([10., 20., 30., 40.]), dv.dot(mat)))
+        self.assertEqual(30.0, lst.dot(dv))
+        self.assertTrue(array_equal(array([10., 20., 30., 40.]), lst.dot(mat)))
+        self.assertEqual(7.0, sv.dot(arr))
+
+    def test_squared_distance(self):
+        sv = SparseVector(4, {1: 1, 3: 2})
+        dv = DenseVector(array([1., 2., 3., 4.]))
+        lst = DenseVector([4, 3, 2, 1])
+        lst1 = [4, 3, 2, 1]
+        arr = pyarray.array('d', [0, 2, 1, 3])
+        narr = array([0, 2, 1, 3])
+        self.assertEqual(15.0, _squared_distance(sv, dv))
+        self.assertEqual(25.0, _squared_distance(sv, lst))
+        self.assertEqual(20.0, _squared_distance(dv, lst))
+        self.assertEqual(15.0, _squared_distance(dv, sv))
+        self.assertEqual(25.0, _squared_distance(lst, sv))
+        self.assertEqual(20.0, _squared_distance(lst, dv))
+        self.assertEqual(0.0, _squared_distance(sv, sv))
+        self.assertEqual(0.0, _squared_distance(dv, dv))
+        self.assertEqual(0.0, _squared_distance(lst, lst))
+        self.assertEqual(25.0, _squared_distance(sv, lst1))
+        self.assertEqual(3.0, _squared_distance(sv, arr))
+        self.assertEqual(3.0, _squared_distance(sv, narr))
+
+    def test_hash(self):
+        v1 = DenseVector([0.0, 1.0, 0.0, 5.5])
+        v2 = SparseVector(4, [(1, 1.0), (3, 5.5)])
+        v3 = DenseVector([0.0, 1.0, 0.0, 5.5])
+        v4 = SparseVector(4, [(1, 1.0), (3, 2.5)])
+        self.assertEqual(hash(v1), hash(v2))
+        self.assertEqual(hash(v1), hash(v3))
+        self.assertEqual(hash(v2), hash(v3))
+        self.assertFalse(hash(v1) == hash(v4))
+        self.assertFalse(hash(v2) == hash(v4))
+
+    def test_eq(self):
+        v1 = DenseVector([0.0, 1.0, 0.0, 5.5])
+        v2 = SparseVector(4, [(1, 1.0), (3, 5.5)])
+        v3 = DenseVector([0.0, 1.0, 0.0, 5.5])
+        v4 = SparseVector(6, [(1, 1.0), (3, 5.5)])
+        v5 = DenseVector([0.0, 1.0, 0.0, 2.5])
+        v6 = SparseVector(4, [(1, 1.0), (3, 2.5)])
+        self.assertEqual(v1, v2)
+        self.assertEqual(v1, v3)
+        self.assertFalse(v2 == v4)
+        self.assertFalse(v1 == v5)
+        self.assertFalse(v1 == v6)
+
+    def test_equals(self):
+        indices = [1, 2, 4]
+        values = [1., 3., 2.]
+        self.assertTrue(Vectors._equals(indices, values, list(range(5)), [0., 1., 3., 0., 2.]))
+        self.assertFalse(Vectors._equals(indices, values, list(range(5)), [0., 3., 1., 0., 2.]))
+        self.assertFalse(Vectors._equals(indices, values, list(range(5)), [0., 3., 0., 2.]))
+        self.assertFalse(Vectors._equals(indices, values, list(range(5)), [0., 1., 3., 2., 2.]))
+
+    def test_conversion(self):
+        # numpy arrays should be automatically upcast to float64
+        # tests for fix of [SPARK-5089]
+        v = array([1, 2, 3, 4], dtype='float64')
+        dv = DenseVector(v)
+        self.assertTrue(dv.array.dtype == 'float64')
+        v = array([1, 2, 3, 4], dtype='float32')
+        dv = DenseVector(v)
+        self.assertTrue(dv.array.dtype == 'float64')
+
+    def test_sparse_vector_indexing(self):
+        sv = SparseVector(5, {1: 1, 3: 2})
+        self.assertEqual(sv[0], 0.)
+        self.assertEqual(sv[3], 2.)
+        self.assertEqual(sv[1], 1.)
+        self.assertEqual(sv[2], 0.)
+        self.assertEqual(sv[4], 0.)
+        self.assertEqual(sv[-1], 0.)
+        self.assertEqual(sv[-2], 2.)
+        self.assertEqual(sv[-3], 0.)
+        self.assertEqual(sv[-5], 0.)
+        for ind in [5, -6]:
+            self.assertRaises(IndexError, sv.__getitem__, ind)
+        for ind in [7.8, '1']:
+            self.assertRaises(TypeError, sv.__getitem__, ind)
+
+        zeros = SparseVector(4, {})
+        self.assertEqual(zeros[0], 0.0)
+        self.assertEqual(zeros[3], 0.0)
+        for ind in [4, -5]:
+            self.assertRaises(IndexError, zeros.__getitem__, ind)
+
+        empty = SparseVector(0, {})
+        for ind in [-1, 0, 1]:
+            self.assertRaises(IndexError, empty.__getitem__, ind)
+
+    def test_sparse_vector_iteration(self):
+        self.assertListEqual(list(SparseVector(3, [], [])), [0.0, 0.0, 0.0])
+        self.assertListEqual(list(SparseVector(5, [0, 3], [1.0, 2.0])), [1.0, 0.0, 0.0, 2.0, 0.0])
+
+    def test_matrix_indexing(self):
+        mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10])
+        expected = [[0, 6], [1, 8], [4, 10]]
+        for i in range(3):
+            for j in range(2):
+                self.assertEqual(mat[i, j], expected[i][j])
+
+        for i, j in [(-1, 0), (4, 1), (3, 4)]:
+            self.assertRaises(IndexError, mat.__getitem__, (i, j))
+
+    def test_repr_dense_matrix(self):
+        mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10])
+        self.assertTrue(
+            repr(mat),
+            'DenseMatrix(3, 2, [0.0, 1.0, 4.0, 6.0, 8.0, 10.0], False)')
+
+        mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10], True)
+        self.assertTrue(
+            repr(mat),
+            'DenseMatrix(3, 2, [0.0, 1.0, 4.0, 6.0, 8.0, 10.0], False)')
+
+        mat = DenseMatrix(6, 3, zeros(18))
+        self.assertTrue(
+            repr(mat),
+            'DenseMatrix(6, 3, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..., \
+                0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], False)')
+
+    def test_repr_sparse_matrix(self):
+        sm1t = SparseMatrix(
+            3, 4, [0, 2, 3, 5], [0, 1, 2, 0, 2], [3.0, 2.0, 4.0, 9.0, 8.0],
+            isTransposed=True)
+        self.assertTrue(
+            repr(sm1t),
+            'SparseMatrix(3, 4, [0, 2, 3, 5], [0, 1, 2, 0, 2], [3.0, 2.0, 4.0, 9.0, 8.0], True)')
+
+        indices = tile(arange(6), 3)
+        values = ones(18)
+        sm = SparseMatrix(6, 3, [0, 6, 12, 18], indices, values)
+        self.assertTrue(
+            repr(sm), "SparseMatrix(6, 3, [0, 6, 12, 18], \
+                [0, 1, 2, 3, 4, 5, 0, 1, ..., 4, 5, 0, 1, 2, 3, 4, 5], \
+                [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ..., \
+                1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], False)")
+
+        self.assertTrue(
+            str(sm),
+            "6 X 3 CSCMatrix\n\
+            (0,0) 1.0\n(1,0) 1.0\n(2,0) 1.0\n(3,0) 1.0\n(4,0) 1.0\n(5,0) 1.0\n\
+            (0,1) 1.0\n(1,1) 1.0\n(2,1) 1.0\n(3,1) 1.0\n(4,1) 1.0\n(5,1) 1.0\n\
+            (0,2) 1.0\n(1,2) 1.0\n(2,2) 1.0\n(3,2) 1.0\n..\n..")
+
+        sm = SparseMatrix(1, 18, zeros(19), [], [])
+        self.assertTrue(
+            repr(sm),
+            'SparseMatrix(1, 18, \
+                [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0], [], [], False)')
+
+    def test_sparse_matrix(self):
+        # Test sparse matrix creation.
+        sm1 = SparseMatrix(
+            3, 4, [0, 2, 2, 4, 4], [1, 2, 1, 2], [1.0, 2.0, 4.0, 5.0])
+        self.assertEqual(sm1.numRows, 3)
+        self.assertEqual(sm1.numCols, 4)
+        self.assertEqual(sm1.colPtrs.tolist(), [0, 2, 2, 4, 4])
+        self.assertEqual(sm1.rowIndices.tolist(), [1, 2, 1, 2])
+        self.assertEqual(sm1.values.tolist(), [1.0, 2.0, 4.0, 5.0])
+        self.assertTrue(
+            repr(sm1),
+            'SparseMatrix(3, 4, [0, 2, 2, 4, 4], [1, 2, 1, 2], [1.0, 2.0, 4.0, 5.0], False)')
+
+        # Test indexing
+        expected = [
+            [0, 0, 0, 0],
+            [1, 0, 4, 0],
+            [2, 0, 5, 0]]
+
+        for i in range(3):
+            for j in range(4):
+                self.assertEqual(expected[i][j], sm1[i, j])
+        self.assertTrue(array_equal(sm1.toArray(), expected))
+
+        for i, j in [(-1, 1), (4, 3), (3, 5)]:
+            self.assertRaises(IndexError, sm1.__getitem__, (i, j))
+
+        # Test conversion to dense and sparse.
+        smnew = sm1.toDense().toSparse()
+        self.assertEqual(sm1.numRows, smnew.numRows)
+        self.assertEqual(sm1.numCols, smnew.numCols)
+        self.assertTrue(array_equal(sm1.colPtrs, smnew.colPtrs))
+        self.assertTrue(array_equal(sm1.rowIndices, smnew.rowIndices))
+        self.assertTrue(array_equal(sm1.values, smnew.values))
+
+        sm1t = SparseMatrix(
+            3, 4, [0, 2, 3, 5], [0, 1, 2, 0, 2], [3.0, 2.0, 4.0, 9.0, 8.0],
+            isTransposed=True)
+        self.assertEqual(sm1t.numRows, 3)
+        self.assertEqual(sm1t.numCols, 4)
+        self.assertEqual(sm1t.colPtrs.tolist(), [0, 2, 3, 5])
+        self.assertEqual(sm1t.rowIndices.tolist(), [0, 1, 2, 0, 2])
+        self.assertEqual(sm1t.values.tolist(), [3.0, 2.0, 4.0, 9.0, 8.0])
+
+        expected = [
+            [3, 2, 0, 0],
+            [0, 0, 4, 0],
+            [9, 0, 8, 0]]
+
+        for i in range(3):
+            for j in range(4):
+                self.assertEqual(expected[i][j], sm1t[i, j])
+        self.assertTrue(array_equal(sm1t.toArray(), expected))
+
+    def test_dense_matrix_is_transposed(self):
+        mat1 = DenseMatrix(3, 2, [0, 4, 1, 6, 3, 9], isTransposed=True)
+        mat = DenseMatrix(3, 2, [0, 1, 3, 4, 6, 9])
+        self.assertEqual(mat1, mat)
+
+        expected = [[0, 4], [1, 6], [3, 9]]
+        for i in range(3):
+            for j in range(2):
+                self.assertEqual(mat1[i, j], expected[i][j])
+        self.assertTrue(array_equal(mat1.toArray(), expected))
+
+        sm = mat1.toSparse()
+        self.assertTrue(array_equal(sm.rowIndices, [1, 2, 0, 1, 2]))
+        self.assertTrue(array_equal(sm.colPtrs, [0, 2, 5]))
+        self.assertTrue(array_equal(sm.values, [1, 3, 4, 6, 9]))
+
+    def test_norms(self):
+        a = DenseVector([0, 2, 3, -1])
+        self.assertAlmostEqual(a.norm(2), 3.742, 3)
+        self.assertTrue(a.norm(1), 6)
+        self.assertTrue(a.norm(inf), 3)
+        a = SparseVector(4, [0, 2], [3, -4])
+        self.assertAlmostEqual(a.norm(2), 5)
+        self.assertTrue(a.norm(1), 7)
+        self.assertTrue(a.norm(inf), 4)
+
+        tmp = SparseVector(4, [0, 2], [3, 0])
+        self.assertEqual(tmp.numNonzeros(), 1)
+
+
+class VectorUDTTests(MLlibTestCase):
+
+    dv0 = DenseVector([])
+    dv1 = DenseVector([1.0, 2.0])
+    sv0 = SparseVector(2, [], [])
+    sv1 = SparseVector(2, [1], [2.0])
+    udt = VectorUDT()
+
+    def test_json_schema(self):
+        self.assertEqual(VectorUDT.fromJson(self.udt.jsonValue()), self.udt)
+
+    def test_serialization(self):
+        for v in [self.dv0, self.dv1, self.sv0, self.sv1]:
+            self.assertEqual(v, self.udt.deserialize(self.udt.serialize(v)))
+
+    def test_infer_schema(self):
+        rdd = self.sc.parallelize([Row(label=1.0, features=self.dv1),
+                                   Row(label=0.0, features=self.sv1)])
+        df = rdd.toDF()
+        schema = df.schema
+        field = [f for f in schema.fields if f.name == "features"][0]
+        self.assertEqual(field.dataType, self.udt)
+        vectors = df.rdd.map(lambda p: p.features).collect()
+        self.assertEqual(len(vectors), 2)
+        for v in vectors:
+            if isinstance(v, SparseVector):
+                self.assertEqual(v, self.sv1)
+            elif isinstance(v, DenseVector):
+                self.assertEqual(v, self.dv1)
+            else:
+                raise TypeError("expecting a vector but got %r of type %r" % (v, type(v)))
+
+
+class MatrixUDTTests(MLlibTestCase):
+
+    dm1 = DenseMatrix(3, 2, [0, 1, 4, 5, 9, 10])
+    dm2 = DenseMatrix(3, 2, [0, 1, 4, 5, 9, 10], isTransposed=True)
+    sm1 = SparseMatrix(1, 1, [0, 1], [0], [2.0])
+    sm2 = SparseMatrix(2, 1, [0, 0, 1], [0], [5.0], isTransposed=True)
+    udt = MatrixUDT()
+
+    def test_json_schema(self):
+        self.assertEqual(MatrixUDT.fromJson(self.udt.jsonValue()), self.udt)
+
+    def test_serialization(self):
+        for m in [self.dm1, self.dm2, self.sm1, self.sm2]:
+            self.assertEqual(m, self.udt.deserialize(self.udt.serialize(m)))
+
+    def test_infer_schema(self):
+        rdd = self.sc.parallelize([("dense", self.dm1), ("sparse", self.sm1)])
+        df = rdd.toDF()
+        schema = df.schema
+        self.assertTrue(schema.fields[1].dataType, self.udt)
+        matrices = df.rdd.map(lambda x: x._2).collect()
+        self.assertEqual(len(matrices), 2)
+        for m in matrices:
+            if isinstance(m, DenseMatrix):
+                self.assertTrue(m, self.dm1)
+            elif isinstance(m, SparseMatrix):
+                self.assertTrue(m, self.sm1)
+            else:
+                raise ValueError("Expected a matrix but got type %r" % type(m))
+
+
+class WrapperTests(MLlibTestCase):
+
+    def test_new_java_array(self):
+        # test array of strings
+        str_list = ["a", "b", "c"]
+        java_class = self.sc._gateway.jvm.java.lang.String
+        java_array = JavaWrapper._new_java_array(str_list, java_class)
+        self.assertEqual(_java2py(self.sc, java_array), str_list)
+        # test array of integers
+        int_list = [1, 2, 3]
+        java_class = self.sc._gateway.jvm.java.lang.Integer
+        java_array = JavaWrapper._new_java_array(int_list, java_class)
+        self.assertEqual(_java2py(self.sc, java_array), int_list)
+        # test array of floats
+        float_list = [0.1, 0.2, 0.3]
+        java_class = self.sc._gateway.jvm.java.lang.Double
+        java_array = JavaWrapper._new_java_array(float_list, java_class)
+        self.assertEqual(_java2py(self.sc, java_array), float_list)
+        # test array of bools
+        bool_list = [False, True, True]
+        java_class = self.sc._gateway.jvm.java.lang.Boolean
+        java_array = JavaWrapper._new_java_array(bool_list, java_class)
+        self.assertEqual(_java2py(self.sc, java_array), bool_list)
+        # test array of Java DenseVectors
+        v1 = DenseVector([0.0, 1.0])
+        v2 = DenseVector([1.0, 0.0])
+        vec_java_list = [_py2java(self.sc, v1), _py2java(self.sc, v2)]
+        java_class = self.sc._gateway.jvm.org.apache.spark.ml.linalg.DenseVector
+        java_array = JavaWrapper._new_java_array(vec_java_list, java_class)
+        self.assertEqual(_java2py(self.sc, java_array), [v1, v2])
+        # test empty array
+        java_class = self.sc._gateway.jvm.java.lang.Integer
+        java_array = JavaWrapper._new_java_array([], java_class)
+        self.assertEqual(_java2py(self.sc, java_array), [])
+
+
+class ChiSquareTestTests(SparkSessionTestCase):
+
+    def test_chisquaretest(self):
+        data = [[0, Vectors.dense([0, 1, 2])],
+                [1, Vectors.dense([1, 1, 1])],
+                [2, Vectors.dense([2, 1, 0])]]
+        df = self.spark.createDataFrame(data, ['label', 'feat'])
+        res = ChiSquareTest.test(df, 'feat', 'label')
+        # This line is hitting the collect bug described in #17218, commented for now.
+        # pValues = res.select("degreesOfFreedom").collect())
+        self.assertIsInstance(res, DataFrame)
+        fieldNames = set(field.name for field in res.schema.fields)
+        expectedFields = ["pValues", "degreesOfFreedom", "statistics"]
+        self.assertTrue(all(field in fieldNames for field in expectedFields))
+
+
+class UnaryTransformerTests(SparkSessionTestCase):
+
+    def test_unary_transformer_validate_input_type(self):
+        shiftVal = 3
+        transformer = MockUnaryTransformer(shiftVal=shiftVal)\
+            .setInputCol("input").setOutputCol("output")
+
+        # should not raise any errors
+        transformer.validateInputType(DoubleType())
+
+        with self.assertRaises(TypeError):
+            # passing the wrong input type should raise an error
+            transformer.validateInputType(IntegerType())
+
+    def test_unary_transformer_transform(self):
+        shiftVal = 3
+        transformer = MockUnaryTransformer(shiftVal=shiftVal)\
+            .setInputCol("input").setOutputCol("output")
+
+        df = self.spark.range(0, 10).toDF('input')
+        df = df.withColumn("input", df.input.cast(dataType="double"))
+
+        transformed_df = transformer.transform(df)
+        results = transformed_df.select("input", "output").collect()
+
+        for res in results:
+            self.assertEqual(res.input + shiftVal, res.output)
+
+
+class EstimatorTest(unittest.TestCase):
+
+    def testDefaultFitMultiple(self):
+        N = 4
+        data = MockDataset()
+        estimator = MockEstimator()
+        params = [{estimator.fake: i} for i in range(N)]
+        modelIter = estimator.fitMultiple(data, params)
+        indexList = []
+        for index, model in modelIter:
+            self.assertEqual(model.getFake(), index)
+            indexList.append(index)
+        self.assertEqual(sorted(indexList), list(range(N)))
+
+
+if __name__ == "__main__":
+    from pyspark.ml.tests import *
+    if xmlrunner:
+        unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
+    else:
+        unittest.main(verbosity=2)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/tuning.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/tuning.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f4abf5157335bfa14e525b445a822859efb97ec
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/tuning.py
@@ -0,0 +1,785 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import itertools
+import sys
+from multiprocessing.pool import ThreadPool
+
+import numpy as np
+
+from pyspark import since, keyword_only
+from pyspark.ml import Estimator, Model
+from pyspark.ml.common import _py2java
+from pyspark.ml.param import Params, Param, TypeConverters
+from pyspark.ml.param.shared import HasCollectSubModels, HasParallelism, HasSeed
+from pyspark.ml.util import *
+from pyspark.ml.wrapper import JavaParams
+from pyspark.sql.functions import rand
+
+__all__ = ['ParamGridBuilder', 'CrossValidator', 'CrossValidatorModel', 'TrainValidationSplit',
+           'TrainValidationSplitModel']
+
+
+def _parallelFitTasks(est, train, eva, validation, epm, collectSubModel):
+    """
+    Creates a list of callables which can be called from different threads to fit and evaluate
+    an estimator in parallel. Each callable returns an `(index, metric)` pair.
+
+    :param est: Estimator, the estimator to be fit.
+    :param train: DataFrame, training data set, used for fitting.
+    :param eva: Evaluator, used to compute `metric`
+    :param validation: DataFrame, validation data set, used for evaluation.
+    :param epm: Sequence of ParamMap, params maps to be used during fitting & evaluation.
+    :param collectSubModel: Whether to collect sub model.
+    :return: (int, float, subModel), an index into `epm` and the associated metric value.
+    """
+    modelIter = est.fitMultiple(train, epm)
+
+    def singleTask():
+        index, model = next(modelIter)
+        metric = eva.evaluate(model.transform(validation, epm[index]))
+        return index, metric, model if collectSubModel else None
+
+    return [singleTask] * len(epm)
+
+
+class ParamGridBuilder(object):
+    r"""
+    Builder for a param grid used in grid search-based model selection.
+
+    >>> from pyspark.ml.classification import LogisticRegression
+    >>> lr = LogisticRegression()
+    >>> output = ParamGridBuilder() \
+    ...     .baseOn({lr.labelCol: 'l'}) \
+    ...     .baseOn([lr.predictionCol, 'p']) \
+    ...     .addGrid(lr.regParam, [1.0, 2.0]) \
+    ...     .addGrid(lr.maxIter, [1, 5]) \
+    ...     .build()
+    >>> expected = [
+    ...     {lr.regParam: 1.0, lr.maxIter: 1, lr.labelCol: 'l', lr.predictionCol: 'p'},
+    ...     {lr.regParam: 2.0, lr.maxIter: 1, lr.labelCol: 'l', lr.predictionCol: 'p'},
+    ...     {lr.regParam: 1.0, lr.maxIter: 5, lr.labelCol: 'l', lr.predictionCol: 'p'},
+    ...     {lr.regParam: 2.0, lr.maxIter: 5, lr.labelCol: 'l', lr.predictionCol: 'p'}]
+    >>> len(output) == len(expected)
+    True
+    >>> all([m in expected for m in output])
+    True
+
+    .. versionadded:: 1.4.0
+    """
+
+    def __init__(self):
+        self._param_grid = {}
+
+    @since("1.4.0")
+    def addGrid(self, param, values):
+        """
+        Sets the given parameters in this grid to fixed values.
+        """
+        self._param_grid[param] = values
+
+        return self
+
+    @since("1.4.0")
+    def baseOn(self, *args):
+        """
+        Sets the given parameters in this grid to fixed values.
+        Accepts either a parameter dictionary or a list of (parameter, value) pairs.
+        """
+        if isinstance(args[0], dict):
+            self.baseOn(*args[0].items())
+        else:
+            for (param, value) in args:
+                self.addGrid(param, [value])
+
+        return self
+
+    @since("1.4.0")
+    def build(self):
+        """
+        Builds and returns all combinations of parameters specified
+        by the param grid.
+        """
+        keys = self._param_grid.keys()
+        grid_values = self._param_grid.values()
+
+        def to_key_value_pairs(keys, values):
+            return [(key, key.typeConverter(value)) for key, value in zip(keys, values)]
+
+        return [dict(to_key_value_pairs(keys, prod)) for prod in itertools.product(*grid_values)]
+
+
+class ValidatorParams(HasSeed):
+    """
+    Common params for TrainValidationSplit and CrossValidator.
+    """
+
+    estimator = Param(Params._dummy(), "estimator", "estimator to be cross-validated")
+    estimatorParamMaps = Param(Params._dummy(), "estimatorParamMaps", "estimator param maps")
+    evaluator = Param(
+        Params._dummy(), "evaluator",
+        "evaluator used to select hyper-parameters that maximize the validator metric")
+
+    def setEstimator(self, value):
+        """
+        Sets the value of :py:attr:`estimator`.
+        """
+        return self._set(estimator=value)
+
+    def getEstimator(self):
+        """
+        Gets the value of estimator or its default value.
+        """
+        return self.getOrDefault(self.estimator)
+
+    def setEstimatorParamMaps(self, value):
+        """
+        Sets the value of :py:attr:`estimatorParamMaps`.
+        """
+        return self._set(estimatorParamMaps=value)
+
+    def getEstimatorParamMaps(self):
+        """
+        Gets the value of estimatorParamMaps or its default value.
+        """
+        return self.getOrDefault(self.estimatorParamMaps)
+
+    def setEvaluator(self, value):
+        """
+        Sets the value of :py:attr:`evaluator`.
+        """
+        return self._set(evaluator=value)
+
+    def getEvaluator(self):
+        """
+        Gets the value of evaluator or its default value.
+        """
+        return self.getOrDefault(self.evaluator)
+
+    @classmethod
+    def _from_java_impl(cls, java_stage):
+        """
+        Return Python estimator, estimatorParamMaps, and evaluator from a Java ValidatorParams.
+        """
+
+        # Load information from java_stage to the instance.
+        estimator = JavaParams._from_java(java_stage.getEstimator())
+        evaluator = JavaParams._from_java(java_stage.getEvaluator())
+        epms = [estimator._transfer_param_map_from_java(epm)
+                for epm in java_stage.getEstimatorParamMaps()]
+        return estimator, epms, evaluator
+
+    def _to_java_impl(self):
+        """
+        Return Java estimator, estimatorParamMaps, and evaluator from this Python instance.
+        """
+
+        gateway = SparkContext._gateway
+        cls = SparkContext._jvm.org.apache.spark.ml.param.ParamMap
+
+        java_epms = gateway.new_array(cls, len(self.getEstimatorParamMaps()))
+        for idx, epm in enumerate(self.getEstimatorParamMaps()):
+            java_epms[idx] = self.getEstimator()._transfer_param_map_to_java(epm)
+
+        java_estimator = self.getEstimator()._to_java()
+        java_evaluator = self.getEvaluator()._to_java()
+        return java_estimator, java_epms, java_evaluator
+
+
+class CrossValidator(Estimator, ValidatorParams, HasParallelism, HasCollectSubModels,
+                     MLReadable, MLWritable):
+    """
+
+    K-fold cross validation performs model selection by splitting the dataset into a set of
+    non-overlapping randomly partitioned folds which are used as separate training and test datasets
+    e.g., with k=3 folds, K-fold cross validation will generate 3 (training, test) dataset pairs,
+    each of which uses 2/3 of the data for training and 1/3 for testing. Each fold is used as the
+    test set exactly once.
+
+
+    >>> from pyspark.ml.classification import LogisticRegression
+    >>> from pyspark.ml.evaluation import BinaryClassificationEvaluator
+    >>> from pyspark.ml.linalg import Vectors
+    >>> dataset = spark.createDataFrame(
+    ...     [(Vectors.dense([0.0]), 0.0),
+    ...      (Vectors.dense([0.4]), 1.0),
+    ...      (Vectors.dense([0.5]), 0.0),
+    ...      (Vectors.dense([0.6]), 1.0),
+    ...      (Vectors.dense([1.0]), 1.0)] * 10,
+    ...     ["features", "label"])
+    >>> lr = LogisticRegression()
+    >>> grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build()
+    >>> evaluator = BinaryClassificationEvaluator()
+    >>> cv = CrossValidator(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator,
+    ...     parallelism=2)
+    >>> cvModel = cv.fit(dataset)
+    >>> cvModel.avgMetrics[0]
+    0.5
+    >>> evaluator.evaluate(cvModel.transform(dataset))
+    0.8333...
+
+    .. versionadded:: 1.4.0
+    """
+
+    numFolds = Param(Params._dummy(), "numFolds", "number of folds for cross validation",
+                     typeConverter=TypeConverters.toInt)
+
+    @keyword_only
+    def __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3,
+                 seed=None, parallelism=1, collectSubModels=False):
+        """
+        __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3,\
+                 seed=None, parallelism=1, collectSubModels=False)
+        """
+        super(CrossValidator, self).__init__()
+        self._setDefault(numFolds=3, parallelism=1)
+        kwargs = self._input_kwargs
+        self._set(**kwargs)
+
+    @keyword_only
+    @since("1.4.0")
+    def setParams(self, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3,
+                  seed=None, parallelism=1, collectSubModels=False):
+        """
+        setParams(self, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3,\
+                  seed=None, parallelism=1, collectSubModels=False):
+        Sets params for cross validator.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("1.4.0")
+    def setNumFolds(self, value):
+        """
+        Sets the value of :py:attr:`numFolds`.
+        """
+        return self._set(numFolds=value)
+
+    @since("1.4.0")
+    def getNumFolds(self):
+        """
+        Gets the value of numFolds or its default value.
+        """
+        return self.getOrDefault(self.numFolds)
+
+    def _fit(self, dataset):
+        est = self.getOrDefault(self.estimator)
+        epm = self.getOrDefault(self.estimatorParamMaps)
+        numModels = len(epm)
+        eva = self.getOrDefault(self.evaluator)
+        nFolds = self.getOrDefault(self.numFolds)
+        seed = self.getOrDefault(self.seed)
+        h = 1.0 / nFolds
+        randCol = self.uid + "_rand"
+        df = dataset.select("*", rand(seed).alias(randCol))
+        metrics = [0.0] * numModels
+
+        pool = ThreadPool(processes=min(self.getParallelism(), numModels))
+        subModels = None
+        collectSubModelsParam = self.getCollectSubModels()
+        if collectSubModelsParam:
+            subModels = [[None for j in range(numModels)] for i in range(nFolds)]
+
+        for i in range(nFolds):
+            validateLB = i * h
+            validateUB = (i + 1) * h
+            condition = (df[randCol] >= validateLB) & (df[randCol] < validateUB)
+            validation = df.filter(condition).cache()
+            train = df.filter(~condition).cache()
+
+            tasks = _parallelFitTasks(est, train, eva, validation, epm, collectSubModelsParam)
+            for j, metric, subModel in pool.imap_unordered(lambda f: f(), tasks):
+                metrics[j] += (metric / nFolds)
+                if collectSubModelsParam:
+                    subModels[i][j] = subModel
+
+            validation.unpersist()
+            train.unpersist()
+
+        if eva.isLargerBetter():
+            bestIndex = np.argmax(metrics)
+        else:
+            bestIndex = np.argmin(metrics)
+        bestModel = est.fit(dataset, epm[bestIndex])
+        return self._copyValues(CrossValidatorModel(bestModel, metrics, subModels))
+
+    @since("1.4.0")
+    def copy(self, extra=None):
+        """
+        Creates a copy of this instance with a randomly generated uid
+        and some extra params. This copies creates a deep copy of
+        the embedded paramMap, and copies the embedded and extra parameters over.
+
+        :param extra: Extra parameters to copy to the new instance
+        :return: Copy of this instance
+        """
+        if extra is None:
+            extra = dict()
+        newCV = Params.copy(self, extra)
+        if self.isSet(self.estimator):
+            newCV.setEstimator(self.getEstimator().copy(extra))
+        # estimatorParamMaps remain the same
+        if self.isSet(self.evaluator):
+            newCV.setEvaluator(self.getEvaluator().copy(extra))
+        return newCV
+
+    @since("2.3.0")
+    def write(self):
+        """Returns an MLWriter instance for this ML instance."""
+        return JavaMLWriter(self)
+
+    @classmethod
+    @since("2.3.0")
+    def read(cls):
+        """Returns an MLReader instance for this class."""
+        return JavaMLReader(cls)
+
+    @classmethod
+    def _from_java(cls, java_stage):
+        """
+        Given a Java CrossValidator, create and return a Python wrapper of it.
+        Used for ML persistence.
+        """
+
+        estimator, epms, evaluator = super(CrossValidator, cls)._from_java_impl(java_stage)
+        numFolds = java_stage.getNumFolds()
+        seed = java_stage.getSeed()
+        parallelism = java_stage.getParallelism()
+        collectSubModels = java_stage.getCollectSubModels()
+        # Create a new instance of this stage.
+        py_stage = cls(estimator=estimator, estimatorParamMaps=epms, evaluator=evaluator,
+                       numFolds=numFolds, seed=seed, parallelism=parallelism,
+                       collectSubModels=collectSubModels)
+        py_stage._resetUid(java_stage.uid())
+        return py_stage
+
+    def _to_java(self):
+        """
+        Transfer this instance to a Java CrossValidator. Used for ML persistence.
+
+        :return: Java object equivalent to this instance.
+        """
+
+        estimator, epms, evaluator = super(CrossValidator, self)._to_java_impl()
+
+        _java_obj = JavaParams._new_java_obj("org.apache.spark.ml.tuning.CrossValidator", self.uid)
+        _java_obj.setEstimatorParamMaps(epms)
+        _java_obj.setEvaluator(evaluator)
+        _java_obj.setEstimator(estimator)
+        _java_obj.setSeed(self.getSeed())
+        _java_obj.setNumFolds(self.getNumFolds())
+        _java_obj.setParallelism(self.getParallelism())
+        _java_obj.setCollectSubModels(self.getCollectSubModels())
+
+        return _java_obj
+
+
+class CrossValidatorModel(Model, ValidatorParams, MLReadable, MLWritable):
+    """
+
+    CrossValidatorModel contains the model with the highest average cross-validation
+    metric across folds and uses this model to transform input data. CrossValidatorModel
+    also tracks the metrics for each param map evaluated.
+
+    .. versionadded:: 1.4.0
+    """
+
+    def __init__(self, bestModel, avgMetrics=[], subModels=None):
+        super(CrossValidatorModel, self).__init__()
+        #: best model from cross validation
+        self.bestModel = bestModel
+        #: Average cross-validation metrics for each paramMap in
+        #: CrossValidator.estimatorParamMaps, in the corresponding order.
+        self.avgMetrics = avgMetrics
+        #: sub model list from cross validation
+        self.subModels = subModels
+
+    def _transform(self, dataset):
+        return self.bestModel.transform(dataset)
+
+    @since("1.4.0")
+    def copy(self, extra=None):
+        """
+        Creates a copy of this instance with a randomly generated uid
+        and some extra params. This copies the underlying bestModel,
+        creates a deep copy of the embedded paramMap, and
+        copies the embedded and extra parameters over.
+        It does not copy the extra Params into the subModels.
+
+        :param extra: Extra parameters to copy to the new instance
+        :return: Copy of this instance
+        """
+        if extra is None:
+            extra = dict()
+        bestModel = self.bestModel.copy(extra)
+        avgMetrics = self.avgMetrics
+        subModels = self.subModels
+        return CrossValidatorModel(bestModel, avgMetrics, subModels)
+
+    @since("2.3.0")
+    def write(self):
+        """Returns an MLWriter instance for this ML instance."""
+        return JavaMLWriter(self)
+
+    @classmethod
+    @since("2.3.0")
+    def read(cls):
+        """Returns an MLReader instance for this class."""
+        return JavaMLReader(cls)
+
+    @classmethod
+    def _from_java(cls, java_stage):
+        """
+        Given a Java CrossValidatorModel, create and return a Python wrapper of it.
+        Used for ML persistence.
+        """
+        bestModel = JavaParams._from_java(java_stage.bestModel())
+        estimator, epms, evaluator = super(CrossValidatorModel, cls)._from_java_impl(java_stage)
+
+        py_stage = cls(bestModel=bestModel).setEstimator(estimator)
+        py_stage = py_stage.setEstimatorParamMaps(epms).setEvaluator(evaluator)
+
+        if java_stage.hasSubModels():
+            py_stage.subModels = [[JavaParams._from_java(sub_model)
+                                   for sub_model in fold_sub_models]
+                                  for fold_sub_models in java_stage.subModels()]
+
+        py_stage._resetUid(java_stage.uid())
+        return py_stage
+
+    def _to_java(self):
+        """
+        Transfer this instance to a Java CrossValidatorModel. Used for ML persistence.
+
+        :return: Java object equivalent to this instance.
+        """
+
+        sc = SparkContext._active_spark_context
+        # TODO: persist average metrics as well
+        _java_obj = JavaParams._new_java_obj("org.apache.spark.ml.tuning.CrossValidatorModel",
+                                             self.uid,
+                                             self.bestModel._to_java(),
+                                             _py2java(sc, []))
+        estimator, epms, evaluator = super(CrossValidatorModel, self)._to_java_impl()
+
+        _java_obj.set("evaluator", evaluator)
+        _java_obj.set("estimator", estimator)
+        _java_obj.set("estimatorParamMaps", epms)
+
+        if self.subModels is not None:
+            java_sub_models = [[sub_model._to_java() for sub_model in fold_sub_models]
+                               for fold_sub_models in self.subModels]
+            _java_obj.setSubModels(java_sub_models)
+        return _java_obj
+
+
+class TrainValidationSplit(Estimator, ValidatorParams, HasParallelism, HasCollectSubModels,
+                           MLReadable, MLWritable):
+    """
+    .. note:: Experimental
+
+    Validation for hyper-parameter tuning. Randomly splits the input dataset into train and
+    validation sets, and uses evaluation metric on the validation set to select the best model.
+    Similar to :class:`CrossValidator`, but only splits the set once.
+
+    >>> from pyspark.ml.classification import LogisticRegression
+    >>> from pyspark.ml.evaluation import BinaryClassificationEvaluator
+    >>> from pyspark.ml.linalg import Vectors
+    >>> dataset = spark.createDataFrame(
+    ...     [(Vectors.dense([0.0]), 0.0),
+    ...      (Vectors.dense([0.4]), 1.0),
+    ...      (Vectors.dense([0.5]), 0.0),
+    ...      (Vectors.dense([0.6]), 1.0),
+    ...      (Vectors.dense([1.0]), 1.0)] * 10,
+    ...     ["features", "label"])
+    >>> lr = LogisticRegression()
+    >>> grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build()
+    >>> evaluator = BinaryClassificationEvaluator()
+    >>> tvs = TrainValidationSplit(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator,
+    ...     parallelism=2)
+    >>> tvsModel = tvs.fit(dataset)
+    >>> evaluator.evaluate(tvsModel.transform(dataset))
+    0.8333...
+
+    .. versionadded:: 2.0.0
+    """
+
+    trainRatio = Param(Params._dummy(), "trainRatio", "Param for ratio between train and\
+     validation data. Must be between 0 and 1.", typeConverter=TypeConverters.toFloat)
+
+    @keyword_only
+    def __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, trainRatio=0.75,
+                 parallelism=1, collectSubModels=False, seed=None):
+        """
+        __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, trainRatio=0.75,\
+                 parallelism=1, collectSubModels=False, seed=None)
+        """
+        super(TrainValidationSplit, self).__init__()
+        self._setDefault(trainRatio=0.75, parallelism=1)
+        kwargs = self._input_kwargs
+        self._set(**kwargs)
+
+    @since("2.0.0")
+    @keyword_only
+    def setParams(self, estimator=None, estimatorParamMaps=None, evaluator=None, trainRatio=0.75,
+                  parallelism=1, collectSubModels=False, seed=None):
+        """
+        setParams(self, estimator=None, estimatorParamMaps=None, evaluator=None, trainRatio=0.75,\
+                  parallelism=1, collectSubModels=False, seed=None):
+        Sets params for the train validation split.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("2.0.0")
+    def setTrainRatio(self, value):
+        """
+        Sets the value of :py:attr:`trainRatio`.
+        """
+        return self._set(trainRatio=value)
+
+    @since("2.0.0")
+    def getTrainRatio(self):
+        """
+        Gets the value of trainRatio or its default value.
+        """
+        return self.getOrDefault(self.trainRatio)
+
+    def _fit(self, dataset):
+        est = self.getOrDefault(self.estimator)
+        epm = self.getOrDefault(self.estimatorParamMaps)
+        numModels = len(epm)
+        eva = self.getOrDefault(self.evaluator)
+        tRatio = self.getOrDefault(self.trainRatio)
+        seed = self.getOrDefault(self.seed)
+        randCol = self.uid + "_rand"
+        df = dataset.select("*", rand(seed).alias(randCol))
+        condition = (df[randCol] >= tRatio)
+        validation = df.filter(condition).cache()
+        train = df.filter(~condition).cache()
+
+        subModels = None
+        collectSubModelsParam = self.getCollectSubModels()
+        if collectSubModelsParam:
+            subModels = [None for i in range(numModels)]
+
+        tasks = _parallelFitTasks(est, train, eva, validation, epm, collectSubModelsParam)
+        pool = ThreadPool(processes=min(self.getParallelism(), numModels))
+        metrics = [None] * numModels
+        for j, metric, subModel in pool.imap_unordered(lambda f: f(), tasks):
+            metrics[j] = metric
+            if collectSubModelsParam:
+                subModels[j] = subModel
+
+        train.unpersist()
+        validation.unpersist()
+
+        if eva.isLargerBetter():
+            bestIndex = np.argmax(metrics)
+        else:
+            bestIndex = np.argmin(metrics)
+        bestModel = est.fit(dataset, epm[bestIndex])
+        return self._copyValues(TrainValidationSplitModel(bestModel, metrics, subModels))
+
+    @since("2.0.0")
+    def copy(self, extra=None):
+        """
+        Creates a copy of this instance with a randomly generated uid
+        and some extra params. This copies creates a deep copy of
+        the embedded paramMap, and copies the embedded and extra parameters over.
+
+        :param extra: Extra parameters to copy to the new instance
+        :return: Copy of this instance
+        """
+        if extra is None:
+            extra = dict()
+        newTVS = Params.copy(self, extra)
+        if self.isSet(self.estimator):
+            newTVS.setEstimator(self.getEstimator().copy(extra))
+        # estimatorParamMaps remain the same
+        if self.isSet(self.evaluator):
+            newTVS.setEvaluator(self.getEvaluator().copy(extra))
+        return newTVS
+
+    @since("2.3.0")
+    def write(self):
+        """Returns an MLWriter instance for this ML instance."""
+        return JavaMLWriter(self)
+
+    @classmethod
+    @since("2.3.0")
+    def read(cls):
+        """Returns an MLReader instance for this class."""
+        return JavaMLReader(cls)
+
+    @classmethod
+    def _from_java(cls, java_stage):
+        """
+        Given a Java TrainValidationSplit, create and return a Python wrapper of it.
+        Used for ML persistence.
+        """
+
+        estimator, epms, evaluator = super(TrainValidationSplit, cls)._from_java_impl(java_stage)
+        trainRatio = java_stage.getTrainRatio()
+        seed = java_stage.getSeed()
+        parallelism = java_stage.getParallelism()
+        collectSubModels = java_stage.getCollectSubModels()
+        # Create a new instance of this stage.
+        py_stage = cls(estimator=estimator, estimatorParamMaps=epms, evaluator=evaluator,
+                       trainRatio=trainRatio, seed=seed, parallelism=parallelism,
+                       collectSubModels=collectSubModels)
+        py_stage._resetUid(java_stage.uid())
+        return py_stage
+
+    def _to_java(self):
+        """
+        Transfer this instance to a Java TrainValidationSplit. Used for ML persistence.
+        :return: Java object equivalent to this instance.
+        """
+
+        estimator, epms, evaluator = super(TrainValidationSplit, self)._to_java_impl()
+
+        _java_obj = JavaParams._new_java_obj("org.apache.spark.ml.tuning.TrainValidationSplit",
+                                             self.uid)
+        _java_obj.setEstimatorParamMaps(epms)
+        _java_obj.setEvaluator(evaluator)
+        _java_obj.setEstimator(estimator)
+        _java_obj.setTrainRatio(self.getTrainRatio())
+        _java_obj.setSeed(self.getSeed())
+        _java_obj.setParallelism(self.getParallelism())
+        _java_obj.setCollectSubModels(self.getCollectSubModels())
+        return _java_obj
+
+
+class TrainValidationSplitModel(Model, ValidatorParams, MLReadable, MLWritable):
+    """
+    .. note:: Experimental
+
+    Model from train validation split.
+
+    .. versionadded:: 2.0.0
+    """
+
+    def __init__(self, bestModel, validationMetrics=[], subModels=None):
+        super(TrainValidationSplitModel, self).__init__()
+        #: best model from train validation split
+        self.bestModel = bestModel
+        #: evaluated validation metrics
+        self.validationMetrics = validationMetrics
+        #: sub models from train validation split
+        self.subModels = subModels
+
+    def _transform(self, dataset):
+        return self.bestModel.transform(dataset)
+
+    @since("2.0.0")
+    def copy(self, extra=None):
+        """
+        Creates a copy of this instance with a randomly generated uid
+        and some extra params. This copies the underlying bestModel,
+        creates a deep copy of the embedded paramMap, and
+        copies the embedded and extra parameters over.
+        And, this creates a shallow copy of the validationMetrics.
+        It does not copy the extra Params into the subModels.
+
+        :param extra: Extra parameters to copy to the new instance
+        :return: Copy of this instance
+        """
+        if extra is None:
+            extra = dict()
+        bestModel = self.bestModel.copy(extra)
+        validationMetrics = list(self.validationMetrics)
+        subModels = self.subModels
+        return TrainValidationSplitModel(bestModel, validationMetrics, subModels)
+
+    @since("2.3.0")
+    def write(self):
+        """Returns an MLWriter instance for this ML instance."""
+        return JavaMLWriter(self)
+
+    @classmethod
+    @since("2.3.0")
+    def read(cls):
+        """Returns an MLReader instance for this class."""
+        return JavaMLReader(cls)
+
+    @classmethod
+    def _from_java(cls, java_stage):
+        """
+        Given a Java TrainValidationSplitModel, create and return a Python wrapper of it.
+        Used for ML persistence.
+        """
+
+        # Load information from java_stage to the instance.
+        bestModel = JavaParams._from_java(java_stage.bestModel())
+        estimator, epms, evaluator = super(TrainValidationSplitModel,
+                                           cls)._from_java_impl(java_stage)
+        # Create a new instance of this stage.
+        py_stage = cls(bestModel=bestModel).setEstimator(estimator)
+        py_stage = py_stage.setEstimatorParamMaps(epms).setEvaluator(evaluator)
+
+        if java_stage.hasSubModels():
+            py_stage.subModels = [JavaParams._from_java(sub_model)
+                                  for sub_model in java_stage.subModels()]
+
+        py_stage._resetUid(java_stage.uid())
+        return py_stage
+
+    def _to_java(self):
+        """
+        Transfer this instance to a Java TrainValidationSplitModel. Used for ML persistence.
+        :return: Java object equivalent to this instance.
+        """
+
+        sc = SparkContext._active_spark_context
+        # TODO: persst validation metrics as well
+        _java_obj = JavaParams._new_java_obj(
+            "org.apache.spark.ml.tuning.TrainValidationSplitModel",
+            self.uid,
+            self.bestModel._to_java(),
+            _py2java(sc, []))
+        estimator, epms, evaluator = super(TrainValidationSplitModel, self)._to_java_impl()
+
+        _java_obj.set("evaluator", evaluator)
+        _java_obj.set("estimator", estimator)
+        _java_obj.set("estimatorParamMaps", epms)
+
+        if self.subModels is not None:
+            java_sub_models = [sub_model._to_java() for sub_model in self.subModels]
+            _java_obj.setSubModels(java_sub_models)
+
+        return _java_obj
+
+
+if __name__ == "__main__":
+    import doctest
+
+    from pyspark.sql import SparkSession
+    globs = globals().copy()
+
+    # The small batch size here ensures that we see multiple batches,
+    # even in these small test examples:
+    spark = SparkSession.builder\
+        .master("local[2]")\
+        .appName("ml.tuning tests")\
+        .getOrCreate()
+    sc = spark.sparkContext
+    globs['sc'] = sc
+    globs['spark'] = spark
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/util.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..e846834761e493eff7e96786375ee746fadd6baa
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/util.py
@@ -0,0 +1,613 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import json
+import sys
+import os
+import time
+import uuid
+import warnings
+
+if sys.version > '3':
+    basestring = str
+    unicode = str
+    long = int
+
+from pyspark import SparkContext, since
+from pyspark.ml.common import inherit_doc
+from pyspark.sql import SparkSession
+from pyspark.util import VersionUtils
+
+
+def _jvm():
+    """
+    Returns the JVM view associated with SparkContext. Must be called
+    after SparkContext is initialized.
+    """
+    jvm = SparkContext._jvm
+    if jvm:
+        return jvm
+    else:
+        raise AttributeError("Cannot load _jvm from SparkContext. Is SparkContext initialized?")
+
+
+class Identifiable(object):
+    """
+    Object with a unique ID.
+    """
+
+    def __init__(self):
+        #: A unique id for the object.
+        self.uid = self._randomUID()
+
+    def __repr__(self):
+        return self.uid
+
+    @classmethod
+    def _randomUID(cls):
+        """
+        Generate a unique unicode id for the object. The default implementation
+        concatenates the class name, "_", and 12 random hex chars.
+        """
+        return unicode(cls.__name__ + "_" + uuid.uuid4().hex[-12:])
+
+
+@inherit_doc
+class BaseReadWrite(object):
+    """
+    Base class for MLWriter and MLReader. Stores information about the SparkContext
+    and SparkSession.
+
+    .. versionadded:: 2.3.0
+    """
+
+    def __init__(self):
+        self._sparkSession = None
+
+    def context(self, sqlContext):
+        """
+        Sets the Spark SQLContext to use for saving/loading.
+
+        .. note:: Deprecated in 2.1 and will be removed in 3.0, use session instead.
+        """
+        raise NotImplementedError("Read/Write is not yet implemented for type: %s" % type(self))
+
+    def session(self, sparkSession):
+        """
+        Sets the Spark Session to use for saving/loading.
+        """
+        self._sparkSession = sparkSession
+        return self
+
+    @property
+    def sparkSession(self):
+        """
+        Returns the user-specified Spark Session or the default.
+        """
+        if self._sparkSession is None:
+            self._sparkSession = SparkSession.builder.getOrCreate()
+        return self._sparkSession
+
+    @property
+    def sc(self):
+        """
+        Returns the underlying `SparkContext`.
+        """
+        return self.sparkSession.sparkContext
+
+
+@inherit_doc
+class MLWriter(BaseReadWrite):
+    """
+    Utility class that can save ML instances.
+
+    .. versionadded:: 2.0.0
+    """
+
+    def __init__(self):
+        super(MLWriter, self).__init__()
+        self.shouldOverwrite = False
+
+    def _handleOverwrite(self, path):
+        from pyspark.ml.wrapper import JavaWrapper
+
+        _java_obj = JavaWrapper._new_java_obj("org.apache.spark.ml.util.FileSystemOverwrite")
+        wrapper = JavaWrapper(_java_obj)
+        wrapper._call_java("handleOverwrite", path, True, self.sc._jsc.sc())
+
+    def save(self, path):
+        """Save the ML instance to the input path."""
+        if self.shouldOverwrite:
+            self._handleOverwrite(path)
+        self.saveImpl(path)
+
+    def saveImpl(self, path):
+        """
+        save() handles overwriting and then calls this method.  Subclasses should override this
+        method to implement the actual saving of the instance.
+        """
+        raise NotImplementedError("MLWriter is not yet implemented for type: %s" % type(self))
+
+    def overwrite(self):
+        """Overwrites if the output path already exists."""
+        self.shouldOverwrite = True
+        return self
+
+
+@inherit_doc
+class GeneralMLWriter(MLWriter):
+    """
+    Utility class that can save ML instances in different formats.
+
+    .. versionadded:: 2.4.0
+    """
+
+    def format(self, source):
+        """
+        Specifies the format of ML export (e.g. "pmml", "internal", or the fully qualified class
+        name for export).
+        """
+        self.source = source
+        return self
+
+
+@inherit_doc
+class JavaMLWriter(MLWriter):
+    """
+    (Private) Specialization of :py:class:`MLWriter` for :py:class:`JavaParams` types
+    """
+
+    def __init__(self, instance):
+        super(JavaMLWriter, self).__init__()
+        _java_obj = instance._to_java()
+        self._jwrite = _java_obj.write()
+
+    def save(self, path):
+        """Save the ML instance to the input path."""
+        if not isinstance(path, basestring):
+            raise TypeError("path should be a basestring, got type %s" % type(path))
+        self._jwrite.save(path)
+
+    def overwrite(self):
+        """Overwrites if the output path already exists."""
+        self._jwrite.overwrite()
+        return self
+
+    def option(self, key, value):
+        self._jwrite.option(key, value)
+        return self
+
+    def context(self, sqlContext):
+        """
+        Sets the SQL context to use for saving.
+
+        .. note:: Deprecated in 2.1 and will be removed in 3.0, use session instead.
+        """
+        warnings.warn(
+            "Deprecated in 2.1 and will be removed in 3.0, use session instead.",
+            DeprecationWarning)
+        self._jwrite.context(sqlContext._ssql_ctx)
+        return self
+
+    def session(self, sparkSession):
+        """Sets the Spark Session to use for saving."""
+        self._jwrite.session(sparkSession._jsparkSession)
+        return self
+
+
+@inherit_doc
+class GeneralJavaMLWriter(JavaMLWriter):
+    """
+    (Private) Specialization of :py:class:`GeneralMLWriter` for :py:class:`JavaParams` types
+    """
+
+    def __init__(self, instance):
+        super(GeneralJavaMLWriter, self).__init__(instance)
+
+    def format(self, source):
+        """
+        Specifies the format of ML export (e.g. "pmml", "internal", or the fully qualified class
+        name for export).
+        """
+        self._jwrite.format(source)
+        return self
+
+
+@inherit_doc
+class MLWritable(object):
+    """
+    Mixin for ML instances that provide :py:class:`MLWriter`.
+
+    .. versionadded:: 2.0.0
+    """
+
+    def write(self):
+        """Returns an MLWriter instance for this ML instance."""
+        raise NotImplementedError("MLWritable is not yet implemented for type: %r" % type(self))
+
+    def save(self, path):
+        """Save this ML instance to the given path, a shortcut of 'write().save(path)'."""
+        self.write().save(path)
+
+
+@inherit_doc
+class JavaMLWritable(MLWritable):
+    """
+    (Private) Mixin for ML instances that provide :py:class:`JavaMLWriter`.
+    """
+
+    def write(self):
+        """Returns an MLWriter instance for this ML instance."""
+        return JavaMLWriter(self)
+
+
+@inherit_doc
+class GeneralJavaMLWritable(JavaMLWritable):
+    """
+    (Private) Mixin for ML instances that provide :py:class:`GeneralJavaMLWriter`.
+    """
+
+    def write(self):
+        """Returns an GeneralMLWriter instance for this ML instance."""
+        return GeneralJavaMLWriter(self)
+
+
+@inherit_doc
+class MLReader(BaseReadWrite):
+    """
+    Utility class that can load ML instances.
+
+    .. versionadded:: 2.0.0
+    """
+
+    def __init__(self):
+        super(MLReader, self).__init__()
+
+    def load(self, path):
+        """Load the ML instance from the input path."""
+        raise NotImplementedError("MLReader is not yet implemented for type: %s" % type(self))
+
+
+@inherit_doc
+class JavaMLReader(MLReader):
+    """
+    (Private) Specialization of :py:class:`MLReader` for :py:class:`JavaParams` types
+    """
+
+    def __init__(self, clazz):
+        super(JavaMLReader, self).__init__()
+        self._clazz = clazz
+        self._jread = self._load_java_obj(clazz).read()
+
+    def load(self, path):
+        """Load the ML instance from the input path."""
+        if not isinstance(path, basestring):
+            raise TypeError("path should be a basestring, got type %s" % type(path))
+        java_obj = self._jread.load(path)
+        if not hasattr(self._clazz, "_from_java"):
+            raise NotImplementedError("This Java ML type cannot be loaded into Python currently: %r"
+                                      % self._clazz)
+        return self._clazz._from_java(java_obj)
+
+    def context(self, sqlContext):
+        """
+        Sets the SQL context to use for loading.
+
+        .. note:: Deprecated in 2.1 and will be removed in 3.0, use session instead.
+        """
+        warnings.warn(
+            "Deprecated in 2.1 and will be removed in 3.0, use session instead.",
+            DeprecationWarning)
+        self._jread.context(sqlContext._ssql_ctx)
+        return self
+
+    def session(self, sparkSession):
+        """Sets the Spark Session to use for loading."""
+        self._jread.session(sparkSession._jsparkSession)
+        return self
+
+    @classmethod
+    def _java_loader_class(cls, clazz):
+        """
+        Returns the full class name of the Java ML instance. The default
+        implementation replaces "pyspark" by "org.apache.spark" in
+        the Python full class name.
+        """
+        java_package = clazz.__module__.replace("pyspark", "org.apache.spark")
+        if clazz.__name__ in ("Pipeline", "PipelineModel"):
+            # Remove the last package name "pipeline" for Pipeline and PipelineModel.
+            java_package = ".".join(java_package.split(".")[0:-1])
+        return java_package + "." + clazz.__name__
+
+    @classmethod
+    def _load_java_obj(cls, clazz):
+        """Load the peer Java object of the ML instance."""
+        java_class = cls._java_loader_class(clazz)
+        java_obj = _jvm()
+        for name in java_class.split("."):
+            java_obj = getattr(java_obj, name)
+        return java_obj
+
+
+@inherit_doc
+class MLReadable(object):
+    """
+    Mixin for instances that provide :py:class:`MLReader`.
+
+    .. versionadded:: 2.0.0
+    """
+
+    @classmethod
+    def read(cls):
+        """Returns an MLReader instance for this class."""
+        raise NotImplementedError("MLReadable.read() not implemented for type: %r" % cls)
+
+    @classmethod
+    def load(cls, path):
+        """Reads an ML instance from the input path, a shortcut of `read().load(path)`."""
+        return cls.read().load(path)
+
+
+@inherit_doc
+class JavaMLReadable(MLReadable):
+    """
+    (Private) Mixin for instances that provide JavaMLReader.
+    """
+
+    @classmethod
+    def read(cls):
+        """Returns an MLReader instance for this class."""
+        return JavaMLReader(cls)
+
+
+@inherit_doc
+class JavaPredictionModel():
+    """
+    (Private) Java Model for prediction tasks (regression and classification).
+    To be mixed in with class:`pyspark.ml.JavaModel`
+    """
+
+    @property
+    @since("2.1.0")
+    def numFeatures(self):
+        """
+        Returns the number of features the model was trained on. If unknown, returns -1
+        """
+        return self._call_java("numFeatures")
+
+
+@inherit_doc
+class DefaultParamsWritable(MLWritable):
+    """
+    .. note:: DeveloperApi
+
+    Helper trait for making simple :py:class:`Params` types writable.  If a :py:class:`Params`
+    class stores all data as :py:class:`Param` values, then extending this trait will provide
+    a default implementation of writing saved instances of the class.
+    This only handles simple :py:class:`Param` types; e.g., it will not handle
+    :py:class:`Dataset`. See :py:class:`DefaultParamsReadable`, the counterpart to this trait.
+
+    .. versionadded:: 2.3.0
+    """
+
+    def write(self):
+        """Returns a DefaultParamsWriter instance for this class."""
+        from pyspark.ml.param import Params
+
+        if isinstance(self, Params):
+            return DefaultParamsWriter(self)
+        else:
+            raise TypeError("Cannot use DefautParamsWritable with type %s because it does not " +
+                            " extend Params.", type(self))
+
+
+@inherit_doc
+class DefaultParamsWriter(MLWriter):
+    """
+    .. note:: DeveloperApi
+
+    Specialization of :py:class:`MLWriter` for :py:class:`Params` types
+
+    Class for writing Estimators and Transformers whose parameters are JSON-serializable.
+
+    .. versionadded:: 2.3.0
+    """
+
+    def __init__(self, instance):
+        super(DefaultParamsWriter, self).__init__()
+        self.instance = instance
+
+    def saveImpl(self, path):
+        DefaultParamsWriter.saveMetadata(self.instance, path, self.sc)
+
+    @staticmethod
+    def saveMetadata(instance, path, sc, extraMetadata=None, paramMap=None):
+        """
+        Saves metadata + Params to: path + "/metadata"
+        - class
+        - timestamp
+        - sparkVersion
+        - uid
+        - paramMap
+        - defaultParamMap (since 2.4.0)
+        - (optionally, extra metadata)
+        :param extraMetadata:  Extra metadata to be saved at same level as uid, paramMap, etc.
+        :param paramMap:  If given, this is saved in the "paramMap" field.
+        """
+        metadataPath = os.path.join(path, "metadata")
+        metadataJson = DefaultParamsWriter._get_metadata_to_save(instance,
+                                                                 sc,
+                                                                 extraMetadata,
+                                                                 paramMap)
+        sc.parallelize([metadataJson], 1).saveAsTextFile(metadataPath)
+
+    @staticmethod
+    def _get_metadata_to_save(instance, sc, extraMetadata=None, paramMap=None):
+        """
+        Helper for :py:meth:`DefaultParamsWriter.saveMetadata` which extracts the JSON to save.
+        This is useful for ensemble models which need to save metadata for many sub-models.
+
+        .. note:: :py:meth:`DefaultParamsWriter.saveMetadata` for details on what this includes.
+        """
+        uid = instance.uid
+        cls = instance.__module__ + '.' + instance.__class__.__name__
+
+        # User-supplied param values
+        params = instance._paramMap
+        jsonParams = {}
+        if paramMap is not None:
+            jsonParams = paramMap
+        else:
+            for p in params:
+                jsonParams[p.name] = params[p]
+
+        # Default param values
+        jsonDefaultParams = {}
+        for p in instance._defaultParamMap:
+            jsonDefaultParams[p.name] = instance._defaultParamMap[p]
+
+        basicMetadata = {"class": cls, "timestamp": long(round(time.time() * 1000)),
+                         "sparkVersion": sc.version, "uid": uid, "paramMap": jsonParams,
+                         "defaultParamMap": jsonDefaultParams}
+        if extraMetadata is not None:
+            basicMetadata.update(extraMetadata)
+        return json.dumps(basicMetadata, separators=[',',  ':'])
+
+
+@inherit_doc
+class DefaultParamsReadable(MLReadable):
+    """
+    .. note:: DeveloperApi
+
+    Helper trait for making simple :py:class:`Params` types readable.
+    If a :py:class:`Params` class stores all data as :py:class:`Param` values,
+    then extending this trait will provide a default implementation of reading saved
+    instances of the class. This only handles simple :py:class:`Param` types;
+    e.g., it will not handle :py:class:`Dataset`. See :py:class:`DefaultParamsWritable`,
+    the counterpart to this trait.
+
+    .. versionadded:: 2.3.0
+    """
+
+    @classmethod
+    def read(cls):
+        """Returns a DefaultParamsReader instance for this class."""
+        return DefaultParamsReader(cls)
+
+
+@inherit_doc
+class DefaultParamsReader(MLReader):
+    """
+    .. note:: DeveloperApi
+
+    Specialization of :py:class:`MLReader` for :py:class:`Params` types
+
+    Default :py:class:`MLReader` implementation for transformers and estimators that
+    contain basic (json-serializable) params and no data. This will not handle
+    more complex params or types with data (e.g., models with coefficients).
+
+    .. versionadded:: 2.3.0
+    """
+
+    def __init__(self, cls):
+        super(DefaultParamsReader, self).__init__()
+        self.cls = cls
+
+    @staticmethod
+    def __get_class(clazz):
+        """
+        Loads Python class from its name.
+        """
+        parts = clazz.split('.')
+        module = ".".join(parts[:-1])
+        m = __import__(module)
+        for comp in parts[1:]:
+            m = getattr(m, comp)
+        return m
+
+    def load(self, path):
+        metadata = DefaultParamsReader.loadMetadata(path, self.sc)
+        py_type = DefaultParamsReader.__get_class(metadata['class'])
+        instance = py_type()
+        instance._resetUid(metadata['uid'])
+        DefaultParamsReader.getAndSetParams(instance, metadata)
+        return instance
+
+    @staticmethod
+    def loadMetadata(path, sc, expectedClassName=""):
+        """
+        Load metadata saved using :py:meth:`DefaultParamsWriter.saveMetadata`
+
+        :param expectedClassName:  If non empty, this is checked against the loaded metadata.
+        """
+        metadataPath = os.path.join(path, "metadata")
+        metadataStr = sc.textFile(metadataPath, 1).first()
+        loadedVals = DefaultParamsReader._parseMetaData(metadataStr, expectedClassName)
+        return loadedVals
+
+    @staticmethod
+    def _parseMetaData(metadataStr, expectedClassName=""):
+        """
+        Parse metadata JSON string produced by :py:meth`DefaultParamsWriter._get_metadata_to_save`.
+        This is a helper function for :py:meth:`DefaultParamsReader.loadMetadata`.
+
+        :param metadataStr:  JSON string of metadata
+        :param expectedClassName:  If non empty, this is checked against the loaded metadata.
+        """
+        metadata = json.loads(metadataStr)
+        className = metadata['class']
+        if len(expectedClassName) > 0:
+            assert className == expectedClassName, "Error loading metadata: Expected " + \
+                "class name {} but found class name {}".format(expectedClassName, className)
+        return metadata
+
+    @staticmethod
+    def getAndSetParams(instance, metadata):
+        """
+        Extract Params from metadata, and set them in the instance.
+        """
+        # Set user-supplied param values
+        for paramName in metadata['paramMap']:
+            param = instance.getParam(paramName)
+            paramValue = metadata['paramMap'][paramName]
+            instance.set(param, paramValue)
+
+        # Set default param values
+        majorAndMinorVersions = VersionUtils.majorMinorVersion(metadata['sparkVersion'])
+        major = majorAndMinorVersions[0]
+        minor = majorAndMinorVersions[1]
+
+        # For metadata file prior to Spark 2.4, there is no default section.
+        if major > 2 or (major == 2 and minor >= 4):
+            assert 'defaultParamMap' in metadata, "Error loading metadata: Expected " + \
+                "`defaultParamMap` section not found"
+
+            for paramName in metadata['defaultParamMap']:
+                paramValue = metadata['defaultParamMap'][paramName]
+                instance._setDefault(**{paramName: paramValue})
+
+    @staticmethod
+    def loadParamsInstance(path, sc):
+        """
+        Load a :py:class:`Params` instance from the given path, and return it.
+        This assumes the instance inherits from :py:class:`MLReadable`.
+        """
+        metadata = DefaultParamsReader.loadMetadata(path, sc)
+        pythonClassName = metadata['class'].replace("org.apache.spark", "pyspark")
+        py_type = DefaultParamsReader.__get_class(pythonClassName)
+        instance = py_type.load(path)
+        return instance
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/wrapper.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..d325633195ddb78368913ad5621c8d6c41ee8234
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/ml/wrapper.py
@@ -0,0 +1,347 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from abc import ABCMeta, abstractmethod
+import sys
+if sys.version >= '3':
+    xrange = range
+
+from pyspark import SparkContext
+from pyspark.sql import DataFrame
+from pyspark.ml import Estimator, Transformer, Model
+from pyspark.ml.param import Params
+from pyspark.ml.util import _jvm
+from pyspark.ml.common import inherit_doc, _java2py, _py2java
+
+
+class JavaWrapper(object):
+    """
+    Wrapper class for a Java companion object
+    """
+    def __init__(self, java_obj=None):
+        super(JavaWrapper, self).__init__()
+        self._java_obj = java_obj
+
+    def __del__(self):
+        if SparkContext._active_spark_context and self._java_obj is not None:
+            SparkContext._active_spark_context._gateway.detach(self._java_obj)
+
+    @classmethod
+    def _create_from_java_class(cls, java_class, *args):
+        """
+        Construct this object from given Java classname and arguments
+        """
+        java_obj = JavaWrapper._new_java_obj(java_class, *args)
+        return cls(java_obj)
+
+    def _call_java(self, name, *args):
+        m = getattr(self._java_obj, name)
+        sc = SparkContext._active_spark_context
+        java_args = [_py2java(sc, arg) for arg in args]
+        return _java2py(sc, m(*java_args))
+
+    @staticmethod
+    def _new_java_obj(java_class, *args):
+        """
+        Returns a new Java object.
+        """
+        sc = SparkContext._active_spark_context
+        java_obj = _jvm()
+        for name in java_class.split("."):
+            java_obj = getattr(java_obj, name)
+        java_args = [_py2java(sc, arg) for arg in args]
+        return java_obj(*java_args)
+
+    @staticmethod
+    def _new_java_array(pylist, java_class):
+        """
+        Create a Java array of given java_class type. Useful for
+        calling a method with a Scala Array from Python with Py4J.
+
+        :param pylist:
+          Python list to convert to a Java Array.
+        :param java_class:
+          Java class to specify the type of Array. Should be in the
+          form of sc._gateway.jvm.* (sc is a valid Spark Context).
+        :return:
+          Java Array of converted pylist.
+
+        Example primitive Java classes:
+          - basestring -> sc._gateway.jvm.java.lang.String
+          - int -> sc._gateway.jvm.java.lang.Integer
+          - float -> sc._gateway.jvm.java.lang.Double
+          - bool -> sc._gateway.jvm.java.lang.Boolean
+        """
+        sc = SparkContext._active_spark_context
+        java_array = sc._gateway.new_array(java_class, len(pylist))
+        for i in xrange(len(pylist)):
+            java_array[i] = pylist[i]
+        return java_array
+
+
+@inherit_doc
+class JavaParams(JavaWrapper, Params):
+    """
+    Utility class to help create wrapper classes from Java/Scala
+    implementations of pipeline components.
+    """
+    #: The param values in the Java object should be
+    #: synced with the Python wrapper in fit/transform/evaluate/copy.
+
+    __metaclass__ = ABCMeta
+
+    def _make_java_param_pair(self, param, value):
+        """
+        Makes a Java param pair.
+        """
+        sc = SparkContext._active_spark_context
+        param = self._resolveParam(param)
+        java_param = self._java_obj.getParam(param.name)
+        java_value = _py2java(sc, value)
+        return java_param.w(java_value)
+
+    def _transfer_params_to_java(self):
+        """
+        Transforms the embedded params to the companion Java object.
+        """
+        pair_defaults = []
+        for param in self.params:
+            if self.isSet(param):
+                pair = self._make_java_param_pair(param, self._paramMap[param])
+                self._java_obj.set(pair)
+            if self.hasDefault(param):
+                pair = self._make_java_param_pair(param, self._defaultParamMap[param])
+                pair_defaults.append(pair)
+        if len(pair_defaults) > 0:
+            sc = SparkContext._active_spark_context
+            pair_defaults_seq = sc._jvm.PythonUtils.toSeq(pair_defaults)
+            self._java_obj.setDefault(pair_defaults_seq)
+
+    def _transfer_param_map_to_java(self, pyParamMap):
+        """
+        Transforms a Python ParamMap into a Java ParamMap.
+        """
+        paramMap = JavaWrapper._new_java_obj("org.apache.spark.ml.param.ParamMap")
+        for param in self.params:
+            if param in pyParamMap:
+                pair = self._make_java_param_pair(param, pyParamMap[param])
+                paramMap.put([pair])
+        return paramMap
+
+    def _create_params_from_java(self):
+        """
+        SPARK-10931: Temporary fix to create params that are defined in the Java obj but not here
+        """
+        java_params = list(self._java_obj.params())
+        from pyspark.ml.param import Param
+        for java_param in java_params:
+            java_param_name = java_param.name()
+            if not hasattr(self, java_param_name):
+                param = Param(self, java_param_name, java_param.doc())
+                setattr(param, "created_from_java_param", True)
+                setattr(self, java_param_name, param)
+                self._params = None  # need to reset so self.params will discover new params
+
+    def _transfer_params_from_java(self):
+        """
+        Transforms the embedded params from the companion Java object.
+        """
+        sc = SparkContext._active_spark_context
+        for param in self.params:
+            if self._java_obj.hasParam(param.name):
+                java_param = self._java_obj.getParam(param.name)
+                # SPARK-14931: Only check set params back to avoid default params mismatch.
+                if self._java_obj.isSet(java_param):
+                    value = _java2py(sc, self._java_obj.getOrDefault(java_param))
+                    self._set(**{param.name: value})
+                # SPARK-10931: Temporary fix for params that have a default in Java
+                if self._java_obj.hasDefault(java_param) and not self.isDefined(param):
+                    value = _java2py(sc, self._java_obj.getDefault(java_param)).get()
+                    self._setDefault(**{param.name: value})
+
+    def _transfer_param_map_from_java(self, javaParamMap):
+        """
+        Transforms a Java ParamMap into a Python ParamMap.
+        """
+        sc = SparkContext._active_spark_context
+        paramMap = dict()
+        for pair in javaParamMap.toList():
+            param = pair.param()
+            if self.hasParam(str(param.name())):
+                paramMap[self.getParam(param.name())] = _java2py(sc, pair.value())
+        return paramMap
+
+    @staticmethod
+    def _empty_java_param_map():
+        """
+        Returns an empty Java ParamMap reference.
+        """
+        return _jvm().org.apache.spark.ml.param.ParamMap()
+
+    def _to_java(self):
+        """
+        Transfer this instance's Params to the wrapped Java object, and return the Java object.
+        Used for ML persistence.
+
+        Meta-algorithms such as Pipeline should override this method.
+
+        :return: Java object equivalent to this instance.
+        """
+        self._transfer_params_to_java()
+        return self._java_obj
+
+    @staticmethod
+    def _from_java(java_stage):
+        """
+        Given a Java object, create and return a Python wrapper of it.
+        Used for ML persistence.
+
+        Meta-algorithms such as Pipeline should override this method as a classmethod.
+        """
+        def __get_class(clazz):
+            """
+            Loads Python class from its name.
+            """
+            parts = clazz.split('.')
+            module = ".".join(parts[:-1])
+            m = __import__(module)
+            for comp in parts[1:]:
+                m = getattr(m, comp)
+            return m
+        stage_name = java_stage.getClass().getName().replace("org.apache.spark", "pyspark")
+        # Generate a default new instance from the stage_name class.
+        py_type = __get_class(stage_name)
+        if issubclass(py_type, JavaParams):
+            # Load information from java_stage to the instance.
+            py_stage = py_type()
+            py_stage._java_obj = java_stage
+
+            # SPARK-10931: Temporary fix so that persisted models would own params from Estimator
+            if issubclass(py_type, JavaModel):
+                py_stage._create_params_from_java()
+
+            py_stage._resetUid(java_stage.uid())
+            py_stage._transfer_params_from_java()
+        elif hasattr(py_type, "_from_java"):
+            py_stage = py_type._from_java(java_stage)
+        else:
+            raise NotImplementedError("This Java stage cannot be loaded into Python currently: %r"
+                                      % stage_name)
+        return py_stage
+
+    def copy(self, extra=None):
+        """
+        Creates a copy of this instance with the same uid and some
+        extra params. This implementation first calls Params.copy and
+        then make a copy of the companion Java pipeline component with
+        extra params. So both the Python wrapper and the Java pipeline
+        component get copied.
+
+        :param extra: Extra parameters to copy to the new instance
+        :return: Copy of this instance
+        """
+        if extra is None:
+            extra = dict()
+        that = super(JavaParams, self).copy(extra)
+        if self._java_obj is not None:
+            that._java_obj = self._java_obj.copy(self._empty_java_param_map())
+            that._transfer_params_to_java()
+        return that
+
+
+@inherit_doc
+class JavaEstimator(JavaParams, Estimator):
+    """
+    Base class for :py:class:`Estimator`s that wrap Java/Scala
+    implementations.
+    """
+
+    __metaclass__ = ABCMeta
+
+    @abstractmethod
+    def _create_model(self, java_model):
+        """
+        Creates a model from the input Java model reference.
+        """
+        raise NotImplementedError()
+
+    def _fit_java(self, dataset):
+        """
+        Fits a Java model to the input dataset.
+
+        :param dataset: input dataset, which is an instance of
+                        :py:class:`pyspark.sql.DataFrame`
+        :param params: additional params (overwriting embedded values)
+        :return: fitted Java model
+        """
+        self._transfer_params_to_java()
+        return self._java_obj.fit(dataset._jdf)
+
+    def _fit(self, dataset):
+        java_model = self._fit_java(dataset)
+        model = self._create_model(java_model)
+        return self._copyValues(model)
+
+
+@inherit_doc
+class JavaTransformer(JavaParams, Transformer):
+    """
+    Base class for :py:class:`Transformer`s that wrap Java/Scala
+    implementations. Subclasses should ensure they have the transformer Java object
+    available as _java_obj.
+    """
+
+    __metaclass__ = ABCMeta
+
+    def _transform(self, dataset):
+        self._transfer_params_to_java()
+        return DataFrame(self._java_obj.transform(dataset._jdf), dataset.sql_ctx)
+
+
+@inherit_doc
+class JavaModel(JavaTransformer, Model):
+    """
+    Base class for :py:class:`Model`s that wrap Java/Scala
+    implementations. Subclasses should inherit this class before
+    param mix-ins, because this sets the UID from the Java model.
+    """
+
+    __metaclass__ = ABCMeta
+
+    def __init__(self, java_model=None):
+        """
+        Initialize this instance with a Java model object.
+        Subclasses should call this constructor, initialize params,
+        and then call _transfer_params_from_java.
+
+        This instance can be instantiated without specifying java_model,
+        it will be assigned after that, but this scenario only used by
+        :py:class:`JavaMLReader` to load models.  This is a bit of a
+        hack, but it is easiest since a proper fix would require
+        MLReader (in pyspark.ml.util) to depend on these wrappers, but
+        these wrappers depend on pyspark.ml.util (both directly and via
+        other ML classes).
+        """
+        super(JavaModel, self).__init__(java_model)
+        if java_model is not None:
+
+            # SPARK-10931: This is a temporary fix to allow models to own params
+            # from estimators. Eventually, these params should be in models through
+            # using common base classes between estimators and models.
+            self._create_params_from_java()
+
+            self._resetUid(java_model.uid())
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/__init__.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae26521ea96bf06075d7d95d869109a636b8e8ba
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/__init__.py
@@ -0,0 +1,35 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+RDD-based machine learning APIs for Python (in maintenance mode).
+
+The `pyspark.mllib` package is in maintenance mode as of the Spark 2.0.0 release to encourage
+migration to the DataFrame-based APIs under the `pyspark.ml` package.
+"""
+from __future__ import absolute_import
+
+# MLlib currently needs NumPy 1.4+, so complain if lower
+
+import numpy
+
+ver = [int(x) for x in numpy.version.version.split('.')[:2]]
+if ver < [1, 4]:
+    raise Exception("MLlib requires NumPy 1.4+")
+
+__all__ = ['classification', 'clustering', 'feature', 'fpm', 'linalg', 'random',
+           'recommendation', 'regression', 'stat', 'tree', 'util']
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/classification.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/classification.py
new file mode 100644
index 0000000000000000000000000000000000000000..e00ed95ef0701dd1692951a45330f3ee9cc95b19
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/classification.py
@@ -0,0 +1,771 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from math import exp
+import sys
+import warnings
+
+import numpy
+from numpy import array
+
+from pyspark import RDD, since
+from pyspark.streaming import DStream
+from pyspark.mllib.common import callMLlibFunc, _py2java, _java2py
+from pyspark.mllib.linalg import DenseVector, SparseVector, _convert_to_vector
+from pyspark.mllib.regression import (
+    LabeledPoint, LinearModel, _regression_train_wrapper,
+    StreamingLinearAlgorithm)
+from pyspark.mllib.util import Saveable, Loader, inherit_doc
+
+
+__all__ = ['LogisticRegressionModel', 'LogisticRegressionWithSGD', 'LogisticRegressionWithLBFGS',
+           'SVMModel', 'SVMWithSGD', 'NaiveBayesModel', 'NaiveBayes',
+           'StreamingLogisticRegressionWithSGD']
+
+
+class LinearClassificationModel(LinearModel):
+    """
+    A private abstract class representing a multiclass classification
+    model. The categories are represented by int values: 0, 1, 2, etc.
+    """
+    def __init__(self, weights, intercept):
+        super(LinearClassificationModel, self).__init__(weights, intercept)
+        self._threshold = None
+
+    @since('1.4.0')
+    def setThreshold(self, value):
+        """
+        Sets the threshold that separates positive predictions from
+        negative predictions. An example with prediction score greater
+        than or equal to this threshold is identified as a positive,
+        and negative otherwise. It is used for binary classification
+        only.
+        """
+        self._threshold = value
+
+    @property
+    @since('1.4.0')
+    def threshold(self):
+        """
+        Returns the threshold (if any) used for converting raw
+        prediction scores into 0/1 predictions. It is used for
+        binary classification only.
+        """
+        return self._threshold
+
+    @since('1.4.0')
+    def clearThreshold(self):
+        """
+        Clears the threshold so that `predict` will output raw
+        prediction scores. It is used for binary classification only.
+        """
+        self._threshold = None
+
+    @since('1.4.0')
+    def predict(self, test):
+        """
+        Predict values for a single data point or an RDD of points
+        using the model trained.
+        """
+        raise NotImplementedError
+
+
+class LogisticRegressionModel(LinearClassificationModel):
+
+    """
+    Classification model trained using Multinomial/Binary Logistic
+    Regression.
+
+    :param weights:
+      Weights computed for every feature.
+    :param intercept:
+      Intercept computed for this model. (Only used in Binary Logistic
+      Regression. In Multinomial Logistic Regression, the intercepts will
+      not bea single value, so the intercepts will be part of the
+      weights.)
+    :param numFeatures:
+      The dimension of the features.
+    :param numClasses:
+      The number of possible outcomes for k classes classification problem
+      in Multinomial Logistic Regression. By default, it is binary
+      logistic regression so numClasses will be set to 2.
+
+    >>> data = [
+    ...     LabeledPoint(0.0, [0.0, 1.0]),
+    ...     LabeledPoint(1.0, [1.0, 0.0]),
+    ... ]
+    >>> lrm = LogisticRegressionWithSGD.train(sc.parallelize(data), iterations=10)
+    >>> lrm.predict([1.0, 0.0])
+    1
+    >>> lrm.predict([0.0, 1.0])
+    0
+    >>> lrm.predict(sc.parallelize([[1.0, 0.0], [0.0, 1.0]])).collect()
+    [1, 0]
+    >>> lrm.clearThreshold()
+    >>> lrm.predict([0.0, 1.0])
+    0.279...
+
+    >>> sparse_data = [
+    ...     LabeledPoint(0.0, SparseVector(2, {0: 0.0})),
+    ...     LabeledPoint(1.0, SparseVector(2, {1: 1.0})),
+    ...     LabeledPoint(0.0, SparseVector(2, {0: 1.0})),
+    ...     LabeledPoint(1.0, SparseVector(2, {1: 2.0}))
+    ... ]
+    >>> lrm = LogisticRegressionWithSGD.train(sc.parallelize(sparse_data), iterations=10)
+    >>> lrm.predict(array([0.0, 1.0]))
+    1
+    >>> lrm.predict(array([1.0, 0.0]))
+    0
+    >>> lrm.predict(SparseVector(2, {1: 1.0}))
+    1
+    >>> lrm.predict(SparseVector(2, {0: 1.0}))
+    0
+    >>> import os, tempfile
+    >>> path = tempfile.mkdtemp()
+    >>> lrm.save(sc, path)
+    >>> sameModel = LogisticRegressionModel.load(sc, path)
+    >>> sameModel.predict(array([0.0, 1.0]))
+    1
+    >>> sameModel.predict(SparseVector(2, {0: 1.0}))
+    0
+    >>> from shutil import rmtree
+    >>> try:
+    ...    rmtree(path)
+    ... except:
+    ...    pass
+    >>> multi_class_data = [
+    ...     LabeledPoint(0.0, [0.0, 1.0, 0.0]),
+    ...     LabeledPoint(1.0, [1.0, 0.0, 0.0]),
+    ...     LabeledPoint(2.0, [0.0, 0.0, 1.0])
+    ... ]
+    >>> data = sc.parallelize(multi_class_data)
+    >>> mcm = LogisticRegressionWithLBFGS.train(data, iterations=10, numClasses=3)
+    >>> mcm.predict([0.0, 0.5, 0.0])
+    0
+    >>> mcm.predict([0.8, 0.0, 0.0])
+    1
+    >>> mcm.predict([0.0, 0.0, 0.3])
+    2
+
+    .. versionadded:: 0.9.0
+    """
+    def __init__(self, weights, intercept, numFeatures, numClasses):
+        super(LogisticRegressionModel, self).__init__(weights, intercept)
+        self._numFeatures = int(numFeatures)
+        self._numClasses = int(numClasses)
+        self._threshold = 0.5
+        if self._numClasses == 2:
+            self._dataWithBiasSize = None
+            self._weightsMatrix = None
+        else:
+            self._dataWithBiasSize = self._coeff.size // (self._numClasses - 1)
+            self._weightsMatrix = self._coeff.toArray().reshape(self._numClasses - 1,
+                                                                self._dataWithBiasSize)
+
+    @property
+    @since('1.4.0')
+    def numFeatures(self):
+        """
+        Dimension of the features.
+        """
+        return self._numFeatures
+
+    @property
+    @since('1.4.0')
+    def numClasses(self):
+        """
+        Number of possible outcomes for k classes classification problem
+        in Multinomial Logistic Regression.
+        """
+        return self._numClasses
+
+    @since('0.9.0')
+    def predict(self, x):
+        """
+        Predict values for a single data point or an RDD of points
+        using the model trained.
+        """
+        if isinstance(x, RDD):
+            return x.map(lambda v: self.predict(v))
+
+        x = _convert_to_vector(x)
+        if self.numClasses == 2:
+            margin = self.weights.dot(x) + self._intercept
+            if margin > 0:
+                prob = 1 / (1 + exp(-margin))
+            else:
+                exp_margin = exp(margin)
+                prob = exp_margin / (1 + exp_margin)
+            if self._threshold is None:
+                return prob
+            else:
+                return 1 if prob > self._threshold else 0
+        else:
+            best_class = 0
+            max_margin = 0.0
+            if x.size + 1 == self._dataWithBiasSize:
+                for i in range(0, self._numClasses - 1):
+                    margin = x.dot(self._weightsMatrix[i][0:x.size]) + \
+                        self._weightsMatrix[i][x.size]
+                    if margin > max_margin:
+                        max_margin = margin
+                        best_class = i + 1
+            else:
+                for i in range(0, self._numClasses - 1):
+                    margin = x.dot(self._weightsMatrix[i])
+                    if margin > max_margin:
+                        max_margin = margin
+                        best_class = i + 1
+            return best_class
+
+    @since('1.4.0')
+    def save(self, sc, path):
+        """
+        Save this model to the given path.
+        """
+        java_model = sc._jvm.org.apache.spark.mllib.classification.LogisticRegressionModel(
+            _py2java(sc, self._coeff), self.intercept, self.numFeatures, self.numClasses)
+        java_model.save(sc._jsc.sc(), path)
+
+    @classmethod
+    @since('1.4.0')
+    def load(cls, sc, path):
+        """
+        Load a model from the given path.
+        """
+        java_model = sc._jvm.org.apache.spark.mllib.classification.LogisticRegressionModel.load(
+            sc._jsc.sc(), path)
+        weights = _java2py(sc, java_model.weights())
+        intercept = java_model.intercept()
+        numFeatures = java_model.numFeatures()
+        numClasses = java_model.numClasses()
+        threshold = java_model.getThreshold().get()
+        model = LogisticRegressionModel(weights, intercept, numFeatures, numClasses)
+        model.setThreshold(threshold)
+        return model
+
+    def __repr__(self):
+        return self._call_java("toString")
+
+
+class LogisticRegressionWithSGD(object):
+    """
+    .. versionadded:: 0.9.0
+    .. note:: Deprecated in 2.0.0. Use ml.classification.LogisticRegression or
+            LogisticRegressionWithLBFGS.
+    """
+    @classmethod
+    @since('0.9.0')
+    def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
+              initialWeights=None, regParam=0.01, regType="l2", intercept=False,
+              validateData=True, convergenceTol=0.001):
+        """
+        Train a logistic regression model on the given data.
+
+        :param data:
+          The training data, an RDD of LabeledPoint.
+        :param iterations:
+          The number of iterations.
+          (default: 100)
+        :param step:
+          The step parameter used in SGD.
+          (default: 1.0)
+        :param miniBatchFraction:
+          Fraction of data to be used for each SGD iteration.
+          (default: 1.0)
+        :param initialWeights:
+          The initial weights.
+          (default: None)
+        :param regParam:
+          The regularizer parameter.
+          (default: 0.01)
+        :param regType:
+          The type of regularizer used for training our model.
+          Supported values:
+
+            - "l1" for using L1 regularization
+            - "l2" for using L2 regularization (default)
+            - None for no regularization
+        :param intercept:
+          Boolean parameter which indicates the use or not of the
+          augmented representation for training data (i.e., whether bias
+          features are activated or not).
+          (default: False)
+        :param validateData:
+          Boolean parameter which indicates if the algorithm should
+          validate data before training.
+          (default: True)
+        :param convergenceTol:
+          A condition which decides iteration termination.
+          (default: 0.001)
+        """
+        warnings.warn(
+            "Deprecated in 2.0.0. Use ml.classification.LogisticRegression or "
+            "LogisticRegressionWithLBFGS.", DeprecationWarning)
+
+        def train(rdd, i):
+            return callMLlibFunc("trainLogisticRegressionModelWithSGD", rdd, int(iterations),
+                                 float(step), float(miniBatchFraction), i, float(regParam), regType,
+                                 bool(intercept), bool(validateData), float(convergenceTol))
+
+        return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)
+
+
+class LogisticRegressionWithLBFGS(object):
+    """
+    .. versionadded:: 1.2.0
+    """
+    @classmethod
+    @since('1.2.0')
+    def train(cls, data, iterations=100, initialWeights=None, regParam=0.0, regType="l2",
+              intercept=False, corrections=10, tolerance=1e-6, validateData=True, numClasses=2):
+        """
+        Train a logistic regression model on the given data.
+
+        :param data:
+          The training data, an RDD of LabeledPoint.
+        :param iterations:
+          The number of iterations.
+          (default: 100)
+        :param initialWeights:
+          The initial weights.
+          (default: None)
+        :param regParam:
+          The regularizer parameter.
+          (default: 0.0)
+        :param regType:
+          The type of regularizer used for training our model.
+          Supported values:
+
+            - "l1" for using L1 regularization
+            - "l2" for using L2 regularization (default)
+            - None for no regularization
+        :param intercept:
+          Boolean parameter which indicates the use or not of the
+          augmented representation for training data (i.e., whether bias
+          features are activated or not).
+          (default: False)
+        :param corrections:
+          The number of corrections used in the LBFGS update.
+          If a known updater is used for binary classification,
+          it calls the ml implementation and this parameter will
+          have no effect. (default: 10)
+        :param tolerance:
+          The convergence tolerance of iterations for L-BFGS.
+          (default: 1e-6)
+        :param validateData:
+          Boolean parameter which indicates if the algorithm should
+          validate data before training.
+          (default: True)
+        :param numClasses:
+          The number of classes (i.e., outcomes) a label can take in
+          Multinomial Logistic Regression.
+          (default: 2)
+
+        >>> data = [
+        ...     LabeledPoint(0.0, [0.0, 1.0]),
+        ...     LabeledPoint(1.0, [1.0, 0.0]),
+        ... ]
+        >>> lrm = LogisticRegressionWithLBFGS.train(sc.parallelize(data), iterations=10)
+        >>> lrm.predict([1.0, 0.0])
+        1
+        >>> lrm.predict([0.0, 1.0])
+        0
+        """
+        def train(rdd, i):
+            return callMLlibFunc("trainLogisticRegressionModelWithLBFGS", rdd, int(iterations), i,
+                                 float(regParam), regType, bool(intercept), int(corrections),
+                                 float(tolerance), bool(validateData), int(numClasses))
+
+        if initialWeights is None:
+            if numClasses == 2:
+                initialWeights = [0.0] * len(data.first().features)
+            else:
+                if intercept:
+                    initialWeights = [0.0] * (len(data.first().features) + 1) * (numClasses - 1)
+                else:
+                    initialWeights = [0.0] * len(data.first().features) * (numClasses - 1)
+        return _regression_train_wrapper(train, LogisticRegressionModel, data, initialWeights)
+
+
+class SVMModel(LinearClassificationModel):
+
+    """
+    Model for Support Vector Machines (SVMs).
+
+    :param weights:
+      Weights computed for every feature.
+    :param intercept:
+      Intercept computed for this model.
+
+    >>> data = [
+    ...     LabeledPoint(0.0, [0.0]),
+    ...     LabeledPoint(1.0, [1.0]),
+    ...     LabeledPoint(1.0, [2.0]),
+    ...     LabeledPoint(1.0, [3.0])
+    ... ]
+    >>> svm = SVMWithSGD.train(sc.parallelize(data), iterations=10)
+    >>> svm.predict([1.0])
+    1
+    >>> svm.predict(sc.parallelize([[1.0]])).collect()
+    [1]
+    >>> svm.clearThreshold()
+    >>> svm.predict(array([1.0]))
+    1.44...
+
+    >>> sparse_data = [
+    ...     LabeledPoint(0.0, SparseVector(2, {0: -1.0})),
+    ...     LabeledPoint(1.0, SparseVector(2, {1: 1.0})),
+    ...     LabeledPoint(0.0, SparseVector(2, {0: 0.0})),
+    ...     LabeledPoint(1.0, SparseVector(2, {1: 2.0}))
+    ... ]
+    >>> svm = SVMWithSGD.train(sc.parallelize(sparse_data), iterations=10)
+    >>> svm.predict(SparseVector(2, {1: 1.0}))
+    1
+    >>> svm.predict(SparseVector(2, {0: -1.0}))
+    0
+    >>> import os, tempfile
+    >>> path = tempfile.mkdtemp()
+    >>> svm.save(sc, path)
+    >>> sameModel = SVMModel.load(sc, path)
+    >>> sameModel.predict(SparseVector(2, {1: 1.0}))
+    1
+    >>> sameModel.predict(SparseVector(2, {0: -1.0}))
+    0
+    >>> from shutil import rmtree
+    >>> try:
+    ...    rmtree(path)
+    ... except:
+    ...    pass
+
+    .. versionadded:: 0.9.0
+    """
+    def __init__(self, weights, intercept):
+        super(SVMModel, self).__init__(weights, intercept)
+        self._threshold = 0.0
+
+    @since('0.9.0')
+    def predict(self, x):
+        """
+        Predict values for a single data point or an RDD of points
+        using the model trained.
+        """
+        if isinstance(x, RDD):
+            return x.map(lambda v: self.predict(v))
+
+        x = _convert_to_vector(x)
+        margin = self.weights.dot(x) + self.intercept
+        if self._threshold is None:
+            return margin
+        else:
+            return 1 if margin > self._threshold else 0
+
+    @since('1.4.0')
+    def save(self, sc, path):
+        """
+        Save this model to the given path.
+        """
+        java_model = sc._jvm.org.apache.spark.mllib.classification.SVMModel(
+            _py2java(sc, self._coeff), self.intercept)
+        java_model.save(sc._jsc.sc(), path)
+
+    @classmethod
+    @since('1.4.0')
+    def load(cls, sc, path):
+        """
+        Load a model from the given path.
+        """
+        java_model = sc._jvm.org.apache.spark.mllib.classification.SVMModel.load(
+            sc._jsc.sc(), path)
+        weights = _java2py(sc, java_model.weights())
+        intercept = java_model.intercept()
+        threshold = java_model.getThreshold().get()
+        model = SVMModel(weights, intercept)
+        model.setThreshold(threshold)
+        return model
+
+
+class SVMWithSGD(object):
+    """
+    .. versionadded:: 0.9.0
+    """
+
+    @classmethod
+    @since('0.9.0')
+    def train(cls, data, iterations=100, step=1.0, regParam=0.01,
+              miniBatchFraction=1.0, initialWeights=None, regType="l2",
+              intercept=False, validateData=True, convergenceTol=0.001):
+        """
+        Train a support vector machine on the given data.
+
+        :param data:
+          The training data, an RDD of LabeledPoint.
+        :param iterations:
+          The number of iterations.
+          (default: 100)
+        :param step:
+          The step parameter used in SGD.
+          (default: 1.0)
+        :param regParam:
+          The regularizer parameter.
+          (default: 0.01)
+        :param miniBatchFraction:
+          Fraction of data to be used for each SGD iteration.
+          (default: 1.0)
+        :param initialWeights:
+          The initial weights.
+          (default: None)
+        :param regType:
+          The type of regularizer used for training our model.
+          Allowed values:
+
+            - "l1" for using L1 regularization
+            - "l2" for using L2 regularization (default)
+            - None for no regularization
+        :param intercept:
+          Boolean parameter which indicates the use or not of the
+          augmented representation for training data (i.e. whether bias
+          features are activated or not).
+          (default: False)
+        :param validateData:
+          Boolean parameter which indicates if the algorithm should
+          validate data before training.
+          (default: True)
+        :param convergenceTol:
+          A condition which decides iteration termination.
+          (default: 0.001)
+        """
+        def train(rdd, i):
+            return callMLlibFunc("trainSVMModelWithSGD", rdd, int(iterations), float(step),
+                                 float(regParam), float(miniBatchFraction), i, regType,
+                                 bool(intercept), bool(validateData), float(convergenceTol))
+
+        return _regression_train_wrapper(train, SVMModel, data, initialWeights)
+
+
+@inherit_doc
+class NaiveBayesModel(Saveable, Loader):
+
+    """
+    Model for Naive Bayes classifiers.
+
+    :param labels:
+      List of labels.
+    :param pi:
+      Log of class priors, whose dimension is C, number of labels.
+    :param theta:
+      Log of class conditional probabilities, whose dimension is C-by-D,
+      where D is number of features.
+
+    >>> data = [
+    ...     LabeledPoint(0.0, [0.0, 0.0]),
+    ...     LabeledPoint(0.0, [0.0, 1.0]),
+    ...     LabeledPoint(1.0, [1.0, 0.0]),
+    ... ]
+    >>> model = NaiveBayes.train(sc.parallelize(data))
+    >>> model.predict(array([0.0, 1.0]))
+    0.0
+    >>> model.predict(array([1.0, 0.0]))
+    1.0
+    >>> model.predict(sc.parallelize([[1.0, 0.0]])).collect()
+    [1.0]
+    >>> sparse_data = [
+    ...     LabeledPoint(0.0, SparseVector(2, {1: 0.0})),
+    ...     LabeledPoint(0.0, SparseVector(2, {1: 1.0})),
+    ...     LabeledPoint(1.0, SparseVector(2, {0: 1.0}))
+    ... ]
+    >>> model = NaiveBayes.train(sc.parallelize(sparse_data))
+    >>> model.predict(SparseVector(2, {1: 1.0}))
+    0.0
+    >>> model.predict(SparseVector(2, {0: 1.0}))
+    1.0
+    >>> import os, tempfile
+    >>> path = tempfile.mkdtemp()
+    >>> model.save(sc, path)
+    >>> sameModel = NaiveBayesModel.load(sc, path)
+    >>> sameModel.predict(SparseVector(2, {0: 1.0})) == model.predict(SparseVector(2, {0: 1.0}))
+    True
+    >>> from shutil import rmtree
+    >>> try:
+    ...     rmtree(path)
+    ... except OSError:
+    ...     pass
+
+    .. versionadded:: 0.9.0
+    """
+    def __init__(self, labels, pi, theta):
+        self.labels = labels
+        self.pi = pi
+        self.theta = theta
+
+    @since('0.9.0')
+    def predict(self, x):
+        """
+        Return the most likely class for a data vector
+        or an RDD of vectors
+        """
+        if isinstance(x, RDD):
+            return x.map(lambda v: self.predict(v))
+        x = _convert_to_vector(x)
+        return self.labels[numpy.argmax(self.pi + x.dot(self.theta.transpose()))]
+
+    def save(self, sc, path):
+        """
+        Save this model to the given path.
+        """
+        java_labels = _py2java(sc, self.labels.tolist())
+        java_pi = _py2java(sc, self.pi.tolist())
+        java_theta = _py2java(sc, self.theta.tolist())
+        java_model = sc._jvm.org.apache.spark.mllib.classification.NaiveBayesModel(
+            java_labels, java_pi, java_theta)
+        java_model.save(sc._jsc.sc(), path)
+
+    @classmethod
+    @since('1.4.0')
+    def load(cls, sc, path):
+        """
+        Load a model from the given path.
+        """
+        java_model = sc._jvm.org.apache.spark.mllib.classification.NaiveBayesModel.load(
+            sc._jsc.sc(), path)
+        # Can not unpickle array.array from Pyrolite in Python3 with "bytes"
+        py_labels = _java2py(sc, java_model.labels(), "latin1")
+        py_pi = _java2py(sc, java_model.pi(), "latin1")
+        py_theta = _java2py(sc, java_model.theta(), "latin1")
+        return NaiveBayesModel(py_labels, py_pi, numpy.array(py_theta))
+
+
+class NaiveBayes(object):
+    """
+    .. versionadded:: 0.9.0
+    """
+
+    @classmethod
+    @since('0.9.0')
+    def train(cls, data, lambda_=1.0):
+        """
+        Train a Naive Bayes model given an RDD of (label, features)
+        vectors.
+
+        This is the Multinomial NB (U{http://tinyurl.com/lsdw6p}) which
+        can handle all kinds of discrete data.  For example, by
+        converting documents into TF-IDF vectors, it can be used for
+        document classification. By making every vector a 0-1 vector,
+        it can also be used as Bernoulli NB (U{http://tinyurl.com/p7c96j6}).
+        The input feature values must be nonnegative.
+
+        :param data:
+          RDD of LabeledPoint.
+        :param lambda_:
+          The smoothing parameter.
+          (default: 1.0)
+        """
+        first = data.first()
+        if not isinstance(first, LabeledPoint):
+            raise ValueError("`data` should be an RDD of LabeledPoint")
+        labels, pi, theta = callMLlibFunc("trainNaiveBayesModel", data, lambda_)
+        return NaiveBayesModel(labels.toArray(), pi.toArray(), numpy.array(theta))
+
+
+@inherit_doc
+class StreamingLogisticRegressionWithSGD(StreamingLinearAlgorithm):
+    """
+    Train or predict a logistic regression model on streaming data.
+    Training uses Stochastic Gradient Descent to update the model based on
+    each new batch of incoming data from a DStream.
+
+    Each batch of data is assumed to be an RDD of LabeledPoints.
+    The number of data points per batch can vary, but the number
+    of features must be constant. An initial weight
+    vector must be provided.
+
+    :param stepSize:
+      Step size for each iteration of gradient descent.
+      (default: 0.1)
+    :param numIterations:
+      Number of iterations run for each batch of data.
+      (default: 50)
+    :param miniBatchFraction:
+      Fraction of each batch of data to use for updates.
+      (default: 1.0)
+    :param regParam:
+      L2 Regularization parameter.
+      (default: 0.0)
+    :param convergenceTol:
+      Value used to determine when to terminate iterations.
+      (default: 0.001)
+
+    .. versionadded:: 1.5.0
+    """
+    def __init__(self, stepSize=0.1, numIterations=50, miniBatchFraction=1.0, regParam=0.0,
+                 convergenceTol=0.001):
+        self.stepSize = stepSize
+        self.numIterations = numIterations
+        self.regParam = regParam
+        self.miniBatchFraction = miniBatchFraction
+        self.convergenceTol = convergenceTol
+        self._model = None
+        super(StreamingLogisticRegressionWithSGD, self).__init__(
+            model=self._model)
+
+    @since('1.5.0')
+    def setInitialWeights(self, initialWeights):
+        """
+        Set the initial value of weights.
+
+        This must be set before running trainOn and predictOn.
+        """
+        initialWeights = _convert_to_vector(initialWeights)
+
+        # LogisticRegressionWithSGD does only binary classification.
+        self._model = LogisticRegressionModel(
+            initialWeights, 0, initialWeights.size, 2)
+        return self
+
+    @since('1.5.0')
+    def trainOn(self, dstream):
+        """Train the model on the incoming dstream."""
+        self._validate(dstream)
+
+        def update(rdd):
+            # LogisticRegressionWithSGD.train raises an error for an empty RDD.
+            if not rdd.isEmpty():
+                self._model = LogisticRegressionWithSGD.train(
+                    rdd, self.numIterations, self.stepSize,
+                    self.miniBatchFraction, self._model.weights,
+                    regParam=self.regParam, convergenceTol=self.convergenceTol)
+
+        dstream.foreachRDD(update)
+
+
+def _test():
+    import doctest
+    from pyspark.sql import SparkSession
+    import pyspark.mllib.classification
+    globs = pyspark.mllib.classification.__dict__.copy()
+    spark = SparkSession.builder\
+        .master("local[4]")\
+        .appName("mllib.classification tests")\
+        .getOrCreate()
+    globs['sc'] = spark.sparkContext
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/clustering.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/clustering.py
new file mode 100644
index 0000000000000000000000000000000000000000..b1a8af6bcc0942763d5a121fb3b14b15caaede3d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/clustering.py
@@ -0,0 +1,1061 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import array as pyarray
+import warnings
+
+if sys.version > '3':
+    xrange = range
+    basestring = str
+
+from math import exp, log
+
+from numpy import array, random, tile
+
+from collections import namedtuple
+
+from pyspark import SparkContext, since
+from pyspark.rdd import RDD, ignore_unicode_prefix
+from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc, callJavaFunc, _py2java, _java2py
+from pyspark.mllib.linalg import SparseVector, _convert_to_vector, DenseVector
+from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.stat.distribution import MultivariateGaussian
+from pyspark.mllib.util import Saveable, Loader, inherit_doc, JavaLoader, JavaSaveable
+from pyspark.streaming import DStream
+
+__all__ = ['BisectingKMeansModel', 'BisectingKMeans', 'KMeansModel', 'KMeans',
+           'GaussianMixtureModel', 'GaussianMixture', 'PowerIterationClusteringModel',
+           'PowerIterationClustering', 'StreamingKMeans', 'StreamingKMeansModel',
+           'LDA', 'LDAModel']
+
+
+@inherit_doc
+class BisectingKMeansModel(JavaModelWrapper):
+    """
+    A clustering model derived from the bisecting k-means method.
+
+    >>> data = array([0.0,0.0, 1.0,1.0, 9.0,8.0, 8.0,9.0]).reshape(4, 2)
+    >>> bskm = BisectingKMeans()
+    >>> model = bskm.train(sc.parallelize(data, 2), k=4)
+    >>> p = array([0.0, 0.0])
+    >>> model.predict(p)
+    0
+    >>> model.k
+    4
+    >>> model.computeCost(p)
+    0.0
+
+    .. versionadded:: 2.0.0
+    """
+
+    def __init__(self, java_model):
+        super(BisectingKMeansModel, self).__init__(java_model)
+        self.centers = [c.toArray() for c in self.call("clusterCenters")]
+
+    @property
+    @since('2.0.0')
+    def clusterCenters(self):
+        """Get the cluster centers, represented as a list of NumPy
+        arrays."""
+        return self.centers
+
+    @property
+    @since('2.0.0')
+    def k(self):
+        """Get the number of clusters"""
+        return self.call("k")
+
+    @since('2.0.0')
+    def predict(self, x):
+        """
+        Find the cluster that each of the points belongs to in this
+        model.
+
+        :param x:
+          A data point (or RDD of points) to determine cluster index.
+        :return:
+          Predicted cluster index or an RDD of predicted cluster indices
+          if the input is an RDD.
+        """
+        if isinstance(x, RDD):
+            vecs = x.map(_convert_to_vector)
+            return self.call("predict", vecs)
+
+        x = _convert_to_vector(x)
+        return self.call("predict", x)
+
+    @since('2.0.0')
+    def computeCost(self, x):
+        """
+        Return the Bisecting K-means cost (sum of squared distances of
+        points to their nearest center) for this model on the given
+        data. If provided with an RDD of points returns the sum.
+
+        :param point:
+          A data point (or RDD of points) to compute the cost(s).
+        """
+        if isinstance(x, RDD):
+            vecs = x.map(_convert_to_vector)
+            return self.call("computeCost", vecs)
+
+        return self.call("computeCost", _convert_to_vector(x))
+
+
+class BisectingKMeans(object):
+    """
+    A bisecting k-means algorithm based on the paper "A comparison of
+    document clustering techniques" by Steinbach, Karypis, and Kumar,
+    with modification to fit Spark.
+    The algorithm starts from a single cluster that contains all points.
+    Iteratively it finds divisible clusters on the bottom level and
+    bisects each of them using k-means, until there are `k` leaf
+    clusters in total or no leaf clusters are divisible.
+    The bisecting steps of clusters on the same level are grouped
+    together to increase parallelism. If bisecting all divisible
+    clusters on the bottom level would result more than `k` leaf
+    clusters, larger clusters get higher priority.
+
+    Based on
+    U{http://glaros.dtc.umn.edu/gkhome/fetch/papers/docclusterKDDTMW00.pdf}
+    Steinbach, Karypis, and Kumar, A comparison of document clustering
+    techniques, KDD Workshop on Text Mining, 2000.
+
+    .. versionadded:: 2.0.0
+    """
+
+    @classmethod
+    @since('2.0.0')
+    def train(self, rdd, k=4, maxIterations=20, minDivisibleClusterSize=1.0, seed=-1888008604):
+        """
+        Runs the bisecting k-means algorithm return the model.
+
+        :param rdd:
+          Training points as an `RDD` of `Vector` or convertible
+          sequence types.
+        :param k:
+          The desired number of leaf clusters. The actual number could
+          be smaller if there are no divisible leaf clusters.
+          (default: 4)
+        :param maxIterations:
+          Maximum number of iterations allowed to split clusters.
+          (default: 20)
+        :param minDivisibleClusterSize:
+          Minimum number of points (if >= 1.0) or the minimum proportion
+          of points (if < 1.0) of a divisible cluster.
+          (default: 1)
+        :param seed:
+          Random seed value for cluster initialization.
+          (default: -1888008604 from classOf[BisectingKMeans].getName.##)
+        """
+        java_model = callMLlibFunc(
+            "trainBisectingKMeans", rdd.map(_convert_to_vector),
+            k, maxIterations, minDivisibleClusterSize, seed)
+        return BisectingKMeansModel(java_model)
+
+
+@inherit_doc
+class KMeansModel(Saveable, Loader):
+
+    """A clustering model derived from the k-means method.
+
+    >>> data = array([0.0,0.0, 1.0,1.0, 9.0,8.0, 8.0,9.0]).reshape(4, 2)
+    >>> model = KMeans.train(
+    ...     sc.parallelize(data), 2, maxIterations=10, initializationMode="random",
+    ...                    seed=50, initializationSteps=5, epsilon=1e-4)
+    >>> model.predict(array([0.0, 0.0])) == model.predict(array([1.0, 1.0]))
+    True
+    >>> model.predict(array([8.0, 9.0])) == model.predict(array([9.0, 8.0]))
+    True
+    >>> model.k
+    2
+    >>> model.computeCost(sc.parallelize(data))
+    2.0000000000000004
+    >>> model = KMeans.train(sc.parallelize(data), 2)
+    >>> sparse_data = [
+    ...     SparseVector(3, {1: 1.0}),
+    ...     SparseVector(3, {1: 1.1}),
+    ...     SparseVector(3, {2: 1.0}),
+    ...     SparseVector(3, {2: 1.1})
+    ... ]
+    >>> model = KMeans.train(sc.parallelize(sparse_data), 2, initializationMode="k-means||",
+    ...                                     seed=50, initializationSteps=5, epsilon=1e-4)
+    >>> model.predict(array([0., 1., 0.])) == model.predict(array([0, 1.1, 0.]))
+    True
+    >>> model.predict(array([0., 0., 1.])) == model.predict(array([0, 0, 1.1]))
+    True
+    >>> model.predict(sparse_data[0]) == model.predict(sparse_data[1])
+    True
+    >>> model.predict(sparse_data[2]) == model.predict(sparse_data[3])
+    True
+    >>> isinstance(model.clusterCenters, list)
+    True
+    >>> import os, tempfile
+    >>> path = tempfile.mkdtemp()
+    >>> model.save(sc, path)
+    >>> sameModel = KMeansModel.load(sc, path)
+    >>> sameModel.predict(sparse_data[0]) == model.predict(sparse_data[0])
+    True
+    >>> from shutil import rmtree
+    >>> try:
+    ...     rmtree(path)
+    ... except OSError:
+    ...     pass
+
+    >>> data = array([-383.1,-382.9, 28.7,31.2, 366.2,367.3]).reshape(3, 2)
+    >>> model = KMeans.train(sc.parallelize(data), 3, maxIterations=0,
+    ...     initialModel = KMeansModel([(-1000.0,-1000.0),(5.0,5.0),(1000.0,1000.0)]))
+    >>> model.clusterCenters
+    [array([-1000., -1000.]), array([ 5.,  5.]), array([ 1000.,  1000.])]
+
+    .. versionadded:: 0.9.0
+    """
+
+    def __init__(self, centers):
+        self.centers = centers
+
+    @property
+    @since('1.0.0')
+    def clusterCenters(self):
+        """Get the cluster centers, represented as a list of NumPy arrays."""
+        return self.centers
+
+    @property
+    @since('1.4.0')
+    def k(self):
+        """Total number of clusters."""
+        return len(self.centers)
+
+    @since('0.9.0')
+    def predict(self, x):
+        """
+        Find the cluster that each of the points belongs to in this
+        model.
+
+        :param x:
+          A data point (or RDD of points) to determine cluster index.
+        :return:
+          Predicted cluster index or an RDD of predicted cluster indices
+          if the input is an RDD.
+        """
+        best = 0
+        best_distance = float("inf")
+        if isinstance(x, RDD):
+            return x.map(self.predict)
+
+        x = _convert_to_vector(x)
+        for i in xrange(len(self.centers)):
+            distance = x.squared_distance(self.centers[i])
+            if distance < best_distance:
+                best = i
+                best_distance = distance
+        return best
+
+    @since('1.4.0')
+    def computeCost(self, rdd):
+        """
+        Return the K-means cost (sum of squared distances of points to
+        their nearest center) for this model on the given
+        data.
+
+        :param rdd:
+          The RDD of points to compute the cost on.
+        """
+        cost = callMLlibFunc("computeCostKmeansModel", rdd.map(_convert_to_vector),
+                             [_convert_to_vector(c) for c in self.centers])
+        return cost
+
+    @since('1.4.0')
+    def save(self, sc, path):
+        """
+        Save this model to the given path.
+        """
+        java_centers = _py2java(sc, [_convert_to_vector(c) for c in self.centers])
+        java_model = sc._jvm.org.apache.spark.mllib.clustering.KMeansModel(java_centers)
+        java_model.save(sc._jsc.sc(), path)
+
+    @classmethod
+    @since('1.4.0')
+    def load(cls, sc, path):
+        """
+        Load a model from the given path.
+        """
+        java_model = sc._jvm.org.apache.spark.mllib.clustering.KMeansModel.load(sc._jsc.sc(), path)
+        return KMeansModel(_java2py(sc, java_model.clusterCenters()))
+
+
+class KMeans(object):
+    """
+    .. versionadded:: 0.9.0
+    """
+
+    @classmethod
+    @since('0.9.0')
+    def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||",
+              seed=None, initializationSteps=2, epsilon=1e-4, initialModel=None):
+        """
+        Train a k-means clustering model.
+
+        :param rdd:
+          Training points as an `RDD` of `Vector` or convertible
+          sequence types.
+        :param k:
+          Number of clusters to create.
+        :param maxIterations:
+          Maximum number of iterations allowed.
+          (default: 100)
+        :param runs:
+          This param has no effect since Spark 2.0.0.
+        :param initializationMode:
+          The initialization algorithm. This can be either "random" or
+          "k-means||".
+          (default: "k-means||")
+        :param seed:
+          Random seed value for cluster initialization. Set as None to
+          generate seed based on system time.
+          (default: None)
+        :param initializationSteps:
+          Number of steps for the k-means|| initialization mode.
+          This is an advanced setting -- the default of 2 is almost
+          always enough.
+          (default: 2)
+        :param epsilon:
+          Distance threshold within which a center will be considered to
+          have converged. If all centers move less than this Euclidean
+          distance, iterations are stopped.
+          (default: 1e-4)
+        :param initialModel:
+          Initial cluster centers can be provided as a KMeansModel object
+          rather than using the random or k-means|| initializationModel.
+          (default: None)
+        """
+        if runs != 1:
+            warnings.warn("The param `runs` has no effect since Spark 2.0.0.")
+        clusterInitialModel = []
+        if initialModel is not None:
+            if not isinstance(initialModel, KMeansModel):
+                raise Exception("initialModel is of "+str(type(initialModel))+". It needs "
+                                "to be of <type 'KMeansModel'>")
+            clusterInitialModel = [_convert_to_vector(c) for c in initialModel.clusterCenters]
+        model = callMLlibFunc("trainKMeansModel", rdd.map(_convert_to_vector), k, maxIterations,
+                              runs, initializationMode, seed, initializationSteps, epsilon,
+                              clusterInitialModel)
+        centers = callJavaFunc(rdd.context, model.clusterCenters)
+        return KMeansModel([c.toArray() for c in centers])
+
+
+@inherit_doc
+class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader):
+
+    """
+    A clustering model derived from the Gaussian Mixture Model method.
+
+    >>> from pyspark.mllib.linalg import Vectors, DenseMatrix
+    >>> from numpy.testing import assert_equal
+    >>> from shutil import rmtree
+    >>> import os, tempfile
+
+    >>> clusterdata_1 =  sc.parallelize(array([-0.1,-0.05,-0.01,-0.1,
+    ...                                         0.9,0.8,0.75,0.935,
+    ...                                        -0.83,-0.68,-0.91,-0.76 ]).reshape(6, 2), 2)
+    >>> model = GaussianMixture.train(clusterdata_1, 3, convergenceTol=0.0001,
+    ...                                 maxIterations=50, seed=10)
+    >>> labels = model.predict(clusterdata_1).collect()
+    >>> labels[0]==labels[1]
+    False
+    >>> labels[1]==labels[2]
+    False
+    >>> labels[4]==labels[5]
+    True
+    >>> model.predict([-0.1,-0.05])
+    0
+    >>> softPredicted = model.predictSoft([-0.1,-0.05])
+    >>> abs(softPredicted[0] - 1.0) < 0.001
+    True
+    >>> abs(softPredicted[1] - 0.0) < 0.001
+    True
+    >>> abs(softPredicted[2] - 0.0) < 0.001
+    True
+
+    >>> path = tempfile.mkdtemp()
+    >>> model.save(sc, path)
+    >>> sameModel = GaussianMixtureModel.load(sc, path)
+    >>> assert_equal(model.weights, sameModel.weights)
+    >>> mus, sigmas = list(
+    ...     zip(*[(g.mu, g.sigma) for g in model.gaussians]))
+    >>> sameMus, sameSigmas = list(
+    ...     zip(*[(g.mu, g.sigma) for g in sameModel.gaussians]))
+    >>> mus == sameMus
+    True
+    >>> sigmas == sameSigmas
+    True
+    >>> from shutil import rmtree
+    >>> try:
+    ...     rmtree(path)
+    ... except OSError:
+    ...     pass
+
+    >>> data =  array([-5.1971, -2.5359, -3.8220,
+    ...                -5.2211, -5.0602,  4.7118,
+    ...                 6.8989, 3.4592,  4.6322,
+    ...                 5.7048,  4.6567, 5.5026,
+    ...                 4.5605,  5.2043,  6.2734])
+    >>> clusterdata_2 = sc.parallelize(data.reshape(5,3))
+    >>> model = GaussianMixture.train(clusterdata_2, 2, convergenceTol=0.0001,
+    ...                               maxIterations=150, seed=4)
+    >>> labels = model.predict(clusterdata_2).collect()
+    >>> labels[0]==labels[1]
+    True
+    >>> labels[2]==labels[3]==labels[4]
+    True
+
+    .. versionadded:: 1.3.0
+    """
+
+    @property
+    @since('1.4.0')
+    def weights(self):
+        """
+        Weights for each Gaussian distribution in the mixture, where weights[i] is
+        the weight for Gaussian i, and weights.sum == 1.
+        """
+        return array(self.call("weights"))
+
+    @property
+    @since('1.4.0')
+    def gaussians(self):
+        """
+        Array of MultivariateGaussian where gaussians[i] represents
+        the Multivariate Gaussian (Normal) Distribution for Gaussian i.
+        """
+        return [
+            MultivariateGaussian(gaussian[0], gaussian[1])
+            for gaussian in self.call("gaussians")]
+
+    @property
+    @since('1.4.0')
+    def k(self):
+        """Number of gaussians in mixture."""
+        return len(self.weights)
+
+    @since('1.3.0')
+    def predict(self, x):
+        """
+        Find the cluster to which the point 'x' or each point in RDD 'x'
+        has maximum membership in this model.
+
+        :param x:
+          A feature vector or an RDD of vectors representing data points.
+        :return:
+          Predicted cluster label or an RDD of predicted cluster labels
+          if the input is an RDD.
+        """
+        if isinstance(x, RDD):
+            cluster_labels = self.predictSoft(x).map(lambda z: z.index(max(z)))
+            return cluster_labels
+        else:
+            z = self.predictSoft(x)
+            return z.argmax()
+
+    @since('1.3.0')
+    def predictSoft(self, x):
+        """
+        Find the membership of point 'x' or each point in RDD 'x' to all mixture components.
+
+        :param x:
+          A feature vector or an RDD of vectors representing data points.
+        :return:
+          The membership value to all mixture components for vector 'x'
+          or each vector in RDD 'x'.
+        """
+        if isinstance(x, RDD):
+            means, sigmas = zip(*[(g.mu, g.sigma) for g in self.gaussians])
+            membership_matrix = callMLlibFunc("predictSoftGMM", x.map(_convert_to_vector),
+                                              _convert_to_vector(self.weights), means, sigmas)
+            return membership_matrix.map(lambda x: pyarray.array('d', x))
+        else:
+            return self.call("predictSoft", _convert_to_vector(x)).toArray()
+
+    @classmethod
+    @since('1.5.0')
+    def load(cls, sc, path):
+        """Load the GaussianMixtureModel from disk.
+
+        :param sc:
+          SparkContext.
+        :param path:
+          Path to where the model is stored.
+        """
+        model = cls._load_java(sc, path)
+        wrapper = sc._jvm.org.apache.spark.mllib.api.python.GaussianMixtureModelWrapper(model)
+        return cls(wrapper)
+
+
+class GaussianMixture(object):
+    """
+    Learning algorithm for Gaussian Mixtures using the expectation-maximization algorithm.
+
+    .. versionadded:: 1.3.0
+    """
+    @classmethod
+    @since('1.3.0')
+    def train(cls, rdd, k, convergenceTol=1e-3, maxIterations=100, seed=None, initialModel=None):
+        """
+        Train a Gaussian Mixture clustering model.
+
+        :param rdd:
+          Training points as an `RDD` of `Vector` or convertible
+          sequence types.
+        :param k:
+          Number of independent Gaussians in the mixture model.
+        :param convergenceTol:
+          Maximum change in log-likelihood at which convergence is
+          considered to have occurred.
+          (default: 1e-3)
+        :param maxIterations:
+          Maximum number of iterations allowed.
+          (default: 100)
+        :param seed:
+          Random seed for initial Gaussian distribution. Set as None to
+          generate seed based on system time.
+          (default: None)
+        :param initialModel:
+          Initial GMM starting point, bypassing the random
+          initialization.
+          (default: None)
+        """
+        initialModelWeights = None
+        initialModelMu = None
+        initialModelSigma = None
+        if initialModel is not None:
+            if initialModel.k != k:
+                raise Exception("Mismatched cluster count, initialModel.k = %s, however k = %s"
+                                % (initialModel.k, k))
+            initialModelWeights = list(initialModel.weights)
+            initialModelMu = [initialModel.gaussians[i].mu for i in range(initialModel.k)]
+            initialModelSigma = [initialModel.gaussians[i].sigma for i in range(initialModel.k)]
+        java_model = callMLlibFunc("trainGaussianMixtureModel", rdd.map(_convert_to_vector),
+                                   k, convergenceTol, maxIterations, seed,
+                                   initialModelWeights, initialModelMu, initialModelSigma)
+        return GaussianMixtureModel(java_model)
+
+
+class PowerIterationClusteringModel(JavaModelWrapper, JavaSaveable, JavaLoader):
+
+    """
+    Model produced by [[PowerIterationClustering]].
+
+    >>> import math
+    >>> def genCircle(r, n):
+    ...     points = []
+    ...     for i in range(0, n):
+    ...         theta = 2.0 * math.pi * i / n
+    ...         points.append((r * math.cos(theta), r * math.sin(theta)))
+    ...     return points
+    >>> def sim(x, y):
+    ...     dist2 = (x[0] - y[0]) * (x[0] - y[0]) + (x[1] - y[1]) * (x[1] - y[1])
+    ...     return math.exp(-dist2 / 2.0)
+    >>> r1 = 1.0
+    >>> n1 = 10
+    >>> r2 = 4.0
+    >>> n2 = 40
+    >>> n = n1 + n2
+    >>> points = genCircle(r1, n1) + genCircle(r2, n2)
+    >>> similarities = [(i, j, sim(points[i], points[j])) for i in range(1, n) for j in range(0, i)]
+    >>> rdd = sc.parallelize(similarities, 2)
+    >>> model = PowerIterationClustering.train(rdd, 2, 40)
+    >>> model.k
+    2
+    >>> result = sorted(model.assignments().collect(), key=lambda x: x.id)
+    >>> result[0].cluster == result[1].cluster == result[2].cluster == result[3].cluster
+    True
+    >>> result[4].cluster == result[5].cluster == result[6].cluster == result[7].cluster
+    True
+    >>> import os, tempfile
+    >>> path = tempfile.mkdtemp()
+    >>> model.save(sc, path)
+    >>> sameModel = PowerIterationClusteringModel.load(sc, path)
+    >>> sameModel.k
+    2
+    >>> result = sorted(model.assignments().collect(), key=lambda x: x.id)
+    >>> result[0].cluster == result[1].cluster == result[2].cluster == result[3].cluster
+    True
+    >>> result[4].cluster == result[5].cluster == result[6].cluster == result[7].cluster
+    True
+    >>> from shutil import rmtree
+    >>> try:
+    ...     rmtree(path)
+    ... except OSError:
+    ...     pass
+
+    .. versionadded:: 1.5.0
+    """
+
+    @property
+    @since('1.5.0')
+    def k(self):
+        """
+        Returns the number of clusters.
+        """
+        return self.call("k")
+
+    @since('1.5.0')
+    def assignments(self):
+        """
+        Returns the cluster assignments of this model.
+        """
+        return self.call("getAssignments").map(
+            lambda x: (PowerIterationClustering.Assignment(*x)))
+
+    @classmethod
+    @since('1.5.0')
+    def load(cls, sc, path):
+        """
+        Load a model from the given path.
+        """
+        model = cls._load_java(sc, path)
+        wrapper =\
+            sc._jvm.org.apache.spark.mllib.api.python.PowerIterationClusteringModelWrapper(model)
+        return PowerIterationClusteringModel(wrapper)
+
+
+class PowerIterationClustering(object):
+    """
+    Power Iteration Clustering (PIC), a scalable graph clustering algorithm
+    developed by [[http://www.cs.cmu.edu/~frank/papers/icml2010-pic-final.pdf Lin and Cohen]].
+    From the abstract: PIC finds a very low-dimensional embedding of a
+    dataset using truncated power iteration on a normalized pair-wise
+    similarity matrix of the data.
+
+    .. versionadded:: 1.5.0
+    """
+
+    @classmethod
+    @since('1.5.0')
+    def train(cls, rdd, k, maxIterations=100, initMode="random"):
+        r"""
+        :param rdd:
+          An RDD of (i, j, s\ :sub:`ij`\) tuples representing the
+          affinity matrix, which is the matrix A in the PIC paper.  The
+          similarity s\ :sub:`ij`\ must be nonnegative.  This is a symmetric
+          matrix and hence s\ :sub:`ij`\ = s\ :sub:`ji`\  For any (i, j) with
+          nonzero similarity, there should be either (i, j, s\ :sub:`ij`\) or
+          (j, i, s\ :sub:`ji`\) in the input.  Tuples with i = j are ignored,
+          because it is assumed s\ :sub:`ij`\ = 0.0.
+        :param k:
+          Number of clusters.
+        :param maxIterations:
+          Maximum number of iterations of the PIC algorithm.
+          (default: 100)
+        :param initMode:
+          Initialization mode. This can be either "random" to use
+          a random vector as vertex properties, or "degree" to use
+          normalized sum similarities.
+          (default: "random")
+        """
+        model = callMLlibFunc("trainPowerIterationClusteringModel",
+                              rdd.map(_convert_to_vector), int(k), int(maxIterations), initMode)
+        return PowerIterationClusteringModel(model)
+
+    class Assignment(namedtuple("Assignment", ["id", "cluster"])):
+        """
+        Represents an (id, cluster) tuple.
+
+        .. versionadded:: 1.5.0
+        """
+
+
+class StreamingKMeansModel(KMeansModel):
+    """
+    Clustering model which can perform an online update of the centroids.
+
+    The update formula for each centroid is given by
+
+    * c_t+1 = ((c_t * n_t * a) + (x_t * m_t)) / (n_t + m_t)
+    * n_t+1 = n_t * a + m_t
+
+    where
+
+    * c_t: Centroid at the n_th iteration.
+    * n_t: Number of samples (or) weights associated with the centroid
+           at the n_th iteration.
+    * x_t: Centroid of the new data closest to c_t.
+    * m_t: Number of samples (or) weights of the new data closest to c_t
+    * c_t+1: New centroid.
+    * n_t+1: New number of weights.
+    * a: Decay Factor, which gives the forgetfulness.
+
+    .. note:: If a is set to 1, it is the weighted mean of the previous
+        and new data. If it set to zero, the old centroids are completely
+        forgotten.
+
+    :param clusterCenters:
+      Initial cluster centers.
+    :param clusterWeights:
+      List of weights assigned to each cluster.
+
+    >>> initCenters = [[0.0, 0.0], [1.0, 1.0]]
+    >>> initWeights = [1.0, 1.0]
+    >>> stkm = StreamingKMeansModel(initCenters, initWeights)
+    >>> data = sc.parallelize([[-0.1, -0.1], [0.1, 0.1],
+    ...                        [0.9, 0.9], [1.1, 1.1]])
+    >>> stkm = stkm.update(data, 1.0, u"batches")
+    >>> stkm.centers
+    array([[ 0.,  0.],
+           [ 1.,  1.]])
+    >>> stkm.predict([-0.1, -0.1])
+    0
+    >>> stkm.predict([0.9, 0.9])
+    1
+    >>> stkm.clusterWeights
+    [3.0, 3.0]
+    >>> decayFactor = 0.0
+    >>> data = sc.parallelize([DenseVector([1.5, 1.5]), DenseVector([0.2, 0.2])])
+    >>> stkm = stkm.update(data, 0.0, u"batches")
+    >>> stkm.centers
+    array([[ 0.2,  0.2],
+           [ 1.5,  1.5]])
+    >>> stkm.clusterWeights
+    [1.0, 1.0]
+    >>> stkm.predict([0.2, 0.2])
+    0
+    >>> stkm.predict([1.5, 1.5])
+    1
+
+    .. versionadded:: 1.5.0
+    """
+    def __init__(self, clusterCenters, clusterWeights):
+        super(StreamingKMeansModel, self).__init__(centers=clusterCenters)
+        self._clusterWeights = list(clusterWeights)
+
+    @property
+    @since('1.5.0')
+    def clusterWeights(self):
+        """Return the cluster weights."""
+        return self._clusterWeights
+
+    @ignore_unicode_prefix
+    @since('1.5.0')
+    def update(self, data, decayFactor, timeUnit):
+        """Update the centroids, according to data
+
+        :param data:
+          RDD with new data for the model update.
+        :param decayFactor:
+          Forgetfulness of the previous centroids.
+        :param timeUnit:
+          Can be "batches" or "points". If points, then the decay factor
+          is raised to the power of number of new points and if batches,
+          then decay factor will be used as is.
+        """
+        if not isinstance(data, RDD):
+            raise TypeError("Data should be of an RDD, got %s." % type(data))
+        data = data.map(_convert_to_vector)
+        decayFactor = float(decayFactor)
+        if timeUnit not in ["batches", "points"]:
+            raise ValueError(
+                "timeUnit should be 'batches' or 'points', got %s." % timeUnit)
+        vectorCenters = [_convert_to_vector(center) for center in self.centers]
+        updatedModel = callMLlibFunc(
+            "updateStreamingKMeansModel", vectorCenters, self._clusterWeights,
+            data, decayFactor, timeUnit)
+        self.centers = array(updatedModel[0])
+        self._clusterWeights = list(updatedModel[1])
+        return self
+
+
+class StreamingKMeans(object):
+    """
+    Provides methods to set k, decayFactor, timeUnit to configure the
+    KMeans algorithm for fitting and predicting on incoming dstreams.
+    More details on how the centroids are updated are provided under the
+    docs of StreamingKMeansModel.
+
+    :param k:
+      Number of clusters.
+      (default: 2)
+    :param decayFactor:
+      Forgetfulness of the previous centroids.
+      (default: 1.0)
+    :param timeUnit:
+      Can be "batches" or "points". If points, then the decay factor is
+      raised to the power of number of new points and if batches, then
+      decay factor will be used as is.
+      (default: "batches")
+
+    .. versionadded:: 1.5.0
+    """
+    def __init__(self, k=2, decayFactor=1.0, timeUnit="batches"):
+        self._k = k
+        self._decayFactor = decayFactor
+        if timeUnit not in ["batches", "points"]:
+            raise ValueError(
+                "timeUnit should be 'batches' or 'points', got %s." % timeUnit)
+        self._timeUnit = timeUnit
+        self._model = None
+
+    @since('1.5.0')
+    def latestModel(self):
+        """Return the latest model"""
+        return self._model
+
+    def _validate(self, dstream):
+        if self._model is None:
+            raise ValueError(
+                "Initial centers should be set either by setInitialCenters "
+                "or setRandomCenters.")
+        if not isinstance(dstream, DStream):
+            raise TypeError(
+                "Expected dstream to be of type DStream, "
+                "got type %s" % type(dstream))
+
+    @since('1.5.0')
+    def setK(self, k):
+        """Set number of clusters."""
+        self._k = k
+        return self
+
+    @since('1.5.0')
+    def setDecayFactor(self, decayFactor):
+        """Set decay factor."""
+        self._decayFactor = decayFactor
+        return self
+
+    @since('1.5.0')
+    def setHalfLife(self, halfLife, timeUnit):
+        """
+        Set number of batches after which the centroids of that
+        particular batch has half the weightage.
+        """
+        self._timeUnit = timeUnit
+        self._decayFactor = exp(log(0.5) / halfLife)
+        return self
+
+    @since('1.5.0')
+    def setInitialCenters(self, centers, weights):
+        """
+        Set initial centers. Should be set before calling trainOn.
+        """
+        self._model = StreamingKMeansModel(centers, weights)
+        return self
+
+    @since('1.5.0')
+    def setRandomCenters(self, dim, weight, seed):
+        """
+        Set the initial centres to be random samples from
+        a gaussian population with constant weights.
+        """
+        rng = random.RandomState(seed)
+        clusterCenters = rng.randn(self._k, dim)
+        clusterWeights = tile(weight, self._k)
+        self._model = StreamingKMeansModel(clusterCenters, clusterWeights)
+        return self
+
+    @since('1.5.0')
+    def trainOn(self, dstream):
+        """Train the model on the incoming dstream."""
+        self._validate(dstream)
+
+        def update(rdd):
+            self._model.update(rdd, self._decayFactor, self._timeUnit)
+
+        dstream.foreachRDD(update)
+
+    @since('1.5.0')
+    def predictOn(self, dstream):
+        """
+        Make predictions on a dstream.
+        Returns a transformed dstream object
+        """
+        self._validate(dstream)
+        return dstream.map(lambda x: self._model.predict(x))
+
+    @since('1.5.0')
+    def predictOnValues(self, dstream):
+        """
+        Make predictions on a keyed dstream.
+        Returns a transformed dstream object.
+        """
+        self._validate(dstream)
+        return dstream.mapValues(lambda x: self._model.predict(x))
+
+
+class LDAModel(JavaModelWrapper, JavaSaveable, Loader):
+
+    """ A clustering model derived from the LDA method.
+
+    Latent Dirichlet Allocation (LDA), a topic model designed for text documents.
+    Terminology
+    - "word" = "term": an element of the vocabulary
+    - "token": instance of a term appearing in a document
+    - "topic": multinomial distribution over words representing some concept
+    References:
+    - Original LDA paper (journal version):
+    Blei, Ng, and Jordan.  "Latent Dirichlet Allocation."  JMLR, 2003.
+
+    >>> from pyspark.mllib.linalg import Vectors
+    >>> from numpy.testing import assert_almost_equal, assert_equal
+    >>> data = [
+    ...     [1, Vectors.dense([0.0, 1.0])],
+    ...     [2, SparseVector(2, {0: 1.0})],
+    ... ]
+    >>> rdd =  sc.parallelize(data)
+    >>> model = LDA.train(rdd, k=2, seed=1)
+    >>> model.vocabSize()
+    2
+    >>> model.describeTopics()
+    [([1, 0], [0.5..., 0.49...]), ([0, 1], [0.5..., 0.49...])]
+    >>> model.describeTopics(1)
+    [([1], [0.5...]), ([0], [0.5...])]
+
+    >>> topics = model.topicsMatrix()
+    >>> topics_expect = array([[0.5,  0.5], [0.5, 0.5]])
+    >>> assert_almost_equal(topics, topics_expect, 1)
+
+    >>> import os, tempfile
+    >>> from shutil import rmtree
+    >>> path = tempfile.mkdtemp()
+    >>> model.save(sc, path)
+    >>> sameModel = LDAModel.load(sc, path)
+    >>> assert_equal(sameModel.topicsMatrix(), model.topicsMatrix())
+    >>> sameModel.vocabSize() == model.vocabSize()
+    True
+    >>> try:
+    ...     rmtree(path)
+    ... except OSError:
+    ...     pass
+
+    .. versionadded:: 1.5.0
+    """
+
+    @since('1.5.0')
+    def topicsMatrix(self):
+        """Inferred topics, where each topic is represented by a distribution over terms."""
+        return self.call("topicsMatrix").toArray()
+
+    @since('1.5.0')
+    def vocabSize(self):
+        """Vocabulary size (number of terms or terms in the vocabulary)"""
+        return self.call("vocabSize")
+
+    @since('1.6.0')
+    def describeTopics(self, maxTermsPerTopic=None):
+        """Return the topics described by weighted terms.
+
+        WARNING: If vocabSize and k are large, this can return a large object!
+
+        :param maxTermsPerTopic:
+          Maximum number of terms to collect for each topic.
+          (default: vocabulary size)
+        :return:
+          Array over topics. Each topic is represented as a pair of
+          matching arrays: (term indices, term weights in topic).
+          Each topic's terms are sorted in order of decreasing weight.
+        """
+        if maxTermsPerTopic is None:
+            topics = self.call("describeTopics")
+        else:
+            topics = self.call("describeTopics", maxTermsPerTopic)
+        return topics
+
+    @classmethod
+    @since('1.5.0')
+    def load(cls, sc, path):
+        """Load the LDAModel from disk.
+
+        :param sc:
+          SparkContext.
+        :param path:
+          Path to where the model is stored.
+        """
+        if not isinstance(sc, SparkContext):
+            raise TypeError("sc should be a SparkContext, got type %s" % type(sc))
+        if not isinstance(path, basestring):
+            raise TypeError("path should be a basestring, got type %s" % type(path))
+        model = callMLlibFunc("loadLDAModel", sc, path)
+        return LDAModel(model)
+
+
+class LDA(object):
+    """
+    .. versionadded:: 1.5.0
+    """
+
+    @classmethod
+    @since('1.5.0')
+    def train(cls, rdd, k=10, maxIterations=20, docConcentration=-1.0,
+              topicConcentration=-1.0, seed=None, checkpointInterval=10, optimizer="em"):
+        """Train a LDA model.
+
+        :param rdd:
+          RDD of documents, which are tuples of document IDs and term
+          (word) count vectors. The term count vectors are "bags of
+          words" with a fixed-size vocabulary (where the vocabulary size
+          is the length of the vector). Document IDs must be unique
+          and >= 0.
+        :param k:
+          Number of topics to infer, i.e., the number of soft cluster
+          centers.
+          (default: 10)
+        :param maxIterations:
+          Maximum number of iterations allowed.
+          (default: 20)
+        :param docConcentration:
+          Concentration parameter (commonly named "alpha") for the prior
+          placed on documents' distributions over topics ("theta").
+          (default: -1.0)
+        :param topicConcentration:
+          Concentration parameter (commonly named "beta" or "eta") for
+          the prior placed on topics' distributions over terms.
+          (default: -1.0)
+        :param seed:
+          Random seed for cluster initialization. Set as None to generate
+          seed based on system time.
+          (default: None)
+        :param checkpointInterval:
+          Period (in iterations) between checkpoints.
+          (default: 10)
+        :param optimizer:
+          LDAOptimizer used to perform the actual calculation. Currently
+          "em", "online" are supported.
+          (default: "em")
+        """
+        model = callMLlibFunc("trainLDAModel", rdd, k, maxIterations,
+                              docConcentration, topicConcentration, seed,
+                              checkpointInterval, optimizer)
+        return LDAModel(model)
+
+
+def _test():
+    import doctest
+    import numpy
+    import pyspark.mllib.clustering
+    try:
+        # Numpy 1.14+ changed it's string format.
+        numpy.set_printoptions(legacy='1.13')
+    except TypeError:
+        pass
+    globs = pyspark.mllib.clustering.__dict__.copy()
+    globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    globs['sc'].stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/common.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..bac8f350563ecbc6908c5ad3c183b72187003756
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/common.py
@@ -0,0 +1,163 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+if sys.version >= '3':
+    long = int
+    unicode = str
+
+import py4j.protocol
+from py4j.protocol import Py4JJavaError
+from py4j.java_gateway import JavaObject
+from py4j.java_collections import JavaArray, JavaList
+
+from pyspark import RDD, SparkContext
+from pyspark.serializers import PickleSerializer, AutoBatchedSerializer
+from pyspark.sql import DataFrame, SQLContext
+
+# Hack for support float('inf') in Py4j
+_old_smart_decode = py4j.protocol.smart_decode
+
+_float_str_mapping = {
+    'nan': 'NaN',
+    'inf': 'Infinity',
+    '-inf': '-Infinity',
+}
+
+
+def _new_smart_decode(obj):
+    if isinstance(obj, float):
+        s = str(obj)
+        return _float_str_mapping.get(s, s)
+    return _old_smart_decode(obj)
+
+py4j.protocol.smart_decode = _new_smart_decode
+
+
+_picklable_classes = [
+    'LinkedList',
+    'SparseVector',
+    'DenseVector',
+    'DenseMatrix',
+    'Rating',
+    'LabeledPoint',
+]
+
+
+# this will call the MLlib version of pythonToJava()
+def _to_java_object_rdd(rdd):
+    """ Return a JavaRDD of Object by unpickling
+
+    It will convert each Python object into Java object by Pyrolite, whenever the
+    RDD is serialized in batch or not.
+    """
+    rdd = rdd._reserialize(AutoBatchedSerializer(PickleSerializer()))
+    return rdd.ctx._jvm.org.apache.spark.mllib.api.python.SerDe.pythonToJava(rdd._jrdd, True)
+
+
+def _py2java(sc, obj):
+    """ Convert Python object into Java """
+    if isinstance(obj, RDD):
+        obj = _to_java_object_rdd(obj)
+    elif isinstance(obj, DataFrame):
+        obj = obj._jdf
+    elif isinstance(obj, SparkContext):
+        obj = obj._jsc
+    elif isinstance(obj, list):
+        obj = [_py2java(sc, x) for x in obj]
+    elif isinstance(obj, JavaObject):
+        pass
+    elif isinstance(obj, (int, long, float, bool, bytes, unicode)):
+        pass
+    else:
+        data = bytearray(PickleSerializer().dumps(obj))
+        obj = sc._jvm.org.apache.spark.mllib.api.python.SerDe.loads(data)
+    return obj
+
+
+def _java2py(sc, r, encoding="bytes"):
+    if isinstance(r, JavaObject):
+        clsName = r.getClass().getSimpleName()
+        # convert RDD into JavaRDD
+        if clsName != 'JavaRDD' and clsName.endswith("RDD"):
+            r = r.toJavaRDD()
+            clsName = 'JavaRDD'
+
+        if clsName == 'JavaRDD':
+            jrdd = sc._jvm.org.apache.spark.mllib.api.python.SerDe.javaToPython(r)
+            return RDD(jrdd, sc)
+
+        if clsName == 'Dataset':
+            return DataFrame(r, SQLContext.getOrCreate(sc))
+
+        if clsName in _picklable_classes:
+            r = sc._jvm.org.apache.spark.mllib.api.python.SerDe.dumps(r)
+        elif isinstance(r, (JavaArray, JavaList)):
+            try:
+                r = sc._jvm.org.apache.spark.mllib.api.python.SerDe.dumps(r)
+            except Py4JJavaError:
+                pass  # not pickable
+
+    if isinstance(r, (bytearray, bytes)):
+        r = PickleSerializer().loads(bytes(r), encoding=encoding)
+    return r
+
+
+def callJavaFunc(sc, func, *args):
+    """ Call Java Function """
+    args = [_py2java(sc, a) for a in args]
+    return _java2py(sc, func(*args))
+
+
+def callMLlibFunc(name, *args):
+    """ Call API in PythonMLLibAPI """
+    sc = SparkContext.getOrCreate()
+    api = getattr(sc._jvm.PythonMLLibAPI(), name)
+    return callJavaFunc(sc, api, *args)
+
+
+class JavaModelWrapper(object):
+    """
+    Wrapper for the model in JVM
+    """
+    def __init__(self, java_model):
+        self._sc = SparkContext.getOrCreate()
+        self._java_model = java_model
+
+    def __del__(self):
+        self._sc._gateway.detach(self._java_model)
+
+    def call(self, name, *a):
+        """Call method of java_model"""
+        return callJavaFunc(self._sc, getattr(self._java_model, name), *a)
+
+
+def inherit_doc(cls):
+    """
+    A decorator that makes a class inherit documentation from its parents.
+    """
+    for name, func in vars(cls).items():
+        # only inherit docstring for public functions
+        if name.startswith("_"):
+            continue
+        if not func.__doc__:
+            for parent in cls.__bases__:
+                parent_func = getattr(parent, name, None)
+                if parent_func and getattr(parent_func, "__doc__", None):
+                    func.__doc__ = parent_func.__doc__
+                    break
+    return cls
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/evaluation.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..0bb0ca37c1ab6fc4db931d0d21c94384e52f77a9
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/evaluation.py
@@ -0,0 +1,556 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import warnings
+
+from pyspark import since
+from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc
+from pyspark.sql import SQLContext
+from pyspark.sql.types import StructField, StructType, DoubleType, IntegerType, ArrayType
+
+__all__ = ['BinaryClassificationMetrics', 'RegressionMetrics',
+           'MulticlassMetrics', 'RankingMetrics']
+
+
+class BinaryClassificationMetrics(JavaModelWrapper):
+    """
+    Evaluator for binary classification.
+
+    :param scoreAndLabels: an RDD of (score, label) pairs
+
+    >>> scoreAndLabels = sc.parallelize([
+    ...     (0.1, 0.0), (0.1, 1.0), (0.4, 0.0), (0.6, 0.0), (0.6, 1.0), (0.6, 1.0), (0.8, 1.0)], 2)
+    >>> metrics = BinaryClassificationMetrics(scoreAndLabels)
+    >>> metrics.areaUnderROC
+    0.70...
+    >>> metrics.areaUnderPR
+    0.83...
+    >>> metrics.unpersist()
+
+    .. versionadded:: 1.4.0
+    """
+
+    def __init__(self, scoreAndLabels):
+        sc = scoreAndLabels.ctx
+        sql_ctx = SQLContext.getOrCreate(sc)
+        df = sql_ctx.createDataFrame(scoreAndLabels, schema=StructType([
+            StructField("score", DoubleType(), nullable=False),
+            StructField("label", DoubleType(), nullable=False)]))
+        java_class = sc._jvm.org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
+        java_model = java_class(df._jdf)
+        super(BinaryClassificationMetrics, self).__init__(java_model)
+
+    @property
+    @since('1.4.0')
+    def areaUnderROC(self):
+        """
+        Computes the area under the receiver operating characteristic
+        (ROC) curve.
+        """
+        return self.call("areaUnderROC")
+
+    @property
+    @since('1.4.0')
+    def areaUnderPR(self):
+        """
+        Computes the area under the precision-recall curve.
+        """
+        return self.call("areaUnderPR")
+
+    @since('1.4.0')
+    def unpersist(self):
+        """
+        Unpersists intermediate RDDs used in the computation.
+        """
+        self.call("unpersist")
+
+
+class RegressionMetrics(JavaModelWrapper):
+    """
+    Evaluator for regression.
+
+    :param predictionAndObservations: an RDD of (prediction,
+                                      observation) pairs.
+
+    >>> predictionAndObservations = sc.parallelize([
+    ...     (2.5, 3.0), (0.0, -0.5), (2.0, 2.0), (8.0, 7.0)])
+    >>> metrics = RegressionMetrics(predictionAndObservations)
+    >>> metrics.explainedVariance
+    8.859...
+    >>> metrics.meanAbsoluteError
+    0.5...
+    >>> metrics.meanSquaredError
+    0.37...
+    >>> metrics.rootMeanSquaredError
+    0.61...
+    >>> metrics.r2
+    0.94...
+
+    .. versionadded:: 1.4.0
+    """
+
+    def __init__(self, predictionAndObservations):
+        sc = predictionAndObservations.ctx
+        sql_ctx = SQLContext.getOrCreate(sc)
+        df = sql_ctx.createDataFrame(predictionAndObservations, schema=StructType([
+            StructField("prediction", DoubleType(), nullable=False),
+            StructField("observation", DoubleType(), nullable=False)]))
+        java_class = sc._jvm.org.apache.spark.mllib.evaluation.RegressionMetrics
+        java_model = java_class(df._jdf)
+        super(RegressionMetrics, self).__init__(java_model)
+
+    @property
+    @since('1.4.0')
+    def explainedVariance(self):
+        r"""
+        Returns the explained variance regression score.
+        explainedVariance = :math:`1 - \frac{variance(y - \hat{y})}{variance(y)}`
+        """
+        return self.call("explainedVariance")
+
+    @property
+    @since('1.4.0')
+    def meanAbsoluteError(self):
+        """
+        Returns the mean absolute error, which is a risk function corresponding to the
+        expected value of the absolute error loss or l1-norm loss.
+        """
+        return self.call("meanAbsoluteError")
+
+    @property
+    @since('1.4.0')
+    def meanSquaredError(self):
+        """
+        Returns the mean squared error, which is a risk function corresponding to the
+        expected value of the squared error loss or quadratic loss.
+        """
+        return self.call("meanSquaredError")
+
+    @property
+    @since('1.4.0')
+    def rootMeanSquaredError(self):
+        """
+        Returns the root mean squared error, which is defined as the square root of
+        the mean squared error.
+        """
+        return self.call("rootMeanSquaredError")
+
+    @property
+    @since('1.4.0')
+    def r2(self):
+        """
+        Returns R^2^, the coefficient of determination.
+        """
+        return self.call("r2")
+
+
+class MulticlassMetrics(JavaModelWrapper):
+    """
+    Evaluator for multiclass classification.
+
+    :param predictionAndLabels: an RDD of (prediction, label) pairs.
+
+    >>> predictionAndLabels = sc.parallelize([(0.0, 0.0), (0.0, 1.0), (0.0, 0.0),
+    ...     (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (2.0, 2.0), (2.0, 0.0)])
+    >>> metrics = MulticlassMetrics(predictionAndLabels)
+    >>> metrics.confusionMatrix().toArray()
+    array([[ 2.,  1.,  1.],
+           [ 1.,  3.,  0.],
+           [ 0.,  0.,  1.]])
+    >>> metrics.falsePositiveRate(0.0)
+    0.2...
+    >>> metrics.precision(1.0)
+    0.75...
+    >>> metrics.recall(2.0)
+    1.0...
+    >>> metrics.fMeasure(0.0, 2.0)
+    0.52...
+    >>> metrics.accuracy
+    0.66...
+    >>> metrics.weightedFalsePositiveRate
+    0.19...
+    >>> metrics.weightedPrecision
+    0.68...
+    >>> metrics.weightedRecall
+    0.66...
+    >>> metrics.weightedFMeasure()
+    0.66...
+    >>> metrics.weightedFMeasure(2.0)
+    0.65...
+
+    .. versionadded:: 1.4.0
+    """
+
+    def __init__(self, predictionAndLabels):
+        sc = predictionAndLabels.ctx
+        sql_ctx = SQLContext.getOrCreate(sc)
+        df = sql_ctx.createDataFrame(predictionAndLabels, schema=StructType([
+            StructField("prediction", DoubleType(), nullable=False),
+            StructField("label", DoubleType(), nullable=False)]))
+        java_class = sc._jvm.org.apache.spark.mllib.evaluation.MulticlassMetrics
+        java_model = java_class(df._jdf)
+        super(MulticlassMetrics, self).__init__(java_model)
+
+    @since('1.4.0')
+    def confusionMatrix(self):
+        """
+        Returns confusion matrix: predicted classes are in columns,
+        they are ordered by class label ascending, as in "labels".
+        """
+        return self.call("confusionMatrix")
+
+    @since('1.4.0')
+    def truePositiveRate(self, label):
+        """
+        Returns true positive rate for a given label (category).
+        """
+        return self.call("truePositiveRate", label)
+
+    @since('1.4.0')
+    def falsePositiveRate(self, label):
+        """
+        Returns false positive rate for a given label (category).
+        """
+        return self.call("falsePositiveRate", label)
+
+    @since('1.4.0')
+    def precision(self, label=None):
+        """
+        Returns precision or precision for a given label (category) if specified.
+        """
+        if label is None:
+            # note:: Deprecated in 2.0.0. Use accuracy.
+            warnings.warn("Deprecated in 2.0.0. Use accuracy.", DeprecationWarning)
+            return self.call("precision")
+        else:
+            return self.call("precision", float(label))
+
+    @since('1.4.0')
+    def recall(self, label=None):
+        """
+        Returns recall or recall for a given label (category) if specified.
+        """
+        if label is None:
+            # note:: Deprecated in 2.0.0. Use accuracy.
+            warnings.warn("Deprecated in 2.0.0. Use accuracy.", DeprecationWarning)
+            return self.call("recall")
+        else:
+            return self.call("recall", float(label))
+
+    @since('1.4.0')
+    def fMeasure(self, label=None, beta=None):
+        """
+        Returns f-measure or f-measure for a given label (category) if specified.
+        """
+        if beta is None:
+            if label is None:
+                # note:: Deprecated in 2.0.0. Use accuracy.
+                warnings.warn("Deprecated in 2.0.0. Use accuracy.", DeprecationWarning)
+                return self.call("fMeasure")
+            else:
+                return self.call("fMeasure", label)
+        else:
+            if label is None:
+                raise Exception("If the beta parameter is specified, label can not be none")
+            else:
+                return self.call("fMeasure", label, beta)
+
+    @property
+    @since('2.0.0')
+    def accuracy(self):
+        """
+        Returns accuracy (equals to the total number of correctly classified instances
+        out of the total number of instances).
+        """
+        return self.call("accuracy")
+
+    @property
+    @since('1.4.0')
+    def weightedTruePositiveRate(self):
+        """
+        Returns weighted true positive rate.
+        (equals to precision, recall and f-measure)
+        """
+        return self.call("weightedTruePositiveRate")
+
+    @property
+    @since('1.4.0')
+    def weightedFalsePositiveRate(self):
+        """
+        Returns weighted false positive rate.
+        """
+        return self.call("weightedFalsePositiveRate")
+
+    @property
+    @since('1.4.0')
+    def weightedRecall(self):
+        """
+        Returns weighted averaged recall.
+        (equals to precision, recall and f-measure)
+        """
+        return self.call("weightedRecall")
+
+    @property
+    @since('1.4.0')
+    def weightedPrecision(self):
+        """
+        Returns weighted averaged precision.
+        """
+        return self.call("weightedPrecision")
+
+    @since('1.4.0')
+    def weightedFMeasure(self, beta=None):
+        """
+        Returns weighted averaged f-measure.
+        """
+        if beta is None:
+            return self.call("weightedFMeasure")
+        else:
+            return self.call("weightedFMeasure", beta)
+
+
+class RankingMetrics(JavaModelWrapper):
+    """
+    Evaluator for ranking algorithms.
+
+    :param predictionAndLabels: an RDD of (predicted ranking,
+                                ground truth set) pairs.
+
+    >>> predictionAndLabels = sc.parallelize([
+    ...     ([1, 6, 2, 7, 8, 3, 9, 10, 4, 5], [1, 2, 3, 4, 5]),
+    ...     ([4, 1, 5, 6, 2, 7, 3, 8, 9, 10], [1, 2, 3]),
+    ...     ([1, 2, 3, 4, 5], [])])
+    >>> metrics = RankingMetrics(predictionAndLabels)
+    >>> metrics.precisionAt(1)
+    0.33...
+    >>> metrics.precisionAt(5)
+    0.26...
+    >>> metrics.precisionAt(15)
+    0.17...
+    >>> metrics.meanAveragePrecision
+    0.35...
+    >>> metrics.ndcgAt(3)
+    0.33...
+    >>> metrics.ndcgAt(10)
+    0.48...
+
+    .. versionadded:: 1.4.0
+    """
+
+    def __init__(self, predictionAndLabels):
+        sc = predictionAndLabels.ctx
+        sql_ctx = SQLContext.getOrCreate(sc)
+        df = sql_ctx.createDataFrame(predictionAndLabels,
+                                     schema=sql_ctx._inferSchema(predictionAndLabels))
+        java_model = callMLlibFunc("newRankingMetrics", df._jdf)
+        super(RankingMetrics, self).__init__(java_model)
+
+    @since('1.4.0')
+    def precisionAt(self, k):
+        """
+        Compute the average precision of all the queries, truncated at ranking position k.
+
+        If for a query, the ranking algorithm returns n (n < k) results, the precision value
+        will be computed as #(relevant items retrieved) / k. This formula also applies when
+        the size of the ground truth set is less than k.
+
+        If a query has an empty ground truth set, zero will be used as precision together
+        with a log warning.
+        """
+        return self.call("precisionAt", int(k))
+
+    @property
+    @since('1.4.0')
+    def meanAveragePrecision(self):
+        """
+        Returns the mean average precision (MAP) of all the queries.
+        If a query has an empty ground truth set, the average precision will be zero and
+        a log warining is generated.
+        """
+        return self.call("meanAveragePrecision")
+
+    @since('1.4.0')
+    def ndcgAt(self, k):
+        """
+        Compute the average NDCG value of all the queries, truncated at ranking position k.
+        The discounted cumulative gain at position k is computed as:
+        sum,,i=1,,^k^ (2^{relevance of ''i''th item}^ - 1) / log(i + 1),
+        and the NDCG is obtained by dividing the DCG value on the ground truth set.
+        In the current implementation, the relevance value is binary.
+        If a query has an empty ground truth set, zero will be used as NDCG together with
+        a log warning.
+        """
+        return self.call("ndcgAt", int(k))
+
+
+class MultilabelMetrics(JavaModelWrapper):
+    """
+    Evaluator for multilabel classification.
+
+    :param predictionAndLabels: an RDD of (predictions, labels) pairs,
+                                both are non-null Arrays, each with
+                                unique elements.
+
+    >>> predictionAndLabels = sc.parallelize([([0.0, 1.0], [0.0, 2.0]), ([0.0, 2.0], [0.0, 1.0]),
+    ...     ([], [0.0]), ([2.0], [2.0]), ([2.0, 0.0], [2.0, 0.0]),
+    ...     ([0.0, 1.0, 2.0], [0.0, 1.0]), ([1.0], [1.0, 2.0])])
+    >>> metrics = MultilabelMetrics(predictionAndLabels)
+    >>> metrics.precision(0.0)
+    1.0
+    >>> metrics.recall(1.0)
+    0.66...
+    >>> metrics.f1Measure(2.0)
+    0.5
+    >>> metrics.precision()
+    0.66...
+    >>> metrics.recall()
+    0.64...
+    >>> metrics.f1Measure()
+    0.63...
+    >>> metrics.microPrecision
+    0.72...
+    >>> metrics.microRecall
+    0.66...
+    >>> metrics.microF1Measure
+    0.69...
+    >>> metrics.hammingLoss
+    0.33...
+    >>> metrics.subsetAccuracy
+    0.28...
+    >>> metrics.accuracy
+    0.54...
+
+    .. versionadded:: 1.4.0
+    """
+
+    def __init__(self, predictionAndLabels):
+        sc = predictionAndLabels.ctx
+        sql_ctx = SQLContext.getOrCreate(sc)
+        df = sql_ctx.createDataFrame(predictionAndLabels,
+                                     schema=sql_ctx._inferSchema(predictionAndLabels))
+        java_class = sc._jvm.org.apache.spark.mllib.evaluation.MultilabelMetrics
+        java_model = java_class(df._jdf)
+        super(MultilabelMetrics, self).__init__(java_model)
+
+    @since('1.4.0')
+    def precision(self, label=None):
+        """
+        Returns precision or precision for a given label (category) if specified.
+        """
+        if label is None:
+            return self.call("precision")
+        else:
+            return self.call("precision", float(label))
+
+    @since('1.4.0')
+    def recall(self, label=None):
+        """
+        Returns recall or recall for a given label (category) if specified.
+        """
+        if label is None:
+            return self.call("recall")
+        else:
+            return self.call("recall", float(label))
+
+    @since('1.4.0')
+    def f1Measure(self, label=None):
+        """
+        Returns f1Measure or f1Measure for a given label (category) if specified.
+        """
+        if label is None:
+            return self.call("f1Measure")
+        else:
+            return self.call("f1Measure", float(label))
+
+    @property
+    @since('1.4.0')
+    def microPrecision(self):
+        """
+        Returns micro-averaged label-based precision.
+        (equals to micro-averaged document-based precision)
+        """
+        return self.call("microPrecision")
+
+    @property
+    @since('1.4.0')
+    def microRecall(self):
+        """
+        Returns micro-averaged label-based recall.
+        (equals to micro-averaged document-based recall)
+        """
+        return self.call("microRecall")
+
+    @property
+    @since('1.4.0')
+    def microF1Measure(self):
+        """
+        Returns micro-averaged label-based f1-measure.
+        (equals to micro-averaged document-based f1-measure)
+        """
+        return self.call("microF1Measure")
+
+    @property
+    @since('1.4.0')
+    def hammingLoss(self):
+        """
+        Returns Hamming-loss.
+        """
+        return self.call("hammingLoss")
+
+    @property
+    @since('1.4.0')
+    def subsetAccuracy(self):
+        """
+        Returns subset accuracy.
+        (for equal sets of labels)
+        """
+        return self.call("subsetAccuracy")
+
+    @property
+    @since('1.4.0')
+    def accuracy(self):
+        """
+        Returns accuracy.
+        """
+        return self.call("accuracy")
+
+
+def _test():
+    import doctest
+    import numpy
+    from pyspark.sql import SparkSession
+    import pyspark.mllib.evaluation
+    try:
+        # Numpy 1.14+ changed it's string format.
+        numpy.set_printoptions(legacy='1.13')
+    except TypeError:
+        pass
+    globs = pyspark.mllib.evaluation.__dict__.copy()
+    spark = SparkSession.builder\
+        .master("local[4]")\
+        .appName("mllib.evaluation tests")\
+        .getOrCreate()
+    globs['sc'] = spark.sparkContext
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/feature.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/feature.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d7d4d61db043aeebb9ffcd5ddb973814444be97
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/feature.py
@@ -0,0 +1,826 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Python package for feature in MLlib.
+"""
+from __future__ import absolute_import
+
+import sys
+import warnings
+import random
+import binascii
+if sys.version >= '3':
+    basestring = str
+    unicode = str
+
+from py4j.protocol import Py4JJavaError
+
+from pyspark import since
+from pyspark.rdd import RDD, ignore_unicode_prefix
+from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper
+from pyspark.mllib.linalg import (
+    Vector, Vectors, DenseVector, SparseVector, _convert_to_vector)
+from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.util import JavaLoader, JavaSaveable
+
+__all__ = ['Normalizer', 'StandardScalerModel', 'StandardScaler',
+           'HashingTF', 'IDFModel', 'IDF', 'Word2Vec', 'Word2VecModel',
+           'ChiSqSelector', 'ChiSqSelectorModel', 'ElementwiseProduct']
+
+
+class VectorTransformer(object):
+    """
+    .. note:: DeveloperApi
+
+    Base class for transformation of a vector or RDD of vector
+    """
+    def transform(self, vector):
+        """
+        Applies transformation on a vector.
+
+        :param vector: vector to be transformed.
+        """
+        raise NotImplementedError
+
+
+class Normalizer(VectorTransformer):
+    r"""
+    Normalizes samples individually to unit L\ :sup:`p`\  norm
+
+    For any 1 <= `p` < float('inf'), normalizes samples using
+    sum(abs(vector) :sup:`p`) :sup:`(1/p)` as norm.
+
+    For `p` = float('inf'), max(abs(vector)) will be used as norm for
+    normalization.
+
+    :param p: Normalization in L^p^ space, p = 2 by default.
+
+    >>> v = Vectors.dense(range(3))
+    >>> nor = Normalizer(1)
+    >>> nor.transform(v)
+    DenseVector([0.0, 0.3333, 0.6667])
+
+    >>> rdd = sc.parallelize([v])
+    >>> nor.transform(rdd).collect()
+    [DenseVector([0.0, 0.3333, 0.6667])]
+
+    >>> nor2 = Normalizer(float("inf"))
+    >>> nor2.transform(v)
+    DenseVector([0.0, 0.5, 1.0])
+
+    .. versionadded:: 1.2.0
+    """
+    def __init__(self, p=2.0):
+        assert p >= 1.0, "p should be greater than 1.0"
+        self.p = float(p)
+
+    @since('1.2.0')
+    def transform(self, vector):
+        """
+        Applies unit length normalization on a vector.
+
+        :param vector: vector or RDD of vector to be normalized.
+        :return: normalized vector. If the norm of the input is zero, it
+                 will return the input vector.
+        """
+        if isinstance(vector, RDD):
+            vector = vector.map(_convert_to_vector)
+        else:
+            vector = _convert_to_vector(vector)
+        return callMLlibFunc("normalizeVector", self.p, vector)
+
+
+class JavaVectorTransformer(JavaModelWrapper, VectorTransformer):
+    """
+    Wrapper for the model in JVM
+    """
+
+    def transform(self, vector):
+        """
+        Applies transformation on a vector or an RDD[Vector].
+
+        .. note:: In Python, transform cannot currently be used within
+            an RDD transformation or action.
+            Call transform directly on the RDD instead.
+
+        :param vector: Vector or RDD of Vector to be transformed.
+        """
+        if isinstance(vector, RDD):
+            vector = vector.map(_convert_to_vector)
+        else:
+            vector = _convert_to_vector(vector)
+        return self.call("transform", vector)
+
+
+class StandardScalerModel(JavaVectorTransformer):
+    """
+    Represents a StandardScaler model that can transform vectors.
+
+    .. versionadded:: 1.2.0
+    """
+
+    @since('1.2.0')
+    def transform(self, vector):
+        """
+        Applies standardization transformation on a vector.
+
+        .. note:: In Python, transform cannot currently be used within
+            an RDD transformation or action.
+            Call transform directly on the RDD instead.
+
+        :param vector: Vector or RDD of Vector to be standardized.
+        :return: Standardized vector. If the variance of a column is
+                 zero, it will return default `0.0` for the column with
+                 zero variance.
+        """
+        return JavaVectorTransformer.transform(self, vector)
+
+    @since('1.4.0')
+    def setWithMean(self, withMean):
+        """
+        Setter of the boolean which decides
+        whether it uses mean or not
+        """
+        self.call("setWithMean", withMean)
+        return self
+
+    @since('1.4.0')
+    def setWithStd(self, withStd):
+        """
+        Setter of the boolean which decides
+        whether it uses std or not
+        """
+        self.call("setWithStd", withStd)
+        return self
+
+    @property
+    @since('2.0.0')
+    def withStd(self):
+        """
+        Returns if the model scales the data to unit standard deviation.
+        """
+        return self.call("withStd")
+
+    @property
+    @since('2.0.0')
+    def withMean(self):
+        """
+        Returns if the model centers the data before scaling.
+        """
+        return self.call("withMean")
+
+    @property
+    @since('2.0.0')
+    def std(self):
+        """
+        Return the column standard deviation values.
+        """
+        return self.call("std")
+
+    @property
+    @since('2.0.0')
+    def mean(self):
+        """
+        Return the column mean values.
+        """
+        return self.call("mean")
+
+
+class StandardScaler(object):
+    """
+    Standardizes features by removing the mean and scaling to unit
+    variance using column summary statistics on the samples in the
+    training set.
+
+    :param withMean: False by default. Centers the data with mean
+                     before scaling. It will build a dense output, so take
+                     care when applying to sparse input.
+    :param withStd: True by default. Scales the data to unit
+                    standard deviation.
+
+    >>> vs = [Vectors.dense([-2.0, 2.3, 0]), Vectors.dense([3.8, 0.0, 1.9])]
+    >>> dataset = sc.parallelize(vs)
+    >>> standardizer = StandardScaler(True, True)
+    >>> model = standardizer.fit(dataset)
+    >>> result = model.transform(dataset)
+    >>> for r in result.collect(): r
+    DenseVector([-0.7071, 0.7071, -0.7071])
+    DenseVector([0.7071, -0.7071, 0.7071])
+    >>> int(model.std[0])
+    4
+    >>> int(model.mean[0]*10)
+    9
+    >>> model.withStd
+    True
+    >>> model.withMean
+    True
+
+    .. versionadded:: 1.2.0
+    """
+    def __init__(self, withMean=False, withStd=True):
+        if not (withMean or withStd):
+            warnings.warn("Both withMean and withStd are false. The model does nothing.")
+        self.withMean = withMean
+        self.withStd = withStd
+
+    @since('1.2.0')
+    def fit(self, dataset):
+        """
+        Computes the mean and variance and stores as a model to be used
+        for later scaling.
+
+        :param dataset: The data used to compute the mean and variance
+                     to build the transformation model.
+        :return: a StandardScalarModel
+        """
+        dataset = dataset.map(_convert_to_vector)
+        jmodel = callMLlibFunc("fitStandardScaler", self.withMean, self.withStd, dataset)
+        return StandardScalerModel(jmodel)
+
+
+class ChiSqSelectorModel(JavaVectorTransformer):
+    """
+    Represents a Chi Squared selector model.
+
+    .. versionadded:: 1.4.0
+    """
+
+    @since('1.4.0')
+    def transform(self, vector):
+        """
+        Applies transformation on a vector.
+
+        :param vector: Vector or RDD of Vector to be transformed.
+        :return: transformed vector.
+        """
+        return JavaVectorTransformer.transform(self, vector)
+
+
+class ChiSqSelector(object):
+    """
+    Creates a ChiSquared feature selector.
+    The selector supports different selection methods: `numTopFeatures`, `percentile`, `fpr`,
+    `fdr`, `fwe`.
+
+     * `numTopFeatures` chooses a fixed number of top features according to a chi-squared test.
+
+     * `percentile` is similar but chooses a fraction of all features
+       instead of a fixed number.
+
+     * `fpr` chooses all features whose p-values are below a threshold,
+       thus controlling the false positive rate of selection.
+
+     * `fdr` uses the `Benjamini-Hochberg procedure <https://en.wikipedia.org/wiki/
+       False_discovery_rate#Benjamini.E2.80.93Hochberg_procedure>`_
+       to choose all features whose false discovery rate is below a threshold.
+
+     * `fwe` chooses all features whose p-values are below a threshold. The threshold is scaled by
+       1/numFeatures, thus controlling the family-wise error rate of selection.
+
+    By default, the selection method is `numTopFeatures`, with the default number of top features
+    set to 50.
+
+    >>> data = sc.parallelize([
+    ...     LabeledPoint(0.0, SparseVector(3, {0: 8.0, 1: 7.0})),
+    ...     LabeledPoint(1.0, SparseVector(3, {1: 9.0, 2: 6.0})),
+    ...     LabeledPoint(1.0, [0.0, 9.0, 8.0]),
+    ...     LabeledPoint(2.0, [7.0, 9.0, 5.0]),
+    ...     LabeledPoint(2.0, [8.0, 7.0, 3.0])
+    ... ])
+    >>> model = ChiSqSelector(numTopFeatures=1).fit(data)
+    >>> model.transform(SparseVector(3, {1: 9.0, 2: 6.0}))
+    SparseVector(1, {})
+    >>> model.transform(DenseVector([7.0, 9.0, 5.0]))
+    DenseVector([7.0])
+    >>> model = ChiSqSelector(selectorType="fpr", fpr=0.2).fit(data)
+    >>> model.transform(SparseVector(3, {1: 9.0, 2: 6.0}))
+    SparseVector(1, {})
+    >>> model.transform(DenseVector([7.0, 9.0, 5.0]))
+    DenseVector([7.0])
+    >>> model = ChiSqSelector(selectorType="percentile", percentile=0.34).fit(data)
+    >>> model.transform(DenseVector([7.0, 9.0, 5.0]))
+    DenseVector([7.0])
+
+    .. versionadded:: 1.4.0
+    """
+    def __init__(self, numTopFeatures=50, selectorType="numTopFeatures", percentile=0.1, fpr=0.05,
+                 fdr=0.05, fwe=0.05):
+        self.numTopFeatures = numTopFeatures
+        self.selectorType = selectorType
+        self.percentile = percentile
+        self.fpr = fpr
+        self.fdr = fdr
+        self.fwe = fwe
+
+    @since('2.1.0')
+    def setNumTopFeatures(self, numTopFeatures):
+        """
+        set numTopFeature for feature selection by number of top features.
+        Only applicable when selectorType = "numTopFeatures".
+        """
+        self.numTopFeatures = int(numTopFeatures)
+        return self
+
+    @since('2.1.0')
+    def setPercentile(self, percentile):
+        """
+        set percentile [0.0, 1.0] for feature selection by percentile.
+        Only applicable when selectorType = "percentile".
+        """
+        self.percentile = float(percentile)
+        return self
+
+    @since('2.1.0')
+    def setFpr(self, fpr):
+        """
+        set FPR [0.0, 1.0] for feature selection by FPR.
+        Only applicable when selectorType = "fpr".
+        """
+        self.fpr = float(fpr)
+        return self
+
+    @since('2.2.0')
+    def setFdr(self, fdr):
+        """
+        set FDR [0.0, 1.0] for feature selection by FDR.
+        Only applicable when selectorType = "fdr".
+        """
+        self.fdr = float(fdr)
+        return self
+
+    @since('2.2.0')
+    def setFwe(self, fwe):
+        """
+        set FWE [0.0, 1.0] for feature selection by FWE.
+        Only applicable when selectorType = "fwe".
+        """
+        self.fwe = float(fwe)
+        return self
+
+    @since('2.1.0')
+    def setSelectorType(self, selectorType):
+        """
+        set the selector type of the ChisqSelector.
+        Supported options: "numTopFeatures" (default), "percentile", "fpr", "fdr", "fwe".
+        """
+        self.selectorType = str(selectorType)
+        return self
+
+    @since('1.4.0')
+    def fit(self, data):
+        """
+        Returns a ChiSquared feature selector.
+
+        :param data: an `RDD[LabeledPoint]` containing the labeled dataset
+                     with categorical features. Real-valued features will be
+                     treated as categorical for each distinct value.
+                     Apply feature discretizer before using this function.
+        """
+        jmodel = callMLlibFunc("fitChiSqSelector", self.selectorType, self.numTopFeatures,
+                               self.percentile, self.fpr, self.fdr, self.fwe, data)
+        return ChiSqSelectorModel(jmodel)
+
+
+class PCAModel(JavaVectorTransformer):
+    """
+    Model fitted by [[PCA]] that can project vectors to a low-dimensional space using PCA.
+
+    .. versionadded:: 1.5.0
+    """
+
+
+class PCA(object):
+    """
+    A feature transformer that projects vectors to a low-dimensional space using PCA.
+
+    >>> data = [Vectors.sparse(5, [(1, 1.0), (3, 7.0)]),
+    ...     Vectors.dense([2.0, 0.0, 3.0, 4.0, 5.0]),
+    ...     Vectors.dense([4.0, 0.0, 0.0, 6.0, 7.0])]
+    >>> model = PCA(2).fit(sc.parallelize(data))
+    >>> pcArray = model.transform(Vectors.sparse(5, [(1, 1.0), (3, 7.0)])).toArray()
+    >>> pcArray[0]
+    1.648...
+    >>> pcArray[1]
+    -4.013...
+
+    .. versionadded:: 1.5.0
+    """
+    def __init__(self, k):
+        """
+        :param k: number of principal components.
+        """
+        self.k = int(k)
+
+    @since('1.5.0')
+    def fit(self, data):
+        """
+        Computes a [[PCAModel]] that contains the principal components of the input vectors.
+        :param data: source vectors
+        """
+        jmodel = callMLlibFunc("fitPCA", self.k, data)
+        return PCAModel(jmodel)
+
+
+class HashingTF(object):
+    """
+    Maps a sequence of terms to their term frequencies using the hashing
+    trick.
+
+    .. note:: The terms must be hashable (can not be dict/set/list...).
+
+    :param numFeatures: number of features (default: 2^20)
+
+    >>> htf = HashingTF(100)
+    >>> doc = "a a b b c d".split(" ")
+    >>> htf.transform(doc)
+    SparseVector(100, {...})
+
+    .. versionadded:: 1.2.0
+    """
+    def __init__(self, numFeatures=1 << 20):
+        self.numFeatures = numFeatures
+        self.binary = False
+
+    @since("2.0.0")
+    def setBinary(self, value):
+        """
+        If True, term frequency vector will be binary such that non-zero
+        term counts will be set to 1
+        (default: False)
+        """
+        self.binary = value
+        return self
+
+    @since('1.2.0')
+    def indexOf(self, term):
+        """ Returns the index of the input term. """
+        return hash(term) % self.numFeatures
+
+    @since('1.2.0')
+    def transform(self, document):
+        """
+        Transforms the input document (list of terms) to term frequency
+        vectors, or transform the RDD of document to RDD of term
+        frequency vectors.
+        """
+        if isinstance(document, RDD):
+            return document.map(self.transform)
+
+        freq = {}
+        for term in document:
+            i = self.indexOf(term)
+            freq[i] = 1.0 if self.binary else freq.get(i, 0) + 1.0
+        return Vectors.sparse(self.numFeatures, freq.items())
+
+
+class IDFModel(JavaVectorTransformer):
+    """
+    Represents an IDF model that can transform term frequency vectors.
+
+    .. versionadded:: 1.2.0
+    """
+    @since('1.2.0')
+    def transform(self, x):
+        """
+        Transforms term frequency (TF) vectors to TF-IDF vectors.
+
+        If `minDocFreq` was set for the IDF calculation,
+        the terms which occur in fewer than `minDocFreq`
+        documents will have an entry of 0.
+
+        .. note:: In Python, transform cannot currently be used within
+            an RDD transformation or action.
+            Call transform directly on the RDD instead.
+
+        :param x: an RDD of term frequency vectors or a term frequency
+                  vector
+        :return: an RDD of TF-IDF vectors or a TF-IDF vector
+        """
+        return JavaVectorTransformer.transform(self, x)
+
+    @since('1.4.0')
+    def idf(self):
+        """
+        Returns the current IDF vector.
+        """
+        return self.call('idf')
+
+
+class IDF(object):
+    """
+    Inverse document frequency (IDF).
+
+    The standard formulation is used: `idf = log((m + 1) / (d(t) + 1))`,
+    where `m` is the total number of documents and `d(t)` is the number
+    of documents that contain term `t`.
+
+    This implementation supports filtering out terms which do not appear
+    in a minimum number of documents (controlled by the variable
+    `minDocFreq`). For terms that are not in at least `minDocFreq`
+    documents, the IDF is found as 0, resulting in TF-IDFs of 0.
+
+    :param minDocFreq: minimum of documents in which a term
+                       should appear for filtering
+
+    >>> n = 4
+    >>> freqs = [Vectors.sparse(n, (1, 3), (1.0, 2.0)),
+    ...          Vectors.dense([0.0, 1.0, 2.0, 3.0]),
+    ...          Vectors.sparse(n, [1], [1.0])]
+    >>> data = sc.parallelize(freqs)
+    >>> idf = IDF()
+    >>> model = idf.fit(data)
+    >>> tfidf = model.transform(data)
+    >>> for r in tfidf.collect(): r
+    SparseVector(4, {1: 0.0, 3: 0.5754})
+    DenseVector([0.0, 0.0, 1.3863, 0.863])
+    SparseVector(4, {1: 0.0})
+    >>> model.transform(Vectors.dense([0.0, 1.0, 2.0, 3.0]))
+    DenseVector([0.0, 0.0, 1.3863, 0.863])
+    >>> model.transform([0.0, 1.0, 2.0, 3.0])
+    DenseVector([0.0, 0.0, 1.3863, 0.863])
+    >>> model.transform(Vectors.sparse(n, (1, 3), (1.0, 2.0)))
+    SparseVector(4, {1: 0.0, 3: 0.5754})
+
+    .. versionadded:: 1.2.0
+    """
+    def __init__(self, minDocFreq=0):
+        self.minDocFreq = minDocFreq
+
+    @since('1.2.0')
+    def fit(self, dataset):
+        """
+        Computes the inverse document frequency.
+
+        :param dataset: an RDD of term frequency vectors
+        """
+        if not isinstance(dataset, RDD):
+            raise TypeError("dataset should be an RDD of term frequency vectors")
+        jmodel = callMLlibFunc("fitIDF", self.minDocFreq, dataset.map(_convert_to_vector))
+        return IDFModel(jmodel)
+
+
+class Word2VecModel(JavaVectorTransformer, JavaSaveable, JavaLoader):
+    """
+    class for Word2Vec model
+
+    .. versionadded:: 1.2.0
+    """
+    @since('1.2.0')
+    def transform(self, word):
+        """
+        Transforms a word to its vector representation
+
+        .. note:: Local use only
+
+        :param word: a word
+        :return: vector representation of word(s)
+        """
+        try:
+            return self.call("transform", word)
+        except Py4JJavaError:
+            raise ValueError("%s not found" % word)
+
+    @since('1.2.0')
+    def findSynonyms(self, word, num):
+        """
+        Find synonyms of a word
+
+        :param word: a word or a vector representation of word
+        :param num: number of synonyms to find
+        :return: array of (word, cosineSimilarity)
+
+        .. note:: Local use only
+        """
+        if not isinstance(word, basestring):
+            word = _convert_to_vector(word)
+        words, similarity = self.call("findSynonyms", word, num)
+        return zip(words, similarity)
+
+    @since('1.4.0')
+    def getVectors(self):
+        """
+        Returns a map of words to their vector representations.
+        """
+        return self.call("getVectors")
+
+    @classmethod
+    @since('1.5.0')
+    def load(cls, sc, path):
+        """
+        Load a model from the given path.
+        """
+        jmodel = sc._jvm.org.apache.spark.mllib.feature \
+            .Word2VecModel.load(sc._jsc.sc(), path)
+        model = sc._jvm.org.apache.spark.mllib.api.python.Word2VecModelWrapper(jmodel)
+        return Word2VecModel(model)
+
+
+@ignore_unicode_prefix
+class Word2Vec(object):
+    """Word2Vec creates vector representation of words in a text corpus.
+    The algorithm first constructs a vocabulary from the corpus
+    and then learns vector representation of words in the vocabulary.
+    The vector representation can be used as features in
+    natural language processing and machine learning algorithms.
+
+    We used skip-gram model in our implementation and hierarchical
+    softmax method to train the model. The variable names in the
+    implementation matches the original C implementation.
+
+    For original C implementation,
+    see https://code.google.com/p/word2vec/
+    For research papers, see
+    Efficient Estimation of Word Representations in Vector Space
+    and Distributed Representations of Words and Phrases and their
+    Compositionality.
+
+    >>> sentence = "a b " * 100 + "a c " * 10
+    >>> localDoc = [sentence, sentence]
+    >>> doc = sc.parallelize(localDoc).map(lambda line: line.split(" "))
+    >>> model = Word2Vec().setVectorSize(10).setSeed(42).fit(doc)
+
+    Querying for synonyms of a word will not return that word:
+
+    >>> syms = model.findSynonyms("a", 2)
+    >>> [s[0] for s in syms]
+    [u'b', u'c']
+
+    But querying for synonyms of a vector may return the word whose
+    representation is that vector:
+
+    >>> vec = model.transform("a")
+    >>> syms = model.findSynonyms(vec, 2)
+    >>> [s[0] for s in syms]
+    [u'a', u'b']
+
+    >>> import os, tempfile
+    >>> path = tempfile.mkdtemp()
+    >>> model.save(sc, path)
+    >>> sameModel = Word2VecModel.load(sc, path)
+    >>> model.transform("a") == sameModel.transform("a")
+    True
+    >>> syms = sameModel.findSynonyms("a", 2)
+    >>> [s[0] for s in syms]
+    [u'b', u'c']
+    >>> from shutil import rmtree
+    >>> try:
+    ...     rmtree(path)
+    ... except OSError:
+    ...     pass
+
+    .. versionadded:: 1.2.0
+
+    """
+    def __init__(self):
+        """
+        Construct Word2Vec instance
+        """
+        self.vectorSize = 100
+        self.learningRate = 0.025
+        self.numPartitions = 1
+        self.numIterations = 1
+        self.seed = None
+        self.minCount = 5
+        self.windowSize = 5
+
+    @since('1.2.0')
+    def setVectorSize(self, vectorSize):
+        """
+        Sets vector size (default: 100).
+        """
+        self.vectorSize = vectorSize
+        return self
+
+    @since('1.2.0')
+    def setLearningRate(self, learningRate):
+        """
+        Sets initial learning rate (default: 0.025).
+        """
+        self.learningRate = learningRate
+        return self
+
+    @since('1.2.0')
+    def setNumPartitions(self, numPartitions):
+        """
+        Sets number of partitions (default: 1). Use a small number for
+        accuracy.
+        """
+        self.numPartitions = numPartitions
+        return self
+
+    @since('1.2.0')
+    def setNumIterations(self, numIterations):
+        """
+        Sets number of iterations (default: 1), which should be smaller
+        than or equal to number of partitions.
+        """
+        self.numIterations = numIterations
+        return self
+
+    @since('1.2.0')
+    def setSeed(self, seed):
+        """
+        Sets random seed.
+        """
+        self.seed = seed
+        return self
+
+    @since('1.4.0')
+    def setMinCount(self, minCount):
+        """
+        Sets minCount, the minimum number of times a token must appear
+        to be included in the word2vec model's vocabulary (default: 5).
+        """
+        self.minCount = minCount
+        return self
+
+    @since('2.0.0')
+    def setWindowSize(self, windowSize):
+        """
+        Sets window size (default: 5).
+        """
+        self.windowSize = windowSize
+        return self
+
+    @since('1.2.0')
+    def fit(self, data):
+        """
+        Computes the vector representation of each word in vocabulary.
+
+        :param data: training data. RDD of list of string
+        :return: Word2VecModel instance
+        """
+        if not isinstance(data, RDD):
+            raise TypeError("data should be an RDD of list of string")
+        jmodel = callMLlibFunc("trainWord2VecModel", data, int(self.vectorSize),
+                               float(self.learningRate), int(self.numPartitions),
+                               int(self.numIterations), self.seed,
+                               int(self.minCount), int(self.windowSize))
+        return Word2VecModel(jmodel)
+
+
+class ElementwiseProduct(VectorTransformer):
+    """
+    Scales each column of the vector, with the supplied weight vector.
+    i.e the elementwise product.
+
+    >>> weight = Vectors.dense([1.0, 2.0, 3.0])
+    >>> eprod = ElementwiseProduct(weight)
+    >>> a = Vectors.dense([2.0, 1.0, 3.0])
+    >>> eprod.transform(a)
+    DenseVector([2.0, 2.0, 9.0])
+    >>> b = Vectors.dense([9.0, 3.0, 4.0])
+    >>> rdd = sc.parallelize([a, b])
+    >>> eprod.transform(rdd).collect()
+    [DenseVector([2.0, 2.0, 9.0]), DenseVector([9.0, 6.0, 12.0])]
+
+    .. versionadded:: 1.5.0
+    """
+    def __init__(self, scalingVector):
+        self.scalingVector = _convert_to_vector(scalingVector)
+
+    @since('1.5.0')
+    def transform(self, vector):
+        """
+        Computes the Hadamard product of the vector.
+        """
+        if isinstance(vector, RDD):
+            vector = vector.map(_convert_to_vector)
+
+        else:
+            vector = _convert_to_vector(vector)
+        return callMLlibFunc("elementwiseProductVector", self.scalingVector, vector)
+
+
+def _test():
+    import doctest
+    from pyspark.sql import SparkSession
+    globs = globals().copy()
+    spark = SparkSession.builder\
+        .master("local[4]")\
+        .appName("mllib.feature tests")\
+        .getOrCreate()
+    globs['sc'] = spark.sparkContext
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+if __name__ == "__main__":
+    sys.path.pop(0)
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/fpm.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/fpm.py
new file mode 100644
index 0000000000000000000000000000000000000000..de18dad1f675d7790aea902f9c0a570a5201de2f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/fpm.py
@@ -0,0 +1,206 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+
+import numpy
+from numpy import array
+from collections import namedtuple
+
+from pyspark import SparkContext, since
+from pyspark.rdd import ignore_unicode_prefix
+from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc
+from pyspark.mllib.util import JavaSaveable, JavaLoader, inherit_doc
+
+__all__ = ['FPGrowth', 'FPGrowthModel', 'PrefixSpan', 'PrefixSpanModel']
+
+
+@inherit_doc
+@ignore_unicode_prefix
+class FPGrowthModel(JavaModelWrapper, JavaSaveable, JavaLoader):
+    """
+    A FP-Growth model for mining frequent itemsets
+    using the Parallel FP-Growth algorithm.
+
+    >>> data = [["a", "b", "c"], ["a", "b", "d", "e"], ["a", "c", "e"], ["a", "c", "f"]]
+    >>> rdd = sc.parallelize(data, 2)
+    >>> model = FPGrowth.train(rdd, 0.6, 2)
+    >>> sorted(model.freqItemsets().collect())
+    [FreqItemset(items=[u'a'], freq=4), FreqItemset(items=[u'c'], freq=3), ...
+    >>> model_path = temp_path + "/fpm"
+    >>> model.save(sc, model_path)
+    >>> sameModel = FPGrowthModel.load(sc, model_path)
+    >>> sorted(model.freqItemsets().collect()) == sorted(sameModel.freqItemsets().collect())
+    True
+
+    .. versionadded:: 1.4.0
+    """
+
+    @since("1.4.0")
+    def freqItemsets(self):
+        """
+        Returns the frequent itemsets of this model.
+        """
+        return self.call("getFreqItemsets").map(lambda x: (FPGrowth.FreqItemset(x[0], x[1])))
+
+    @classmethod
+    @since("2.0.0")
+    def load(cls, sc, path):
+        """
+        Load a model from the given path.
+        """
+        model = cls._load_java(sc, path)
+        wrapper = sc._jvm.org.apache.spark.mllib.api.python.FPGrowthModelWrapper(model)
+        return FPGrowthModel(wrapper)
+
+
+class FPGrowth(object):
+    """
+    A Parallel FP-growth algorithm to mine frequent itemsets.
+
+    .. versionadded:: 1.4.0
+    """
+
+    @classmethod
+    @since("1.4.0")
+    def train(cls, data, minSupport=0.3, numPartitions=-1):
+        """
+        Computes an FP-Growth model that contains frequent itemsets.
+
+        :param data:
+          The input data set, each element contains a transaction.
+        :param minSupport:
+          The minimal support level.
+          (default: 0.3)
+        :param numPartitions:
+          The number of partitions used by parallel FP-growth. A value
+          of -1 will use the same number as input data.
+          (default: -1)
+        """
+        model = callMLlibFunc("trainFPGrowthModel", data, float(minSupport), int(numPartitions))
+        return FPGrowthModel(model)
+
+    class FreqItemset(namedtuple("FreqItemset", ["items", "freq"])):
+        """
+        Represents an (items, freq) tuple.
+
+        .. versionadded:: 1.4.0
+        """
+
+
+@inherit_doc
+@ignore_unicode_prefix
+class PrefixSpanModel(JavaModelWrapper):
+    """
+    Model fitted by PrefixSpan
+
+    >>> data = [
+    ...    [["a", "b"], ["c"]],
+    ...    [["a"], ["c", "b"], ["a", "b"]],
+    ...    [["a", "b"], ["e"]],
+    ...    [["f"]]]
+    >>> rdd = sc.parallelize(data, 2)
+    >>> model = PrefixSpan.train(rdd)
+    >>> sorted(model.freqSequences().collect())
+    [FreqSequence(sequence=[[u'a']], freq=3), FreqSequence(sequence=[[u'a'], [u'a']], freq=1), ...
+
+    .. versionadded:: 1.6.0
+    """
+
+    @since("1.6.0")
+    def freqSequences(self):
+        """Gets frequent sequences"""
+        return self.call("getFreqSequences").map(lambda x: PrefixSpan.FreqSequence(x[0], x[1]))
+
+
+class PrefixSpan(object):
+    """
+    A parallel PrefixSpan algorithm to mine frequent sequential patterns.
+    The PrefixSpan algorithm is described in J. Pei, et al., PrefixSpan:
+    Mining Sequential Patterns Efficiently by Prefix-Projected Pattern Growth
+    ([[http://doi.org/10.1109/ICDE.2001.914830]]).
+
+    .. versionadded:: 1.6.0
+    """
+
+    @classmethod
+    @since("1.6.0")
+    def train(cls, data, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=32000000):
+        """
+        Finds the complete set of frequent sequential patterns in the
+        input sequences of itemsets.
+
+        :param data:
+          The input data set, each element contains a sequence of
+          itemsets.
+        :param minSupport:
+          The minimal support level of the sequential pattern, any
+          pattern that appears more than (minSupport *
+          size-of-the-dataset) times will be output.
+          (default: 0.1)
+        :param maxPatternLength:
+          The maximal length of the sequential pattern, any pattern
+          that appears less than maxPatternLength will be output.
+          (default: 10)
+        :param maxLocalProjDBSize:
+          The maximum number of items (including delimiters used in the
+          internal storage format) allowed in a projected database before
+          local processing. If a projected database exceeds this size,
+          another iteration of distributed prefix growth is run.
+          (default: 32000000)
+        """
+        model = callMLlibFunc("trainPrefixSpanModel",
+                              data, minSupport, maxPatternLength, maxLocalProjDBSize)
+        return PrefixSpanModel(model)
+
+    class FreqSequence(namedtuple("FreqSequence", ["sequence", "freq"])):
+        """
+        Represents a (sequence, freq) tuple.
+
+        .. versionadded:: 1.6.0
+        """
+
+
+def _test():
+    import doctest
+    from pyspark.sql import SparkSession
+    import pyspark.mllib.fpm
+    globs = pyspark.mllib.fpm.__dict__.copy()
+    spark = SparkSession.builder\
+        .master("local[4]")\
+        .appName("mllib.fpm tests")\
+        .getOrCreate()
+    globs['sc'] = spark.sparkContext
+    import tempfile
+
+    temp_path = tempfile.mkdtemp()
+    globs['temp_path'] = temp_path
+    try:
+        (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+        spark.stop()
+    finally:
+        from shutil import rmtree
+        try:
+            rmtree(temp_path)
+        except OSError:
+            pass
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/linalg/__init__.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/linalg/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4afd6666400b0c4c27f9f666efdfd11d1ed16f13
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/linalg/__init__.py
@@ -0,0 +1,1382 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+MLlib utilities for linear algebra. For dense vectors, MLlib
+uses the NumPy C{array} type, so you can simply pass NumPy arrays
+around. For sparse vectors, users can construct a L{SparseVector}
+object from MLlib or pass SciPy C{scipy.sparse} column vectors if
+SciPy is available in their environment.
+"""
+
+import sys
+import array
+import struct
+
+if sys.version >= '3':
+    basestring = str
+    xrange = range
+    import copyreg as copy_reg
+    long = int
+else:
+    from itertools import izip as zip
+    import copy_reg
+
+import numpy as np
+
+from pyspark import since
+from pyspark.ml import linalg as newlinalg
+from pyspark.sql.types import UserDefinedType, StructField, StructType, ArrayType, DoubleType, \
+    IntegerType, ByteType, BooleanType
+
+
+__all__ = ['Vector', 'DenseVector', 'SparseVector', 'Vectors',
+           'Matrix', 'DenseMatrix', 'SparseMatrix', 'Matrices',
+           'QRDecomposition']
+
+
+if sys.version_info[:2] == (2, 7):
+    # speed up pickling array in Python 2.7
+    def fast_pickle_array(ar):
+        return array.array, (ar.typecode, ar.tostring())
+    copy_reg.pickle(array.array, fast_pickle_array)
+
+
+# Check whether we have SciPy. MLlib works without it too, but if we have it, some methods,
+# such as _dot and _serialize_double_vector, start to support scipy.sparse matrices.
+
+try:
+    import scipy.sparse
+    _have_scipy = True
+except:
+    # No SciPy in environment, but that's okay
+    _have_scipy = False
+
+
+def _convert_to_vector(l):
+    if isinstance(l, Vector):
+        return l
+    elif type(l) in (array.array, np.array, np.ndarray, list, tuple, xrange):
+        return DenseVector(l)
+    elif _have_scipy and scipy.sparse.issparse(l):
+        assert l.shape[1] == 1, "Expected column vector"
+        # Make sure the converted csc_matrix has sorted indices.
+        csc = l.tocsc()
+        if not csc.has_sorted_indices:
+            csc.sort_indices()
+        return SparseVector(l.shape[0], csc.indices, csc.data)
+    else:
+        raise TypeError("Cannot convert type %s into Vector" % type(l))
+
+
+def _vector_size(v):
+    """
+    Returns the size of the vector.
+
+    >>> _vector_size([1., 2., 3.])
+    3
+    >>> _vector_size((1., 2., 3.))
+    3
+    >>> _vector_size(array.array('d', [1., 2., 3.]))
+    3
+    >>> _vector_size(np.zeros(3))
+    3
+    >>> _vector_size(np.zeros((3, 1)))
+    3
+    >>> _vector_size(np.zeros((1, 3)))
+    Traceback (most recent call last):
+        ...
+    ValueError: Cannot treat an ndarray of shape (1, 3) as a vector
+    """
+    if isinstance(v, Vector):
+        return len(v)
+    elif type(v) in (array.array, list, tuple, xrange):
+        return len(v)
+    elif type(v) == np.ndarray:
+        if v.ndim == 1 or (v.ndim == 2 and v.shape[1] == 1):
+            return len(v)
+        else:
+            raise ValueError("Cannot treat an ndarray of shape %s as a vector" % str(v.shape))
+    elif _have_scipy and scipy.sparse.issparse(v):
+        assert v.shape[1] == 1, "Expected column vector"
+        return v.shape[0]
+    else:
+        raise TypeError("Cannot treat type %s as a vector" % type(v))
+
+
+def _format_float(f, digits=4):
+    s = str(round(f, digits))
+    if '.' in s:
+        s = s[:s.index('.') + 1 + digits]
+    return s
+
+
+def _format_float_list(l):
+    return [_format_float(x) for x in l]
+
+
+def _double_to_long_bits(value):
+    if np.isnan(value):
+        value = float('nan')
+    # pack double into 64 bits, then unpack as long int
+    return struct.unpack('Q', struct.pack('d', value))[0]
+
+
+class VectorUDT(UserDefinedType):
+    """
+    SQL user-defined type (UDT) for Vector.
+    """
+
+    @classmethod
+    def sqlType(cls):
+        return StructType([
+            StructField("type", ByteType(), False),
+            StructField("size", IntegerType(), True),
+            StructField("indices", ArrayType(IntegerType(), False), True),
+            StructField("values", ArrayType(DoubleType(), False), True)])
+
+    @classmethod
+    def module(cls):
+        return "pyspark.mllib.linalg"
+
+    @classmethod
+    def scalaUDT(cls):
+        return "org.apache.spark.mllib.linalg.VectorUDT"
+
+    def serialize(self, obj):
+        if isinstance(obj, SparseVector):
+            indices = [int(i) for i in obj.indices]
+            values = [float(v) for v in obj.values]
+            return (0, obj.size, indices, values)
+        elif isinstance(obj, DenseVector):
+            values = [float(v) for v in obj]
+            return (1, None, None, values)
+        else:
+            raise TypeError("cannot serialize %r of type %r" % (obj, type(obj)))
+
+    def deserialize(self, datum):
+        assert len(datum) == 4, \
+            "VectorUDT.deserialize given row with length %d but requires 4" % len(datum)
+        tpe = datum[0]
+        if tpe == 0:
+            return SparseVector(datum[1], datum[2], datum[3])
+        elif tpe == 1:
+            return DenseVector(datum[3])
+        else:
+            raise ValueError("do not recognize type %r" % tpe)
+
+    def simpleString(self):
+        return "vector"
+
+
+class MatrixUDT(UserDefinedType):
+    """
+    SQL user-defined type (UDT) for Matrix.
+    """
+
+    @classmethod
+    def sqlType(cls):
+        return StructType([
+            StructField("type", ByteType(), False),
+            StructField("numRows", IntegerType(), False),
+            StructField("numCols", IntegerType(), False),
+            StructField("colPtrs", ArrayType(IntegerType(), False), True),
+            StructField("rowIndices", ArrayType(IntegerType(), False), True),
+            StructField("values", ArrayType(DoubleType(), False), True),
+            StructField("isTransposed", BooleanType(), False)])
+
+    @classmethod
+    def module(cls):
+        return "pyspark.mllib.linalg"
+
+    @classmethod
+    def scalaUDT(cls):
+        return "org.apache.spark.mllib.linalg.MatrixUDT"
+
+    def serialize(self, obj):
+        if isinstance(obj, SparseMatrix):
+            colPtrs = [int(i) for i in obj.colPtrs]
+            rowIndices = [int(i) for i in obj.rowIndices]
+            values = [float(v) for v in obj.values]
+            return (0, obj.numRows, obj.numCols, colPtrs,
+                    rowIndices, values, bool(obj.isTransposed))
+        elif isinstance(obj, DenseMatrix):
+            values = [float(v) for v in obj.values]
+            return (1, obj.numRows, obj.numCols, None, None, values,
+                    bool(obj.isTransposed))
+        else:
+            raise TypeError("cannot serialize type %r" % (type(obj)))
+
+    def deserialize(self, datum):
+        assert len(datum) == 7, \
+            "MatrixUDT.deserialize given row with length %d but requires 7" % len(datum)
+        tpe = datum[0]
+        if tpe == 0:
+            return SparseMatrix(*datum[1:])
+        elif tpe == 1:
+            return DenseMatrix(datum[1], datum[2], datum[5], datum[6])
+        else:
+            raise ValueError("do not recognize type %r" % tpe)
+
+    def simpleString(self):
+        return "matrix"
+
+
+class Vector(object):
+
+    __UDT__ = VectorUDT()
+
+    """
+    Abstract class for DenseVector and SparseVector
+    """
+    def toArray(self):
+        """
+        Convert the vector into an numpy.ndarray
+
+        :return: numpy.ndarray
+        """
+        raise NotImplementedError
+
+    def asML(self):
+        """
+        Convert this vector to the new mllib-local representation.
+        This does NOT copy the data; it copies references.
+
+        :return: :py:class:`pyspark.ml.linalg.Vector`
+        """
+        raise NotImplementedError
+
+
+class DenseVector(Vector):
+    """
+    A dense vector represented by a value array. We use numpy array for
+    storage and arithmetics will be delegated to the underlying numpy
+    array.
+
+    >>> v = Vectors.dense([1.0, 2.0])
+    >>> u = Vectors.dense([3.0, 4.0])
+    >>> v + u
+    DenseVector([4.0, 6.0])
+    >>> 2 - v
+    DenseVector([1.0, 0.0])
+    >>> v / 2
+    DenseVector([0.5, 1.0])
+    >>> v * u
+    DenseVector([3.0, 8.0])
+    >>> u / v
+    DenseVector([3.0, 2.0])
+    >>> u % 2
+    DenseVector([1.0, 0.0])
+    """
+    def __init__(self, ar):
+        if isinstance(ar, bytes):
+            ar = np.frombuffer(ar, dtype=np.float64)
+        elif not isinstance(ar, np.ndarray):
+            ar = np.array(ar, dtype=np.float64)
+        if ar.dtype != np.float64:
+            ar = ar.astype(np.float64)
+        self.array = ar
+
+    @staticmethod
+    def parse(s):
+        """
+        Parse string representation back into the DenseVector.
+
+        >>> DenseVector.parse(' [ 0.0,1.0,2.0,  3.0]')
+        DenseVector([0.0, 1.0, 2.0, 3.0])
+        """
+        start = s.find('[')
+        if start == -1:
+            raise ValueError("Array should start with '['.")
+        end = s.find(']')
+        if end == -1:
+            raise ValueError("Array should end with ']'.")
+        s = s[start + 1: end]
+
+        try:
+            values = [float(val) for val in s.split(',') if val]
+        except ValueError:
+            raise ValueError("Unable to parse values from %s" % s)
+        return DenseVector(values)
+
+    def __reduce__(self):
+        return DenseVector, (self.array.tostring(),)
+
+    def numNonzeros(self):
+        """
+        Number of nonzero elements. This scans all active values and count non zeros
+        """
+        return np.count_nonzero(self.array)
+
+    def norm(self, p):
+        """
+        Calculates the norm of a DenseVector.
+
+        >>> a = DenseVector([0, -1, 2, -3])
+        >>> a.norm(2)
+        3.7...
+        >>> a.norm(1)
+        6.0
+        """
+        return np.linalg.norm(self.array, p)
+
+    def dot(self, other):
+        """
+        Compute the dot product of two Vectors. We support
+        (Numpy array, list, SparseVector, or SciPy sparse)
+        and a target NumPy array that is either 1- or 2-dimensional.
+        Equivalent to calling numpy.dot of the two vectors.
+
+        >>> dense = DenseVector(array.array('d', [1., 2.]))
+        >>> dense.dot(dense)
+        5.0
+        >>> dense.dot(SparseVector(2, [0, 1], [2., 1.]))
+        4.0
+        >>> dense.dot(range(1, 3))
+        5.0
+        >>> dense.dot(np.array(range(1, 3)))
+        5.0
+        >>> dense.dot([1.,])
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> dense.dot(np.reshape([1., 2., 3., 4.], (2, 2), order='F'))
+        array([  5.,  11.])
+        >>> dense.dot(np.reshape([1., 2., 3.], (3, 1), order='F'))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        """
+        if type(other) == np.ndarray:
+            if other.ndim > 1:
+                assert len(self) == other.shape[0], "dimension mismatch"
+            return np.dot(self.array, other)
+        elif _have_scipy and scipy.sparse.issparse(other):
+            assert len(self) == other.shape[0], "dimension mismatch"
+            return other.transpose().dot(self.toArray())
+        else:
+            assert len(self) == _vector_size(other), "dimension mismatch"
+            if isinstance(other, SparseVector):
+                return other.dot(self)
+            elif isinstance(other, Vector):
+                return np.dot(self.toArray(), other.toArray())
+            else:
+                return np.dot(self.toArray(), other)
+
+    def squared_distance(self, other):
+        """
+        Squared distance of two Vectors.
+
+        >>> dense1 = DenseVector(array.array('d', [1., 2.]))
+        >>> dense1.squared_distance(dense1)
+        0.0
+        >>> dense2 = np.array([2., 1.])
+        >>> dense1.squared_distance(dense2)
+        2.0
+        >>> dense3 = [2., 1.]
+        >>> dense1.squared_distance(dense3)
+        2.0
+        >>> sparse1 = SparseVector(2, [0, 1], [2., 1.])
+        >>> dense1.squared_distance(sparse1)
+        2.0
+        >>> dense1.squared_distance([1.,])
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> dense1.squared_distance(SparseVector(1, [0,], [1.,]))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        """
+        assert len(self) == _vector_size(other), "dimension mismatch"
+        if isinstance(other, SparseVector):
+            return other.squared_distance(self)
+        elif _have_scipy and scipy.sparse.issparse(other):
+            return _convert_to_vector(other).squared_distance(self)
+
+        if isinstance(other, Vector):
+            other = other.toArray()
+        elif not isinstance(other, np.ndarray):
+            other = np.array(other)
+        diff = self.toArray() - other
+        return np.dot(diff, diff)
+
+    def toArray(self):
+        """
+        Returns an numpy.ndarray
+        """
+        return self.array
+
+    def asML(self):
+        """
+        Convert this vector to the new mllib-local representation.
+        This does NOT copy the data; it copies references.
+
+        :return: :py:class:`pyspark.ml.linalg.DenseVector`
+
+        .. versionadded:: 2.0.0
+        """
+        return newlinalg.DenseVector(self.array)
+
+    @property
+    def values(self):
+        """
+        Returns a list of values
+        """
+        return self.array
+
+    def __getitem__(self, item):
+        return self.array[item]
+
+    def __len__(self):
+        return len(self.array)
+
+    def __str__(self):
+        return "[" + ",".join([str(v) for v in self.array]) + "]"
+
+    def __repr__(self):
+        return "DenseVector([%s])" % (', '.join(_format_float(i) for i in self.array))
+
+    def __eq__(self, other):
+        if isinstance(other, DenseVector):
+            return np.array_equal(self.array, other.array)
+        elif isinstance(other, SparseVector):
+            if len(self) != other.size:
+                return False
+            return Vectors._equals(list(xrange(len(self))), self.array, other.indices, other.values)
+        return False
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __hash__(self):
+        size = len(self)
+        result = 31 + size
+        nnz = 0
+        i = 0
+        while i < size and nnz < 128:
+            if self.array[i] != 0:
+                result = 31 * result + i
+                bits = _double_to_long_bits(self.array[i])
+                result = 31 * result + (bits ^ (bits >> 32))
+                nnz += 1
+            i += 1
+        return result
+
+    def __getattr__(self, item):
+        return getattr(self.array, item)
+
+    def _delegate(op):
+        def func(self, other):
+            if isinstance(other, DenseVector):
+                other = other.array
+            return DenseVector(getattr(self.array, op)(other))
+        return func
+
+    __neg__ = _delegate("__neg__")
+    __add__ = _delegate("__add__")
+    __sub__ = _delegate("__sub__")
+    __mul__ = _delegate("__mul__")
+    __div__ = _delegate("__div__")
+    __truediv__ = _delegate("__truediv__")
+    __mod__ = _delegate("__mod__")
+    __radd__ = _delegate("__radd__")
+    __rsub__ = _delegate("__rsub__")
+    __rmul__ = _delegate("__rmul__")
+    __rdiv__ = _delegate("__rdiv__")
+    __rtruediv__ = _delegate("__rtruediv__")
+    __rmod__ = _delegate("__rmod__")
+
+
+class SparseVector(Vector):
+    """
+    A simple sparse vector class for passing data to MLlib. Users may
+    alternatively pass SciPy's {scipy.sparse} data types.
+    """
+    def __init__(self, size, *args):
+        """
+        Create a sparse vector, using either a dictionary, a list of
+        (index, value) pairs, or two separate arrays of indices and
+        values (sorted by index).
+
+        :param size: Size of the vector.
+        :param args: Active entries, as a dictionary {index: value, ...},
+          a list of tuples [(index, value), ...], or a list of strictly
+          increasing indices and a list of corresponding values [index, ...],
+          [value, ...]. Inactive entries are treated as zeros.
+
+        >>> SparseVector(4, {1: 1.0, 3: 5.5})
+        SparseVector(4, {1: 1.0, 3: 5.5})
+        >>> SparseVector(4, [(1, 1.0), (3, 5.5)])
+        SparseVector(4, {1: 1.0, 3: 5.5})
+        >>> SparseVector(4, [1, 3], [1.0, 5.5])
+        SparseVector(4, {1: 1.0, 3: 5.5})
+        """
+        self.size = int(size)
+        """ Size of the vector. """
+        assert 1 <= len(args) <= 2, "must pass either 2 or 3 arguments"
+        if len(args) == 1:
+            pairs = args[0]
+            if type(pairs) == dict:
+                pairs = pairs.items()
+            pairs = sorted(pairs)
+            self.indices = np.array([p[0] for p in pairs], dtype=np.int32)
+            """ A list of indices corresponding to active entries. """
+            self.values = np.array([p[1] for p in pairs], dtype=np.float64)
+            """ A list of values corresponding to active entries. """
+        else:
+            if isinstance(args[0], bytes):
+                assert isinstance(args[1], bytes), "values should be string too"
+                if args[0]:
+                    self.indices = np.frombuffer(args[0], np.int32)
+                    self.values = np.frombuffer(args[1], np.float64)
+                else:
+                    # np.frombuffer() doesn't work well with empty string in older version
+                    self.indices = np.array([], dtype=np.int32)
+                    self.values = np.array([], dtype=np.float64)
+            else:
+                self.indices = np.array(args[0], dtype=np.int32)
+                self.values = np.array(args[1], dtype=np.float64)
+            assert len(self.indices) == len(self.values), "index and value arrays not same length"
+            for i in xrange(len(self.indices) - 1):
+                if self.indices[i] >= self.indices[i + 1]:
+                    raise TypeError(
+                        "Indices %s and %s are not strictly increasing"
+                        % (self.indices[i], self.indices[i + 1]))
+
+    def numNonzeros(self):
+        """
+        Number of nonzero elements. This scans all active values and count non zeros.
+        """
+        return np.count_nonzero(self.values)
+
+    def norm(self, p):
+        """
+        Calculates the norm of a SparseVector.
+
+        >>> a = SparseVector(4, [0, 1], [3., -4.])
+        >>> a.norm(1)
+        7.0
+        >>> a.norm(2)
+        5.0
+        """
+        return np.linalg.norm(self.values, p)
+
+    def __reduce__(self):
+        return (
+            SparseVector,
+            (self.size, self.indices.tostring(), self.values.tostring()))
+
+    @staticmethod
+    def parse(s):
+        """
+        Parse string representation back into the SparseVector.
+
+        >>> SparseVector.parse(' (4, [0,1 ],[ 4.0,5.0] )')
+        SparseVector(4, {0: 4.0, 1: 5.0})
+        """
+        start = s.find('(')
+        if start == -1:
+            raise ValueError("Tuple should start with '('")
+        end = s.find(')')
+        if end == -1:
+            raise ValueError("Tuple should end with ')'")
+        s = s[start + 1: end].strip()
+
+        size = s[: s.find(',')]
+        try:
+            size = int(size)
+        except ValueError:
+            raise ValueError("Cannot parse size %s." % size)
+
+        ind_start = s.find('[')
+        if ind_start == -1:
+            raise ValueError("Indices array should start with '['.")
+        ind_end = s.find(']')
+        if ind_end == -1:
+            raise ValueError("Indices array should end with ']'")
+        new_s = s[ind_start + 1: ind_end]
+        ind_list = new_s.split(',')
+        try:
+            indices = [int(ind) for ind in ind_list if ind]
+        except ValueError:
+            raise ValueError("Unable to parse indices from %s." % new_s)
+        s = s[ind_end + 1:].strip()
+
+        val_start = s.find('[')
+        if val_start == -1:
+            raise ValueError("Values array should start with '['.")
+        val_end = s.find(']')
+        if val_end == -1:
+            raise ValueError("Values array should end with ']'.")
+        val_list = s[val_start + 1: val_end].split(',')
+        try:
+            values = [float(val) for val in val_list if val]
+        except ValueError:
+            raise ValueError("Unable to parse values from %s." % s)
+        return SparseVector(size, indices, values)
+
+    def dot(self, other):
+        """
+        Dot product with a SparseVector or 1- or 2-dimensional Numpy array.
+
+        >>> a = SparseVector(4, [1, 3], [3.0, 4.0])
+        >>> a.dot(a)
+        25.0
+        >>> a.dot(array.array('d', [1., 2., 3., 4.]))
+        22.0
+        >>> b = SparseVector(4, [2], [1.0])
+        >>> a.dot(b)
+        0.0
+        >>> a.dot(np.array([[1, 1], [2, 2], [3, 3], [4, 4]]))
+        array([ 22.,  22.])
+        >>> a.dot([1., 2., 3.])
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> a.dot(np.array([1., 2.]))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> a.dot(DenseVector([1., 2.]))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> a.dot(np.zeros((3, 2)))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        """
+
+        if isinstance(other, np.ndarray):
+            if other.ndim not in [2, 1]:
+                raise ValueError("Cannot call dot with %d-dimensional array" % other.ndim)
+            assert len(self) == other.shape[0], "dimension mismatch"
+            return np.dot(self.values, other[self.indices])
+
+        assert len(self) == _vector_size(other), "dimension mismatch"
+
+        if isinstance(other, DenseVector):
+            return np.dot(other.array[self.indices], self.values)
+
+        elif isinstance(other, SparseVector):
+            # Find out common indices.
+            self_cmind = np.in1d(self.indices, other.indices, assume_unique=True)
+            self_values = self.values[self_cmind]
+            if self_values.size == 0:
+                return 0.0
+            else:
+                other_cmind = np.in1d(other.indices, self.indices, assume_unique=True)
+                return np.dot(self_values, other.values[other_cmind])
+
+        else:
+            return self.dot(_convert_to_vector(other))
+
+    def squared_distance(self, other):
+        """
+        Squared distance from a SparseVector or 1-dimensional NumPy array.
+
+        >>> a = SparseVector(4, [1, 3], [3.0, 4.0])
+        >>> a.squared_distance(a)
+        0.0
+        >>> a.squared_distance(array.array('d', [1., 2., 3., 4.]))
+        11.0
+        >>> a.squared_distance(np.array([1., 2., 3., 4.]))
+        11.0
+        >>> b = SparseVector(4, [2], [1.0])
+        >>> a.squared_distance(b)
+        26.0
+        >>> b.squared_distance(a)
+        26.0
+        >>> b.squared_distance([1., 2.])
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> b.squared_distance(SparseVector(3, [1,], [1.0,]))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        """
+        assert len(self) == _vector_size(other), "dimension mismatch"
+
+        if isinstance(other, np.ndarray) or isinstance(other, DenseVector):
+            if isinstance(other, np.ndarray) and other.ndim != 1:
+                raise Exception("Cannot call squared_distance with %d-dimensional array" %
+                                other.ndim)
+            if isinstance(other, DenseVector):
+                other = other.array
+            sparse_ind = np.zeros(other.size, dtype=bool)
+            sparse_ind[self.indices] = True
+            dist = other[sparse_ind] - self.values
+            result = np.dot(dist, dist)
+
+            other_ind = other[~sparse_ind]
+            result += np.dot(other_ind, other_ind)
+            return result
+
+        elif isinstance(other, SparseVector):
+            result = 0.0
+            i, j = 0, 0
+            while i < len(self.indices) and j < len(other.indices):
+                if self.indices[i] == other.indices[j]:
+                    diff = self.values[i] - other.values[j]
+                    result += diff * diff
+                    i += 1
+                    j += 1
+                elif self.indices[i] < other.indices[j]:
+                    result += self.values[i] * self.values[i]
+                    i += 1
+                else:
+                    result += other.values[j] * other.values[j]
+                    j += 1
+            while i < len(self.indices):
+                result += self.values[i] * self.values[i]
+                i += 1
+            while j < len(other.indices):
+                result += other.values[j] * other.values[j]
+                j += 1
+            return result
+        else:
+            return self.squared_distance(_convert_to_vector(other))
+
+    def toArray(self):
+        """
+        Returns a copy of this SparseVector as a 1-dimensional NumPy array.
+        """
+        arr = np.zeros((self.size,), dtype=np.float64)
+        arr[self.indices] = self.values
+        return arr
+
+    def asML(self):
+        """
+        Convert this vector to the new mllib-local representation.
+        This does NOT copy the data; it copies references.
+
+        :return: :py:class:`pyspark.ml.linalg.SparseVector`
+
+        .. versionadded:: 2.0.0
+        """
+        return newlinalg.SparseVector(self.size, self.indices, self.values)
+
+    def __len__(self):
+        return self.size
+
+    def __str__(self):
+        inds = "[" + ",".join([str(i) for i in self.indices]) + "]"
+        vals = "[" + ",".join([str(v) for v in self.values]) + "]"
+        return "(" + ",".join((str(self.size), inds, vals)) + ")"
+
+    def __repr__(self):
+        inds = self.indices
+        vals = self.values
+        entries = ", ".join(["{0}: {1}".format(inds[i], _format_float(vals[i]))
+                             for i in xrange(len(inds))])
+        return "SparseVector({0}, {{{1}}})".format(self.size, entries)
+
+    def __eq__(self, other):
+        if isinstance(other, SparseVector):
+            return other.size == self.size and np.array_equal(other.indices, self.indices) \
+                and np.array_equal(other.values, self.values)
+        elif isinstance(other, DenseVector):
+            if self.size != len(other):
+                return False
+            return Vectors._equals(self.indices, self.values, list(xrange(len(other))), other.array)
+        return False
+
+    def __getitem__(self, index):
+        inds = self.indices
+        vals = self.values
+        if not isinstance(index, int):
+            raise TypeError(
+                "Indices must be of type integer, got type %s" % type(index))
+
+        if index >= self.size or index < -self.size:
+            raise IndexError("Index %d out of bounds." % index)
+        if index < 0:
+            index += self.size
+
+        if (inds.size == 0) or (index > inds.item(-1)):
+            return 0.
+
+        insert_index = np.searchsorted(inds, index)
+        row_ind = inds[insert_index]
+        if row_ind == index:
+            return vals[insert_index]
+        return 0.
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __hash__(self):
+        result = 31 + self.size
+        nnz = 0
+        i = 0
+        while i < len(self.values) and nnz < 128:
+            if self.values[i] != 0:
+                result = 31 * result + int(self.indices[i])
+                bits = _double_to_long_bits(self.values[i])
+                result = 31 * result + (bits ^ (bits >> 32))
+                nnz += 1
+            i += 1
+        return result
+
+
+class Vectors(object):
+
+    """
+    Factory methods for working with vectors.
+
+    .. note:: Dense vectors are simply represented as NumPy array objects,
+        so there is no need to covert them for use in MLlib. For sparse vectors,
+        the factory methods in this class create an MLlib-compatible type, or users
+        can pass in SciPy's C{scipy.sparse} column vectors.
+    """
+
+    @staticmethod
+    def sparse(size, *args):
+        """
+        Create a sparse vector, using either a dictionary, a list of
+        (index, value) pairs, or two separate arrays of indices and
+        values (sorted by index).
+
+        :param size: Size of the vector.
+        :param args: Non-zero entries, as a dictionary, list of tuples,
+                     or two sorted lists containing indices and values.
+
+        >>> Vectors.sparse(4, {1: 1.0, 3: 5.5})
+        SparseVector(4, {1: 1.0, 3: 5.5})
+        >>> Vectors.sparse(4, [(1, 1.0), (3, 5.5)])
+        SparseVector(4, {1: 1.0, 3: 5.5})
+        >>> Vectors.sparse(4, [1, 3], [1.0, 5.5])
+        SparseVector(4, {1: 1.0, 3: 5.5})
+        """
+        return SparseVector(size, *args)
+
+    @staticmethod
+    def dense(*elements):
+        """
+        Create a dense vector of 64-bit floats from a Python list or numbers.
+
+        >>> Vectors.dense([1, 2, 3])
+        DenseVector([1.0, 2.0, 3.0])
+        >>> Vectors.dense(1.0, 2.0)
+        DenseVector([1.0, 2.0])
+        """
+        if len(elements) == 1 and not isinstance(elements[0], (float, int, long)):
+            # it's list, numpy.array or other iterable object.
+            elements = elements[0]
+        return DenseVector(elements)
+
+    @staticmethod
+    def fromML(vec):
+        """
+        Convert a vector from the new mllib-local representation.
+        This does NOT copy the data; it copies references.
+
+        :param vec: a :py:class:`pyspark.ml.linalg.Vector`
+        :return: a :py:class:`pyspark.mllib.linalg.Vector`
+
+        .. versionadded:: 2.0.0
+        """
+        if isinstance(vec, newlinalg.DenseVector):
+            return DenseVector(vec.array)
+        elif isinstance(vec, newlinalg.SparseVector):
+            return SparseVector(vec.size, vec.indices, vec.values)
+        else:
+            raise TypeError("Unsupported vector type %s" % type(vec))
+
+    @staticmethod
+    def stringify(vector):
+        """
+        Converts a vector into a string, which can be recognized by
+        Vectors.parse().
+
+        >>> Vectors.stringify(Vectors.sparse(2, [1], [1.0]))
+        '(2,[1],[1.0])'
+        >>> Vectors.stringify(Vectors.dense([0.0, 1.0]))
+        '[0.0,1.0]'
+        """
+        return str(vector)
+
+    @staticmethod
+    def squared_distance(v1, v2):
+        """
+        Squared distance between two vectors.
+        a and b can be of type SparseVector, DenseVector, np.ndarray
+        or array.array.
+
+        >>> a = Vectors.sparse(4, [(0, 1), (3, 4)])
+        >>> b = Vectors.dense([2, 5, 4, 1])
+        >>> a.squared_distance(b)
+        51.0
+        """
+        v1, v2 = _convert_to_vector(v1), _convert_to_vector(v2)
+        return v1.squared_distance(v2)
+
+    @staticmethod
+    def norm(vector, p):
+        """
+        Find norm of the given vector.
+        """
+        return _convert_to_vector(vector).norm(p)
+
+    @staticmethod
+    def parse(s):
+        """Parse a string representation back into the Vector.
+
+        >>> Vectors.parse('[2,1,2 ]')
+        DenseVector([2.0, 1.0, 2.0])
+        >>> Vectors.parse(' ( 100,  [0],  [2])')
+        SparseVector(100, {0: 2.0})
+        """
+        if s.find('(') == -1 and s.find('[') != -1:
+            return DenseVector.parse(s)
+        elif s.find('(') != -1:
+            return SparseVector.parse(s)
+        else:
+            raise ValueError(
+                "Cannot find tokens '[' or '(' from the input string.")
+
+    @staticmethod
+    def zeros(size):
+        return DenseVector(np.zeros(size))
+
+    @staticmethod
+    def _equals(v1_indices, v1_values, v2_indices, v2_values):
+        """
+        Check equality between sparse/dense vectors,
+        v1_indices and v2_indices assume to be strictly increasing.
+        """
+        v1_size = len(v1_values)
+        v2_size = len(v2_values)
+        k1 = 0
+        k2 = 0
+        all_equal = True
+        while all_equal:
+            while k1 < v1_size and v1_values[k1] == 0:
+                k1 += 1
+            while k2 < v2_size and v2_values[k2] == 0:
+                k2 += 1
+
+            if k1 >= v1_size or k2 >= v2_size:
+                return k1 >= v1_size and k2 >= v2_size
+
+            all_equal = v1_indices[k1] == v2_indices[k2] and v1_values[k1] == v2_values[k2]
+            k1 += 1
+            k2 += 1
+        return all_equal
+
+
+class Matrix(object):
+
+    __UDT__ = MatrixUDT()
+
+    """
+    Represents a local matrix.
+    """
+    def __init__(self, numRows, numCols, isTransposed=False):
+        self.numRows = numRows
+        self.numCols = numCols
+        self.isTransposed = isTransposed
+
+    def toArray(self):
+        """
+        Returns its elements in a NumPy ndarray.
+        """
+        raise NotImplementedError
+
+    def asML(self):
+        """
+        Convert this matrix to the new mllib-local representation.
+        This does NOT copy the data; it copies references.
+        """
+        raise NotImplementedError
+
+    @staticmethod
+    def _convert_to_array(array_like, dtype):
+        """
+        Convert Matrix attributes which are array-like or buffer to array.
+        """
+        if isinstance(array_like, bytes):
+            return np.frombuffer(array_like, dtype=dtype)
+        return np.asarray(array_like, dtype=dtype)
+
+
+class DenseMatrix(Matrix):
+    """
+    Column-major dense matrix.
+    """
+    def __init__(self, numRows, numCols, values, isTransposed=False):
+        Matrix.__init__(self, numRows, numCols, isTransposed)
+        values = self._convert_to_array(values, np.float64)
+        assert len(values) == numRows * numCols
+        self.values = values
+
+    def __reduce__(self):
+        return DenseMatrix, (
+            self.numRows, self.numCols, self.values.tostring(),
+            int(self.isTransposed))
+
+    def __str__(self):
+        """
+        Pretty printing of a DenseMatrix
+
+        >>> dm = DenseMatrix(2, 2, range(4))
+        >>> print(dm)
+        DenseMatrix([[ 0.,  2.],
+                     [ 1.,  3.]])
+        >>> dm = DenseMatrix(2, 2, range(4), isTransposed=True)
+        >>> print(dm)
+        DenseMatrix([[ 0.,  1.],
+                     [ 2.,  3.]])
+        """
+        # Inspired by __repr__ in scipy matrices.
+        array_lines = repr(self.toArray()).splitlines()
+
+        # We need to adjust six spaces which is the difference in number
+        # of letters between "DenseMatrix" and "array"
+        x = '\n'.join([(" " * 6 + line) for line in array_lines[1:]])
+        return array_lines[0].replace("array", "DenseMatrix") + "\n" + x
+
+    def __repr__(self):
+        """
+        Representation of a DenseMatrix
+
+        >>> dm = DenseMatrix(2, 2, range(4))
+        >>> dm
+        DenseMatrix(2, 2, [0.0, 1.0, 2.0, 3.0], False)
+        """
+        # If the number of values are less than seventeen then return as it is.
+        # Else return first eight values and last eight values.
+        if len(self.values) < 17:
+            entries = _format_float_list(self.values)
+        else:
+            entries = (
+                _format_float_list(self.values[:8]) +
+                ["..."] +
+                _format_float_list(self.values[-8:])
+            )
+
+        entries = ", ".join(entries)
+        return "DenseMatrix({0}, {1}, [{2}], {3})".format(
+            self.numRows, self.numCols, entries, self.isTransposed)
+
+    def toArray(self):
+        """
+        Return an numpy.ndarray
+
+        >>> m = DenseMatrix(2, 2, range(4))
+        >>> m.toArray()
+        array([[ 0.,  2.],
+               [ 1.,  3.]])
+        """
+        if self.isTransposed:
+            return np.asfortranarray(
+                self.values.reshape((self.numRows, self.numCols)))
+        else:
+            return self.values.reshape((self.numRows, self.numCols), order='F')
+
+    def toSparse(self):
+        """Convert to SparseMatrix"""
+        if self.isTransposed:
+            values = np.ravel(self.toArray(), order='F')
+        else:
+            values = self.values
+        indices = np.nonzero(values)[0]
+        colCounts = np.bincount(indices // self.numRows)
+        colPtrs = np.cumsum(np.hstack(
+            (0, colCounts, np.zeros(self.numCols - colCounts.size))))
+        values = values[indices]
+        rowIndices = indices % self.numRows
+
+        return SparseMatrix(self.numRows, self.numCols, colPtrs, rowIndices, values)
+
+    def asML(self):
+        """
+        Convert this matrix to the new mllib-local representation.
+        This does NOT copy the data; it copies references.
+
+        :return: :py:class:`pyspark.ml.linalg.DenseMatrix`
+
+        .. versionadded:: 2.0.0
+        """
+        return newlinalg.DenseMatrix(self.numRows, self.numCols, self.values, self.isTransposed)
+
+    def __getitem__(self, indices):
+        i, j = indices
+        if i < 0 or i >= self.numRows:
+            raise IndexError("Row index %d is out of range [0, %d)"
+                             % (i, self.numRows))
+        if j >= self.numCols or j < 0:
+            raise IndexError("Column index %d is out of range [0, %d)"
+                             % (j, self.numCols))
+
+        if self.isTransposed:
+            return self.values[i * self.numCols + j]
+        else:
+            return self.values[i + j * self.numRows]
+
+    def __eq__(self, other):
+        if (not isinstance(other, DenseMatrix) or
+                self.numRows != other.numRows or
+                self.numCols != other.numCols):
+            return False
+
+        self_values = np.ravel(self.toArray(), order='F')
+        other_values = np.ravel(other.toArray(), order='F')
+        return all(self_values == other_values)
+
+
+class SparseMatrix(Matrix):
+    """Sparse Matrix stored in CSC format."""
+    def __init__(self, numRows, numCols, colPtrs, rowIndices, values,
+                 isTransposed=False):
+        Matrix.__init__(self, numRows, numCols, isTransposed)
+        self.colPtrs = self._convert_to_array(colPtrs, np.int32)
+        self.rowIndices = self._convert_to_array(rowIndices, np.int32)
+        self.values = self._convert_to_array(values, np.float64)
+
+        if self.isTransposed:
+            if self.colPtrs.size != numRows + 1:
+                raise ValueError("Expected colPtrs of size %d, got %d."
+                                 % (numRows + 1, self.colPtrs.size))
+        else:
+            if self.colPtrs.size != numCols + 1:
+                raise ValueError("Expected colPtrs of size %d, got %d."
+                                 % (numCols + 1, self.colPtrs.size))
+        if self.rowIndices.size != self.values.size:
+            raise ValueError("Expected rowIndices of length %d, got %d."
+                             % (self.rowIndices.size, self.values.size))
+
+    def __str__(self):
+        """
+        Pretty printing of a SparseMatrix
+
+        >>> sm1 = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
+        >>> print(sm1)
+        2 X 2 CSCMatrix
+        (0,0) 2.0
+        (1,0) 3.0
+        (1,1) 4.0
+        >>> sm1 = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4], True)
+        >>> print(sm1)
+        2 X 2 CSRMatrix
+        (0,0) 2.0
+        (0,1) 3.0
+        (1,1) 4.0
+        """
+        spstr = "{0} X {1} ".format(self.numRows, self.numCols)
+        if self.isTransposed:
+            spstr += "CSRMatrix\n"
+        else:
+            spstr += "CSCMatrix\n"
+
+        cur_col = 0
+        smlist = []
+
+        # Display first 16 values.
+        if len(self.values) <= 16:
+            zipindval = zip(self.rowIndices, self.values)
+        else:
+            zipindval = zip(self.rowIndices[:16], self.values[:16])
+        for i, (rowInd, value) in enumerate(zipindval):
+            if self.colPtrs[cur_col + 1] <= i:
+                cur_col += 1
+            if self.isTransposed:
+                smlist.append('({0},{1}) {2}'.format(
+                    cur_col, rowInd, _format_float(value)))
+            else:
+                smlist.append('({0},{1}) {2}'.format(
+                    rowInd, cur_col, _format_float(value)))
+        spstr += "\n".join(smlist)
+
+        if len(self.values) > 16:
+            spstr += "\n.." * 2
+        return spstr
+
+    def __repr__(self):
+        """
+        Representation of a SparseMatrix
+
+        >>> sm1 = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
+        >>> sm1
+        SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2.0, 3.0, 4.0], False)
+        """
+        rowIndices = list(self.rowIndices)
+        colPtrs = list(self.colPtrs)
+
+        if len(self.values) <= 16:
+            values = _format_float_list(self.values)
+
+        else:
+            values = (
+                _format_float_list(self.values[:8]) +
+                ["..."] +
+                _format_float_list(self.values[-8:])
+            )
+            rowIndices = rowIndices[:8] + ["..."] + rowIndices[-8:]
+
+        if len(self.colPtrs) > 16:
+            colPtrs = colPtrs[:8] + ["..."] + colPtrs[-8:]
+
+        values = ", ".join(values)
+        rowIndices = ", ".join([str(ind) for ind in rowIndices])
+        colPtrs = ", ".join([str(ptr) for ptr in colPtrs])
+        return "SparseMatrix({0}, {1}, [{2}], [{3}], [{4}], {5})".format(
+            self.numRows, self.numCols, colPtrs, rowIndices,
+            values, self.isTransposed)
+
+    def __reduce__(self):
+        return SparseMatrix, (
+            self.numRows, self.numCols, self.colPtrs.tostring(),
+            self.rowIndices.tostring(), self.values.tostring(),
+            int(self.isTransposed))
+
+    def __getitem__(self, indices):
+        i, j = indices
+        if i < 0 or i >= self.numRows:
+            raise IndexError("Row index %d is out of range [0, %d)"
+                             % (i, self.numRows))
+        if j < 0 or j >= self.numCols:
+            raise IndexError("Column index %d is out of range [0, %d)"
+                             % (j, self.numCols))
+
+        # If a CSR matrix is given, then the row index should be searched
+        # for in ColPtrs, and the column index should be searched for in the
+        # corresponding slice obtained from rowIndices.
+        if self.isTransposed:
+            j, i = i, j
+
+        colStart = self.colPtrs[j]
+        colEnd = self.colPtrs[j + 1]
+        nz = self.rowIndices[colStart: colEnd]
+        ind = np.searchsorted(nz, i) + colStart
+        if ind < colEnd and self.rowIndices[ind] == i:
+            return self.values[ind]
+        else:
+            return 0.0
+
+    def toArray(self):
+        """
+        Return an numpy.ndarray
+        """
+        A = np.zeros((self.numRows, self.numCols), dtype=np.float64, order='F')
+        for k in xrange(self.colPtrs.size - 1):
+            startptr = self.colPtrs[k]
+            endptr = self.colPtrs[k + 1]
+            if self.isTransposed:
+                A[k, self.rowIndices[startptr:endptr]] = self.values[startptr:endptr]
+            else:
+                A[self.rowIndices[startptr:endptr], k] = self.values[startptr:endptr]
+        return A
+
+    def toDense(self):
+        densevals = np.ravel(self.toArray(), order='F')
+        return DenseMatrix(self.numRows, self.numCols, densevals)
+
+    def asML(self):
+        """
+        Convert this matrix to the new mllib-local representation.
+        This does NOT copy the data; it copies references.
+
+        :return: :py:class:`pyspark.ml.linalg.SparseMatrix`
+
+        .. versionadded:: 2.0.0
+        """
+        return newlinalg.SparseMatrix(self.numRows, self.numCols, self.colPtrs, self.rowIndices,
+                                      self.values, self.isTransposed)
+
+    # TODO: More efficient implementation:
+    def __eq__(self, other):
+        return np.all(self.toArray() == other.toArray())
+
+
+class Matrices(object):
+    @staticmethod
+    def dense(numRows, numCols, values):
+        """
+        Create a DenseMatrix
+        """
+        return DenseMatrix(numRows, numCols, values)
+
+    @staticmethod
+    def sparse(numRows, numCols, colPtrs, rowIndices, values):
+        """
+        Create a SparseMatrix
+        """
+        return SparseMatrix(numRows, numCols, colPtrs, rowIndices, values)
+
+    @staticmethod
+    def fromML(mat):
+        """
+        Convert a matrix from the new mllib-local representation.
+        This does NOT copy the data; it copies references.
+
+        :param mat: a :py:class:`pyspark.ml.linalg.Matrix`
+        :return: a :py:class:`pyspark.mllib.linalg.Matrix`
+
+        .. versionadded:: 2.0.0
+        """
+        if isinstance(mat, newlinalg.DenseMatrix):
+            return DenseMatrix(mat.numRows, mat.numCols, mat.values, mat.isTransposed)
+        elif isinstance(mat, newlinalg.SparseMatrix):
+            return SparseMatrix(mat.numRows, mat.numCols, mat.colPtrs, mat.rowIndices,
+                                mat.values, mat.isTransposed)
+        else:
+            raise TypeError("Unsupported matrix type %s" % type(mat))
+
+
+class QRDecomposition(object):
+    """
+    Represents QR factors.
+    """
+    def __init__(self, Q, R):
+        self._Q = Q
+        self._R = R
+
+    @property
+    @since('2.0.0')
+    def Q(self):
+        """
+        An orthogonal matrix Q in a QR decomposition.
+        May be null if not computed.
+        """
+        return self._Q
+
+    @property
+    @since('2.0.0')
+    def R(self):
+        """
+        An upper triangular matrix R in a QR decomposition.
+        """
+        return self._R
+
+
+def _test():
+    import doctest
+    import numpy
+    try:
+        # Numpy 1.14+ changed it's string format.
+        numpy.set_printoptions(legacy='1.13')
+    except TypeError:
+        pass
+    (failure_count, test_count) = doctest.testmod(optionflags=doctest.ELLIPSIS)
+    if failure_count:
+        sys.exit(-1)
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/linalg/distributed.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/linalg/distributed.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e8b15056cabe4ee3cde4ec8db33b08a32880381
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/linalg/distributed.py
@@ -0,0 +1,1389 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Package for distributed linear algebra.
+"""
+
+import sys
+
+if sys.version >= '3':
+    long = int
+
+from py4j.java_gateway import JavaObject
+
+from pyspark import RDD, since
+from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper
+from pyspark.mllib.linalg import _convert_to_vector, DenseMatrix, Matrix, QRDecomposition
+from pyspark.mllib.stat import MultivariateStatisticalSummary
+from pyspark.storagelevel import StorageLevel
+
+
+__all__ = ['BlockMatrix', 'CoordinateMatrix', 'DistributedMatrix', 'IndexedRow',
+           'IndexedRowMatrix', 'MatrixEntry', 'RowMatrix', 'SingularValueDecomposition']
+
+
+class DistributedMatrix(object):
+    """
+    Represents a distributively stored matrix backed by one or
+    more RDDs.
+
+    """
+    def numRows(self):
+        """Get or compute the number of rows."""
+        raise NotImplementedError
+
+    def numCols(self):
+        """Get or compute the number of cols."""
+        raise NotImplementedError
+
+
+class RowMatrix(DistributedMatrix):
+    """
+    Represents a row-oriented distributed Matrix with no meaningful
+    row indices.
+
+    :param rows: An RDD of vectors.
+    :param numRows: Number of rows in the matrix. A non-positive
+                    value means unknown, at which point the number
+                    of rows will be determined by the number of
+                    records in the `rows` RDD.
+    :param numCols: Number of columns in the matrix. A non-positive
+                    value means unknown, at which point the number
+                    of columns will be determined by the size of
+                    the first row.
+    """
+    def __init__(self, rows, numRows=0, numCols=0):
+        """
+        Note: This docstring is not shown publicly.
+
+        Create a wrapper over a Java RowMatrix.
+
+        Publicly, we require that `rows` be an RDD.  However, for
+        internal usage, `rows` can also be a Java RowMatrix
+        object, in which case we can wrap it directly.  This
+        assists in clean matrix conversions.
+
+        >>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6]])
+        >>> mat = RowMatrix(rows)
+
+        >>> mat_diff = RowMatrix(rows)
+        >>> (mat_diff._java_matrix_wrapper._java_model ==
+        ...  mat._java_matrix_wrapper._java_model)
+        False
+
+        >>> mat_same = RowMatrix(mat._java_matrix_wrapper._java_model)
+        >>> (mat_same._java_matrix_wrapper._java_model ==
+        ...  mat._java_matrix_wrapper._java_model)
+        True
+        """
+        if isinstance(rows, RDD):
+            rows = rows.map(_convert_to_vector)
+            java_matrix = callMLlibFunc("createRowMatrix", rows, long(numRows), int(numCols))
+        elif (isinstance(rows, JavaObject)
+              and rows.getClass().getSimpleName() == "RowMatrix"):
+            java_matrix = rows
+        else:
+            raise TypeError("rows should be an RDD of vectors, got %s" % type(rows))
+
+        self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
+
+    @property
+    def rows(self):
+        """
+        Rows of the RowMatrix stored as an RDD of vectors.
+
+        >>> mat = RowMatrix(sc.parallelize([[1, 2, 3], [4, 5, 6]]))
+        >>> rows = mat.rows
+        >>> rows.first()
+        DenseVector([1.0, 2.0, 3.0])
+        """
+        return self._java_matrix_wrapper.call("rows")
+
+    def numRows(self):
+        """
+        Get or compute the number of rows.
+
+        >>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6],
+        ...                        [7, 8, 9], [10, 11, 12]])
+
+        >>> mat = RowMatrix(rows)
+        >>> print(mat.numRows())
+        4
+
+        >>> mat = RowMatrix(rows, 7, 6)
+        >>> print(mat.numRows())
+        7
+        """
+        return self._java_matrix_wrapper.call("numRows")
+
+    def numCols(self):
+        """
+        Get or compute the number of cols.
+
+        >>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6],
+        ...                        [7, 8, 9], [10, 11, 12]])
+
+        >>> mat = RowMatrix(rows)
+        >>> print(mat.numCols())
+        3
+
+        >>> mat = RowMatrix(rows, 7, 6)
+        >>> print(mat.numCols())
+        6
+        """
+        return self._java_matrix_wrapper.call("numCols")
+
+    @since('2.0.0')
+    def computeColumnSummaryStatistics(self):
+        """
+        Computes column-wise summary statistics.
+
+        :return: :class:`MultivariateStatisticalSummary` object
+                 containing column-wise summary statistics.
+
+        >>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6]])
+        >>> mat = RowMatrix(rows)
+
+        >>> colStats = mat.computeColumnSummaryStatistics()
+        >>> colStats.mean()
+        array([ 2.5,  3.5,  4.5])
+        """
+        java_col_stats = self._java_matrix_wrapper.call("computeColumnSummaryStatistics")
+        return MultivariateStatisticalSummary(java_col_stats)
+
+    @since('2.0.0')
+    def computeCovariance(self):
+        """
+        Computes the covariance matrix, treating each row as an
+        observation.
+
+        .. note:: This cannot be computed on matrices with more than 65535 columns.
+
+        >>> rows = sc.parallelize([[1, 2], [2, 1]])
+        >>> mat = RowMatrix(rows)
+
+        >>> mat.computeCovariance()
+        DenseMatrix(2, 2, [0.5, -0.5, -0.5, 0.5], 0)
+        """
+        return self._java_matrix_wrapper.call("computeCovariance")
+
+    @since('2.0.0')
+    def computeGramianMatrix(self):
+        """
+        Computes the Gramian matrix `A^T A`.
+
+        .. note:: This cannot be computed on matrices with more than 65535 columns.
+
+        >>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6]])
+        >>> mat = RowMatrix(rows)
+
+        >>> mat.computeGramianMatrix()
+        DenseMatrix(3, 3, [17.0, 22.0, 27.0, 22.0, 29.0, 36.0, 27.0, 36.0, 45.0], 0)
+        """
+        return self._java_matrix_wrapper.call("computeGramianMatrix")
+
+    @since('2.0.0')
+    def columnSimilarities(self, threshold=0.0):
+        """
+        Compute similarities between columns of this matrix.
+
+        The threshold parameter is a trade-off knob between estimate
+        quality and computational cost.
+
+        The default threshold setting of 0 guarantees deterministically
+        correct results, but uses the brute-force approach of computing
+        normalized dot products.
+
+        Setting the threshold to positive values uses a sampling
+        approach and incurs strictly less computational cost than the
+        brute-force approach. However the similarities computed will
+        be estimates.
+
+        The sampling guarantees relative-error correctness for those
+        pairs of columns that have similarity greater than the given
+        similarity threshold.
+
+        To describe the guarantee, we set some notation:
+            * Let A be the smallest in magnitude non-zero element of
+              this matrix.
+            * Let B be the largest in magnitude non-zero element of
+              this matrix.
+            * Let L be the maximum number of non-zeros per row.
+
+        For example, for {0,1} matrices: A=B=1.
+        Another example, for the Netflix matrix: A=1, B=5
+
+        For those column pairs that are above the threshold, the
+        computed similarity is correct to within 20% relative error
+        with probability at least 1 - (0.981)^10/B^
+
+        The shuffle size is bounded by the *smaller* of the following
+        two expressions:
+
+            * O(n log(n) L / (threshold * A))
+            * O(m L^2^)
+
+        The latter is the cost of the brute-force approach, so for
+        non-zero thresholds, the cost is always cheaper than the
+        brute-force approach.
+
+        :param: threshold: Set to 0 for deterministic guaranteed
+                           correctness. Similarities above this
+                           threshold are estimated with the cost vs
+                           estimate quality trade-off described above.
+        :return: An n x n sparse upper-triangular CoordinateMatrix of
+                 cosine similarities between columns of this matrix.
+
+        >>> rows = sc.parallelize([[1, 2], [1, 5]])
+        >>> mat = RowMatrix(rows)
+
+        >>> sims = mat.columnSimilarities()
+        >>> sims.entries.first().value
+        0.91914503...
+        """
+        java_sims_mat = self._java_matrix_wrapper.call("columnSimilarities", float(threshold))
+        return CoordinateMatrix(java_sims_mat)
+
+    @since('2.0.0')
+    def tallSkinnyQR(self, computeQ=False):
+        """
+        Compute the QR decomposition of this RowMatrix.
+
+        The implementation is designed to optimize the QR decomposition
+        (factorization) for the RowMatrix of a tall and skinny shape.
+
+        Reference:
+         Paul G. Constantine, David F. Gleich. "Tall and skinny QR
+         factorizations in MapReduce architectures"
+         ([[http://dx.doi.org/10.1145/1996092.1996103]])
+
+        :param: computeQ: whether to computeQ
+        :return: QRDecomposition(Q: RowMatrix, R: Matrix), where
+                 Q = None if computeQ = false.
+
+        >>> rows = sc.parallelize([[3, -6], [4, -8], [0, 1]])
+        >>> mat = RowMatrix(rows)
+        >>> decomp = mat.tallSkinnyQR(True)
+        >>> Q = decomp.Q
+        >>> R = decomp.R
+
+        >>> # Test with absolute values
+        >>> absQRows = Q.rows.map(lambda row: abs(row.toArray()).tolist())
+        >>> absQRows.collect()
+        [[0.6..., 0.0], [0.8..., 0.0], [0.0, 1.0]]
+
+        >>> # Test with absolute values
+        >>> abs(R.toArray()).tolist()
+        [[5.0, 10.0], [0.0, 1.0]]
+        """
+        decomp = JavaModelWrapper(self._java_matrix_wrapper.call("tallSkinnyQR", computeQ))
+        if computeQ:
+            java_Q = decomp.call("Q")
+            Q = RowMatrix(java_Q)
+        else:
+            Q = None
+        R = decomp.call("R")
+        return QRDecomposition(Q, R)
+
+    @since('2.2.0')
+    def computeSVD(self, k, computeU=False, rCond=1e-9):
+        """
+        Computes the singular value decomposition of the RowMatrix.
+
+        The given row matrix A of dimension (m X n) is decomposed into
+        U * s * V'T where
+
+        * U: (m X k) (left singular vectors) is a RowMatrix whose
+             columns are the eigenvectors of (A X A')
+        * s: DenseVector consisting of square root of the eigenvalues
+             (singular values) in descending order.
+        * v: (n X k) (right singular vectors) is a Matrix whose columns
+             are the eigenvectors of (A' X A)
+
+        For more specific details on implementation, please refer
+        the Scala documentation.
+
+        :param k: Number of leading singular values to keep (`0 < k <= n`).
+                  It might return less than k if there are numerically zero singular values
+                  or there are not enough Ritz values converged before the maximum number of
+                  Arnoldi update iterations is reached (in case that matrix A is ill-conditioned).
+        :param computeU: Whether or not to compute U. If set to be
+                         True, then U is computed by A * V * s^-1
+        :param rCond: Reciprocal condition number. All singular values
+                      smaller than rCond * s[0] are treated as zero
+                      where s[0] is the largest singular value.
+        :returns: :py:class:`SingularValueDecomposition`
+
+        >>> rows = sc.parallelize([[3, 1, 1], [-1, 3, 1]])
+        >>> rm = RowMatrix(rows)
+
+        >>> svd_model = rm.computeSVD(2, True)
+        >>> svd_model.U.rows.collect()
+        [DenseVector([-0.7071, 0.7071]), DenseVector([-0.7071, -0.7071])]
+        >>> svd_model.s
+        DenseVector([3.4641, 3.1623])
+        >>> svd_model.V
+        DenseMatrix(3, 2, [-0.4082, -0.8165, -0.4082, 0.8944, -0.4472, 0.0], 0)
+        """
+        j_model = self._java_matrix_wrapper.call(
+            "computeSVD", int(k), bool(computeU), float(rCond))
+        return SingularValueDecomposition(j_model)
+
+    @since('2.2.0')
+    def computePrincipalComponents(self, k):
+        """
+        Computes the k principal components of the given row matrix
+
+        .. note:: This cannot be computed on matrices with more than 65535 columns.
+
+        :param k: Number of principal components to keep.
+        :returns: :py:class:`pyspark.mllib.linalg.DenseMatrix`
+
+        >>> rows = sc.parallelize([[1, 2, 3], [2, 4, 5], [3, 6, 1]])
+        >>> rm = RowMatrix(rows)
+
+        >>> # Returns the two principal components of rm
+        >>> pca = rm.computePrincipalComponents(2)
+        >>> pca
+        DenseMatrix(3, 2, [-0.349, -0.6981, 0.6252, -0.2796, -0.5592, -0.7805], 0)
+
+        >>> # Transform into new dimensions with the greatest variance.
+        >>> rm.multiply(pca).rows.collect() # doctest: +NORMALIZE_WHITESPACE
+        [DenseVector([0.1305, -3.7394]), DenseVector([-0.3642, -6.6983]), \
+        DenseVector([-4.6102, -4.9745])]
+        """
+        return self._java_matrix_wrapper.call("computePrincipalComponents", k)
+
+    @since('2.2.0')
+    def multiply(self, matrix):
+        """
+        Multiply this matrix by a local dense matrix on the right.
+
+        :param matrix: a local dense matrix whose number of rows must match the number of columns
+                       of this matrix
+        :returns: :py:class:`RowMatrix`
+
+        >>> rm = RowMatrix(sc.parallelize([[0, 1], [2, 3]]))
+        >>> rm.multiply(DenseMatrix(2, 2, [0, 2, 1, 3])).rows.collect()
+        [DenseVector([2.0, 3.0]), DenseVector([6.0, 11.0])]
+        """
+        if not isinstance(matrix, DenseMatrix):
+            raise ValueError("Only multiplication with DenseMatrix "
+                             "is supported.")
+        j_model = self._java_matrix_wrapper.call("multiply", matrix)
+        return RowMatrix(j_model)
+
+
+class SingularValueDecomposition(JavaModelWrapper):
+    """
+    Represents singular value decomposition (SVD) factors.
+
+    .. versionadded:: 2.2.0
+    """
+
+    @property
+    @since('2.2.0')
+    def U(self):
+        """
+        Returns a distributed matrix whose columns are the left
+        singular vectors of the SingularValueDecomposition if computeU was set to be True.
+        """
+        u = self.call("U")
+        if u is not None:
+            mat_name = u.getClass().getSimpleName()
+            if mat_name == "RowMatrix":
+                return RowMatrix(u)
+            elif mat_name == "IndexedRowMatrix":
+                return IndexedRowMatrix(u)
+            else:
+                raise TypeError("Expected RowMatrix/IndexedRowMatrix got %s" % mat_name)
+
+    @property
+    @since('2.2.0')
+    def s(self):
+        """
+        Returns a DenseVector with singular values in descending order.
+        """
+        return self.call("s")
+
+    @property
+    @since('2.2.0')
+    def V(self):
+        """
+        Returns a DenseMatrix whose columns are the right singular
+        vectors of the SingularValueDecomposition.
+        """
+        return self.call("V")
+
+
+class IndexedRow(object):
+    """
+    Represents a row of an IndexedRowMatrix.
+
+    Just a wrapper over a (long, vector) tuple.
+
+    :param index: The index for the given row.
+    :param vector: The row in the matrix at the given index.
+    """
+    def __init__(self, index, vector):
+        self.index = long(index)
+        self.vector = _convert_to_vector(vector)
+
+    def __repr__(self):
+        return "IndexedRow(%s, %s)" % (self.index, self.vector)
+
+
+def _convert_to_indexed_row(row):
+    if isinstance(row, IndexedRow):
+        return row
+    elif isinstance(row, tuple) and len(row) == 2:
+        return IndexedRow(*row)
+    else:
+        raise TypeError("Cannot convert type %s into IndexedRow" % type(row))
+
+
+class IndexedRowMatrix(DistributedMatrix):
+    """
+    Represents a row-oriented distributed Matrix with indexed rows.
+
+    :param rows: An RDD of IndexedRows or (long, vector) tuples.
+    :param numRows: Number of rows in the matrix. A non-positive
+                    value means unknown, at which point the number
+                    of rows will be determined by the max row
+                    index plus one.
+    :param numCols: Number of columns in the matrix. A non-positive
+                    value means unknown, at which point the number
+                    of columns will be determined by the size of
+                    the first row.
+    """
+    def __init__(self, rows, numRows=0, numCols=0):
+        """
+        Note: This docstring is not shown publicly.
+
+        Create a wrapper over a Java IndexedRowMatrix.
+
+        Publicly, we require that `rows` be an RDD.  However, for
+        internal usage, `rows` can also be a Java IndexedRowMatrix
+        object, in which case we can wrap it directly.  This
+        assists in clean matrix conversions.
+
+        >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),
+        ...                        IndexedRow(1, [4, 5, 6])])
+        >>> mat = IndexedRowMatrix(rows)
+
+        >>> mat_diff = IndexedRowMatrix(rows)
+        >>> (mat_diff._java_matrix_wrapper._java_model ==
+        ...  mat._java_matrix_wrapper._java_model)
+        False
+
+        >>> mat_same = IndexedRowMatrix(mat._java_matrix_wrapper._java_model)
+        >>> (mat_same._java_matrix_wrapper._java_model ==
+        ...  mat._java_matrix_wrapper._java_model)
+        True
+        """
+        if isinstance(rows, RDD):
+            rows = rows.map(_convert_to_indexed_row)
+            # We use DataFrames for serialization of IndexedRows from
+            # Python, so first convert the RDD to a DataFrame on this
+            # side. This will convert each IndexedRow to a Row
+            # containing the 'index' and 'vector' values, which can
+            # both be easily serialized.  We will convert back to
+            # IndexedRows on the Scala side.
+            java_matrix = callMLlibFunc("createIndexedRowMatrix", rows.toDF(),
+                                        long(numRows), int(numCols))
+        elif (isinstance(rows, JavaObject)
+              and rows.getClass().getSimpleName() == "IndexedRowMatrix"):
+            java_matrix = rows
+        else:
+            raise TypeError("rows should be an RDD of IndexedRows or (long, vector) tuples, "
+                            "got %s" % type(rows))
+
+        self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
+
+    @property
+    def rows(self):
+        """
+        Rows of the IndexedRowMatrix stored as an RDD of IndexedRows.
+
+        >>> mat = IndexedRowMatrix(sc.parallelize([IndexedRow(0, [1, 2, 3]),
+        ...                                        IndexedRow(1, [4, 5, 6])]))
+        >>> rows = mat.rows
+        >>> rows.first()
+        IndexedRow(0, [1.0,2.0,3.0])
+        """
+        # We use DataFrames for serialization of IndexedRows from
+        # Java, so we first convert the RDD of rows to a DataFrame
+        # on the Scala/Java side. Then we map each Row in the
+        # DataFrame back to an IndexedRow on this side.
+        rows_df = callMLlibFunc("getIndexedRows", self._java_matrix_wrapper._java_model)
+        rows = rows_df.rdd.map(lambda row: IndexedRow(row[0], row[1]))
+        return rows
+
+    def numRows(self):
+        """
+        Get or compute the number of rows.
+
+        >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),
+        ...                        IndexedRow(1, [4, 5, 6]),
+        ...                        IndexedRow(2, [7, 8, 9]),
+        ...                        IndexedRow(3, [10, 11, 12])])
+
+        >>> mat = IndexedRowMatrix(rows)
+        >>> print(mat.numRows())
+        4
+
+        >>> mat = IndexedRowMatrix(rows, 7, 6)
+        >>> print(mat.numRows())
+        7
+        """
+        return self._java_matrix_wrapper.call("numRows")
+
+    def numCols(self):
+        """
+        Get or compute the number of cols.
+
+        >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),
+        ...                        IndexedRow(1, [4, 5, 6]),
+        ...                        IndexedRow(2, [7, 8, 9]),
+        ...                        IndexedRow(3, [10, 11, 12])])
+
+        >>> mat = IndexedRowMatrix(rows)
+        >>> print(mat.numCols())
+        3
+
+        >>> mat = IndexedRowMatrix(rows, 7, 6)
+        >>> print(mat.numCols())
+        6
+        """
+        return self._java_matrix_wrapper.call("numCols")
+
+    def columnSimilarities(self):
+        """
+        Compute all cosine similarities between columns.
+
+        >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),
+        ...                        IndexedRow(6, [4, 5, 6])])
+        >>> mat = IndexedRowMatrix(rows)
+        >>> cs = mat.columnSimilarities()
+        >>> print(cs.numCols())
+        3
+        """
+        java_coordinate_matrix = self._java_matrix_wrapper.call("columnSimilarities")
+        return CoordinateMatrix(java_coordinate_matrix)
+
+    @since('2.0.0')
+    def computeGramianMatrix(self):
+        """
+        Computes the Gramian matrix `A^T A`.
+
+        .. note:: This cannot be computed on matrices with more than 65535 columns.
+
+        >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),
+        ...                        IndexedRow(1, [4, 5, 6])])
+        >>> mat = IndexedRowMatrix(rows)
+
+        >>> mat.computeGramianMatrix()
+        DenseMatrix(3, 3, [17.0, 22.0, 27.0, 22.0, 29.0, 36.0, 27.0, 36.0, 45.0], 0)
+        """
+        return self._java_matrix_wrapper.call("computeGramianMatrix")
+
+    def toRowMatrix(self):
+        """
+        Convert this matrix to a RowMatrix.
+
+        >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),
+        ...                        IndexedRow(6, [4, 5, 6])])
+        >>> mat = IndexedRowMatrix(rows).toRowMatrix()
+        >>> mat.rows.collect()
+        [DenseVector([1.0, 2.0, 3.0]), DenseVector([4.0, 5.0, 6.0])]
+        """
+        java_row_matrix = self._java_matrix_wrapper.call("toRowMatrix")
+        return RowMatrix(java_row_matrix)
+
+    def toCoordinateMatrix(self):
+        """
+        Convert this matrix to a CoordinateMatrix.
+
+        >>> rows = sc.parallelize([IndexedRow(0, [1, 0]),
+        ...                        IndexedRow(6, [0, 5])])
+        >>> mat = IndexedRowMatrix(rows).toCoordinateMatrix()
+        >>> mat.entries.take(3)
+        [MatrixEntry(0, 0, 1.0), MatrixEntry(0, 1, 0.0), MatrixEntry(6, 0, 0.0)]
+        """
+        java_coordinate_matrix = self._java_matrix_wrapper.call("toCoordinateMatrix")
+        return CoordinateMatrix(java_coordinate_matrix)
+
+    def toBlockMatrix(self, rowsPerBlock=1024, colsPerBlock=1024):
+        """
+        Convert this matrix to a BlockMatrix.
+
+        :param rowsPerBlock: Number of rows that make up each block.
+                             The blocks forming the final rows are not
+                             required to have the given number of rows.
+        :param colsPerBlock: Number of columns that make up each block.
+                             The blocks forming the final columns are not
+                             required to have the given number of columns.
+
+        >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),
+        ...                        IndexedRow(6, [4, 5, 6])])
+        >>> mat = IndexedRowMatrix(rows).toBlockMatrix()
+
+        >>> # This IndexedRowMatrix will have 7 effective rows, due to
+        >>> # the highest row index being 6, and the ensuing
+        >>> # BlockMatrix will have 7 rows as well.
+        >>> print(mat.numRows())
+        7
+
+        >>> print(mat.numCols())
+        3
+        """
+        java_block_matrix = self._java_matrix_wrapper.call("toBlockMatrix",
+                                                           rowsPerBlock,
+                                                           colsPerBlock)
+        return BlockMatrix(java_block_matrix, rowsPerBlock, colsPerBlock)
+
+    @since('2.2.0')
+    def computeSVD(self, k, computeU=False, rCond=1e-9):
+        """
+        Computes the singular value decomposition of the IndexedRowMatrix.
+
+        The given row matrix A of dimension (m X n) is decomposed into
+        U * s * V'T where
+
+        * U: (m X k) (left singular vectors) is a IndexedRowMatrix
+             whose columns are the eigenvectors of (A X A')
+        * s: DenseVector consisting of square root of the eigenvalues
+             (singular values) in descending order.
+        * v: (n X k) (right singular vectors) is a Matrix whose columns
+             are the eigenvectors of (A' X A)
+
+        For more specific details on implementation, please refer
+        the scala documentation.
+
+        :param k: Number of leading singular values to keep (`0 < k <= n`).
+                  It might return less than k if there are numerically zero singular values
+                  or there are not enough Ritz values converged before the maximum number of
+                  Arnoldi update iterations is reached (in case that matrix A is ill-conditioned).
+        :param computeU: Whether or not to compute U. If set to be
+                         True, then U is computed by A * V * s^-1
+        :param rCond: Reciprocal condition number. All singular values
+                      smaller than rCond * s[0] are treated as zero
+                      where s[0] is the largest singular value.
+        :returns: SingularValueDecomposition object
+
+        >>> rows = [(0, (3, 1, 1)), (1, (-1, 3, 1))]
+        >>> irm = IndexedRowMatrix(sc.parallelize(rows))
+        >>> svd_model = irm.computeSVD(2, True)
+        >>> svd_model.U.rows.collect() # doctest: +NORMALIZE_WHITESPACE
+        [IndexedRow(0, [-0.707106781187,0.707106781187]),\
+        IndexedRow(1, [-0.707106781187,-0.707106781187])]
+        >>> svd_model.s
+        DenseVector([3.4641, 3.1623])
+        >>> svd_model.V
+        DenseMatrix(3, 2, [-0.4082, -0.8165, -0.4082, 0.8944, -0.4472, 0.0], 0)
+        """
+        j_model = self._java_matrix_wrapper.call(
+            "computeSVD", int(k), bool(computeU), float(rCond))
+        return SingularValueDecomposition(j_model)
+
+    @since('2.2.0')
+    def multiply(self, matrix):
+        """
+        Multiply this matrix by a local dense matrix on the right.
+
+        :param matrix: a local dense matrix whose number of rows must match the number of columns
+                       of this matrix
+        :returns: :py:class:`IndexedRowMatrix`
+
+        >>> mat = IndexedRowMatrix(sc.parallelize([(0, (0, 1)), (1, (2, 3))]))
+        >>> mat.multiply(DenseMatrix(2, 2, [0, 2, 1, 3])).rows.collect()
+        [IndexedRow(0, [2.0,3.0]), IndexedRow(1, [6.0,11.0])]
+        """
+        if not isinstance(matrix, DenseMatrix):
+            raise ValueError("Only multiplication with DenseMatrix "
+                             "is supported.")
+        return IndexedRowMatrix(self._java_matrix_wrapper.call("multiply", matrix))
+
+
+class MatrixEntry(object):
+    """
+    Represents an entry of a CoordinateMatrix.
+
+    Just a wrapper over a (long, long, float) tuple.
+
+    :param i: The row index of the matrix.
+    :param j: The column index of the matrix.
+    :param value: The (i, j)th entry of the matrix, as a float.
+    """
+    def __init__(self, i, j, value):
+        self.i = long(i)
+        self.j = long(j)
+        self.value = float(value)
+
+    def __repr__(self):
+        return "MatrixEntry(%s, %s, %s)" % (self.i, self.j, self.value)
+
+
+def _convert_to_matrix_entry(entry):
+    if isinstance(entry, MatrixEntry):
+        return entry
+    elif isinstance(entry, tuple) and len(entry) == 3:
+        return MatrixEntry(*entry)
+    else:
+        raise TypeError("Cannot convert type %s into MatrixEntry" % type(entry))
+
+
+class CoordinateMatrix(DistributedMatrix):
+    """
+    Represents a matrix in coordinate format.
+
+    :param entries: An RDD of MatrixEntry inputs or
+                    (long, long, float) tuples.
+    :param numRows: Number of rows in the matrix. A non-positive
+                    value means unknown, at which point the number
+                    of rows will be determined by the max row
+                    index plus one.
+    :param numCols: Number of columns in the matrix. A non-positive
+                    value means unknown, at which point the number
+                    of columns will be determined by the max row
+                    index plus one.
+    """
+    def __init__(self, entries, numRows=0, numCols=0):
+        """
+        Note: This docstring is not shown publicly.
+
+        Create a wrapper over a Java CoordinateMatrix.
+
+        Publicly, we require that `rows` be an RDD.  However, for
+        internal usage, `rows` can also be a Java CoordinateMatrix
+        object, in which case we can wrap it directly.  This
+        assists in clean matrix conversions.
+
+        >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2),
+        ...                           MatrixEntry(6, 4, 2.1)])
+        >>> mat = CoordinateMatrix(entries)
+
+        >>> mat_diff = CoordinateMatrix(entries)
+        >>> (mat_diff._java_matrix_wrapper._java_model ==
+        ...  mat._java_matrix_wrapper._java_model)
+        False
+
+        >>> mat_same = CoordinateMatrix(mat._java_matrix_wrapper._java_model)
+        >>> (mat_same._java_matrix_wrapper._java_model ==
+        ...  mat._java_matrix_wrapper._java_model)
+        True
+        """
+        if isinstance(entries, RDD):
+            entries = entries.map(_convert_to_matrix_entry)
+            # We use DataFrames for serialization of MatrixEntry entries
+            # from Python, so first convert the RDD to a DataFrame on
+            # this side. This will convert each MatrixEntry to a Row
+            # containing the 'i', 'j', and 'value' values, which can
+            # each be easily serialized. We will convert back to
+            # MatrixEntry inputs on the Scala side.
+            java_matrix = callMLlibFunc("createCoordinateMatrix", entries.toDF(),
+                                        long(numRows), long(numCols))
+        elif (isinstance(entries, JavaObject)
+              and entries.getClass().getSimpleName() == "CoordinateMatrix"):
+            java_matrix = entries
+        else:
+            raise TypeError("entries should be an RDD of MatrixEntry entries or "
+                            "(long, long, float) tuples, got %s" % type(entries))
+
+        self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
+
+    @property
+    def entries(self):
+        """
+        Entries of the CoordinateMatrix stored as an RDD of
+        MatrixEntries.
+
+        >>> mat = CoordinateMatrix(sc.parallelize([MatrixEntry(0, 0, 1.2),
+        ...                                        MatrixEntry(6, 4, 2.1)]))
+        >>> entries = mat.entries
+        >>> entries.first()
+        MatrixEntry(0, 0, 1.2)
+        """
+        # We use DataFrames for serialization of MatrixEntry entries
+        # from Java, so we first convert the RDD of entries to a
+        # DataFrame on the Scala/Java side. Then we map each Row in
+        # the DataFrame back to a MatrixEntry on this side.
+        entries_df = callMLlibFunc("getMatrixEntries", self._java_matrix_wrapper._java_model)
+        entries = entries_df.rdd.map(lambda row: MatrixEntry(row[0], row[1], row[2]))
+        return entries
+
+    def numRows(self):
+        """
+        Get or compute the number of rows.
+
+        >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2),
+        ...                           MatrixEntry(1, 0, 2),
+        ...                           MatrixEntry(2, 1, 3.7)])
+
+        >>> mat = CoordinateMatrix(entries)
+        >>> print(mat.numRows())
+        3
+
+        >>> mat = CoordinateMatrix(entries, 7, 6)
+        >>> print(mat.numRows())
+        7
+        """
+        return self._java_matrix_wrapper.call("numRows")
+
+    def numCols(self):
+        """
+        Get or compute the number of cols.
+
+        >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2),
+        ...                           MatrixEntry(1, 0, 2),
+        ...                           MatrixEntry(2, 1, 3.7)])
+
+        >>> mat = CoordinateMatrix(entries)
+        >>> print(mat.numCols())
+        2
+
+        >>> mat = CoordinateMatrix(entries, 7, 6)
+        >>> print(mat.numCols())
+        6
+        """
+        return self._java_matrix_wrapper.call("numCols")
+
+    @since('2.0.0')
+    def transpose(self):
+        """
+        Transpose this CoordinateMatrix.
+
+        >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2),
+        ...                           MatrixEntry(1, 0, 2),
+        ...                           MatrixEntry(2, 1, 3.7)])
+        >>> mat = CoordinateMatrix(entries)
+        >>> mat_transposed = mat.transpose()
+
+        >>> print(mat_transposed.numRows())
+        2
+
+        >>> print(mat_transposed.numCols())
+        3
+        """
+        java_transposed_matrix = self._java_matrix_wrapper.call("transpose")
+        return CoordinateMatrix(java_transposed_matrix)
+
+    def toRowMatrix(self):
+        """
+        Convert this matrix to a RowMatrix.
+
+        >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2),
+        ...                           MatrixEntry(6, 4, 2.1)])
+        >>> mat = CoordinateMatrix(entries).toRowMatrix()
+
+        >>> # This CoordinateMatrix will have 7 effective rows, due to
+        >>> # the highest row index being 6, but the ensuing RowMatrix
+        >>> # will only have 2 rows since there are only entries on 2
+        >>> # unique rows.
+        >>> print(mat.numRows())
+        2
+
+        >>> # This CoordinateMatrix will have 5 columns, due to the
+        >>> # highest column index being 4, and the ensuing RowMatrix
+        >>> # will have 5 columns as well.
+        >>> print(mat.numCols())
+        5
+        """
+        java_row_matrix = self._java_matrix_wrapper.call("toRowMatrix")
+        return RowMatrix(java_row_matrix)
+
+    def toIndexedRowMatrix(self):
+        """
+        Convert this matrix to an IndexedRowMatrix.
+
+        >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2),
+        ...                           MatrixEntry(6, 4, 2.1)])
+        >>> mat = CoordinateMatrix(entries).toIndexedRowMatrix()
+
+        >>> # This CoordinateMatrix will have 7 effective rows, due to
+        >>> # the highest row index being 6, and the ensuing
+        >>> # IndexedRowMatrix will have 7 rows as well.
+        >>> print(mat.numRows())
+        7
+
+        >>> # This CoordinateMatrix will have 5 columns, due to the
+        >>> # highest column index being 4, and the ensuing
+        >>> # IndexedRowMatrix will have 5 columns as well.
+        >>> print(mat.numCols())
+        5
+        """
+        java_indexed_row_matrix = self._java_matrix_wrapper.call("toIndexedRowMatrix")
+        return IndexedRowMatrix(java_indexed_row_matrix)
+
+    def toBlockMatrix(self, rowsPerBlock=1024, colsPerBlock=1024):
+        """
+        Convert this matrix to a BlockMatrix.
+
+        :param rowsPerBlock: Number of rows that make up each block.
+                             The blocks forming the final rows are not
+                             required to have the given number of rows.
+        :param colsPerBlock: Number of columns that make up each block.
+                             The blocks forming the final columns are not
+                             required to have the given number of columns.
+
+        >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2),
+        ...                           MatrixEntry(6, 4, 2.1)])
+        >>> mat = CoordinateMatrix(entries).toBlockMatrix()
+
+        >>> # This CoordinateMatrix will have 7 effective rows, due to
+        >>> # the highest row index being 6, and the ensuing
+        >>> # BlockMatrix will have 7 rows as well.
+        >>> print(mat.numRows())
+        7
+
+        >>> # This CoordinateMatrix will have 5 columns, due to the
+        >>> # highest column index being 4, and the ensuing
+        >>> # BlockMatrix will have 5 columns as well.
+        >>> print(mat.numCols())
+        5
+        """
+        java_block_matrix = self._java_matrix_wrapper.call("toBlockMatrix",
+                                                           rowsPerBlock,
+                                                           colsPerBlock)
+        return BlockMatrix(java_block_matrix, rowsPerBlock, colsPerBlock)
+
+
+def _convert_to_matrix_block_tuple(block):
+    if (isinstance(block, tuple) and len(block) == 2
+            and isinstance(block[0], tuple) and len(block[0]) == 2
+            and isinstance(block[1], Matrix)):
+        blockRowIndex = int(block[0][0])
+        blockColIndex = int(block[0][1])
+        subMatrix = block[1]
+        return ((blockRowIndex, blockColIndex), subMatrix)
+    else:
+        raise TypeError("Cannot convert type %s into a sub-matrix block tuple" % type(block))
+
+
+class BlockMatrix(DistributedMatrix):
+    """
+    Represents a distributed matrix in blocks of local matrices.
+
+    :param blocks: An RDD of sub-matrix blocks
+                   ((blockRowIndex, blockColIndex), sub-matrix) that
+                   form this distributed matrix. If multiple blocks
+                   with the same index exist, the results for
+                   operations like add and multiply will be
+                   unpredictable.
+    :param rowsPerBlock: Number of rows that make up each block.
+                         The blocks forming the final rows are not
+                         required to have the given number of rows.
+    :param colsPerBlock: Number of columns that make up each block.
+                         The blocks forming the final columns are not
+                         required to have the given number of columns.
+    :param numRows: Number of rows of this matrix. If the supplied
+                    value is less than or equal to zero, the number
+                    of rows will be calculated when `numRows` is
+                    invoked.
+    :param numCols: Number of columns of this matrix. If the supplied
+                    value is less than or equal to zero, the number
+                    of columns will be calculated when `numCols` is
+                    invoked.
+    """
+    def __init__(self, blocks, rowsPerBlock, colsPerBlock, numRows=0, numCols=0):
+        """
+        Note: This docstring is not shown publicly.
+
+        Create a wrapper over a Java BlockMatrix.
+
+        Publicly, we require that `blocks` be an RDD.  However, for
+        internal usage, `blocks` can also be a Java BlockMatrix
+        object, in which case we can wrap it directly.  This
+        assists in clean matrix conversions.
+
+        >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
+        ...                          ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
+        >>> mat = BlockMatrix(blocks, 3, 2)
+
+        >>> mat_diff = BlockMatrix(blocks, 3, 2)
+        >>> (mat_diff._java_matrix_wrapper._java_model ==
+        ...  mat._java_matrix_wrapper._java_model)
+        False
+
+        >>> mat_same = BlockMatrix(mat._java_matrix_wrapper._java_model, 3, 2)
+        >>> (mat_same._java_matrix_wrapper._java_model ==
+        ...  mat._java_matrix_wrapper._java_model)
+        True
+        """
+        if isinstance(blocks, RDD):
+            blocks = blocks.map(_convert_to_matrix_block_tuple)
+            # We use DataFrames for serialization of sub-matrix blocks
+            # from Python, so first convert the RDD to a DataFrame on
+            # this side. This will convert each sub-matrix block
+            # tuple to a Row containing the 'blockRowIndex',
+            # 'blockColIndex', and 'subMatrix' values, which can
+            # each be easily serialized.  We will convert back to
+            # ((blockRowIndex, blockColIndex), sub-matrix) tuples on
+            # the Scala side.
+            java_matrix = callMLlibFunc("createBlockMatrix", blocks.toDF(),
+                                        int(rowsPerBlock), int(colsPerBlock),
+                                        long(numRows), long(numCols))
+        elif (isinstance(blocks, JavaObject)
+              and blocks.getClass().getSimpleName() == "BlockMatrix"):
+            java_matrix = blocks
+        else:
+            raise TypeError("blocks should be an RDD of sub-matrix blocks as "
+                            "((int, int), matrix) tuples, got %s" % type(blocks))
+
+        self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
+
+    @property
+    def blocks(self):
+        """
+        The RDD of sub-matrix blocks
+        ((blockRowIndex, blockColIndex), sub-matrix) that form this
+        distributed matrix.
+
+        >>> mat = BlockMatrix(
+        ...     sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
+        ...                     ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))]), 3, 2)
+        >>> blocks = mat.blocks
+        >>> blocks.first()
+        ((0, 0), DenseMatrix(3, 2, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], 0))
+
+        """
+        # We use DataFrames for serialization of sub-matrix blocks
+        # from Java, so we first convert the RDD of blocks to a
+        # DataFrame on the Scala/Java side. Then we map each Row in
+        # the DataFrame back to a sub-matrix block on this side.
+        blocks_df = callMLlibFunc("getMatrixBlocks", self._java_matrix_wrapper._java_model)
+        blocks = blocks_df.rdd.map(lambda row: ((row[0][0], row[0][1]), row[1]))
+        return blocks
+
+    @property
+    def rowsPerBlock(self):
+        """
+        Number of rows that make up each block.
+
+        >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
+        ...                          ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
+        >>> mat = BlockMatrix(blocks, 3, 2)
+        >>> mat.rowsPerBlock
+        3
+        """
+        return self._java_matrix_wrapper.call("rowsPerBlock")
+
+    @property
+    def colsPerBlock(self):
+        """
+        Number of columns that make up each block.
+
+        >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
+        ...                          ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
+        >>> mat = BlockMatrix(blocks, 3, 2)
+        >>> mat.colsPerBlock
+        2
+        """
+        return self._java_matrix_wrapper.call("colsPerBlock")
+
+    @property
+    def numRowBlocks(self):
+        """
+        Number of rows of blocks in the BlockMatrix.
+
+        >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
+        ...                          ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
+        >>> mat = BlockMatrix(blocks, 3, 2)
+        >>> mat.numRowBlocks
+        2
+        """
+        return self._java_matrix_wrapper.call("numRowBlocks")
+
+    @property
+    def numColBlocks(self):
+        """
+        Number of columns of blocks in the BlockMatrix.
+
+        >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
+        ...                          ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
+        >>> mat = BlockMatrix(blocks, 3, 2)
+        >>> mat.numColBlocks
+        1
+        """
+        return self._java_matrix_wrapper.call("numColBlocks")
+
+    def numRows(self):
+        """
+        Get or compute the number of rows.
+
+        >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
+        ...                          ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
+
+        >>> mat = BlockMatrix(blocks, 3, 2)
+        >>> print(mat.numRows())
+        6
+
+        >>> mat = BlockMatrix(blocks, 3, 2, 7, 6)
+        >>> print(mat.numRows())
+        7
+        """
+        return self._java_matrix_wrapper.call("numRows")
+
+    def numCols(self):
+        """
+        Get or compute the number of cols.
+
+        >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
+        ...                          ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
+
+        >>> mat = BlockMatrix(blocks, 3, 2)
+        >>> print(mat.numCols())
+        2
+
+        >>> mat = BlockMatrix(blocks, 3, 2, 7, 6)
+        >>> print(mat.numCols())
+        6
+        """
+        return self._java_matrix_wrapper.call("numCols")
+
+    @since('2.0.0')
+    def cache(self):
+        """
+        Caches the underlying RDD.
+        """
+        self._java_matrix_wrapper.call("cache")
+        return self
+
+    @since('2.0.0')
+    def persist(self, storageLevel):
+        """
+        Persists the underlying RDD with the specified storage level.
+        """
+        if not isinstance(storageLevel, StorageLevel):
+            raise TypeError("`storageLevel` should be a StorageLevel, got %s" % type(storageLevel))
+        javaStorageLevel = self._java_matrix_wrapper._sc._getJavaStorageLevel(storageLevel)
+        self._java_matrix_wrapper.call("persist", javaStorageLevel)
+        return self
+
+    @since('2.0.0')
+    def validate(self):
+        """
+        Validates the block matrix info against the matrix data (`blocks`)
+        and throws an exception if any error is found.
+        """
+        self._java_matrix_wrapper.call("validate")
+
+    def add(self, other):
+        """
+        Adds two block matrices together. The matrices must have the
+        same size and matching `rowsPerBlock` and `colsPerBlock` values.
+        If one of the sub matrix blocks that are being added is a
+        SparseMatrix, the resulting sub matrix block will also be a
+        SparseMatrix, even if it is being added to a DenseMatrix. If
+        two dense sub matrix blocks are added, the output block will
+        also be a DenseMatrix.
+
+        >>> dm1 = Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])
+        >>> dm2 = Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12])
+        >>> sm = Matrices.sparse(3, 2, [0, 1, 3], [0, 1, 2], [7, 11, 12])
+        >>> blocks1 = sc.parallelize([((0, 0), dm1), ((1, 0), dm2)])
+        >>> blocks2 = sc.parallelize([((0, 0), dm1), ((1, 0), dm2)])
+        >>> blocks3 = sc.parallelize([((0, 0), sm), ((1, 0), dm2)])
+        >>> mat1 = BlockMatrix(blocks1, 3, 2)
+        >>> mat2 = BlockMatrix(blocks2, 3, 2)
+        >>> mat3 = BlockMatrix(blocks3, 3, 2)
+
+        >>> mat1.add(mat2).toLocalMatrix()
+        DenseMatrix(6, 2, [2.0, 4.0, 6.0, 14.0, 16.0, 18.0, 8.0, 10.0, 12.0, 20.0, 22.0, 24.0], 0)
+
+        >>> mat1.add(mat3).toLocalMatrix()
+        DenseMatrix(6, 2, [8.0, 2.0, 3.0, 14.0, 16.0, 18.0, 4.0, 16.0, 18.0, 20.0, 22.0, 24.0], 0)
+        """
+        if not isinstance(other, BlockMatrix):
+            raise TypeError("Other should be a BlockMatrix, got %s" % type(other))
+
+        other_java_block_matrix = other._java_matrix_wrapper._java_model
+        java_block_matrix = self._java_matrix_wrapper.call("add", other_java_block_matrix)
+        return BlockMatrix(java_block_matrix, self.rowsPerBlock, self.colsPerBlock)
+
+    @since('2.0.0')
+    def subtract(self, other):
+        """
+        Subtracts the given block matrix `other` from this block matrix:
+        `this - other`. The matrices must have the same size and
+        matching `rowsPerBlock` and `colsPerBlock` values.  If one of
+        the sub matrix blocks that are being subtracted is a
+        SparseMatrix, the resulting sub matrix block will also be a
+        SparseMatrix, even if it is being subtracted from a DenseMatrix.
+        If two dense sub matrix blocks are subtracted, the output block
+        will also be a DenseMatrix.
+
+        >>> dm1 = Matrices.dense(3, 2, [3, 1, 5, 4, 6, 2])
+        >>> dm2 = Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12])
+        >>> sm = Matrices.sparse(3, 2, [0, 1, 3], [0, 1, 2], [1, 2, 3])
+        >>> blocks1 = sc.parallelize([((0, 0), dm1), ((1, 0), dm2)])
+        >>> blocks2 = sc.parallelize([((0, 0), dm2), ((1, 0), dm1)])
+        >>> blocks3 = sc.parallelize([((0, 0), sm), ((1, 0), dm2)])
+        >>> mat1 = BlockMatrix(blocks1, 3, 2)
+        >>> mat2 = BlockMatrix(blocks2, 3, 2)
+        >>> mat3 = BlockMatrix(blocks3, 3, 2)
+
+        >>> mat1.subtract(mat2).toLocalMatrix()
+        DenseMatrix(6, 2, [-4.0, -7.0, -4.0, 4.0, 7.0, 4.0, -6.0, -5.0, -10.0, 6.0, 5.0, 10.0], 0)
+
+        >>> mat2.subtract(mat3).toLocalMatrix()
+        DenseMatrix(6, 2, [6.0, 8.0, 9.0, -4.0, -7.0, -4.0, 10.0, 9.0, 9.0, -6.0, -5.0, -10.0], 0)
+        """
+        if not isinstance(other, BlockMatrix):
+            raise TypeError("Other should be a BlockMatrix, got %s" % type(other))
+
+        other_java_block_matrix = other._java_matrix_wrapper._java_model
+        java_block_matrix = self._java_matrix_wrapper.call("subtract", other_java_block_matrix)
+        return BlockMatrix(java_block_matrix, self.rowsPerBlock, self.colsPerBlock)
+
+    def multiply(self, other):
+        """
+        Left multiplies this BlockMatrix by `other`, another
+        BlockMatrix. The `colsPerBlock` of this matrix must equal the
+        `rowsPerBlock` of `other`. If `other` contains any SparseMatrix
+        blocks, they will have to be converted to DenseMatrix blocks.
+        The output BlockMatrix will only consist of DenseMatrix blocks.
+        This may cause some performance issues until support for
+        multiplying two sparse matrices is added.
+
+        >>> dm1 = Matrices.dense(2, 3, [1, 2, 3, 4, 5, 6])
+        >>> dm2 = Matrices.dense(2, 3, [7, 8, 9, 10, 11, 12])
+        >>> dm3 = Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])
+        >>> dm4 = Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12])
+        >>> sm = Matrices.sparse(3, 2, [0, 1, 3], [0, 1, 2], [7, 11, 12])
+        >>> blocks1 = sc.parallelize([((0, 0), dm1), ((0, 1), dm2)])
+        >>> blocks2 = sc.parallelize([((0, 0), dm3), ((1, 0), dm4)])
+        >>> blocks3 = sc.parallelize([((0, 0), sm), ((1, 0), dm4)])
+        >>> mat1 = BlockMatrix(blocks1, 2, 3)
+        >>> mat2 = BlockMatrix(blocks2, 3, 2)
+        >>> mat3 = BlockMatrix(blocks3, 3, 2)
+
+        >>> mat1.multiply(mat2).toLocalMatrix()
+        DenseMatrix(2, 2, [242.0, 272.0, 350.0, 398.0], 0)
+
+        >>> mat1.multiply(mat3).toLocalMatrix()
+        DenseMatrix(2, 2, [227.0, 258.0, 394.0, 450.0], 0)
+        """
+        if not isinstance(other, BlockMatrix):
+            raise TypeError("Other should be a BlockMatrix, got %s" % type(other))
+
+        other_java_block_matrix = other._java_matrix_wrapper._java_model
+        java_block_matrix = self._java_matrix_wrapper.call("multiply", other_java_block_matrix)
+        return BlockMatrix(java_block_matrix, self.rowsPerBlock, self.colsPerBlock)
+
+    @since('2.0.0')
+    def transpose(self):
+        """
+        Transpose this BlockMatrix. Returns a new BlockMatrix
+        instance sharing the same underlying data. Is a lazy operation.
+
+        >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
+        ...                          ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
+        >>> mat = BlockMatrix(blocks, 3, 2)
+
+        >>> mat_transposed = mat.transpose()
+        >>> mat_transposed.toLocalMatrix()
+        DenseMatrix(2, 6, [1.0, 4.0, 2.0, 5.0, 3.0, 6.0, 7.0, 10.0, 8.0, 11.0, 9.0, 12.0], 0)
+        """
+        java_transposed_matrix = self._java_matrix_wrapper.call("transpose")
+        return BlockMatrix(java_transposed_matrix, self.colsPerBlock, self.rowsPerBlock)
+
+    def toLocalMatrix(self):
+        """
+        Collect the distributed matrix on the driver as a DenseMatrix.
+
+        >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
+        ...                          ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
+        >>> mat = BlockMatrix(blocks, 3, 2).toLocalMatrix()
+
+        >>> # This BlockMatrix will have 6 effective rows, due to
+        >>> # having two sub-matrix blocks stacked, each with 3 rows.
+        >>> # The ensuing DenseMatrix will also have 6 rows.
+        >>> print(mat.numRows)
+        6
+
+        >>> # This BlockMatrix will have 2 effective columns, due to
+        >>> # having two sub-matrix blocks stacked, each with 2
+        >>> # columns. The ensuing DenseMatrix will also have 2 columns.
+        >>> print(mat.numCols)
+        2
+        """
+        return self._java_matrix_wrapper.call("toLocalMatrix")
+
+    def toIndexedRowMatrix(self):
+        """
+        Convert this matrix to an IndexedRowMatrix.
+
+        >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
+        ...                          ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
+        >>> mat = BlockMatrix(blocks, 3, 2).toIndexedRowMatrix()
+
+        >>> # This BlockMatrix will have 6 effective rows, due to
+        >>> # having two sub-matrix blocks stacked, each with 3 rows.
+        >>> # The ensuing IndexedRowMatrix will also have 6 rows.
+        >>> print(mat.numRows())
+        6
+
+        >>> # This BlockMatrix will have 2 effective columns, due to
+        >>> # having two sub-matrix blocks stacked, each with 2 columns.
+        >>> # The ensuing IndexedRowMatrix will also have 2 columns.
+        >>> print(mat.numCols())
+        2
+        """
+        java_indexed_row_matrix = self._java_matrix_wrapper.call("toIndexedRowMatrix")
+        return IndexedRowMatrix(java_indexed_row_matrix)
+
+    def toCoordinateMatrix(self):
+        """
+        Convert this matrix to a CoordinateMatrix.
+
+        >>> blocks = sc.parallelize([((0, 0), Matrices.dense(1, 2, [1, 2])),
+        ...                          ((1, 0), Matrices.dense(1, 2, [7, 8]))])
+        >>> mat = BlockMatrix(blocks, 1, 2).toCoordinateMatrix()
+        >>> mat.entries.take(3)
+        [MatrixEntry(0, 0, 1.0), MatrixEntry(0, 1, 2.0), MatrixEntry(1, 0, 7.0)]
+        """
+        java_coordinate_matrix = self._java_matrix_wrapper.call("toCoordinateMatrix")
+        return CoordinateMatrix(java_coordinate_matrix)
+
+
+def _test():
+    import doctest
+    import numpy
+    from pyspark.sql import SparkSession
+    from pyspark.mllib.linalg import Matrices
+    import pyspark.mllib.linalg.distributed
+    try:
+        # Numpy 1.14+ changed it's string format.
+        numpy.set_printoptions(legacy='1.13')
+    except TypeError:
+        pass
+    globs = pyspark.mllib.linalg.distributed.__dict__.copy()
+    spark = SparkSession.builder\
+        .master("local[2]")\
+        .appName("mllib.linalg.distributed tests")\
+        .getOrCreate()
+    globs['sc'] = spark.sparkContext
+    globs['Matrices'] = Matrices
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/random.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/random.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8833cb4469233bec13d622d8fdfb2f6564b49b7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/random.py
@@ -0,0 +1,429 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Python package for random data generation.
+"""
+
+import sys
+from functools import wraps
+
+from pyspark import since
+from pyspark.mllib.common import callMLlibFunc
+
+
+__all__ = ['RandomRDDs', ]
+
+
+def toArray(f):
+    @wraps(f)
+    def func(sc, *a, **kw):
+        rdd = f(sc, *a, **kw)
+        return rdd.map(lambda vec: vec.toArray())
+    return func
+
+
+class RandomRDDs(object):
+    """
+    Generator methods for creating RDDs comprised of i.i.d samples from
+    some distribution.
+
+    .. versionadded:: 1.1.0
+    """
+
+    @staticmethod
+    @since("1.1.0")
+    def uniformRDD(sc, size, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of i.i.d. samples from the
+        uniform distribution U(0.0, 1.0).
+
+        To transform the distribution in the generated RDD from U(0.0, 1.0)
+        to U(a, b), use
+        C{RandomRDDs.uniformRDD(sc, n, p, seed)\
+          .map(lambda v: a + (b - a) * v)}
+
+        :param sc: SparkContext used to create the RDD.
+        :param size: Size of the RDD.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of float comprised of i.i.d. samples ~ `U(0.0, 1.0)`.
+
+        >>> x = RandomRDDs.uniformRDD(sc, 100).collect()
+        >>> len(x)
+        100
+        >>> max(x) <= 1.0 and min(x) >= 0.0
+        True
+        >>> RandomRDDs.uniformRDD(sc, 100, 4).getNumPartitions()
+        4
+        >>> parts = RandomRDDs.uniformRDD(sc, 100, seed=4).getNumPartitions()
+        >>> parts == sc.defaultParallelism
+        True
+        """
+        return callMLlibFunc("uniformRDD", sc._jsc, size, numPartitions, seed)
+
+    @staticmethod
+    @since("1.1.0")
+    def normalRDD(sc, size, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of i.i.d. samples from the standard normal
+        distribution.
+
+        To transform the distribution in the generated RDD from standard normal
+        to some other normal N(mean, sigma^2), use
+        C{RandomRDDs.normal(sc, n, p, seed)\
+          .map(lambda v: mean + sigma * v)}
+
+        :param sc: SparkContext used to create the RDD.
+        :param size: Size of the RDD.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of float comprised of i.i.d. samples ~ N(0.0, 1.0).
+
+        >>> x = RandomRDDs.normalRDD(sc, 1000, seed=1)
+        >>> stats = x.stats()
+        >>> stats.count()
+        1000
+        >>> abs(stats.mean() - 0.0) < 0.1
+        True
+        >>> abs(stats.stdev() - 1.0) < 0.1
+        True
+        """
+        return callMLlibFunc("normalRDD", sc._jsc, size, numPartitions, seed)
+
+    @staticmethod
+    @since("1.3.0")
+    def logNormalRDD(sc, mean, std, size, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of i.i.d. samples from the log normal
+        distribution with the input mean and standard distribution.
+
+        :param sc: SparkContext used to create the RDD.
+        :param mean: mean for the log Normal distribution
+        :param std: std for the log Normal distribution
+        :param size: Size of the RDD.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of float comprised of i.i.d. samples ~ log N(mean, std).
+
+        >>> from math import sqrt, exp
+        >>> mean = 0.0
+        >>> std = 1.0
+        >>> expMean = exp(mean + 0.5 * std * std)
+        >>> expStd = sqrt((exp(std * std) - 1.0) * exp(2.0 * mean + std * std))
+        >>> x = RandomRDDs.logNormalRDD(sc, mean, std, 1000, seed=2)
+        >>> stats = x.stats()
+        >>> stats.count()
+        1000
+        >>> abs(stats.mean() - expMean) < 0.5
+        True
+        >>> from math import sqrt
+        >>> abs(stats.stdev() - expStd) < 0.5
+        True
+        """
+        return callMLlibFunc("logNormalRDD", sc._jsc, float(mean), float(std),
+                             size, numPartitions, seed)
+
+    @staticmethod
+    @since("1.1.0")
+    def poissonRDD(sc, mean, size, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of i.i.d. samples from the Poisson
+        distribution with the input mean.
+
+        :param sc: SparkContext used to create the RDD.
+        :param mean: Mean, or lambda, for the Poisson distribution.
+        :param size: Size of the RDD.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of float comprised of i.i.d. samples ~ Pois(mean).
+
+        >>> mean = 100.0
+        >>> x = RandomRDDs.poissonRDD(sc, mean, 1000, seed=2)
+        >>> stats = x.stats()
+        >>> stats.count()
+        1000
+        >>> abs(stats.mean() - mean) < 0.5
+        True
+        >>> from math import sqrt
+        >>> abs(stats.stdev() - sqrt(mean)) < 0.5
+        True
+        """
+        return callMLlibFunc("poissonRDD", sc._jsc, float(mean), size, numPartitions, seed)
+
+    @staticmethod
+    @since("1.3.0")
+    def exponentialRDD(sc, mean, size, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of i.i.d. samples from the Exponential
+        distribution with the input mean.
+
+        :param sc: SparkContext used to create the RDD.
+        :param mean: Mean, or 1 / lambda, for the Exponential distribution.
+        :param size: Size of the RDD.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of float comprised of i.i.d. samples ~ Exp(mean).
+
+        >>> mean = 2.0
+        >>> x = RandomRDDs.exponentialRDD(sc, mean, 1000, seed=2)
+        >>> stats = x.stats()
+        >>> stats.count()
+        1000
+        >>> abs(stats.mean() - mean) < 0.5
+        True
+        >>> from math import sqrt
+        >>> abs(stats.stdev() - sqrt(mean)) < 0.5
+        True
+        """
+        return callMLlibFunc("exponentialRDD", sc._jsc, float(mean), size, numPartitions, seed)
+
+    @staticmethod
+    @since("1.3.0")
+    def gammaRDD(sc, shape, scale, size, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of i.i.d. samples from the Gamma
+        distribution with the input shape and scale.
+
+        :param sc: SparkContext used to create the RDD.
+        :param shape: shape (> 0) parameter for the Gamma distribution
+        :param scale: scale (> 0) parameter for the Gamma distribution
+        :param size: Size of the RDD.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of float comprised of i.i.d. samples ~ Gamma(shape, scale).
+
+        >>> from math import sqrt
+        >>> shape = 1.0
+        >>> scale = 2.0
+        >>> expMean = shape * scale
+        >>> expStd = sqrt(shape * scale * scale)
+        >>> x = RandomRDDs.gammaRDD(sc, shape, scale, 1000, seed=2)
+        >>> stats = x.stats()
+        >>> stats.count()
+        1000
+        >>> abs(stats.mean() - expMean) < 0.5
+        True
+        >>> abs(stats.stdev() - expStd) < 0.5
+        True
+        """
+        return callMLlibFunc("gammaRDD", sc._jsc, float(shape),
+                             float(scale), size, numPartitions, seed)
+
+    @staticmethod
+    @toArray
+    @since("1.1.0")
+    def uniformVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of vectors containing i.i.d. samples drawn
+        from the uniform distribution U(0.0, 1.0).
+
+        :param sc: SparkContext used to create the RDD.
+        :param numRows: Number of Vectors in the RDD.
+        :param numCols: Number of elements in each Vector.
+        :param numPartitions: Number of partitions in the RDD.
+        :param seed: Seed for the RNG that generates the seed for the generator in each partition.
+        :return: RDD of Vector with vectors containing i.i.d samples ~ `U(0.0, 1.0)`.
+
+        >>> import numpy as np
+        >>> mat = np.matrix(RandomRDDs.uniformVectorRDD(sc, 10, 10).collect())
+        >>> mat.shape
+        (10, 10)
+        >>> mat.max() <= 1.0 and mat.min() >= 0.0
+        True
+        >>> RandomRDDs.uniformVectorRDD(sc, 10, 10, 4).getNumPartitions()
+        4
+        """
+        return callMLlibFunc("uniformVectorRDD", sc._jsc, numRows, numCols, numPartitions, seed)
+
+    @staticmethod
+    @toArray
+    @since("1.1.0")
+    def normalVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of vectors containing i.i.d. samples drawn
+        from the standard normal distribution.
+
+        :param sc: SparkContext used to create the RDD.
+        :param numRows: Number of Vectors in the RDD.
+        :param numCols: Number of elements in each Vector.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of Vector with vectors containing i.i.d. samples ~ `N(0.0, 1.0)`.
+
+        >>> import numpy as np
+        >>> mat = np.matrix(RandomRDDs.normalVectorRDD(sc, 100, 100, seed=1).collect())
+        >>> mat.shape
+        (100, 100)
+        >>> abs(mat.mean() - 0.0) < 0.1
+        True
+        >>> abs(mat.std() - 1.0) < 0.1
+        True
+        """
+        return callMLlibFunc("normalVectorRDD", sc._jsc, numRows, numCols, numPartitions, seed)
+
+    @staticmethod
+    @toArray
+    @since("1.3.0")
+    def logNormalVectorRDD(sc, mean, std, numRows, numCols, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of vectors containing i.i.d. samples drawn
+        from the log normal distribution.
+
+        :param sc: SparkContext used to create the RDD.
+        :param mean: Mean of the log normal distribution
+        :param std: Standard Deviation of the log normal distribution
+        :param numRows: Number of Vectors in the RDD.
+        :param numCols: Number of elements in each Vector.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of Vector with vectors containing i.i.d. samples ~ log `N(mean, std)`.
+
+        >>> import numpy as np
+        >>> from math import sqrt, exp
+        >>> mean = 0.0
+        >>> std = 1.0
+        >>> expMean = exp(mean + 0.5 * std * std)
+        >>> expStd = sqrt((exp(std * std) - 1.0) * exp(2.0 * mean + std * std))
+        >>> m = RandomRDDs.logNormalVectorRDD(sc, mean, std, 100, 100, seed=1).collect()
+        >>> mat = np.matrix(m)
+        >>> mat.shape
+        (100, 100)
+        >>> abs(mat.mean() - expMean) < 0.1
+        True
+        >>> abs(mat.std() - expStd) < 0.1
+        True
+        """
+        return callMLlibFunc("logNormalVectorRDD", sc._jsc, float(mean), float(std),
+                             numRows, numCols, numPartitions, seed)
+
+    @staticmethod
+    @toArray
+    @since("1.1.0")
+    def poissonVectorRDD(sc, mean, numRows, numCols, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of vectors containing i.i.d. samples drawn
+        from the Poisson distribution with the input mean.
+
+        :param sc: SparkContext used to create the RDD.
+        :param mean: Mean, or lambda, for the Poisson distribution.
+        :param numRows: Number of Vectors in the RDD.
+        :param numCols: Number of elements in each Vector.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`)
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of Vector with vectors containing i.i.d. samples ~ Pois(mean).
+
+        >>> import numpy as np
+        >>> mean = 100.0
+        >>> rdd = RandomRDDs.poissonVectorRDD(sc, mean, 100, 100, seed=1)
+        >>> mat = np.mat(rdd.collect())
+        >>> mat.shape
+        (100, 100)
+        >>> abs(mat.mean() - mean) < 0.5
+        True
+        >>> from math import sqrt
+        >>> abs(mat.std() - sqrt(mean)) < 0.5
+        True
+        """
+        return callMLlibFunc("poissonVectorRDD", sc._jsc, float(mean), numRows, numCols,
+                             numPartitions, seed)
+
+    @staticmethod
+    @toArray
+    @since("1.3.0")
+    def exponentialVectorRDD(sc, mean, numRows, numCols, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of vectors containing i.i.d. samples drawn
+        from the Exponential distribution with the input mean.
+
+        :param sc: SparkContext used to create the RDD.
+        :param mean: Mean, or 1 / lambda, for the Exponential distribution.
+        :param numRows: Number of Vectors in the RDD.
+        :param numCols: Number of elements in each Vector.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`)
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of Vector with vectors containing i.i.d. samples ~ Exp(mean).
+
+        >>> import numpy as np
+        >>> mean = 0.5
+        >>> rdd = RandomRDDs.exponentialVectorRDD(sc, mean, 100, 100, seed=1)
+        >>> mat = np.mat(rdd.collect())
+        >>> mat.shape
+        (100, 100)
+        >>> abs(mat.mean() - mean) < 0.5
+        True
+        >>> from math import sqrt
+        >>> abs(mat.std() - sqrt(mean)) < 0.5
+        True
+        """
+        return callMLlibFunc("exponentialVectorRDD", sc._jsc, float(mean), numRows, numCols,
+                             numPartitions, seed)
+
+    @staticmethod
+    @toArray
+    @since("1.3.0")
+    def gammaVectorRDD(sc, shape, scale, numRows, numCols, numPartitions=None, seed=None):
+        """
+        Generates an RDD comprised of vectors containing i.i.d. samples drawn
+        from the Gamma distribution.
+
+        :param sc: SparkContext used to create the RDD.
+        :param shape: Shape (> 0) of the Gamma distribution
+        :param scale: Scale (> 0) of the Gamma distribution
+        :param numRows: Number of Vectors in the RDD.
+        :param numCols: Number of elements in each Vector.
+        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        :param seed: Random seed (default: a random long integer).
+        :return: RDD of Vector with vectors containing i.i.d. samples ~ Gamma(shape, scale).
+
+        >>> import numpy as np
+        >>> from math import sqrt
+        >>> shape = 1.0
+        >>> scale = 2.0
+        >>> expMean = shape * scale
+        >>> expStd = sqrt(shape * scale * scale)
+        >>> mat = np.matrix(RandomRDDs.gammaVectorRDD(sc, shape, scale, 100, 100, seed=1).collect())
+        >>> mat.shape
+        (100, 100)
+        >>> abs(mat.mean() - expMean) < 0.1
+        True
+        >>> abs(mat.std() - expStd) < 0.1
+        True
+        """
+        return callMLlibFunc("gammaVectorRDD", sc._jsc, float(shape), float(scale),
+                             numRows, numCols, numPartitions, seed)
+
+
+def _test():
+    import doctest
+    from pyspark.sql import SparkSession
+    globs = globals().copy()
+    # The small batch size here ensures that we see multiple batches,
+    # even in these small test examples:
+    spark = SparkSession.builder\
+        .master("local[2]")\
+        .appName("mllib.random tests")\
+        .getOrCreate()
+    globs['sc'] = spark.sparkContext
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/recommendation.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/recommendation.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d4eae85132bbb6f2129b008ad73ab7db21572f7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/recommendation.py
@@ -0,0 +1,334 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import array
+import sys
+from collections import namedtuple
+
+from pyspark import SparkContext, since
+from pyspark.rdd import RDD
+from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc, inherit_doc
+from pyspark.mllib.util import JavaLoader, JavaSaveable
+from pyspark.sql import DataFrame
+
+__all__ = ['MatrixFactorizationModel', 'ALS', 'Rating']
+
+
+class Rating(namedtuple("Rating", ["user", "product", "rating"])):
+    """
+    Represents a (user, product, rating) tuple.
+
+    >>> r = Rating(1, 2, 5.0)
+    >>> (r.user, r.product, r.rating)
+    (1, 2, 5.0)
+    >>> (r[0], r[1], r[2])
+    (1, 2, 5.0)
+
+    .. versionadded:: 1.2.0
+    """
+
+    def __reduce__(self):
+        return Rating, (int(self.user), int(self.product), float(self.rating))
+
+
+@inherit_doc
+class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
+
+    """A matrix factorisation model trained by regularized alternating
+    least-squares.
+
+    >>> r1 = (1, 1, 1.0)
+    >>> r2 = (1, 2, 2.0)
+    >>> r3 = (2, 1, 2.0)
+    >>> ratings = sc.parallelize([r1, r2, r3])
+    >>> model = ALS.trainImplicit(ratings, 1, seed=10)
+    >>> model.predict(2, 2)
+    0.4...
+
+    >>> testset = sc.parallelize([(1, 2), (1, 1)])
+    >>> model = ALS.train(ratings, 2, seed=0)
+    >>> model.predictAll(testset).collect()
+    [Rating(user=1, product=1, rating=1.0...), Rating(user=1, product=2, rating=1.9...)]
+
+    >>> model = ALS.train(ratings, 4, seed=10)
+    >>> model.userFeatures().collect()
+    [(1, array('d', [...])), (2, array('d', [...]))]
+
+    >>> model.recommendUsers(1, 2)
+    [Rating(user=2, product=1, rating=1.9...), Rating(user=1, product=1, rating=1.0...)]
+    >>> model.recommendProducts(1, 2)
+    [Rating(user=1, product=2, rating=1.9...), Rating(user=1, product=1, rating=1.0...)]
+    >>> model.rank
+    4
+
+    >>> first_user = model.userFeatures().take(1)[0]
+    >>> latents = first_user[1]
+    >>> len(latents)
+    4
+
+    >>> model.productFeatures().collect()
+    [(1, array('d', [...])), (2, array('d', [...]))]
+
+    >>> first_product = model.productFeatures().take(1)[0]
+    >>> latents = first_product[1]
+    >>> len(latents)
+    4
+
+    >>> products_for_users = model.recommendProductsForUsers(1).collect()
+    >>> len(products_for_users)
+    2
+    >>> products_for_users[0]
+    (1, (Rating(user=1, product=2, rating=...),))
+
+    >>> users_for_products = model.recommendUsersForProducts(1).collect()
+    >>> len(users_for_products)
+    2
+    >>> users_for_products[0]
+    (1, (Rating(user=2, product=1, rating=...),))
+
+    >>> model = ALS.train(ratings, 1, nonnegative=True, seed=10)
+    >>> model.predict(2, 2)
+    3.73...
+
+    >>> df = sqlContext.createDataFrame([Rating(1, 1, 1.0), Rating(1, 2, 2.0), Rating(2, 1, 2.0)])
+    >>> model = ALS.train(df, 1, nonnegative=True, seed=10)
+    >>> model.predict(2, 2)
+    3.73...
+
+    >>> model = ALS.trainImplicit(ratings, 1, nonnegative=True, seed=10)
+    >>> model.predict(2, 2)
+    0.4...
+
+    >>> import os, tempfile
+    >>> path = tempfile.mkdtemp()
+    >>> model.save(sc, path)
+    >>> sameModel = MatrixFactorizationModel.load(sc, path)
+    >>> sameModel.predict(2, 2)
+    0.4...
+    >>> sameModel.predictAll(testset).collect()
+    [Rating(...
+    >>> from shutil import rmtree
+    >>> try:
+    ...     rmtree(path)
+    ... except OSError:
+    ...     pass
+
+    .. versionadded:: 0.9.0
+    """
+    @since("0.9.0")
+    def predict(self, user, product):
+        """
+        Predicts rating for the given user and product.
+        """
+        return self._java_model.predict(int(user), int(product))
+
+    @since("0.9.0")
+    def predictAll(self, user_product):
+        """
+        Returns a list of predicted ratings for input user and product
+        pairs.
+        """
+        assert isinstance(user_product, RDD), "user_product should be RDD of (user, product)"
+        first = user_product.first()
+        assert len(first) == 2, "user_product should be RDD of (user, product)"
+        user_product = user_product.map(lambda u_p: (int(u_p[0]), int(u_p[1])))
+        return self.call("predict", user_product)
+
+    @since("1.2.0")
+    def userFeatures(self):
+        """
+        Returns a paired RDD, where the first element is the user and the
+        second is an array of features corresponding to that user.
+        """
+        return self.call("getUserFeatures").mapValues(lambda v: array.array('d', v))
+
+    @since("1.2.0")
+    def productFeatures(self):
+        """
+        Returns a paired RDD, where the first element is the product and the
+        second is an array of features corresponding to that product.
+        """
+        return self.call("getProductFeatures").mapValues(lambda v: array.array('d', v))
+
+    @since("1.4.0")
+    def recommendUsers(self, product, num):
+        """
+        Recommends the top "num" number of users for a given product and
+        returns a list of Rating objects sorted by the predicted rating in
+        descending order.
+        """
+        return list(self.call("recommendUsers", product, num))
+
+    @since("1.4.0")
+    def recommendProducts(self, user, num):
+        """
+        Recommends the top "num" number of products for a given user and
+        returns a list of Rating objects sorted by the predicted rating in
+        descending order.
+        """
+        return list(self.call("recommendProducts", user, num))
+
+    def recommendProductsForUsers(self, num):
+        """
+        Recommends the top "num" number of products for all users. The
+        number of recommendations returned per user may be less than "num".
+        """
+        return self.call("wrappedRecommendProductsForUsers", num)
+
+    def recommendUsersForProducts(self, num):
+        """
+        Recommends the top "num" number of users for all products. The
+        number of recommendations returned per product may be less than
+        "num".
+        """
+        return self.call("wrappedRecommendUsersForProducts", num)
+
+    @property
+    @since("1.4.0")
+    def rank(self):
+        """Rank for the features in this model"""
+        return self.call("rank")
+
+    @classmethod
+    @since("1.3.1")
+    def load(cls, sc, path):
+        """Load a model from the given path"""
+        model = cls._load_java(sc, path)
+        wrapper = sc._jvm.org.apache.spark.mllib.api.python.MatrixFactorizationModelWrapper(model)
+        return MatrixFactorizationModel(wrapper)
+
+
+class ALS(object):
+    """Alternating Least Squares matrix factorization
+
+    .. versionadded:: 0.9.0
+    """
+
+    @classmethod
+    def _prepare(cls, ratings):
+        if isinstance(ratings, RDD):
+            pass
+        elif isinstance(ratings, DataFrame):
+            ratings = ratings.rdd
+        else:
+            raise TypeError("Ratings should be represented by either an RDD or a DataFrame, "
+                            "but got %s." % type(ratings))
+        first = ratings.first()
+        if isinstance(first, Rating):
+            pass
+        elif isinstance(first, (tuple, list)):
+            ratings = ratings.map(lambda x: Rating(*x))
+        else:
+            raise TypeError("Expect a Rating or a tuple/list, but got %s." % type(first))
+        return ratings
+
+    @classmethod
+    @since("0.9.0")
+    def train(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, nonnegative=False,
+              seed=None):
+        """
+        Train a matrix factorization model given an RDD of ratings by users
+        for a subset of products. The ratings matrix is approximated as the
+        product of two lower-rank matrices of a given rank (number of
+        features). To solve for these features, ALS is run iteratively with
+        a configurable level of parallelism.
+
+        :param ratings:
+          RDD of `Rating` or (userID, productID, rating) tuple.
+        :param rank:
+          Number of features to use (also referred to as the number of latent factors).
+        :param iterations:
+          Number of iterations of ALS.
+          (default: 5)
+        :param lambda_:
+          Regularization parameter.
+          (default: 0.01)
+        :param blocks:
+          Number of blocks used to parallelize the computation. A value
+          of -1 will use an auto-configured number of blocks.
+          (default: -1)
+        :param nonnegative:
+          A value of True will solve least-squares with nonnegativity
+          constraints.
+          (default: False)
+        :param seed:
+          Random seed for initial matrix factorization model. A value
+          of None will use system time as the seed.
+          (default: None)
+        """
+        model = callMLlibFunc("trainALSModel", cls._prepare(ratings), rank, iterations,
+                              lambda_, blocks, nonnegative, seed)
+        return MatrixFactorizationModel(model)
+
+    @classmethod
+    @since("0.9.0")
+    def trainImplicit(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, alpha=0.01,
+                      nonnegative=False, seed=None):
+        """
+        Train a matrix factorization model given an RDD of 'implicit
+        preferences' of users for a subset of products. The ratings matrix
+        is approximated as the product of two lower-rank matrices of a
+        given rank (number of features). To solve for these features, ALS
+        is run iteratively with a configurable level of parallelism.
+
+        :param ratings:
+          RDD of `Rating` or (userID, productID, rating) tuple.
+        :param rank:
+          Number of features to use (also referred to as the number of latent factors).
+        :param iterations:
+          Number of iterations of ALS.
+          (default: 5)
+        :param lambda_:
+          Regularization parameter.
+          (default: 0.01)
+        :param blocks:
+          Number of blocks used to parallelize the computation. A value
+          of -1 will use an auto-configured number of blocks.
+          (default: -1)
+        :param alpha:
+          A constant used in computing confidence.
+          (default: 0.01)
+        :param nonnegative:
+          A value of True will solve least-squares with nonnegativity
+          constraints.
+          (default: False)
+        :param seed:
+          Random seed for initial matrix factorization model. A value
+          of None will use system time as the seed.
+          (default: None)
+        """
+        model = callMLlibFunc("trainImplicitALSModel", cls._prepare(ratings), rank,
+                              iterations, lambda_, blocks, alpha, nonnegative, seed)
+        return MatrixFactorizationModel(model)
+
+
+def _test():
+    import doctest
+    import pyspark.mllib.recommendation
+    from pyspark.sql import SQLContext
+    globs = pyspark.mllib.recommendation.__dict__.copy()
+    sc = SparkContext('local[4]', 'PythonTest')
+    globs['sc'] = sc
+    globs['sqlContext'] = SQLContext(sc)
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    globs['sc'].stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/regression.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/regression.py
new file mode 100644
index 0000000000000000000000000000000000000000..6be45f51862c91485eaed81ef5844899876c1211
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/regression.py
@@ -0,0 +1,845 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import warnings
+
+import numpy as np
+from numpy import array
+
+from pyspark import RDD, since
+from pyspark.streaming.dstream import DStream
+from pyspark.mllib.common import callMLlibFunc, _py2java, _java2py, inherit_doc
+from pyspark.mllib.linalg import SparseVector, Vectors, _convert_to_vector
+from pyspark.mllib.util import Saveable, Loader
+
+__all__ = ['LabeledPoint', 'LinearModel',
+           'LinearRegressionModel', 'LinearRegressionWithSGD',
+           'RidgeRegressionModel', 'RidgeRegressionWithSGD',
+           'LassoModel', 'LassoWithSGD', 'IsotonicRegressionModel',
+           'IsotonicRegression', 'StreamingLinearAlgorithm',
+           'StreamingLinearRegressionWithSGD']
+
+
+class LabeledPoint(object):
+
+    """
+    Class that represents the features and labels of a data point.
+
+    :param label:
+      Label for this data point.
+    :param features:
+      Vector of features for this point (NumPy array, list,
+      pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix).
+
+    .. note:: 'label' and 'features' are accessible as class attributes.
+
+    .. versionadded:: 1.0.0
+    """
+
+    def __init__(self, label, features):
+        self.label = float(label)
+        self.features = _convert_to_vector(features)
+
+    def __reduce__(self):
+        return (LabeledPoint, (self.label, self.features))
+
+    def __str__(self):
+        return "(" + ",".join((str(self.label), str(self.features))) + ")"
+
+    def __repr__(self):
+        return "LabeledPoint(%s, %s)" % (self.label, self.features)
+
+
+class LinearModel(object):
+
+    """
+    A linear model that has a vector of coefficients and an intercept.
+
+    :param weights:
+      Weights computed for every feature.
+    :param intercept:
+      Intercept computed for this model.
+
+    .. versionadded:: 0.9.0
+    """
+
+    def __init__(self, weights, intercept):
+        self._coeff = _convert_to_vector(weights)
+        self._intercept = float(intercept)
+
+    @property
+    @since("1.0.0")
+    def weights(self):
+        """Weights computed for every feature."""
+        return self._coeff
+
+    @property
+    @since("1.0.0")
+    def intercept(self):
+        """Intercept computed for this model."""
+        return self._intercept
+
+    def __repr__(self):
+        return "(weights=%s, intercept=%r)" % (self._coeff, self._intercept)
+
+
+@inherit_doc
+class LinearRegressionModelBase(LinearModel):
+
+    """A linear regression model.
+
+    >>> lrmb = LinearRegressionModelBase(np.array([1.0, 2.0]), 0.1)
+    >>> abs(lrmb.predict(np.array([-1.03, 7.777])) - 14.624) < 1e-6
+    True
+    >>> abs(lrmb.predict(SparseVector(2, {0: -1.03, 1: 7.777})) - 14.624) < 1e-6
+    True
+
+    .. versionadded:: 0.9.0
+    """
+
+    @since("0.9.0")
+    def predict(self, x):
+        """
+        Predict the value of the dependent variable given a vector or
+        an RDD of vectors containing values for the independent variables.
+        """
+        if isinstance(x, RDD):
+            return x.map(self.predict)
+        x = _convert_to_vector(x)
+        return self.weights.dot(x) + self.intercept
+
+
+@inherit_doc
+class LinearRegressionModel(LinearRegressionModelBase):
+
+    """A linear regression model derived from a least-squares fit.
+
+    >>> from pyspark.mllib.regression import LabeledPoint
+    >>> data = [
+    ...     LabeledPoint(0.0, [0.0]),
+    ...     LabeledPoint(1.0, [1.0]),
+    ...     LabeledPoint(3.0, [2.0]),
+    ...     LabeledPoint(2.0, [3.0])
+    ... ]
+    >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10,
+    ...     initialWeights=np.array([1.0]))
+    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    True
+    >>> abs(lrm.predict(np.array([1.0])) - 1) < 0.5
+    True
+    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    True
+    >>> abs(lrm.predict(sc.parallelize([[1.0]])).collect()[0] - 1) < 0.5
+    True
+    >>> import os, tempfile
+    >>> path = tempfile.mkdtemp()
+    >>> lrm.save(sc, path)
+    >>> sameModel = LinearRegressionModel.load(sc, path)
+    >>> abs(sameModel.predict(np.array([0.0])) - 0) < 0.5
+    True
+    >>> abs(sameModel.predict(np.array([1.0])) - 1) < 0.5
+    True
+    >>> abs(sameModel.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    True
+    >>> from shutil import rmtree
+    >>> try:
+    ...     rmtree(path)
+    ... except:
+    ...     pass
+    >>> data = [
+    ...     LabeledPoint(0.0, SparseVector(1, {0: 0.0})),
+    ...     LabeledPoint(1.0, SparseVector(1, {0: 1.0})),
+    ...     LabeledPoint(3.0, SparseVector(1, {0: 2.0})),
+    ...     LabeledPoint(2.0, SparseVector(1, {0: 3.0}))
+    ... ]
+    >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10,
+    ...     initialWeights=array([1.0]))
+    >>> abs(lrm.predict(array([0.0])) - 0) < 0.5
+    True
+    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    True
+    >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10, step=1.0,
+    ...    miniBatchFraction=1.0, initialWeights=array([1.0]), regParam=0.1, regType="l2",
+    ...    intercept=True, validateData=True)
+    >>> abs(lrm.predict(array([0.0])) - 0) < 0.5
+    True
+    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    True
+
+    .. versionadded:: 0.9.0
+    """
+    @since("1.4.0")
+    def save(self, sc, path):
+        """Save a LinearRegressionModel."""
+        java_model = sc._jvm.org.apache.spark.mllib.regression.LinearRegressionModel(
+            _py2java(sc, self._coeff), self.intercept)
+        java_model.save(sc._jsc.sc(), path)
+
+    @classmethod
+    @since("1.4.0")
+    def load(cls, sc, path):
+        """Load a LinearRegressionModel."""
+        java_model = sc._jvm.org.apache.spark.mllib.regression.LinearRegressionModel.load(
+            sc._jsc.sc(), path)
+        weights = _java2py(sc, java_model.weights())
+        intercept = java_model.intercept()
+        model = LinearRegressionModel(weights, intercept)
+        return model
+
+
+# train_func should take two parameters, namely data and initial_weights, and
+# return the result of a call to the appropriate JVM stub.
+# _regression_train_wrapper is responsible for setup and error checking.
+def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
+    from pyspark.mllib.classification import LogisticRegressionModel
+    first = data.first()
+    if not isinstance(first, LabeledPoint):
+        raise TypeError("data should be an RDD of LabeledPoint, but got %s" % type(first))
+    if initial_weights is None:
+        initial_weights = [0.0] * len(data.first().features)
+    if (modelClass == LogisticRegressionModel):
+        weights, intercept, numFeatures, numClasses = train_func(
+            data, _convert_to_vector(initial_weights))
+        return modelClass(weights, intercept, numFeatures, numClasses)
+    else:
+        weights, intercept = train_func(data, _convert_to_vector(initial_weights))
+        return modelClass(weights, intercept)
+
+
+class LinearRegressionWithSGD(object):
+    """
+    .. versionadded:: 0.9.0
+    .. note:: Deprecated in 2.0.0. Use ml.regression.LinearRegression.
+    """
+    @classmethod
+    @since("0.9.0")
+    def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
+              initialWeights=None, regParam=0.0, regType=None, intercept=False,
+              validateData=True, convergenceTol=0.001):
+        """
+        Train a linear regression model using Stochastic Gradient
+        Descent (SGD). This solves the least squares regression
+        formulation
+
+            f(weights) = 1/(2n) ||A weights - y||^2
+
+        which is the mean squared error. Here the data matrix has n rows,
+        and the input RDD holds the set of rows of A, each with its
+        corresponding right hand side label y.
+        See also the documentation for the precise formulation.
+
+        :param data:
+          The training data, an RDD of LabeledPoint.
+        :param iterations:
+          The number of iterations.
+          (default: 100)
+        :param step:
+          The step parameter used in SGD.
+          (default: 1.0)
+        :param miniBatchFraction:
+          Fraction of data to be used for each SGD iteration.
+          (default: 1.0)
+        :param initialWeights:
+          The initial weights.
+          (default: None)
+        :param regParam:
+          The regularizer parameter.
+          (default: 0.0)
+        :param regType:
+          The type of regularizer used for training our model.
+          Supported values:
+
+            - "l1" for using L1 regularization
+            - "l2" for using L2 regularization
+            - None for no regularization (default)
+        :param intercept:
+          Boolean parameter which indicates the use or not of the
+          augmented representation for training data (i.e., whether bias
+          features are activated or not).
+          (default: False)
+        :param validateData:
+          Boolean parameter which indicates if the algorithm should
+          validate data before training.
+          (default: True)
+        :param convergenceTol:
+          A condition which decides iteration termination.
+          (default: 0.001)
+        """
+        warnings.warn(
+            "Deprecated in 2.0.0. Use ml.regression.LinearRegression.", DeprecationWarning)
+
+        def train(rdd, i):
+            return callMLlibFunc("trainLinearRegressionModelWithSGD", rdd, int(iterations),
+                                 float(step), float(miniBatchFraction), i, float(regParam),
+                                 regType, bool(intercept), bool(validateData),
+                                 float(convergenceTol))
+
+        return _regression_train_wrapper(train, LinearRegressionModel, data, initialWeights)
+
+
+@inherit_doc
+class LassoModel(LinearRegressionModelBase):
+
+    """A linear regression model derived from a least-squares fit with
+    an l_1 penalty term.
+
+    >>> from pyspark.mllib.regression import LabeledPoint
+    >>> data = [
+    ...     LabeledPoint(0.0, [0.0]),
+    ...     LabeledPoint(1.0, [1.0]),
+    ...     LabeledPoint(3.0, [2.0]),
+    ...     LabeledPoint(2.0, [3.0])
+    ... ]
+    >>> lrm = LassoWithSGD.train(sc.parallelize(data), iterations=10, initialWeights=array([1.0]))
+    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    True
+    >>> abs(lrm.predict(np.array([1.0])) - 1) < 0.5
+    True
+    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    True
+    >>> abs(lrm.predict(sc.parallelize([[1.0]])).collect()[0] - 1) < 0.5
+    True
+    >>> import os, tempfile
+    >>> path = tempfile.mkdtemp()
+    >>> lrm.save(sc, path)
+    >>> sameModel = LassoModel.load(sc, path)
+    >>> abs(sameModel.predict(np.array([0.0])) - 0) < 0.5
+    True
+    >>> abs(sameModel.predict(np.array([1.0])) - 1) < 0.5
+    True
+    >>> abs(sameModel.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    True
+    >>> from shutil import rmtree
+    >>> try:
+    ...    rmtree(path)
+    ... except:
+    ...    pass
+    >>> data = [
+    ...     LabeledPoint(0.0, SparseVector(1, {0: 0.0})),
+    ...     LabeledPoint(1.0, SparseVector(1, {0: 1.0})),
+    ...     LabeledPoint(3.0, SparseVector(1, {0: 2.0})),
+    ...     LabeledPoint(2.0, SparseVector(1, {0: 3.0}))
+    ... ]
+    >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10,
+    ...     initialWeights=array([1.0]))
+    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    True
+    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    True
+    >>> lrm = LassoWithSGD.train(sc.parallelize(data), iterations=10, step=1.0,
+    ...     regParam=0.01, miniBatchFraction=1.0, initialWeights=array([1.0]), intercept=True,
+    ...     validateData=True)
+    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    True
+    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    True
+
+    .. versionadded:: 0.9.0
+    """
+    @since("1.4.0")
+    def save(self, sc, path):
+        """Save a LassoModel."""
+        java_model = sc._jvm.org.apache.spark.mllib.regression.LassoModel(
+            _py2java(sc, self._coeff), self.intercept)
+        java_model.save(sc._jsc.sc(), path)
+
+    @classmethod
+    @since("1.4.0")
+    def load(cls, sc, path):
+        """Load a LassoModel."""
+        java_model = sc._jvm.org.apache.spark.mllib.regression.LassoModel.load(
+            sc._jsc.sc(), path)
+        weights = _java2py(sc, java_model.weights())
+        intercept = java_model.intercept()
+        model = LassoModel(weights, intercept)
+        return model
+
+
+class LassoWithSGD(object):
+    """
+    .. versionadded:: 0.9.0
+    .. note:: Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 1.0.
+            Note the default regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.
+    """
+    @classmethod
+    @since("0.9.0")
+    def train(cls, data, iterations=100, step=1.0, regParam=0.01,
+              miniBatchFraction=1.0, initialWeights=None, intercept=False,
+              validateData=True, convergenceTol=0.001):
+        """
+        Train a regression model with L1-regularization using Stochastic
+        Gradient Descent. This solves the l1-regularized least squares
+        regression formulation
+
+            f(weights) = 1/(2n) ||A weights - y||^2  + regParam ||weights||_1
+
+        Here the data matrix has n rows, and the input RDD holds the set
+        of rows of A, each with its corresponding right hand side label y.
+        See also the documentation for the precise formulation.
+
+        :param data:
+          The training data, an RDD of LabeledPoint.
+        :param iterations:
+          The number of iterations.
+          (default: 100)
+        :param step:
+          The step parameter used in SGD.
+          (default: 1.0)
+        :param regParam:
+          The regularizer parameter.
+          (default: 0.01)
+        :param miniBatchFraction:
+          Fraction of data to be used for each SGD iteration.
+          (default: 1.0)
+        :param initialWeights:
+          The initial weights.
+          (default: None)
+        :param intercept:
+          Boolean parameter which indicates the use or not of the
+          augmented representation for training data (i.e. whether bias
+          features are activated or not).
+          (default: False)
+        :param validateData:
+          Boolean parameter which indicates if the algorithm should
+          validate data before training.
+          (default: True)
+        :param convergenceTol:
+          A condition which decides iteration termination.
+          (default: 0.001)
+        """
+        warnings.warn(
+            "Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 1.0. "
+            "Note the default regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.",
+            DeprecationWarning)
+
+        def train(rdd, i):
+            return callMLlibFunc("trainLassoModelWithSGD", rdd, int(iterations), float(step),
+                                 float(regParam), float(miniBatchFraction), i, bool(intercept),
+                                 bool(validateData), float(convergenceTol))
+
+        return _regression_train_wrapper(train, LassoModel, data, initialWeights)
+
+
+@inherit_doc
+class RidgeRegressionModel(LinearRegressionModelBase):
+
+    """A linear regression model derived from a least-squares fit with
+    an l_2 penalty term.
+
+    >>> from pyspark.mllib.regression import LabeledPoint
+    >>> data = [
+    ...     LabeledPoint(0.0, [0.0]),
+    ...     LabeledPoint(1.0, [1.0]),
+    ...     LabeledPoint(3.0, [2.0]),
+    ...     LabeledPoint(2.0, [3.0])
+    ... ]
+    >>> lrm = RidgeRegressionWithSGD.train(sc.parallelize(data), iterations=10,
+    ...     initialWeights=array([1.0]))
+    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    True
+    >>> abs(lrm.predict(np.array([1.0])) - 1) < 0.5
+    True
+    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    True
+    >>> abs(lrm.predict(sc.parallelize([[1.0]])).collect()[0] - 1) < 0.5
+    True
+    >>> import os, tempfile
+    >>> path = tempfile.mkdtemp()
+    >>> lrm.save(sc, path)
+    >>> sameModel = RidgeRegressionModel.load(sc, path)
+    >>> abs(sameModel.predict(np.array([0.0])) - 0) < 0.5
+    True
+    >>> abs(sameModel.predict(np.array([1.0])) - 1) < 0.5
+    True
+    >>> abs(sameModel.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    True
+    >>> from shutil import rmtree
+    >>> try:
+    ...    rmtree(path)
+    ... except:
+    ...    pass
+    >>> data = [
+    ...     LabeledPoint(0.0, SparseVector(1, {0: 0.0})),
+    ...     LabeledPoint(1.0, SparseVector(1, {0: 1.0})),
+    ...     LabeledPoint(3.0, SparseVector(1, {0: 2.0})),
+    ...     LabeledPoint(2.0, SparseVector(1, {0: 3.0}))
+    ... ]
+    >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10,
+    ...     initialWeights=array([1.0]))
+    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    True
+    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    True
+    >>> lrm = RidgeRegressionWithSGD.train(sc.parallelize(data), iterations=10, step=1.0,
+    ...     regParam=0.01, miniBatchFraction=1.0, initialWeights=array([1.0]), intercept=True,
+    ...     validateData=True)
+    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    True
+    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    True
+
+    .. versionadded:: 0.9.0
+    """
+    @since("1.4.0")
+    def save(self, sc, path):
+        """Save a RidgeRegressionMode."""
+        java_model = sc._jvm.org.apache.spark.mllib.regression.RidgeRegressionModel(
+            _py2java(sc, self._coeff), self.intercept)
+        java_model.save(sc._jsc.sc(), path)
+
+    @classmethod
+    @since("1.4.0")
+    def load(cls, sc, path):
+        """Load a RidgeRegressionMode."""
+        java_model = sc._jvm.org.apache.spark.mllib.regression.RidgeRegressionModel.load(
+            sc._jsc.sc(), path)
+        weights = _java2py(sc, java_model.weights())
+        intercept = java_model.intercept()
+        model = RidgeRegressionModel(weights, intercept)
+        return model
+
+
+class RidgeRegressionWithSGD(object):
+    """
+    .. versionadded:: 0.9.0
+    .. note:: Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 0.0.
+            Note the default regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for
+            LinearRegression.
+    """
+    @classmethod
+    @since("0.9.0")
+    def train(cls, data, iterations=100, step=1.0, regParam=0.01,
+              miniBatchFraction=1.0, initialWeights=None, intercept=False,
+              validateData=True, convergenceTol=0.001):
+        """
+        Train a regression model with L2-regularization using Stochastic
+        Gradient Descent. This solves the l2-regularized least squares
+        regression formulation
+
+            f(weights) = 1/(2n) ||A weights - y||^2 + regParam/2 ||weights||^2
+
+        Here the data matrix has n rows, and the input RDD holds the set
+        of rows of A, each with its corresponding right hand side label y.
+        See also the documentation for the precise formulation.
+
+        :param data:
+          The training data, an RDD of LabeledPoint.
+        :param iterations:
+          The number of iterations.
+          (default: 100)
+        :param step:
+          The step parameter used in SGD.
+          (default: 1.0)
+        :param regParam:
+          The regularizer parameter.
+          (default: 0.01)
+        :param miniBatchFraction:
+          Fraction of data to be used for each SGD iteration.
+          (default: 1.0)
+        :param initialWeights:
+          The initial weights.
+          (default: None)
+        :param intercept:
+          Boolean parameter which indicates the use or not of the
+          augmented representation for training data (i.e. whether bias
+          features are activated or not).
+          (default: False)
+        :param validateData:
+          Boolean parameter which indicates if the algorithm should
+          validate data before training.
+          (default: True)
+        :param convergenceTol:
+          A condition which decides iteration termination.
+          (default: 0.001)
+        """
+        warnings.warn(
+            "Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 0.0. "
+            "Note the default regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for "
+            "LinearRegression.", DeprecationWarning)
+
+        def train(rdd, i):
+            return callMLlibFunc("trainRidgeModelWithSGD", rdd, int(iterations), float(step),
+                                 float(regParam), float(miniBatchFraction), i, bool(intercept),
+                                 bool(validateData), float(convergenceTol))
+
+        return _regression_train_wrapper(train, RidgeRegressionModel, data, initialWeights)
+
+
+class IsotonicRegressionModel(Saveable, Loader):
+
+    """
+    Regression model for isotonic regression.
+
+    :param boundaries:
+      Array of boundaries for which predictions are known. Boundaries
+      must be sorted in increasing order.
+    :param predictions:
+      Array of predictions associated to the boundaries at the same
+      index. Results of isotonic regression and therefore monotone.
+    :param isotonic:
+      Indicates whether this is isotonic or antitonic.
+
+    >>> data = [(1, 0, 1), (2, 1, 1), (3, 2, 1), (1, 3, 1), (6, 4, 1), (17, 5, 1), (16, 6, 1)]
+    >>> irm = IsotonicRegression.train(sc.parallelize(data))
+    >>> irm.predict(3)
+    2.0
+    >>> irm.predict(5)
+    16.5
+    >>> irm.predict(sc.parallelize([3, 5])).collect()
+    [2.0, 16.5]
+    >>> import os, tempfile
+    >>> path = tempfile.mkdtemp()
+    >>> irm.save(sc, path)
+    >>> sameModel = IsotonicRegressionModel.load(sc, path)
+    >>> sameModel.predict(3)
+    2.0
+    >>> sameModel.predict(5)
+    16.5
+    >>> from shutil import rmtree
+    >>> try:
+    ...     rmtree(path)
+    ... except OSError:
+    ...     pass
+
+    .. versionadded:: 1.4.0
+    """
+
+    def __init__(self, boundaries, predictions, isotonic):
+        self.boundaries = boundaries
+        self.predictions = predictions
+        self.isotonic = isotonic
+
+    @since("1.4.0")
+    def predict(self, x):
+        """
+        Predict labels for provided features.
+        Using a piecewise linear function.
+        1) If x exactly matches a boundary then associated prediction
+        is returned. In case there are multiple predictions with the
+        same boundary then one of them is returned. Which one is
+        undefined (same as java.util.Arrays.binarySearch).
+        2) If x is lower or higher than all boundaries then first or
+        last prediction is returned respectively. In case there are
+        multiple predictions with the same boundary then the lowest
+        or highest is returned respectively.
+        3) If x falls between two values in boundary array then
+        prediction is treated as piecewise linear function and
+        interpolated value is returned. In case there are multiple
+        values with the same boundary then the same rules as in 2)
+        are used.
+
+        :param x:
+          Feature or RDD of Features to be labeled.
+        """
+        if isinstance(x, RDD):
+            return x.map(lambda v: self.predict(v))
+        return np.interp(x, self.boundaries, self.predictions)
+
+    @since("1.4.0")
+    def save(self, sc, path):
+        """Save an IsotonicRegressionModel."""
+        java_boundaries = _py2java(sc, self.boundaries.tolist())
+        java_predictions = _py2java(sc, self.predictions.tolist())
+        java_model = sc._jvm.org.apache.spark.mllib.regression.IsotonicRegressionModel(
+            java_boundaries, java_predictions, self.isotonic)
+        java_model.save(sc._jsc.sc(), path)
+
+    @classmethod
+    @since("1.4.0")
+    def load(cls, sc, path):
+        """Load an IsotonicRegressionModel."""
+        java_model = sc._jvm.org.apache.spark.mllib.regression.IsotonicRegressionModel.load(
+            sc._jsc.sc(), path)
+        py_boundaries = _java2py(sc, java_model.boundaryVector()).toArray()
+        py_predictions = _java2py(sc, java_model.predictionVector()).toArray()
+        return IsotonicRegressionModel(py_boundaries, py_predictions, java_model.isotonic)
+
+
+class IsotonicRegression(object):
+    """
+    Isotonic regression.
+    Currently implemented using parallelized pool adjacent violators
+    algorithm. Only univariate (single feature) algorithm supported.
+
+    Sequential PAV implementation based on:
+
+      Tibshirani, Ryan J., Holger Hoefling, and Robert Tibshirani.
+      "Nearly-isotonic regression." Technometrics 53.1 (2011): 54-61.
+      Available from http://www.stat.cmu.edu/~ryantibs/papers/neariso.pdf
+
+    Sequential PAV parallelization based on:
+
+        Kearsley, Anthony J., Richard A. Tapia, and Michael W. Trosset.
+        "An approach to parallelizing isotonic regression."
+        Applied Mathematics and Parallel Computing. Physica-Verlag HD, 1996. 141-147.
+        Available from http://softlib.rice.edu/pub/CRPC-TRs/reports/CRPC-TR96640.pdf
+
+    See `Isotonic regression (Wikipedia) <http://en.wikipedia.org/wiki/Isotonic_regression>`_.
+
+    .. versionadded:: 1.4.0
+    """
+
+    @classmethod
+    @since("1.4.0")
+    def train(cls, data, isotonic=True):
+        """
+        Train an isotonic regression model on the given data.
+
+        :param data:
+          RDD of (label, feature, weight) tuples.
+        :param isotonic:
+          Whether this is isotonic (which is default) or antitonic.
+          (default: True)
+        """
+        boundaries, predictions = callMLlibFunc("trainIsotonicRegressionModel",
+                                                data.map(_convert_to_vector), bool(isotonic))
+        return IsotonicRegressionModel(boundaries.toArray(), predictions.toArray(), isotonic)
+
+
+class StreamingLinearAlgorithm(object):
+    """
+    Base class that has to be inherited by any StreamingLinearAlgorithm.
+
+    Prevents reimplementation of methods predictOn and predictOnValues.
+
+    .. versionadded:: 1.5.0
+    """
+    def __init__(self, model):
+        self._model = model
+
+    @since("1.5.0")
+    def latestModel(self):
+        """
+        Returns the latest model.
+        """
+        return self._model
+
+    def _validate(self, dstream):
+        if not isinstance(dstream, DStream):
+            raise TypeError(
+                "dstream should be a DStream object, got %s" % type(dstream))
+        if not self._model:
+            raise ValueError(
+                "Model must be intialized using setInitialWeights")
+
+    @since("1.5.0")
+    def predictOn(self, dstream):
+        """
+        Use the model to make predictions on batches of data from a
+        DStream.
+
+        :return:
+          DStream containing predictions.
+        """
+        self._validate(dstream)
+        return dstream.map(lambda x: self._model.predict(x))
+
+    @since("1.5.0")
+    def predictOnValues(self, dstream):
+        """
+        Use the model to make predictions on the values of a DStream and
+        carry over its keys.
+
+        :return:
+          DStream containing the input keys and the predictions as values.
+        """
+        self._validate(dstream)
+        return dstream.mapValues(lambda x: self._model.predict(x))
+
+
+@inherit_doc
+class StreamingLinearRegressionWithSGD(StreamingLinearAlgorithm):
+    """
+    Train or predict a linear regression model on streaming data.
+    Training uses Stochastic Gradient Descent to update the model
+    based on each new batch of incoming data from a DStream
+    (see `LinearRegressionWithSGD` for model equation).
+
+    Each batch of data is assumed to be an RDD of LabeledPoints.
+    The number of data points per batch can vary, but the number
+    of features must be constant. An initial weight vector must
+    be provided.
+
+    :param stepSize:
+      Step size for each iteration of gradient descent.
+      (default: 0.1)
+    :param numIterations:
+      Number of iterations run for each batch of data.
+      (default: 50)
+    :param miniBatchFraction:
+      Fraction of each batch of data to use for updates.
+      (default: 1.0)
+    :param convergenceTol:
+      Value used to determine when to terminate iterations.
+      (default: 0.001)
+
+    .. versionadded:: 1.5.0
+    """
+    def __init__(self, stepSize=0.1, numIterations=50, miniBatchFraction=1.0, convergenceTol=0.001):
+        self.stepSize = stepSize
+        self.numIterations = numIterations
+        self.miniBatchFraction = miniBatchFraction
+        self.convergenceTol = convergenceTol
+        self._model = None
+        super(StreamingLinearRegressionWithSGD, self).__init__(
+            model=self._model)
+
+    @since("1.5.0")
+    def setInitialWeights(self, initialWeights):
+        """
+        Set the initial value of weights.
+
+        This must be set before running trainOn and predictOn
+        """
+        initialWeights = _convert_to_vector(initialWeights)
+        self._model = LinearRegressionModel(initialWeights, 0)
+        return self
+
+    @since("1.5.0")
+    def trainOn(self, dstream):
+        """Train the model on the incoming dstream."""
+        self._validate(dstream)
+
+        def update(rdd):
+            # LinearRegressionWithSGD.train raises an error for an empty RDD.
+            if not rdd.isEmpty():
+                self._model = LinearRegressionWithSGD.train(
+                    rdd, self.numIterations, self.stepSize,
+                    self.miniBatchFraction, self._model.weights,
+                    intercept=self._model.intercept, convergenceTol=self.convergenceTol)
+
+        dstream.foreachRDD(update)
+
+
+def _test():
+    import doctest
+    from pyspark.sql import SparkSession
+    import pyspark.mllib.regression
+    globs = pyspark.mllib.regression.__dict__.copy()
+    spark = SparkSession.builder\
+        .master("local[2]")\
+        .appName("mllib.regression tests")\
+        .getOrCreate()
+    globs['sc'] = spark.sparkContext
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/stat/KernelDensity.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/stat/KernelDensity.py
new file mode 100644
index 0000000000000000000000000000000000000000..7250eab6705a7434d69afd37e9536e4c96e33c9a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/stat/KernelDensity.py
@@ -0,0 +1,59 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+
+if sys.version > '3':
+    xrange = range
+
+import numpy as np
+
+from pyspark.mllib.common import callMLlibFunc
+from pyspark.rdd import RDD
+
+
+class KernelDensity(object):
+    """
+    Estimate probability density at required points given an RDD of samples
+    from the population.
+
+    >>> kd = KernelDensity()
+    >>> sample = sc.parallelize([0.0, 1.0])
+    >>> kd.setSample(sample)
+    >>> kd.estimate([0.0, 1.0])
+    array([ 0.12938758,  0.12938758])
+    """
+    def __init__(self):
+        self._bandwidth = 1.0
+        self._sample = None
+
+    def setBandwidth(self, bandwidth):
+        """Set bandwidth of each sample. Defaults to 1.0"""
+        self._bandwidth = bandwidth
+
+    def setSample(self, sample):
+        """Set sample points from the population. Should be a RDD"""
+        if not isinstance(sample, RDD):
+            raise TypeError("samples should be a RDD, received %s" % type(sample))
+        self._sample = sample
+
+    def estimate(self, points):
+        """Estimate the probability density at points"""
+        points = list(points)
+        densities = callMLlibFunc(
+            "estimateKernelDensity", self._sample, self._bandwidth, points)
+        return np.asarray(densities)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/stat/__init__.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/stat/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8a721d3fe41c6302975c74372f75790372e7619
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/stat/__init__.py
@@ -0,0 +1,28 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Python package for statistical functions in MLlib.
+"""
+
+from pyspark.mllib.stat._statistics import *
+from pyspark.mllib.stat.distribution import MultivariateGaussian
+from pyspark.mllib.stat.test import ChiSqTestResult
+from pyspark.mllib.stat.KernelDensity import KernelDensity
+
+__all__ = ["Statistics", "MultivariateStatisticalSummary", "ChiSqTestResult",
+           "MultivariateGaussian", "KernelDensity"]
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/stat/_statistics.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/stat/_statistics.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e89bfd691d16e15220bc4676b168ebc2742c685
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/stat/_statistics.py
@@ -0,0 +1,326 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+if sys.version >= '3':
+    basestring = str
+
+from pyspark.rdd import RDD, ignore_unicode_prefix
+from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper
+from pyspark.mllib.linalg import Matrix, _convert_to_vector
+from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.stat.test import ChiSqTestResult, KolmogorovSmirnovTestResult
+
+
+__all__ = ['MultivariateStatisticalSummary', 'Statistics']
+
+
+class MultivariateStatisticalSummary(JavaModelWrapper):
+
+    """
+    Trait for multivariate statistical summary of a data matrix.
+    """
+
+    def mean(self):
+        return self.call("mean").toArray()
+
+    def variance(self):
+        return self.call("variance").toArray()
+
+    def count(self):
+        return int(self.call("count"))
+
+    def numNonzeros(self):
+        return self.call("numNonzeros").toArray()
+
+    def max(self):
+        return self.call("max").toArray()
+
+    def min(self):
+        return self.call("min").toArray()
+
+    def normL1(self):
+        return self.call("normL1").toArray()
+
+    def normL2(self):
+        return self.call("normL2").toArray()
+
+
+class Statistics(object):
+
+    @staticmethod
+    def colStats(rdd):
+        """
+        Computes column-wise summary statistics for the input RDD[Vector].
+
+        :param rdd: an RDD[Vector] for which column-wise summary statistics
+                    are to be computed.
+        :return: :class:`MultivariateStatisticalSummary` object containing
+                 column-wise summary statistics.
+
+        >>> from pyspark.mllib.linalg import Vectors
+        >>> rdd = sc.parallelize([Vectors.dense([2, 0, 0, -2]),
+        ...                       Vectors.dense([4, 5, 0,  3]),
+        ...                       Vectors.dense([6, 7, 0,  8])])
+        >>> cStats = Statistics.colStats(rdd)
+        >>> cStats.mean()
+        array([ 4.,  4.,  0.,  3.])
+        >>> cStats.variance()
+        array([  4.,  13.,   0.,  25.])
+        >>> cStats.count()
+        3
+        >>> cStats.numNonzeros()
+        array([ 3.,  2.,  0.,  3.])
+        >>> cStats.max()
+        array([ 6.,  7.,  0.,  8.])
+        >>> cStats.min()
+        array([ 2.,  0.,  0., -2.])
+        """
+        cStats = callMLlibFunc("colStats", rdd.map(_convert_to_vector))
+        return MultivariateStatisticalSummary(cStats)
+
+    @staticmethod
+    def corr(x, y=None, method=None):
+        """
+        Compute the correlation (matrix) for the input RDD(s) using the
+        specified method.
+        Methods currently supported: I{pearson (default), spearman}.
+
+        If a single RDD of Vectors is passed in, a correlation matrix
+        comparing the columns in the input RDD is returned. Use C{method=}
+        to specify the method to be used for single RDD inout.
+        If two RDDs of floats are passed in, a single float is returned.
+
+        :param x: an RDD of vector for which the correlation matrix is to be computed,
+                  or an RDD of float of the same cardinality as y when y is specified.
+        :param y: an RDD of float of the same cardinality as x.
+        :param method: String specifying the method to use for computing correlation.
+                       Supported: `pearson` (default), `spearman`
+        :return: Correlation matrix comparing columns in x.
+
+        >>> x = sc.parallelize([1.0, 0.0, -2.0], 2)
+        >>> y = sc.parallelize([4.0, 5.0, 3.0], 2)
+        >>> zeros = sc.parallelize([0.0, 0.0, 0.0], 2)
+        >>> abs(Statistics.corr(x, y) - 0.6546537) < 1e-7
+        True
+        >>> Statistics.corr(x, y) == Statistics.corr(x, y, "pearson")
+        True
+        >>> Statistics.corr(x, y, "spearman")
+        0.5
+        >>> from math import isnan
+        >>> isnan(Statistics.corr(x, zeros))
+        True
+        >>> from pyspark.mllib.linalg import Vectors
+        >>> rdd = sc.parallelize([Vectors.dense([1, 0, 0, -2]), Vectors.dense([4, 5, 0, 3]),
+        ...                       Vectors.dense([6, 7, 0,  8]), Vectors.dense([9, 0, 0, 1])])
+        >>> pearsonCorr = Statistics.corr(rdd)
+        >>> print(str(pearsonCorr).replace('nan', 'NaN'))
+        [[ 1.          0.05564149         NaN  0.40047142]
+         [ 0.05564149  1.                 NaN  0.91359586]
+         [        NaN         NaN  1.                 NaN]
+         [ 0.40047142  0.91359586         NaN  1.        ]]
+        >>> spearmanCorr = Statistics.corr(rdd, method="spearman")
+        >>> print(str(spearmanCorr).replace('nan', 'NaN'))
+        [[ 1.          0.10540926         NaN  0.4       ]
+         [ 0.10540926  1.                 NaN  0.9486833 ]
+         [        NaN         NaN  1.                 NaN]
+         [ 0.4         0.9486833          NaN  1.        ]]
+        >>> try:
+        ...     Statistics.corr(rdd, "spearman")
+        ...     print("Method name as second argument without 'method=' shouldn't be allowed.")
+        ... except TypeError:
+        ...     pass
+        """
+        # Check inputs to determine whether a single value or a matrix is needed for output.
+        # Since it's legal for users to use the method name as the second argument, we need to
+        # check if y is used to specify the method name instead.
+        if type(y) == str:
+            raise TypeError("Use 'method=' to specify method name.")
+
+        if not y:
+            return callMLlibFunc("corr", x.map(_convert_to_vector), method).toArray()
+        else:
+            return callMLlibFunc("corr", x.map(float), y.map(float), method)
+
+    @staticmethod
+    @ignore_unicode_prefix
+    def chiSqTest(observed, expected=None):
+        """
+        If `observed` is Vector, conduct Pearson's chi-squared goodness
+        of fit test of the observed data against the expected distribution,
+        or againt the uniform distribution (by default), with each category
+        having an expected frequency of `1 / len(observed)`.
+
+        If `observed` is matrix, conduct Pearson's independence test on the
+        input contingency matrix, which cannot contain negative entries or
+        columns or rows that sum up to 0.
+
+        If `observed` is an RDD of LabeledPoint, conduct Pearson's independence
+        test for every feature against the label across the input RDD.
+        For each feature, the (feature, label) pairs are converted into a
+        contingency matrix for which the chi-squared statistic is computed.
+        All label and feature values must be categorical.
+
+        .. note:: `observed` cannot contain negative values
+
+        :param observed: it could be a vector containing the observed categorical
+                         counts/relative frequencies, or the contingency matrix
+                         (containing either counts or relative frequencies),
+                         or an RDD of LabeledPoint containing the labeled dataset
+                         with categorical features. Real-valued features will be
+                         treated as categorical for each distinct value.
+        :param expected: Vector containing the expected categorical counts/relative
+                         frequencies. `expected` is rescaled if the `expected` sum
+                         differs from the `observed` sum.
+        :return: ChiSquaredTest object containing the test statistic, degrees
+                 of freedom, p-value, the method used, and the null hypothesis.
+
+        >>> from pyspark.mllib.linalg import Vectors, Matrices
+        >>> observed = Vectors.dense([4, 6, 5])
+        >>> pearson = Statistics.chiSqTest(observed)
+        >>> print(pearson.statistic)
+        0.4
+        >>> pearson.degreesOfFreedom
+        2
+        >>> print(round(pearson.pValue, 4))
+        0.8187
+        >>> pearson.method
+        u'pearson'
+        >>> pearson.nullHypothesis
+        u'observed follows the same distribution as expected.'
+
+        >>> observed = Vectors.dense([21, 38, 43, 80])
+        >>> expected = Vectors.dense([3, 5, 7, 20])
+        >>> pearson = Statistics.chiSqTest(observed, expected)
+        >>> print(round(pearson.pValue, 4))
+        0.0027
+
+        >>> data = [40.0, 24.0, 29.0, 56.0, 32.0, 42.0, 31.0, 10.0, 0.0, 30.0, 15.0, 12.0]
+        >>> chi = Statistics.chiSqTest(Matrices.dense(3, 4, data))
+        >>> print(round(chi.statistic, 4))
+        21.9958
+
+        >>> data = [LabeledPoint(0.0, Vectors.dense([0.5, 10.0])),
+        ...         LabeledPoint(0.0, Vectors.dense([1.5, 20.0])),
+        ...         LabeledPoint(1.0, Vectors.dense([1.5, 30.0])),
+        ...         LabeledPoint(0.0, Vectors.dense([3.5, 30.0])),
+        ...         LabeledPoint(0.0, Vectors.dense([3.5, 40.0])),
+        ...         LabeledPoint(1.0, Vectors.dense([3.5, 40.0])),]
+        >>> rdd = sc.parallelize(data, 4)
+        >>> chi = Statistics.chiSqTest(rdd)
+        >>> print(chi[0].statistic)
+        0.75
+        >>> print(chi[1].statistic)
+        1.5
+        """
+        if isinstance(observed, RDD):
+            if not isinstance(observed.first(), LabeledPoint):
+                raise ValueError("observed should be an RDD of LabeledPoint")
+            jmodels = callMLlibFunc("chiSqTest", observed)
+            return [ChiSqTestResult(m) for m in jmodels]
+
+        if isinstance(observed, Matrix):
+            jmodel = callMLlibFunc("chiSqTest", observed)
+        else:
+            if expected and len(expected) != len(observed):
+                raise ValueError("`expected` should have same length with `observed`")
+            jmodel = callMLlibFunc("chiSqTest", _convert_to_vector(observed), expected)
+        return ChiSqTestResult(jmodel)
+
+    @staticmethod
+    @ignore_unicode_prefix
+    def kolmogorovSmirnovTest(data, distName="norm", *params):
+        """
+        Performs the Kolmogorov-Smirnov (KS) test for data sampled from
+        a continuous distribution. It tests the null hypothesis that
+        the data is generated from a particular distribution.
+
+        The given data is sorted and the Empirical Cumulative
+        Distribution Function (ECDF) is calculated
+        which for a given point is the number of points having a CDF
+        value lesser than it divided by the total number of points.
+
+        Since the data is sorted, this is a step function
+        that rises by (1 / length of data) for every ordered point.
+
+        The KS statistic gives us the maximum distance between the
+        ECDF and the CDF. Intuitively if this statistic is large, the
+        probability that the null hypothesis is true becomes small.
+        For specific details of the implementation, please have a look
+        at the Scala documentation.
+
+        :param data: RDD, samples from the data
+        :param distName: string, currently only "norm" is supported.
+                         (Normal distribution) to calculate the
+                         theoretical distribution of the data.
+        :param params: additional values which need to be provided for
+                       a certain distribution.
+                       If not provided, the default values are used.
+        :return: KolmogorovSmirnovTestResult object containing the test
+                 statistic, degrees of freedom, p-value,
+                 the method used, and the null hypothesis.
+
+        >>> kstest = Statistics.kolmogorovSmirnovTest
+        >>> data = sc.parallelize([-1.0, 0.0, 1.0])
+        >>> ksmodel = kstest(data, "norm")
+        >>> print(round(ksmodel.pValue, 3))
+        1.0
+        >>> print(round(ksmodel.statistic, 3))
+        0.175
+        >>> ksmodel.nullHypothesis
+        u'Sample follows theoretical distribution'
+
+        >>> data = sc.parallelize([2.0, 3.0, 4.0])
+        >>> ksmodel = kstest(data, "norm", 3.0, 1.0)
+        >>> print(round(ksmodel.pValue, 3))
+        1.0
+        >>> print(round(ksmodel.statistic, 3))
+        0.175
+        """
+        if not isinstance(data, RDD):
+            raise TypeError("data should be an RDD, got %s." % type(data))
+        if not isinstance(distName, basestring):
+            raise TypeError("distName should be a string, got %s." % type(distName))
+
+        params = [float(param) for param in params]
+        return KolmogorovSmirnovTestResult(
+            callMLlibFunc("kolmogorovSmirnovTest", data, distName, params))
+
+
+def _test():
+    import doctest
+    import numpy
+    from pyspark.sql import SparkSession
+    try:
+        # Numpy 1.14+ changed it's string format.
+        numpy.set_printoptions(legacy='1.13')
+    except TypeError:
+        pass
+    globs = globals().copy()
+    spark = SparkSession.builder\
+        .master("local[4]")\
+        .appName("mllib.stat.statistics tests")\
+        .getOrCreate()
+    globs['sc'] = spark.sparkContext
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/stat/distribution.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/stat/distribution.py
new file mode 100644
index 0000000000000000000000000000000000000000..46f7a1d2f277a7fd3da7c8b272b15ee952392a1a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/stat/distribution.py
@@ -0,0 +1,32 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from collections import namedtuple
+
+__all__ = ['MultivariateGaussian']
+
+
+class MultivariateGaussian(namedtuple('MultivariateGaussian', ['mu', 'sigma'])):
+
+    """Represents a (mu, sigma) tuple
+
+    >>> m = MultivariateGaussian(Vectors.dense([11,12]),DenseMatrix(2, 2, (1.0, 3.0, 5.0, 2.0)))
+    >>> (m.mu, m.sigma.toArray())
+    (DenseVector([11.0, 12.0]), array([[ 1., 5.],[ 3., 2.]]))
+    >>> (m[0], m[1])
+    (DenseVector([11.0, 12.0]), array([[ 1., 5.],[ 3., 2.]]))
+    """
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/stat/test.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/stat/test.py
new file mode 100644
index 0000000000000000000000000000000000000000..0abe104049ff985ef84299f3d7cce62bb7d8d6c1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/stat/test.py
@@ -0,0 +1,82 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark.mllib.common import inherit_doc, JavaModelWrapper
+
+
+__all__ = ["ChiSqTestResult", "KolmogorovSmirnovTestResult"]
+
+
+class TestResult(JavaModelWrapper):
+    """
+    Base class for all test results.
+    """
+
+    @property
+    def pValue(self):
+        """
+        The probability of obtaining a test statistic result at least as
+        extreme as the one that was actually observed, assuming that the
+        null hypothesis is true.
+        """
+        return self._java_model.pValue()
+
+    @property
+    def degreesOfFreedom(self):
+        """
+        Returns the degree(s) of freedom of the hypothesis test.
+        Return type should be Number(e.g. Int, Double) or tuples of Numbers.
+        """
+        return self._java_model.degreesOfFreedom()
+
+    @property
+    def statistic(self):
+        """
+        Test statistic.
+        """
+        return self._java_model.statistic()
+
+    @property
+    def nullHypothesis(self):
+        """
+        Null hypothesis of the test.
+        """
+        return self._java_model.nullHypothesis()
+
+    def __str__(self):
+        return self._java_model.toString()
+
+
+@inherit_doc
+class ChiSqTestResult(TestResult):
+    """
+    Contains test results for the chi-squared hypothesis test.
+    """
+
+    @property
+    def method(self):
+        """
+        Name of the test method
+        """
+        return self._java_model.method()
+
+
+@inherit_doc
+class KolmogorovSmirnovTestResult(TestResult):
+    """
+    Contains test results for the Kolmogorov-Smirnov test.
+    """
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/tests.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/tests.py
new file mode 100644
index 0000000000000000000000000000000000000000..4c2ce137e331c636c5895e3210d6e731e9d07921
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/tests.py
@@ -0,0 +1,1787 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Fuller unit tests for Python MLlib.
+"""
+
+import os
+import sys
+import tempfile
+import array as pyarray
+from math import sqrt
+from time import time, sleep
+from shutil import rmtree
+
+from numpy import (
+    array, array_equal, zeros, inf, random, exp, dot, all, mean, abs, arange, tile, ones)
+from numpy import sum as array_sum
+
+from py4j.protocol import Py4JJavaError
+try:
+    import xmlrunner
+except ImportError:
+    xmlrunner = None
+
+if sys.version > '3':
+    basestring = str
+
+if sys.version_info[:2] <= (2, 6):
+    try:
+        import unittest2 as unittest
+    except ImportError:
+        sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier')
+        sys.exit(1)
+else:
+    import unittest
+
+from pyspark import SparkContext
+import pyspark.ml.linalg as newlinalg
+from pyspark.mllib.common import _to_java_object_rdd
+from pyspark.mllib.clustering import StreamingKMeans, StreamingKMeansModel
+from pyspark.mllib.linalg import Vector, SparseVector, DenseVector, VectorUDT, _convert_to_vector,\
+    DenseMatrix, SparseMatrix, Vectors, Matrices, MatrixUDT
+from pyspark.mllib.linalg.distributed import RowMatrix
+from pyspark.mllib.classification import StreamingLogisticRegressionWithSGD
+from pyspark.mllib.fpm import FPGrowth
+from pyspark.mllib.recommendation import Rating
+from pyspark.mllib.regression import LabeledPoint, StreamingLinearRegressionWithSGD
+from pyspark.mllib.random import RandomRDDs
+from pyspark.mllib.stat import Statistics
+from pyspark.mllib.feature import HashingTF
+from pyspark.mllib.feature import Word2Vec
+from pyspark.mllib.feature import IDF
+from pyspark.mllib.feature import StandardScaler, ElementwiseProduct
+from pyspark.mllib.util import LinearDataGenerator
+from pyspark.mllib.util import MLUtils
+from pyspark.serializers import PickleSerializer
+from pyspark.streaming import StreamingContext
+from pyspark.sql import SparkSession
+from pyspark.sql.utils import IllegalArgumentException
+from pyspark.streaming import StreamingContext
+
+_have_scipy = False
+try:
+    import scipy.sparse
+    _have_scipy = True
+except:
+    # No SciPy, but that's okay, we'll skip those tests
+    pass
+
+ser = PickleSerializer()
+
+
+class MLlibTestCase(unittest.TestCase):
+    def setUp(self):
+        self.sc = SparkContext('local[4]', "MLlib tests")
+        self.spark = SparkSession(self.sc)
+
+    def tearDown(self):
+        self.spark.stop()
+
+
+class MLLibStreamingTestCase(unittest.TestCase):
+    def setUp(self):
+        self.sc = SparkContext('local[4]', "MLlib tests")
+        self.ssc = StreamingContext(self.sc, 1.0)
+
+    def tearDown(self):
+        self.ssc.stop(False)
+        self.sc.stop()
+
+    @staticmethod
+    def _eventually(condition, timeout=30.0, catch_assertions=False):
+        """
+        Wait a given amount of time for a condition to pass, else fail with an error.
+        This is a helper utility for streaming ML tests.
+        :param condition: Function that checks for termination conditions.
+                          condition() can return:
+                           - True: Conditions met. Return without error.
+                           - other value: Conditions not met yet. Continue. Upon timeout,
+                                          include last such value in error message.
+                          Note that this method may be called at any time during
+                          streaming execution (e.g., even before any results
+                          have been created).
+        :param timeout: Number of seconds to wait.  Default 30 seconds.
+        :param catch_assertions: If False (default), do not catch AssertionErrors.
+                                 If True, catch AssertionErrors; continue, but save
+                                 error to throw upon timeout.
+        """
+        start_time = time()
+        lastValue = None
+        while time() - start_time < timeout:
+            if catch_assertions:
+                try:
+                    lastValue = condition()
+                except AssertionError as e:
+                    lastValue = e
+            else:
+                lastValue = condition()
+            if lastValue is True:
+                return
+            sleep(0.01)
+        if isinstance(lastValue, AssertionError):
+            raise lastValue
+        else:
+            raise AssertionError(
+                "Test failed due to timeout after %g sec, with last condition returning: %s"
+                % (timeout, lastValue))
+
+
+def _squared_distance(a, b):
+    if isinstance(a, Vector):
+        return a.squared_distance(b)
+    else:
+        return b.squared_distance(a)
+
+
+class VectorTests(MLlibTestCase):
+
+    def _test_serialize(self, v):
+        self.assertEqual(v, ser.loads(ser.dumps(v)))
+        jvec = self.sc._jvm.org.apache.spark.mllib.api.python.SerDe.loads(bytearray(ser.dumps(v)))
+        nv = ser.loads(bytes(self.sc._jvm.org.apache.spark.mllib.api.python.SerDe.dumps(jvec)))
+        self.assertEqual(v, nv)
+        vs = [v] * 100
+        jvecs = self.sc._jvm.org.apache.spark.mllib.api.python.SerDe.loads(bytearray(ser.dumps(vs)))
+        nvs = ser.loads(bytes(self.sc._jvm.org.apache.spark.mllib.api.python.SerDe.dumps(jvecs)))
+        self.assertEqual(vs, nvs)
+
+    def test_serialize(self):
+        self._test_serialize(DenseVector(range(10)))
+        self._test_serialize(DenseVector(array([1., 2., 3., 4.])))
+        self._test_serialize(DenseVector(pyarray.array('d', range(10))))
+        self._test_serialize(SparseVector(4, {1: 1, 3: 2}))
+        self._test_serialize(SparseVector(3, {}))
+        self._test_serialize(DenseMatrix(2, 3, range(6)))
+        sm1 = SparseMatrix(
+            3, 4, [0, 2, 2, 4, 4], [1, 2, 1, 2], [1.0, 2.0, 4.0, 5.0])
+        self._test_serialize(sm1)
+
+    def test_dot(self):
+        sv = SparseVector(4, {1: 1, 3: 2})
+        dv = DenseVector(array([1., 2., 3., 4.]))
+        lst = DenseVector([1, 2, 3, 4])
+        mat = array([[1., 2., 3., 4.],
+                     [1., 2., 3., 4.],
+                     [1., 2., 3., 4.],
+                     [1., 2., 3., 4.]])
+        arr = pyarray.array('d', [0, 1, 2, 3])
+        self.assertEqual(10.0, sv.dot(dv))
+        self.assertTrue(array_equal(array([3., 6., 9., 12.]), sv.dot(mat)))
+        self.assertEqual(30.0, dv.dot(dv))
+        self.assertTrue(array_equal(array([10., 20., 30., 40.]), dv.dot(mat)))
+        self.assertEqual(30.0, lst.dot(dv))
+        self.assertTrue(array_equal(array([10., 20., 30., 40.]), lst.dot(mat)))
+        self.assertEqual(7.0, sv.dot(arr))
+
+    def test_squared_distance(self):
+        sv = SparseVector(4, {1: 1, 3: 2})
+        dv = DenseVector(array([1., 2., 3., 4.]))
+        lst = DenseVector([4, 3, 2, 1])
+        lst1 = [4, 3, 2, 1]
+        arr = pyarray.array('d', [0, 2, 1, 3])
+        narr = array([0, 2, 1, 3])
+        self.assertEqual(15.0, _squared_distance(sv, dv))
+        self.assertEqual(25.0, _squared_distance(sv, lst))
+        self.assertEqual(20.0, _squared_distance(dv, lst))
+        self.assertEqual(15.0, _squared_distance(dv, sv))
+        self.assertEqual(25.0, _squared_distance(lst, sv))
+        self.assertEqual(20.0, _squared_distance(lst, dv))
+        self.assertEqual(0.0, _squared_distance(sv, sv))
+        self.assertEqual(0.0, _squared_distance(dv, dv))
+        self.assertEqual(0.0, _squared_distance(lst, lst))
+        self.assertEqual(25.0, _squared_distance(sv, lst1))
+        self.assertEqual(3.0, _squared_distance(sv, arr))
+        self.assertEqual(3.0, _squared_distance(sv, narr))
+
+    def test_hash(self):
+        v1 = DenseVector([0.0, 1.0, 0.0, 5.5])
+        v2 = SparseVector(4, [(1, 1.0), (3, 5.5)])
+        v3 = DenseVector([0.0, 1.0, 0.0, 5.5])
+        v4 = SparseVector(4, [(1, 1.0), (3, 2.5)])
+        self.assertEqual(hash(v1), hash(v2))
+        self.assertEqual(hash(v1), hash(v3))
+        self.assertEqual(hash(v2), hash(v3))
+        self.assertFalse(hash(v1) == hash(v4))
+        self.assertFalse(hash(v2) == hash(v4))
+
+    def test_eq(self):
+        v1 = DenseVector([0.0, 1.0, 0.0, 5.5])
+        v2 = SparseVector(4, [(1, 1.0), (3, 5.5)])
+        v3 = DenseVector([0.0, 1.0, 0.0, 5.5])
+        v4 = SparseVector(6, [(1, 1.0), (3, 5.5)])
+        v5 = DenseVector([0.0, 1.0, 0.0, 2.5])
+        v6 = SparseVector(4, [(1, 1.0), (3, 2.5)])
+        self.assertEqual(v1, v2)
+        self.assertEqual(v1, v3)
+        self.assertFalse(v2 == v4)
+        self.assertFalse(v1 == v5)
+        self.assertFalse(v1 == v6)
+
+    def test_equals(self):
+        indices = [1, 2, 4]
+        values = [1., 3., 2.]
+        self.assertTrue(Vectors._equals(indices, values, list(range(5)), [0., 1., 3., 0., 2.]))
+        self.assertFalse(Vectors._equals(indices, values, list(range(5)), [0., 3., 1., 0., 2.]))
+        self.assertFalse(Vectors._equals(indices, values, list(range(5)), [0., 3., 0., 2.]))
+        self.assertFalse(Vectors._equals(indices, values, list(range(5)), [0., 1., 3., 2., 2.]))
+
+    def test_conversion(self):
+        # numpy arrays should be automatically upcast to float64
+        # tests for fix of [SPARK-5089]
+        v = array([1, 2, 3, 4], dtype='float64')
+        dv = DenseVector(v)
+        self.assertTrue(dv.array.dtype == 'float64')
+        v = array([1, 2, 3, 4], dtype='float32')
+        dv = DenseVector(v)
+        self.assertTrue(dv.array.dtype == 'float64')
+
+    def test_sparse_vector_indexing(self):
+        sv = SparseVector(5, {1: 1, 3: 2})
+        self.assertEqual(sv[0], 0.)
+        self.assertEqual(sv[3], 2.)
+        self.assertEqual(sv[1], 1.)
+        self.assertEqual(sv[2], 0.)
+        self.assertEqual(sv[4], 0.)
+        self.assertEqual(sv[-1], 0.)
+        self.assertEqual(sv[-2], 2.)
+        self.assertEqual(sv[-3], 0.)
+        self.assertEqual(sv[-5], 0.)
+        for ind in [5, -6]:
+            self.assertRaises(IndexError, sv.__getitem__, ind)
+        for ind in [7.8, '1']:
+            self.assertRaises(TypeError, sv.__getitem__, ind)
+
+        zeros = SparseVector(4, {})
+        self.assertEqual(zeros[0], 0.0)
+        self.assertEqual(zeros[3], 0.0)
+        for ind in [4, -5]:
+            self.assertRaises(IndexError, zeros.__getitem__, ind)
+
+        empty = SparseVector(0, {})
+        for ind in [-1, 0, 1]:
+            self.assertRaises(IndexError, empty.__getitem__, ind)
+
+    def test_sparse_vector_iteration(self):
+        self.assertListEqual(list(SparseVector(3, [], [])), [0.0, 0.0, 0.0])
+        self.assertListEqual(list(SparseVector(5, [0, 3], [1.0, 2.0])), [1.0, 0.0, 0.0, 2.0, 0.0])
+
+    def test_matrix_indexing(self):
+        mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10])
+        expected = [[0, 6], [1, 8], [4, 10]]
+        for i in range(3):
+            for j in range(2):
+                self.assertEqual(mat[i, j], expected[i][j])
+
+        for i, j in [(-1, 0), (4, 1), (3, 4)]:
+            self.assertRaises(IndexError, mat.__getitem__, (i, j))
+
+    def test_repr_dense_matrix(self):
+        mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10])
+        self.assertTrue(
+            repr(mat),
+            'DenseMatrix(3, 2, [0.0, 1.0, 4.0, 6.0, 8.0, 10.0], False)')
+
+        mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10], True)
+        self.assertTrue(
+            repr(mat),
+            'DenseMatrix(3, 2, [0.0, 1.0, 4.0, 6.0, 8.0, 10.0], False)')
+
+        mat = DenseMatrix(6, 3, zeros(18))
+        self.assertTrue(
+            repr(mat),
+            'DenseMatrix(6, 3, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..., \
+                0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], False)')
+
+    def test_repr_sparse_matrix(self):
+        sm1t = SparseMatrix(
+            3, 4, [0, 2, 3, 5], [0, 1, 2, 0, 2], [3.0, 2.0, 4.0, 9.0, 8.0],
+            isTransposed=True)
+        self.assertTrue(
+            repr(sm1t),
+            'SparseMatrix(3, 4, [0, 2, 3, 5], [0, 1, 2, 0, 2], [3.0, 2.0, 4.0, 9.0, 8.0], True)')
+
+        indices = tile(arange(6), 3)
+        values = ones(18)
+        sm = SparseMatrix(6, 3, [0, 6, 12, 18], indices, values)
+        self.assertTrue(
+            repr(sm), "SparseMatrix(6, 3, [0, 6, 12, 18], \
+                [0, 1, 2, 3, 4, 5, 0, 1, ..., 4, 5, 0, 1, 2, 3, 4, 5], \
+                [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ..., \
+                1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], False)")
+
+        self.assertTrue(
+            str(sm),
+            "6 X 3 CSCMatrix\n\
+            (0,0) 1.0\n(1,0) 1.0\n(2,0) 1.0\n(3,0) 1.0\n(4,0) 1.0\n(5,0) 1.0\n\
+            (0,1) 1.0\n(1,1) 1.0\n(2,1) 1.0\n(3,1) 1.0\n(4,1) 1.0\n(5,1) 1.0\n\
+            (0,2) 1.0\n(1,2) 1.0\n(2,2) 1.0\n(3,2) 1.0\n..\n..")
+
+        sm = SparseMatrix(1, 18, zeros(19), [], [])
+        self.assertTrue(
+            repr(sm),
+            'SparseMatrix(1, 18, \
+                [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0], [], [], False)')
+
+    def test_sparse_matrix(self):
+        # Test sparse matrix creation.
+        sm1 = SparseMatrix(
+            3, 4, [0, 2, 2, 4, 4], [1, 2, 1, 2], [1.0, 2.0, 4.0, 5.0])
+        self.assertEqual(sm1.numRows, 3)
+        self.assertEqual(sm1.numCols, 4)
+        self.assertEqual(sm1.colPtrs.tolist(), [0, 2, 2, 4, 4])
+        self.assertEqual(sm1.rowIndices.tolist(), [1, 2, 1, 2])
+        self.assertEqual(sm1.values.tolist(), [1.0, 2.0, 4.0, 5.0])
+        self.assertTrue(
+            repr(sm1),
+            'SparseMatrix(3, 4, [0, 2, 2, 4, 4], [1, 2, 1, 2], [1.0, 2.0, 4.0, 5.0], False)')
+
+        # Test indexing
+        expected = [
+            [0, 0, 0, 0],
+            [1, 0, 4, 0],
+            [2, 0, 5, 0]]
+
+        for i in range(3):
+            for j in range(4):
+                self.assertEqual(expected[i][j], sm1[i, j])
+        self.assertTrue(array_equal(sm1.toArray(), expected))
+
+        for i, j in [(-1, 1), (4, 3), (3, 5)]:
+            self.assertRaises(IndexError, sm1.__getitem__, (i, j))
+
+        # Test conversion to dense and sparse.
+        smnew = sm1.toDense().toSparse()
+        self.assertEqual(sm1.numRows, smnew.numRows)
+        self.assertEqual(sm1.numCols, smnew.numCols)
+        self.assertTrue(array_equal(sm1.colPtrs, smnew.colPtrs))
+        self.assertTrue(array_equal(sm1.rowIndices, smnew.rowIndices))
+        self.assertTrue(array_equal(sm1.values, smnew.values))
+
+        sm1t = SparseMatrix(
+            3, 4, [0, 2, 3, 5], [0, 1, 2, 0, 2], [3.0, 2.0, 4.0, 9.0, 8.0],
+            isTransposed=True)
+        self.assertEqual(sm1t.numRows, 3)
+        self.assertEqual(sm1t.numCols, 4)
+        self.assertEqual(sm1t.colPtrs.tolist(), [0, 2, 3, 5])
+        self.assertEqual(sm1t.rowIndices.tolist(), [0, 1, 2, 0, 2])
+        self.assertEqual(sm1t.values.tolist(), [3.0, 2.0, 4.0, 9.0, 8.0])
+
+        expected = [
+            [3, 2, 0, 0],
+            [0, 0, 4, 0],
+            [9, 0, 8, 0]]
+
+        for i in range(3):
+            for j in range(4):
+                self.assertEqual(expected[i][j], sm1t[i, j])
+        self.assertTrue(array_equal(sm1t.toArray(), expected))
+
+    def test_dense_matrix_is_transposed(self):
+        mat1 = DenseMatrix(3, 2, [0, 4, 1, 6, 3, 9], isTransposed=True)
+        mat = DenseMatrix(3, 2, [0, 1, 3, 4, 6, 9])
+        self.assertEqual(mat1, mat)
+
+        expected = [[0, 4], [1, 6], [3, 9]]
+        for i in range(3):
+            for j in range(2):
+                self.assertEqual(mat1[i, j], expected[i][j])
+        self.assertTrue(array_equal(mat1.toArray(), expected))
+
+        sm = mat1.toSparse()
+        self.assertTrue(array_equal(sm.rowIndices, [1, 2, 0, 1, 2]))
+        self.assertTrue(array_equal(sm.colPtrs, [0, 2, 5]))
+        self.assertTrue(array_equal(sm.values, [1, 3, 4, 6, 9]))
+
+    def test_parse_vector(self):
+        a = DenseVector([])
+        self.assertEqual(str(a), '[]')
+        self.assertEqual(Vectors.parse(str(a)), a)
+        a = DenseVector([3, 4, 6, 7])
+        self.assertEqual(str(a), '[3.0,4.0,6.0,7.0]')
+        self.assertEqual(Vectors.parse(str(a)), a)
+        a = SparseVector(4, [], [])
+        self.assertEqual(str(a), '(4,[],[])')
+        self.assertEqual(SparseVector.parse(str(a)), a)
+        a = SparseVector(4, [0, 2], [3, 4])
+        self.assertEqual(str(a), '(4,[0,2],[3.0,4.0])')
+        self.assertEqual(Vectors.parse(str(a)), a)
+        a = SparseVector(10, [0, 1], [4, 5])
+        self.assertEqual(SparseVector.parse(' (10, [0,1 ],[ 4.0,5.0] )'), a)
+
+    def test_norms(self):
+        a = DenseVector([0, 2, 3, -1])
+        self.assertAlmostEqual(a.norm(2), 3.742, 3)
+        self.assertTrue(a.norm(1), 6)
+        self.assertTrue(a.norm(inf), 3)
+        a = SparseVector(4, [0, 2], [3, -4])
+        self.assertAlmostEqual(a.norm(2), 5)
+        self.assertTrue(a.norm(1), 7)
+        self.assertTrue(a.norm(inf), 4)
+
+        tmp = SparseVector(4, [0, 2], [3, 0])
+        self.assertEqual(tmp.numNonzeros(), 1)
+
+    def test_ml_mllib_vector_conversion(self):
+        # to ml
+        # dense
+        mllibDV = Vectors.dense([1, 2, 3])
+        mlDV1 = newlinalg.Vectors.dense([1, 2, 3])
+        mlDV2 = mllibDV.asML()
+        self.assertEqual(mlDV2, mlDV1)
+        # sparse
+        mllibSV = Vectors.sparse(4, {1: 1.0, 3: 5.5})
+        mlSV1 = newlinalg.Vectors.sparse(4, {1: 1.0, 3: 5.5})
+        mlSV2 = mllibSV.asML()
+        self.assertEqual(mlSV2, mlSV1)
+        # from ml
+        # dense
+        mllibDV1 = Vectors.dense([1, 2, 3])
+        mlDV = newlinalg.Vectors.dense([1, 2, 3])
+        mllibDV2 = Vectors.fromML(mlDV)
+        self.assertEqual(mllibDV1, mllibDV2)
+        # sparse
+        mllibSV1 = Vectors.sparse(4, {1: 1.0, 3: 5.5})
+        mlSV = newlinalg.Vectors.sparse(4, {1: 1.0, 3: 5.5})
+        mllibSV2 = Vectors.fromML(mlSV)
+        self.assertEqual(mllibSV1, mllibSV2)
+
+    def test_ml_mllib_matrix_conversion(self):
+        # to ml
+        # dense
+        mllibDM = Matrices.dense(2, 2, [0, 1, 2, 3])
+        mlDM1 = newlinalg.Matrices.dense(2, 2, [0, 1, 2, 3])
+        mlDM2 = mllibDM.asML()
+        self.assertEqual(mlDM2, mlDM1)
+        # transposed
+        mllibDMt = DenseMatrix(2, 2, [0, 1, 2, 3], True)
+        mlDMt1 = newlinalg.DenseMatrix(2, 2, [0, 1, 2, 3], True)
+        mlDMt2 = mllibDMt.asML()
+        self.assertEqual(mlDMt2, mlDMt1)
+        # sparse
+        mllibSM = Matrices.sparse(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
+        mlSM1 = newlinalg.Matrices.sparse(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
+        mlSM2 = mllibSM.asML()
+        self.assertEqual(mlSM2, mlSM1)
+        # transposed
+        mllibSMt = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4], True)
+        mlSMt1 = newlinalg.SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4], True)
+        mlSMt2 = mllibSMt.asML()
+        self.assertEqual(mlSMt2, mlSMt1)
+        # from ml
+        # dense
+        mllibDM1 = Matrices.dense(2, 2, [1, 2, 3, 4])
+        mlDM = newlinalg.Matrices.dense(2, 2, [1, 2, 3, 4])
+        mllibDM2 = Matrices.fromML(mlDM)
+        self.assertEqual(mllibDM1, mllibDM2)
+        # transposed
+        mllibDMt1 = DenseMatrix(2, 2, [1, 2, 3, 4], True)
+        mlDMt = newlinalg.DenseMatrix(2, 2, [1, 2, 3, 4], True)
+        mllibDMt2 = Matrices.fromML(mlDMt)
+        self.assertEqual(mllibDMt1, mllibDMt2)
+        # sparse
+        mllibSM1 = Matrices.sparse(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
+        mlSM = newlinalg.Matrices.sparse(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
+        mllibSM2 = Matrices.fromML(mlSM)
+        self.assertEqual(mllibSM1, mllibSM2)
+        # transposed
+        mllibSMt1 = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4], True)
+        mlSMt = newlinalg.SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4], True)
+        mllibSMt2 = Matrices.fromML(mlSMt)
+        self.assertEqual(mllibSMt1, mllibSMt2)
+
+
+class ListTests(MLlibTestCase):
+
+    """
+    Test MLlib algorithms on plain lists, to make sure they're passed through
+    as NumPy arrays.
+    """
+
+    def test_bisecting_kmeans(self):
+        from pyspark.mllib.clustering import BisectingKMeans
+        data = array([0.0, 0.0, 1.0, 1.0, 9.0, 8.0, 8.0, 9.0]).reshape(4, 2)
+        bskm = BisectingKMeans()
+        model = bskm.train(self.sc.parallelize(data, 2), k=4)
+        p = array([0.0, 0.0])
+        rdd_p = self.sc.parallelize([p])
+        self.assertEqual(model.predict(p), model.predict(rdd_p).first())
+        self.assertEqual(model.computeCost(p), model.computeCost(rdd_p))
+        self.assertEqual(model.k, len(model.clusterCenters))
+
+    def test_kmeans(self):
+        from pyspark.mllib.clustering import KMeans
+        data = [
+            [0, 1.1],
+            [0, 1.2],
+            [1.1, 0],
+            [1.2, 0],
+        ]
+        clusters = KMeans.train(self.sc.parallelize(data), 2, initializationMode="k-means||",
+                                initializationSteps=7, epsilon=1e-4)
+        self.assertEqual(clusters.predict(data[0]), clusters.predict(data[1]))
+        self.assertEqual(clusters.predict(data[2]), clusters.predict(data[3]))
+
+    def test_kmeans_deterministic(self):
+        from pyspark.mllib.clustering import KMeans
+        X = range(0, 100, 10)
+        Y = range(0, 100, 10)
+        data = [[x, y] for x, y in zip(X, Y)]
+        clusters1 = KMeans.train(self.sc.parallelize(data),
+                                 3, initializationMode="k-means||",
+                                 seed=42, initializationSteps=7, epsilon=1e-4)
+        clusters2 = KMeans.train(self.sc.parallelize(data),
+                                 3, initializationMode="k-means||",
+                                 seed=42, initializationSteps=7, epsilon=1e-4)
+        centers1 = clusters1.centers
+        centers2 = clusters2.centers
+        for c1, c2 in zip(centers1, centers2):
+            # TODO: Allow small numeric difference.
+            self.assertTrue(array_equal(c1, c2))
+
+    def test_gmm(self):
+        from pyspark.mllib.clustering import GaussianMixture
+        data = self.sc.parallelize([
+            [1, 2],
+            [8, 9],
+            [-4, -3],
+            [-6, -7],
+        ])
+        clusters = GaussianMixture.train(data, 2, convergenceTol=0.001,
+                                         maxIterations=10, seed=1)
+        labels = clusters.predict(data).collect()
+        self.assertEqual(labels[0], labels[1])
+        self.assertEqual(labels[2], labels[3])
+
+    def test_gmm_deterministic(self):
+        from pyspark.mllib.clustering import GaussianMixture
+        x = range(0, 100, 10)
+        y = range(0, 100, 10)
+        data = self.sc.parallelize([[a, b] for a, b in zip(x, y)])
+        clusters1 = GaussianMixture.train(data, 5, convergenceTol=0.001,
+                                          maxIterations=10, seed=63)
+        clusters2 = GaussianMixture.train(data, 5, convergenceTol=0.001,
+                                          maxIterations=10, seed=63)
+        for c1, c2 in zip(clusters1.weights, clusters2.weights):
+            self.assertEqual(round(c1, 7), round(c2, 7))
+
+    def test_gmm_with_initial_model(self):
+        from pyspark.mllib.clustering import GaussianMixture
+        data = self.sc.parallelize([
+            (-10, -5), (-9, -4), (10, 5), (9, 4)
+        ])
+
+        gmm1 = GaussianMixture.train(data, 2, convergenceTol=0.001,
+                                     maxIterations=10, seed=63)
+        gmm2 = GaussianMixture.train(data, 2, convergenceTol=0.001,
+                                     maxIterations=10, seed=63, initialModel=gmm1)
+        self.assertAlmostEqual((gmm1.weights - gmm2.weights).sum(), 0.0)
+
+    def test_classification(self):
+        from pyspark.mllib.classification import LogisticRegressionWithSGD, SVMWithSGD, NaiveBayes
+        from pyspark.mllib.tree import DecisionTree, DecisionTreeModel, RandomForest,\
+            RandomForestModel, GradientBoostedTrees, GradientBoostedTreesModel
+        data = [
+            LabeledPoint(0.0, [1, 0, 0]),
+            LabeledPoint(1.0, [0, 1, 1]),
+            LabeledPoint(0.0, [2, 0, 0]),
+            LabeledPoint(1.0, [0, 2, 1])
+        ]
+        rdd = self.sc.parallelize(data)
+        features = [p.features.tolist() for p in data]
+
+        temp_dir = tempfile.mkdtemp()
+
+        lr_model = LogisticRegressionWithSGD.train(rdd, iterations=10)
+        self.assertTrue(lr_model.predict(features[0]) <= 0)
+        self.assertTrue(lr_model.predict(features[1]) > 0)
+        self.assertTrue(lr_model.predict(features[2]) <= 0)
+        self.assertTrue(lr_model.predict(features[3]) > 0)
+
+        svm_model = SVMWithSGD.train(rdd, iterations=10)
+        self.assertTrue(svm_model.predict(features[0]) <= 0)
+        self.assertTrue(svm_model.predict(features[1]) > 0)
+        self.assertTrue(svm_model.predict(features[2]) <= 0)
+        self.assertTrue(svm_model.predict(features[3]) > 0)
+
+        nb_model = NaiveBayes.train(rdd)
+        self.assertTrue(nb_model.predict(features[0]) <= 0)
+        self.assertTrue(nb_model.predict(features[1]) > 0)
+        self.assertTrue(nb_model.predict(features[2]) <= 0)
+        self.assertTrue(nb_model.predict(features[3]) > 0)
+
+        categoricalFeaturesInfo = {0: 3}  # feature 0 has 3 categories
+        dt_model = DecisionTree.trainClassifier(
+            rdd, numClasses=2, categoricalFeaturesInfo=categoricalFeaturesInfo, maxBins=4)
+        self.assertTrue(dt_model.predict(features[0]) <= 0)
+        self.assertTrue(dt_model.predict(features[1]) > 0)
+        self.assertTrue(dt_model.predict(features[2]) <= 0)
+        self.assertTrue(dt_model.predict(features[3]) > 0)
+
+        dt_model_dir = os.path.join(temp_dir, "dt")
+        dt_model.save(self.sc, dt_model_dir)
+        same_dt_model = DecisionTreeModel.load(self.sc, dt_model_dir)
+        self.assertEqual(same_dt_model.toDebugString(), dt_model.toDebugString())
+
+        rf_model = RandomForest.trainClassifier(
+            rdd, numClasses=2, categoricalFeaturesInfo=categoricalFeaturesInfo, numTrees=10,
+            maxBins=4, seed=1)
+        self.assertTrue(rf_model.predict(features[0]) <= 0)
+        self.assertTrue(rf_model.predict(features[1]) > 0)
+        self.assertTrue(rf_model.predict(features[2]) <= 0)
+        self.assertTrue(rf_model.predict(features[3]) > 0)
+
+        rf_model_dir = os.path.join(temp_dir, "rf")
+        rf_model.save(self.sc, rf_model_dir)
+        same_rf_model = RandomForestModel.load(self.sc, rf_model_dir)
+        self.assertEqual(same_rf_model.toDebugString(), rf_model.toDebugString())
+
+        gbt_model = GradientBoostedTrees.trainClassifier(
+            rdd, categoricalFeaturesInfo=categoricalFeaturesInfo, numIterations=4)
+        self.assertTrue(gbt_model.predict(features[0]) <= 0)
+        self.assertTrue(gbt_model.predict(features[1]) > 0)
+        self.assertTrue(gbt_model.predict(features[2]) <= 0)
+        self.assertTrue(gbt_model.predict(features[3]) > 0)
+
+        gbt_model_dir = os.path.join(temp_dir, "gbt")
+        gbt_model.save(self.sc, gbt_model_dir)
+        same_gbt_model = GradientBoostedTreesModel.load(self.sc, gbt_model_dir)
+        self.assertEqual(same_gbt_model.toDebugString(), gbt_model.toDebugString())
+
+        try:
+            rmtree(temp_dir)
+        except OSError:
+            pass
+
+    def test_regression(self):
+        from pyspark.mllib.regression import LinearRegressionWithSGD, LassoWithSGD, \
+            RidgeRegressionWithSGD
+        from pyspark.mllib.tree import DecisionTree, RandomForest, GradientBoostedTrees
+        data = [
+            LabeledPoint(-1.0, [0, -1]),
+            LabeledPoint(1.0, [0, 1]),
+            LabeledPoint(-1.0, [0, -2]),
+            LabeledPoint(1.0, [0, 2])
+        ]
+        rdd = self.sc.parallelize(data)
+        features = [p.features.tolist() for p in data]
+
+        lr_model = LinearRegressionWithSGD.train(rdd, iterations=10)
+        self.assertTrue(lr_model.predict(features[0]) <= 0)
+        self.assertTrue(lr_model.predict(features[1]) > 0)
+        self.assertTrue(lr_model.predict(features[2]) <= 0)
+        self.assertTrue(lr_model.predict(features[3]) > 0)
+
+        lasso_model = LassoWithSGD.train(rdd, iterations=10)
+        self.assertTrue(lasso_model.predict(features[0]) <= 0)
+        self.assertTrue(lasso_model.predict(features[1]) > 0)
+        self.assertTrue(lasso_model.predict(features[2]) <= 0)
+        self.assertTrue(lasso_model.predict(features[3]) > 0)
+
+        rr_model = RidgeRegressionWithSGD.train(rdd, iterations=10)
+        self.assertTrue(rr_model.predict(features[0]) <= 0)
+        self.assertTrue(rr_model.predict(features[1]) > 0)
+        self.assertTrue(rr_model.predict(features[2]) <= 0)
+        self.assertTrue(rr_model.predict(features[3]) > 0)
+
+        categoricalFeaturesInfo = {0: 2}  # feature 0 has 2 categories
+        dt_model = DecisionTree.trainRegressor(
+            rdd, categoricalFeaturesInfo=categoricalFeaturesInfo, maxBins=4)
+        self.assertTrue(dt_model.predict(features[0]) <= 0)
+        self.assertTrue(dt_model.predict(features[1]) > 0)
+        self.assertTrue(dt_model.predict(features[2]) <= 0)
+        self.assertTrue(dt_model.predict(features[3]) > 0)
+
+        rf_model = RandomForest.trainRegressor(
+            rdd, categoricalFeaturesInfo=categoricalFeaturesInfo, numTrees=10, maxBins=4, seed=1)
+        self.assertTrue(rf_model.predict(features[0]) <= 0)
+        self.assertTrue(rf_model.predict(features[1]) > 0)
+        self.assertTrue(rf_model.predict(features[2]) <= 0)
+        self.assertTrue(rf_model.predict(features[3]) > 0)
+
+        gbt_model = GradientBoostedTrees.trainRegressor(
+            rdd, categoricalFeaturesInfo=categoricalFeaturesInfo, numIterations=4)
+        self.assertTrue(gbt_model.predict(features[0]) <= 0)
+        self.assertTrue(gbt_model.predict(features[1]) > 0)
+        self.assertTrue(gbt_model.predict(features[2]) <= 0)
+        self.assertTrue(gbt_model.predict(features[3]) > 0)
+
+        try:
+            LinearRegressionWithSGD.train(rdd, initialWeights=array([1.0, 1.0]), iterations=10)
+            LassoWithSGD.train(rdd, initialWeights=array([1.0, 1.0]), iterations=10)
+            RidgeRegressionWithSGD.train(rdd, initialWeights=array([1.0, 1.0]), iterations=10)
+        except ValueError:
+            self.fail()
+
+        # Verify that maxBins is being passed through
+        GradientBoostedTrees.trainRegressor(
+            rdd, categoricalFeaturesInfo=categoricalFeaturesInfo, numIterations=4, maxBins=32)
+        with self.assertRaises(Exception) as cm:
+            GradientBoostedTrees.trainRegressor(
+                rdd, categoricalFeaturesInfo=categoricalFeaturesInfo, numIterations=4, maxBins=1)
+
+
+class StatTests(MLlibTestCase):
+    # SPARK-4023
+    def test_col_with_different_rdds(self):
+        # numpy
+        data = RandomRDDs.normalVectorRDD(self.sc, 1000, 10, 10)
+        summary = Statistics.colStats(data)
+        self.assertEqual(1000, summary.count())
+        # array
+        data = self.sc.parallelize([range(10)] * 10)
+        summary = Statistics.colStats(data)
+        self.assertEqual(10, summary.count())
+        # array
+        data = self.sc.parallelize([pyarray.array("d", range(10))] * 10)
+        summary = Statistics.colStats(data)
+        self.assertEqual(10, summary.count())
+
+    def test_col_norms(self):
+        data = RandomRDDs.normalVectorRDD(self.sc, 1000, 10, 10)
+        summary = Statistics.colStats(data)
+        self.assertEqual(10, len(summary.normL1()))
+        self.assertEqual(10, len(summary.normL2()))
+
+        data2 = self.sc.parallelize(range(10)).map(lambda x: Vectors.dense(x))
+        summary2 = Statistics.colStats(data2)
+        self.assertEqual(array([45.0]), summary2.normL1())
+        import math
+        expectedNormL2 = math.sqrt(sum(map(lambda x: x*x, range(10))))
+        self.assertTrue(math.fabs(summary2.normL2()[0] - expectedNormL2) < 1e-14)
+
+
+class VectorUDTTests(MLlibTestCase):
+
+    dv0 = DenseVector([])
+    dv1 = DenseVector([1.0, 2.0])
+    sv0 = SparseVector(2, [], [])
+    sv1 = SparseVector(2, [1], [2.0])
+    udt = VectorUDT()
+
+    def test_json_schema(self):
+        self.assertEqual(VectorUDT.fromJson(self.udt.jsonValue()), self.udt)
+
+    def test_serialization(self):
+        for v in [self.dv0, self.dv1, self.sv0, self.sv1]:
+            self.assertEqual(v, self.udt.deserialize(self.udt.serialize(v)))
+
+    def test_infer_schema(self):
+        rdd = self.sc.parallelize([LabeledPoint(1.0, self.dv1), LabeledPoint(0.0, self.sv1)])
+        df = rdd.toDF()
+        schema = df.schema
+        field = [f for f in schema.fields if f.name == "features"][0]
+        self.assertEqual(field.dataType, self.udt)
+        vectors = df.rdd.map(lambda p: p.features).collect()
+        self.assertEqual(len(vectors), 2)
+        for v in vectors:
+            if isinstance(v, SparseVector):
+                self.assertEqual(v, self.sv1)
+            elif isinstance(v, DenseVector):
+                self.assertEqual(v, self.dv1)
+            else:
+                raise TypeError("expecting a vector but got %r of type %r" % (v, type(v)))
+
+
+class MatrixUDTTests(MLlibTestCase):
+
+    dm1 = DenseMatrix(3, 2, [0, 1, 4, 5, 9, 10])
+    dm2 = DenseMatrix(3, 2, [0, 1, 4, 5, 9, 10], isTransposed=True)
+    sm1 = SparseMatrix(1, 1, [0, 1], [0], [2.0])
+    sm2 = SparseMatrix(2, 1, [0, 0, 1], [0], [5.0], isTransposed=True)
+    udt = MatrixUDT()
+
+    def test_json_schema(self):
+        self.assertEqual(MatrixUDT.fromJson(self.udt.jsonValue()), self.udt)
+
+    def test_serialization(self):
+        for m in [self.dm1, self.dm2, self.sm1, self.sm2]:
+            self.assertEqual(m, self.udt.deserialize(self.udt.serialize(m)))
+
+    def test_infer_schema(self):
+        rdd = self.sc.parallelize([("dense", self.dm1), ("sparse", self.sm1)])
+        df = rdd.toDF()
+        schema = df.schema
+        self.assertTrue(schema.fields[1].dataType, self.udt)
+        matrices = df.rdd.map(lambda x: x._2).collect()
+        self.assertEqual(len(matrices), 2)
+        for m in matrices:
+            if isinstance(m, DenseMatrix):
+                self.assertTrue(m, self.dm1)
+            elif isinstance(m, SparseMatrix):
+                self.assertTrue(m, self.sm1)
+            else:
+                raise ValueError("Expected a matrix but got type %r" % type(m))
+
+
+@unittest.skipIf(not _have_scipy, "SciPy not installed")
+class SciPyTests(MLlibTestCase):
+
+    """
+    Test both vector operations and MLlib algorithms with SciPy sparse matrices,
+    if SciPy is available.
+    """
+
+    def test_serialize(self):
+        from scipy.sparse import lil_matrix
+        lil = lil_matrix((4, 1))
+        lil[1, 0] = 1
+        lil[3, 0] = 2
+        sv = SparseVector(4, {1: 1, 3: 2})
+        self.assertEqual(sv, _convert_to_vector(lil))
+        self.assertEqual(sv, _convert_to_vector(lil.tocsc()))
+        self.assertEqual(sv, _convert_to_vector(lil.tocoo()))
+        self.assertEqual(sv, _convert_to_vector(lil.tocsr()))
+        self.assertEqual(sv, _convert_to_vector(lil.todok()))
+
+        def serialize(l):
+            return ser.loads(ser.dumps(_convert_to_vector(l)))
+        self.assertEqual(sv, serialize(lil))
+        self.assertEqual(sv, serialize(lil.tocsc()))
+        self.assertEqual(sv, serialize(lil.tocsr()))
+        self.assertEqual(sv, serialize(lil.todok()))
+
+    def test_convert_to_vector(self):
+        from scipy.sparse import csc_matrix
+        # Create a CSC matrix with non-sorted indices
+        indptr = array([0, 2])
+        indices = array([3, 1])
+        data = array([2.0, 1.0])
+        csc = csc_matrix((data, indices, indptr))
+        self.assertFalse(csc.has_sorted_indices)
+        sv = SparseVector(4, {1: 1, 3: 2})
+        self.assertEqual(sv, _convert_to_vector(csc))
+
+    def test_dot(self):
+        from scipy.sparse import lil_matrix
+        lil = lil_matrix((4, 1))
+        lil[1, 0] = 1
+        lil[3, 0] = 2
+        dv = DenseVector(array([1., 2., 3., 4.]))
+        self.assertEqual(10.0, dv.dot(lil))
+
+    def test_squared_distance(self):
+        from scipy.sparse import lil_matrix
+        lil = lil_matrix((4, 1))
+        lil[1, 0] = 3
+        lil[3, 0] = 2
+        dv = DenseVector(array([1., 2., 3., 4.]))
+        sv = SparseVector(4, {0: 1, 1: 2, 2: 3, 3: 4})
+        self.assertEqual(15.0, dv.squared_distance(lil))
+        self.assertEqual(15.0, sv.squared_distance(lil))
+
+    def scipy_matrix(self, size, values):
+        """Create a column SciPy matrix from a dictionary of values"""
+        from scipy.sparse import lil_matrix
+        lil = lil_matrix((size, 1))
+        for key, value in values.items():
+            lil[key, 0] = value
+        return lil
+
+    def test_clustering(self):
+        from pyspark.mllib.clustering import KMeans
+        data = [
+            self.scipy_matrix(3, {1: 1.0}),
+            self.scipy_matrix(3, {1: 1.1}),
+            self.scipy_matrix(3, {2: 1.0}),
+            self.scipy_matrix(3, {2: 1.1})
+        ]
+        clusters = KMeans.train(self.sc.parallelize(data), 2, initializationMode="k-means||")
+        self.assertEqual(clusters.predict(data[0]), clusters.predict(data[1]))
+        self.assertEqual(clusters.predict(data[2]), clusters.predict(data[3]))
+
+    def test_classification(self):
+        from pyspark.mllib.classification import LogisticRegressionWithSGD, SVMWithSGD, NaiveBayes
+        from pyspark.mllib.tree import DecisionTree
+        data = [
+            LabeledPoint(0.0, self.scipy_matrix(2, {0: 1.0})),
+            LabeledPoint(1.0, self.scipy_matrix(2, {1: 1.0})),
+            LabeledPoint(0.0, self.scipy_matrix(2, {0: 2.0})),
+            LabeledPoint(1.0, self.scipy_matrix(2, {1: 2.0}))
+        ]
+        rdd = self.sc.parallelize(data)
+        features = [p.features for p in data]
+
+        lr_model = LogisticRegressionWithSGD.train(rdd)
+        self.assertTrue(lr_model.predict(features[0]) <= 0)
+        self.assertTrue(lr_model.predict(features[1]) > 0)
+        self.assertTrue(lr_model.predict(features[2]) <= 0)
+        self.assertTrue(lr_model.predict(features[3]) > 0)
+
+        svm_model = SVMWithSGD.train(rdd)
+        self.assertTrue(svm_model.predict(features[0]) <= 0)
+        self.assertTrue(svm_model.predict(features[1]) > 0)
+        self.assertTrue(svm_model.predict(features[2]) <= 0)
+        self.assertTrue(svm_model.predict(features[3]) > 0)
+
+        nb_model = NaiveBayes.train(rdd)
+        self.assertTrue(nb_model.predict(features[0]) <= 0)
+        self.assertTrue(nb_model.predict(features[1]) > 0)
+        self.assertTrue(nb_model.predict(features[2]) <= 0)
+        self.assertTrue(nb_model.predict(features[3]) > 0)
+
+        categoricalFeaturesInfo = {0: 3}  # feature 0 has 3 categories
+        dt_model = DecisionTree.trainClassifier(rdd, numClasses=2,
+                                                categoricalFeaturesInfo=categoricalFeaturesInfo)
+        self.assertTrue(dt_model.predict(features[0]) <= 0)
+        self.assertTrue(dt_model.predict(features[1]) > 0)
+        self.assertTrue(dt_model.predict(features[2]) <= 0)
+        self.assertTrue(dt_model.predict(features[3]) > 0)
+
+    def test_regression(self):
+        from pyspark.mllib.regression import LinearRegressionWithSGD, LassoWithSGD, \
+            RidgeRegressionWithSGD
+        from pyspark.mllib.tree import DecisionTree
+        data = [
+            LabeledPoint(-1.0, self.scipy_matrix(2, {1: -1.0})),
+            LabeledPoint(1.0, self.scipy_matrix(2, {1: 1.0})),
+            LabeledPoint(-1.0, self.scipy_matrix(2, {1: -2.0})),
+            LabeledPoint(1.0, self.scipy_matrix(2, {1: 2.0}))
+        ]
+        rdd = self.sc.parallelize(data)
+        features = [p.features for p in data]
+
+        lr_model = LinearRegressionWithSGD.train(rdd)
+        self.assertTrue(lr_model.predict(features[0]) <= 0)
+        self.assertTrue(lr_model.predict(features[1]) > 0)
+        self.assertTrue(lr_model.predict(features[2]) <= 0)
+        self.assertTrue(lr_model.predict(features[3]) > 0)
+
+        lasso_model = LassoWithSGD.train(rdd)
+        self.assertTrue(lasso_model.predict(features[0]) <= 0)
+        self.assertTrue(lasso_model.predict(features[1]) > 0)
+        self.assertTrue(lasso_model.predict(features[2]) <= 0)
+        self.assertTrue(lasso_model.predict(features[3]) > 0)
+
+        rr_model = RidgeRegressionWithSGD.train(rdd)
+        self.assertTrue(rr_model.predict(features[0]) <= 0)
+        self.assertTrue(rr_model.predict(features[1]) > 0)
+        self.assertTrue(rr_model.predict(features[2]) <= 0)
+        self.assertTrue(rr_model.predict(features[3]) > 0)
+
+        categoricalFeaturesInfo = {0: 2}  # feature 0 has 2 categories
+        dt_model = DecisionTree.trainRegressor(rdd, categoricalFeaturesInfo=categoricalFeaturesInfo)
+        self.assertTrue(dt_model.predict(features[0]) <= 0)
+        self.assertTrue(dt_model.predict(features[1]) > 0)
+        self.assertTrue(dt_model.predict(features[2]) <= 0)
+        self.assertTrue(dt_model.predict(features[3]) > 0)
+
+
+class ChiSqTestTests(MLlibTestCase):
+    def test_goodness_of_fit(self):
+        from numpy import inf
+
+        observed = Vectors.dense([4, 6, 5])
+        pearson = Statistics.chiSqTest(observed)
+
+        # Validated against the R command `chisq.test(c(4, 6, 5), p=c(1/3, 1/3, 1/3))`
+        self.assertEqual(pearson.statistic, 0.4)
+        self.assertEqual(pearson.degreesOfFreedom, 2)
+        self.assertAlmostEqual(pearson.pValue, 0.8187, 4)
+
+        # Different expected and observed sum
+        observed1 = Vectors.dense([21, 38, 43, 80])
+        expected1 = Vectors.dense([3, 5, 7, 20])
+        pearson1 = Statistics.chiSqTest(observed1, expected1)
+
+        # Results validated against the R command
+        # `chisq.test(c(21, 38, 43, 80), p=c(3/35, 1/7, 1/5, 4/7))`
+        self.assertAlmostEqual(pearson1.statistic, 14.1429, 4)
+        self.assertEqual(pearson1.degreesOfFreedom, 3)
+        self.assertAlmostEqual(pearson1.pValue, 0.002717, 4)
+
+        # Vectors with different sizes
+        observed3 = Vectors.dense([1.0, 2.0, 3.0])
+        expected3 = Vectors.dense([1.0, 2.0, 3.0, 4.0])
+        self.assertRaises(ValueError, Statistics.chiSqTest, observed3, expected3)
+
+        # Negative counts in observed
+        neg_obs = Vectors.dense([1.0, 2.0, 3.0, -4.0])
+        self.assertRaises(IllegalArgumentException, Statistics.chiSqTest, neg_obs, expected1)
+
+        # Count = 0.0 in expected but not observed
+        zero_expected = Vectors.dense([1.0, 0.0, 3.0])
+        pearson_inf = Statistics.chiSqTest(observed, zero_expected)
+        self.assertEqual(pearson_inf.statistic, inf)
+        self.assertEqual(pearson_inf.degreesOfFreedom, 2)
+        self.assertEqual(pearson_inf.pValue, 0.0)
+
+        # 0.0 in expected and observed simultaneously
+        zero_observed = Vectors.dense([2.0, 0.0, 1.0])
+        self.assertRaises(
+            IllegalArgumentException, Statistics.chiSqTest, zero_observed, zero_expected)
+
+    def test_matrix_independence(self):
+        data = [40.0, 24.0, 29.0, 56.0, 32.0, 42.0, 31.0, 10.0, 0.0, 30.0, 15.0, 12.0]
+        chi = Statistics.chiSqTest(Matrices.dense(3, 4, data))
+
+        # Results validated against R command
+        # `chisq.test(rbind(c(40, 56, 31, 30),c(24, 32, 10, 15), c(29, 42, 0, 12)))`
+        self.assertAlmostEqual(chi.statistic, 21.9958, 4)
+        self.assertEqual(chi.degreesOfFreedom, 6)
+        self.assertAlmostEqual(chi.pValue, 0.001213, 4)
+
+        # Negative counts
+        neg_counts = Matrices.dense(2, 2, [4.0, 5.0, 3.0, -3.0])
+        self.assertRaises(IllegalArgumentException, Statistics.chiSqTest, neg_counts)
+
+        # Row sum = 0.0
+        row_zero = Matrices.dense(2, 2, [0.0, 1.0, 0.0, 2.0])
+        self.assertRaises(IllegalArgumentException, Statistics.chiSqTest, row_zero)
+
+        # Column sum = 0.0
+        col_zero = Matrices.dense(2, 2, [0.0, 0.0, 2.0, 2.0])
+        self.assertRaises(IllegalArgumentException, Statistics.chiSqTest, col_zero)
+
+    def test_chi_sq_pearson(self):
+        data = [
+            LabeledPoint(0.0, Vectors.dense([0.5, 10.0])),
+            LabeledPoint(0.0, Vectors.dense([1.5, 20.0])),
+            LabeledPoint(1.0, Vectors.dense([1.5, 30.0])),
+            LabeledPoint(0.0, Vectors.dense([3.5, 30.0])),
+            LabeledPoint(0.0, Vectors.dense([3.5, 40.0])),
+            LabeledPoint(1.0, Vectors.dense([3.5, 40.0]))
+        ]
+
+        for numParts in [2, 4, 6, 8]:
+            chi = Statistics.chiSqTest(self.sc.parallelize(data, numParts))
+            feature1 = chi[0]
+            self.assertEqual(feature1.statistic, 0.75)
+            self.assertEqual(feature1.degreesOfFreedom, 2)
+            self.assertAlmostEqual(feature1.pValue, 0.6873, 4)
+
+            feature2 = chi[1]
+            self.assertEqual(feature2.statistic, 1.5)
+            self.assertEqual(feature2.degreesOfFreedom, 3)
+            self.assertAlmostEqual(feature2.pValue, 0.6823, 4)
+
+    def test_right_number_of_results(self):
+        num_cols = 1001
+        sparse_data = [
+            LabeledPoint(0.0, Vectors.sparse(num_cols, [(100, 2.0)])),
+            LabeledPoint(0.1, Vectors.sparse(num_cols, [(200, 1.0)]))
+        ]
+        chi = Statistics.chiSqTest(self.sc.parallelize(sparse_data))
+        self.assertEqual(len(chi), num_cols)
+        self.assertIsNotNone(chi[1000])
+
+
+class KolmogorovSmirnovTest(MLlibTestCase):
+
+    def test_R_implementation_equivalence(self):
+        data = self.sc.parallelize([
+            1.1626852897838, -0.585924465893051, 1.78546500331661, -1.33259371048501,
+            -0.446566766553219, 0.569606122374976, -2.88971761441412, -0.869018343326555,
+            -0.461702683149641, -0.555540910137444, -0.0201353678515895, -0.150382224136063,
+            -0.628126755843964, 1.32322085193283, -1.52135057001199, -0.437427868856691,
+            0.970577579543399, 0.0282226444247749, -0.0857821886527593, 0.389214404984942
+        ])
+        model = Statistics.kolmogorovSmirnovTest(data, "norm")
+        self.assertAlmostEqual(model.statistic, 0.189, 3)
+        self.assertAlmostEqual(model.pValue, 0.422, 3)
+
+        model = Statistics.kolmogorovSmirnovTest(data, "norm", 0, 1)
+        self.assertAlmostEqual(model.statistic, 0.189, 3)
+        self.assertAlmostEqual(model.pValue, 0.422, 3)
+
+
+class SerDeTest(MLlibTestCase):
+    def test_to_java_object_rdd(self):  # SPARK-6660
+        data = RandomRDDs.uniformRDD(self.sc, 10, 5, seed=0)
+        self.assertEqual(_to_java_object_rdd(data).count(), 10)
+
+
+class FeatureTest(MLlibTestCase):
+    def test_idf_model(self):
+        data = [
+            Vectors.dense([1, 2, 6, 0, 2, 3, 1, 1, 0, 0, 3]),
+            Vectors.dense([1, 3, 0, 1, 3, 0, 0, 2, 0, 0, 1]),
+            Vectors.dense([1, 4, 1, 0, 0, 4, 9, 0, 1, 2, 0]),
+            Vectors.dense([2, 1, 0, 3, 0, 0, 5, 0, 2, 3, 9])
+        ]
+        model = IDF().fit(self.sc.parallelize(data, 2))
+        idf = model.idf()
+        self.assertEqual(len(idf), 11)
+
+
+class Word2VecTests(MLlibTestCase):
+    def test_word2vec_setters(self):
+        model = Word2Vec() \
+            .setVectorSize(2) \
+            .setLearningRate(0.01) \
+            .setNumPartitions(2) \
+            .setNumIterations(10) \
+            .setSeed(1024) \
+            .setMinCount(3) \
+            .setWindowSize(6)
+        self.assertEqual(model.vectorSize, 2)
+        self.assertTrue(model.learningRate < 0.02)
+        self.assertEqual(model.numPartitions, 2)
+        self.assertEqual(model.numIterations, 10)
+        self.assertEqual(model.seed, 1024)
+        self.assertEqual(model.minCount, 3)
+        self.assertEqual(model.windowSize, 6)
+
+    def test_word2vec_get_vectors(self):
+        data = [
+            ["a", "b", "c", "d", "e", "f", "g"],
+            ["a", "b", "c", "d", "e", "f"],
+            ["a", "b", "c", "d", "e"],
+            ["a", "b", "c", "d"],
+            ["a", "b", "c"],
+            ["a", "b"],
+            ["a"]
+        ]
+        model = Word2Vec().fit(self.sc.parallelize(data))
+        self.assertEqual(len(model.getVectors()), 3)
+
+
+class StandardScalerTests(MLlibTestCase):
+    def test_model_setters(self):
+        data = [
+            [1.0, 2.0, 3.0],
+            [2.0, 3.0, 4.0],
+            [3.0, 4.0, 5.0]
+        ]
+        model = StandardScaler().fit(self.sc.parallelize(data))
+        self.assertIsNotNone(model.setWithMean(True))
+        self.assertIsNotNone(model.setWithStd(True))
+        self.assertEqual(model.transform([1.0, 2.0, 3.0]), DenseVector([-1.0, -1.0, -1.0]))
+
+    def test_model_transform(self):
+        data = [
+            [1.0, 2.0, 3.0],
+            [2.0, 3.0, 4.0],
+            [3.0, 4.0, 5.0]
+        ]
+        model = StandardScaler().fit(self.sc.parallelize(data))
+        self.assertEqual(model.transform([1.0, 2.0, 3.0]), DenseVector([1.0, 2.0, 3.0]))
+
+
+class ElementwiseProductTests(MLlibTestCase):
+    def test_model_transform(self):
+        weight = Vectors.dense([3, 2, 1])
+
+        densevec = Vectors.dense([4, 5, 6])
+        sparsevec = Vectors.sparse(3, [0], [1])
+        eprod = ElementwiseProduct(weight)
+        self.assertEqual(eprod.transform(densevec), DenseVector([12, 10, 6]))
+        self.assertEqual(
+            eprod.transform(sparsevec), SparseVector(3, [0], [3]))
+
+
+class StreamingKMeansTest(MLLibStreamingTestCase):
+    def test_model_params(self):
+        """Test that the model params are set correctly"""
+        stkm = StreamingKMeans()
+        stkm.setK(5).setDecayFactor(0.0)
+        self.assertEqual(stkm._k, 5)
+        self.assertEqual(stkm._decayFactor, 0.0)
+
+        # Model not set yet.
+        self.assertIsNone(stkm.latestModel())
+        self.assertRaises(ValueError, stkm.trainOn, [0.0, 1.0])
+
+        stkm.setInitialCenters(
+            centers=[[0.0, 0.0], [1.0, 1.0]], weights=[1.0, 1.0])
+        self.assertEqual(
+            stkm.latestModel().centers, [[0.0, 0.0], [1.0, 1.0]])
+        self.assertEqual(stkm.latestModel().clusterWeights, [1.0, 1.0])
+
+    def test_accuracy_for_single_center(self):
+        """Test that parameters obtained are correct for a single center."""
+        centers, batches = self.streamingKMeansDataGenerator(
+            batches=5, numPoints=5, k=1, d=5, r=0.1, seed=0)
+        stkm = StreamingKMeans(1)
+        stkm.setInitialCenters([[0., 0., 0., 0., 0.]], [0.])
+        input_stream = self.ssc.queueStream(
+            [self.sc.parallelize(batch, 1) for batch in batches])
+        stkm.trainOn(input_stream)
+
+        self.ssc.start()
+
+        def condition():
+            self.assertEqual(stkm.latestModel().clusterWeights, [25.0])
+            return True
+        self._eventually(condition, catch_assertions=True)
+
+        realCenters = array_sum(array(centers), axis=0)
+        for i in range(5):
+            modelCenters = stkm.latestModel().centers[0][i]
+            self.assertAlmostEqual(centers[0][i], modelCenters, 1)
+            self.assertAlmostEqual(realCenters[i], modelCenters, 1)
+
+    def streamingKMeansDataGenerator(self, batches, numPoints,
+                                     k, d, r, seed, centers=None):
+        rng = random.RandomState(seed)
+
+        # Generate centers.
+        centers = [rng.randn(d) for i in range(k)]
+
+        return centers, [[Vectors.dense(centers[j % k] + r * rng.randn(d))
+                          for j in range(numPoints)]
+                         for i in range(batches)]
+
+    def test_trainOn_model(self):
+        """Test the model on toy data with four clusters."""
+        stkm = StreamingKMeans()
+        initCenters = [[1.0, 1.0], [-1.0, 1.0], [-1.0, -1.0], [1.0, -1.0]]
+        stkm.setInitialCenters(
+            centers=initCenters, weights=[1.0, 1.0, 1.0, 1.0])
+
+        # Create a toy dataset by setting a tiny offset for each point.
+        offsets = [[0, 0.1], [0, -0.1], [0.1, 0], [-0.1, 0]]
+        batches = []
+        for offset in offsets:
+            batches.append([[offset[0] + center[0], offset[1] + center[1]]
+                            for center in initCenters])
+
+        batches = [self.sc.parallelize(batch, 1) for batch in batches]
+        input_stream = self.ssc.queueStream(batches)
+        stkm.trainOn(input_stream)
+        self.ssc.start()
+
+        # Give enough time to train the model.
+        def condition():
+            finalModel = stkm.latestModel()
+            self.assertTrue(all(finalModel.centers == array(initCenters)))
+            self.assertEqual(finalModel.clusterWeights, [5.0, 5.0, 5.0, 5.0])
+            return True
+        self._eventually(condition, catch_assertions=True)
+
+    def test_predictOn_model(self):
+        """Test that the model predicts correctly on toy data."""
+        stkm = StreamingKMeans()
+        stkm._model = StreamingKMeansModel(
+            clusterCenters=[[1.0, 1.0], [-1.0, 1.0], [-1.0, -1.0], [1.0, -1.0]],
+            clusterWeights=[1.0, 1.0, 1.0, 1.0])
+
+        predict_data = [[[1.5, 1.5]], [[-1.5, 1.5]], [[-1.5, -1.5]], [[1.5, -1.5]]]
+        predict_data = [self.sc.parallelize(batch, 1) for batch in predict_data]
+        predict_stream = self.ssc.queueStream(predict_data)
+        predict_val = stkm.predictOn(predict_stream)
+
+        result = []
+
+        def update(rdd):
+            rdd_collect = rdd.collect()
+            if rdd_collect:
+                result.append(rdd_collect)
+
+        predict_val.foreachRDD(update)
+        self.ssc.start()
+
+        def condition():
+            self.assertEqual(result, [[0], [1], [2], [3]])
+            return True
+
+        self._eventually(condition, catch_assertions=True)
+
+    @unittest.skip("SPARK-10086: Flaky StreamingKMeans test in PySpark")
+    def test_trainOn_predictOn(self):
+        """Test that prediction happens on the updated model."""
+        stkm = StreamingKMeans(decayFactor=0.0, k=2)
+        stkm.setInitialCenters([[0.0], [1.0]], [1.0, 1.0])
+
+        # Since decay factor is set to zero, once the first batch
+        # is passed the clusterCenters are updated to [-0.5, 0.7]
+        # which causes 0.2 & 0.3 to be classified as 1, even though the
+        # classification based in the initial model would have been 0
+        # proving that the model is updated.
+        batches = [[[-0.5], [0.6], [0.8]], [[0.2], [-0.1], [0.3]]]
+        batches = [self.sc.parallelize(batch) for batch in batches]
+        input_stream = self.ssc.queueStream(batches)
+        predict_results = []
+
+        def collect(rdd):
+            rdd_collect = rdd.collect()
+            if rdd_collect:
+                predict_results.append(rdd_collect)
+
+        stkm.trainOn(input_stream)
+        predict_stream = stkm.predictOn(input_stream)
+        predict_stream.foreachRDD(collect)
+
+        self.ssc.start()
+
+        def condition():
+            self.assertEqual(predict_results, [[0, 1, 1], [1, 0, 1]])
+            return True
+
+        self._eventually(condition, catch_assertions=True)
+
+
+class LinearDataGeneratorTests(MLlibTestCase):
+    def test_dim(self):
+        linear_data = LinearDataGenerator.generateLinearInput(
+            intercept=0.0, weights=[0.0, 0.0, 0.0],
+            xMean=[0.0, 0.0, 0.0], xVariance=[0.33, 0.33, 0.33],
+            nPoints=4, seed=0, eps=0.1)
+        self.assertEqual(len(linear_data), 4)
+        for point in linear_data:
+            self.assertEqual(len(point.features), 3)
+
+        linear_data = LinearDataGenerator.generateLinearRDD(
+            sc=self.sc, nexamples=6, nfeatures=2, eps=0.1,
+            nParts=2, intercept=0.0).collect()
+        self.assertEqual(len(linear_data), 6)
+        for point in linear_data:
+            self.assertEqual(len(point.features), 2)
+
+
+class StreamingLogisticRegressionWithSGDTests(MLLibStreamingTestCase):
+
+    @staticmethod
+    def generateLogisticInput(offset, scale, nPoints, seed):
+        """
+        Generate 1 / (1 + exp(-x * scale + offset))
+
+        where,
+        x is randomnly distributed and the threshold
+        and labels for each sample in x is obtained from a random uniform
+        distribution.
+        """
+        rng = random.RandomState(seed)
+        x = rng.randn(nPoints)
+        sigmoid = 1. / (1 + exp(-(dot(x, scale) + offset)))
+        y_p = rng.rand(nPoints)
+        cut_off = y_p <= sigmoid
+        y_p[cut_off] = 1.0
+        y_p[~cut_off] = 0.0
+        return [
+            LabeledPoint(y_p[i], Vectors.dense([x[i]]))
+            for i in range(nPoints)]
+
+    def test_parameter_accuracy(self):
+        """
+        Test that the final value of weights is close to the desired value.
+        """
+        input_batches = [
+            self.sc.parallelize(self.generateLogisticInput(0, 1.5, 100, 42 + i))
+            for i in range(20)]
+        input_stream = self.ssc.queueStream(input_batches)
+
+        slr = StreamingLogisticRegressionWithSGD(
+            stepSize=0.2, numIterations=25)
+        slr.setInitialWeights([0.0])
+        slr.trainOn(input_stream)
+
+        self.ssc.start()
+
+        def condition():
+            rel = (1.5 - slr.latestModel().weights.array[0]) / 1.5
+            self.assertAlmostEqual(rel, 0.1, 1)
+            return True
+
+        self._eventually(condition, catch_assertions=True)
+
+    def test_convergence(self):
+        """
+        Test that weights converge to the required value on toy data.
+        """
+        input_batches = [
+            self.sc.parallelize(self.generateLogisticInput(0, 1.5, 100, 42 + i))
+            for i in range(20)]
+        input_stream = self.ssc.queueStream(input_batches)
+        models = []
+
+        slr = StreamingLogisticRegressionWithSGD(
+            stepSize=0.2, numIterations=25)
+        slr.setInitialWeights([0.0])
+        slr.trainOn(input_stream)
+        input_stream.foreachRDD(
+            lambda x: models.append(slr.latestModel().weights[0]))
+
+        self.ssc.start()
+
+        def condition():
+            self.assertEqual(len(models), len(input_batches))
+            return True
+
+        # We want all batches to finish for this test.
+        self._eventually(condition, 60.0, catch_assertions=True)
+
+        t_models = array(models)
+        diff = t_models[1:] - t_models[:-1]
+        # Test that weights improve with a small tolerance
+        self.assertTrue(all(diff >= -0.1))
+        self.assertTrue(array_sum(diff > 0) > 1)
+
+    @staticmethod
+    def calculate_accuracy_error(true, predicted):
+        return sum(abs(array(true) - array(predicted))) / len(true)
+
+    def test_predictions(self):
+        """Test predicted values on a toy model."""
+        input_batches = []
+        for i in range(20):
+            batch = self.sc.parallelize(
+                self.generateLogisticInput(0, 1.5, 100, 42 + i))
+            input_batches.append(batch.map(lambda x: (x.label, x.features)))
+        input_stream = self.ssc.queueStream(input_batches)
+
+        slr = StreamingLogisticRegressionWithSGD(
+            stepSize=0.2, numIterations=25)
+        slr.setInitialWeights([1.5])
+        predict_stream = slr.predictOnValues(input_stream)
+        true_predicted = []
+        predict_stream.foreachRDD(lambda x: true_predicted.append(x.collect()))
+        self.ssc.start()
+
+        def condition():
+            self.assertEqual(len(true_predicted), len(input_batches))
+            return True
+
+        self._eventually(condition, catch_assertions=True)
+
+        # Test that the accuracy error is no more than 0.4 on each batch.
+        for batch in true_predicted:
+            true, predicted = zip(*batch)
+            self.assertTrue(
+                self.calculate_accuracy_error(true, predicted) < 0.4)
+
+    def test_training_and_prediction(self):
+        """Test that the model improves on toy data with no. of batches"""
+        input_batches = [
+            self.sc.parallelize(self.generateLogisticInput(0, 1.5, 100, 42 + i))
+            for i in range(20)]
+        predict_batches = [
+            b.map(lambda lp: (lp.label, lp.features)) for b in input_batches]
+
+        slr = StreamingLogisticRegressionWithSGD(
+            stepSize=0.01, numIterations=25)
+        slr.setInitialWeights([-0.1])
+        errors = []
+
+        def collect_errors(rdd):
+            true, predicted = zip(*rdd.collect())
+            errors.append(self.calculate_accuracy_error(true, predicted))
+
+        true_predicted = []
+        input_stream = self.ssc.queueStream(input_batches)
+        predict_stream = self.ssc.queueStream(predict_batches)
+        slr.trainOn(input_stream)
+        ps = slr.predictOnValues(predict_stream)
+        ps.foreachRDD(lambda x: collect_errors(x))
+
+        self.ssc.start()
+
+        def condition():
+            # Test that the improvement in error is > 0.3
+            if len(errors) == len(predict_batches):
+                self.assertGreater(errors[1] - errors[-1], 0.3)
+            if len(errors) >= 3 and errors[1] - errors[-1] > 0.3:
+                return True
+            return "Latest errors: " + ", ".join(map(lambda x: str(x), errors))
+
+        self._eventually(condition)
+
+
+class StreamingLinearRegressionWithTests(MLLibStreamingTestCase):
+
+    def assertArrayAlmostEqual(self, array1, array2, dec):
+        for i, j in array1, array2:
+            self.assertAlmostEqual(i, j, dec)
+
+    def test_parameter_accuracy(self):
+        """Test that coefs are predicted accurately by fitting on toy data."""
+
+        # Test that fitting (10*X1 + 10*X2), (X1, X2) gives coefficients
+        # (10, 10)
+        slr = StreamingLinearRegressionWithSGD(stepSize=0.2, numIterations=25)
+        slr.setInitialWeights([0.0, 0.0])
+        xMean = [0.0, 0.0]
+        xVariance = [1.0 / 3.0, 1.0 / 3.0]
+
+        # Create ten batches with 100 sample points in each.
+        batches = []
+        for i in range(10):
+            batch = LinearDataGenerator.generateLinearInput(
+                0.0, [10.0, 10.0], xMean, xVariance, 100, 42 + i, 0.1)
+            batches.append(self.sc.parallelize(batch))
+
+        input_stream = self.ssc.queueStream(batches)
+        slr.trainOn(input_stream)
+        self.ssc.start()
+
+        def condition():
+            self.assertArrayAlmostEqual(
+                slr.latestModel().weights.array, [10., 10.], 1)
+            self.assertAlmostEqual(slr.latestModel().intercept, 0.0, 1)
+            return True
+
+        self._eventually(condition, catch_assertions=True)
+
+    def test_parameter_convergence(self):
+        """Test that the model parameters improve with streaming data."""
+        slr = StreamingLinearRegressionWithSGD(stepSize=0.2, numIterations=25)
+        slr.setInitialWeights([0.0])
+
+        # Create ten batches with 100 sample points in each.
+        batches = []
+        for i in range(10):
+            batch = LinearDataGenerator.generateLinearInput(
+                0.0, [10.0], [0.0], [1.0 / 3.0], 100, 42 + i, 0.1)
+            batches.append(self.sc.parallelize(batch))
+
+        model_weights = []
+        input_stream = self.ssc.queueStream(batches)
+        input_stream.foreachRDD(
+            lambda x: model_weights.append(slr.latestModel().weights[0]))
+        slr.trainOn(input_stream)
+        self.ssc.start()
+
+        def condition():
+            self.assertEqual(len(model_weights), len(batches))
+            return True
+
+        # We want all batches to finish for this test.
+        self._eventually(condition, catch_assertions=True)
+
+        w = array(model_weights)
+        diff = w[1:] - w[:-1]
+        self.assertTrue(all(diff >= -0.1))
+
+    def test_prediction(self):
+        """Test prediction on a model with weights already set."""
+        # Create a model with initial Weights equal to coefs
+        slr = StreamingLinearRegressionWithSGD(stepSize=0.2, numIterations=25)
+        slr.setInitialWeights([10.0, 10.0])
+
+        # Create ten batches with 100 sample points in each.
+        batches = []
+        for i in range(10):
+            batch = LinearDataGenerator.generateLinearInput(
+                0.0, [10.0, 10.0], [0.0, 0.0], [1.0 / 3.0, 1.0 / 3.0],
+                100, 42 + i, 0.1)
+            batches.append(
+                self.sc.parallelize(batch).map(lambda lp: (lp.label, lp.features)))
+
+        input_stream = self.ssc.queueStream(batches)
+        output_stream = slr.predictOnValues(input_stream)
+        samples = []
+        output_stream.foreachRDD(lambda x: samples.append(x.collect()))
+
+        self.ssc.start()
+
+        def condition():
+            self.assertEqual(len(samples), len(batches))
+            return True
+
+        # We want all batches to finish for this test.
+        self._eventually(condition, catch_assertions=True)
+
+        # Test that mean absolute error on each batch is less than 0.1
+        for batch in samples:
+            true, predicted = zip(*batch)
+            self.assertTrue(mean(abs(array(true) - array(predicted))) < 0.1)
+
+    def test_train_prediction(self):
+        """Test that error on test data improves as model is trained."""
+        slr = StreamingLinearRegressionWithSGD(stepSize=0.2, numIterations=25)
+        slr.setInitialWeights([0.0])
+
+        # Create ten batches with 100 sample points in each.
+        batches = []
+        for i in range(10):
+            batch = LinearDataGenerator.generateLinearInput(
+                0.0, [10.0], [0.0], [1.0 / 3.0], 100, 42 + i, 0.1)
+            batches.append(self.sc.parallelize(batch))
+
+        predict_batches = [
+            b.map(lambda lp: (lp.label, lp.features)) for b in batches]
+        errors = []
+
+        def func(rdd):
+            true, predicted = zip(*rdd.collect())
+            errors.append(mean(abs(true) - abs(predicted)))
+
+        input_stream = self.ssc.queueStream(batches)
+        output_stream = self.ssc.queueStream(predict_batches)
+        slr.trainOn(input_stream)
+        output_stream = slr.predictOnValues(output_stream)
+        output_stream.foreachRDD(func)
+        self.ssc.start()
+
+        def condition():
+            if len(errors) == len(predict_batches):
+                self.assertGreater(errors[1] - errors[-1], 2)
+            if len(errors) >= 3 and errors[1] - errors[-1] > 2:
+                return True
+            return "Latest errors: " + ", ".join(map(lambda x: str(x), errors))
+
+        self._eventually(condition)
+
+
+class MLUtilsTests(MLlibTestCase):
+    def test_append_bias(self):
+        data = [2.0, 2.0, 2.0]
+        ret = MLUtils.appendBias(data)
+        self.assertEqual(ret[3], 1.0)
+        self.assertEqual(type(ret), DenseVector)
+
+    def test_append_bias_with_vector(self):
+        data = Vectors.dense([2.0, 2.0, 2.0])
+        ret = MLUtils.appendBias(data)
+        self.assertEqual(ret[3], 1.0)
+        self.assertEqual(type(ret), DenseVector)
+
+    def test_append_bias_with_sp_vector(self):
+        data = Vectors.sparse(3, {0: 2.0, 2: 2.0})
+        expected = Vectors.sparse(4, {0: 2.0, 2: 2.0, 3: 1.0})
+        # Returned value must be SparseVector
+        ret = MLUtils.appendBias(data)
+        self.assertEqual(ret, expected)
+        self.assertEqual(type(ret), SparseVector)
+
+    def test_load_vectors(self):
+        import shutil
+        data = [
+            [1.0, 2.0, 3.0],
+            [1.0, 2.0, 3.0]
+        ]
+        temp_dir = tempfile.mkdtemp()
+        load_vectors_path = os.path.join(temp_dir, "test_load_vectors")
+        try:
+            self.sc.parallelize(data).saveAsTextFile(load_vectors_path)
+            ret_rdd = MLUtils.loadVectors(self.sc, load_vectors_path)
+            ret = ret_rdd.collect()
+            self.assertEqual(len(ret), 2)
+            self.assertEqual(ret[0], DenseVector([1.0, 2.0, 3.0]))
+            self.assertEqual(ret[1], DenseVector([1.0, 2.0, 3.0]))
+        except:
+            self.fail()
+        finally:
+            shutil.rmtree(load_vectors_path)
+
+
+class ALSTests(MLlibTestCase):
+
+    def test_als_ratings_serialize(self):
+        r = Rating(7, 1123, 3.14)
+        jr = self.sc._jvm.org.apache.spark.mllib.api.python.SerDe.loads(bytearray(ser.dumps(r)))
+        nr = ser.loads(bytes(self.sc._jvm.org.apache.spark.mllib.api.python.SerDe.dumps(jr)))
+        self.assertEqual(r.user, nr.user)
+        self.assertEqual(r.product, nr.product)
+        self.assertAlmostEqual(r.rating, nr.rating, 2)
+
+    def test_als_ratings_id_long_error(self):
+        r = Rating(1205640308657491975, 50233468418, 1.0)
+        # rating user id exceeds max int value, should fail when pickled
+        self.assertRaises(Py4JJavaError, self.sc._jvm.org.apache.spark.mllib.api.python.SerDe.loads,
+                          bytearray(ser.dumps(r)))
+
+
+class HashingTFTest(MLlibTestCase):
+
+    def test_binary_term_freqs(self):
+        hashingTF = HashingTF(100).setBinary(True)
+        doc = "a a b c c c".split(" ")
+        n = hashingTF.numFeatures
+        output = hashingTF.transform(doc).toArray()
+        expected = Vectors.sparse(n, {hashingTF.indexOf("a"): 1.0,
+                                      hashingTF.indexOf("b"): 1.0,
+                                      hashingTF.indexOf("c"): 1.0}).toArray()
+        for i in range(0, n):
+            self.assertAlmostEqual(output[i], expected[i], 14, "Error at " + str(i) +
+                                   ": expected " + str(expected[i]) + ", got " + str(output[i]))
+
+
+class DimensionalityReductionTests(MLlibTestCase):
+
+    denseData = [
+        Vectors.dense([0.0, 1.0, 2.0]),
+        Vectors.dense([3.0, 4.0, 5.0]),
+        Vectors.dense([6.0, 7.0, 8.0]),
+        Vectors.dense([9.0, 0.0, 1.0])
+    ]
+    sparseData = [
+        Vectors.sparse(3, [(1, 1.0), (2, 2.0)]),
+        Vectors.sparse(3, [(0, 3.0), (1, 4.0), (2, 5.0)]),
+        Vectors.sparse(3, [(0, 6.0), (1, 7.0), (2, 8.0)]),
+        Vectors.sparse(3, [(0, 9.0), (2, 1.0)])
+    ]
+
+    def assertEqualUpToSign(self, vecA, vecB):
+        eq1 = vecA - vecB
+        eq2 = vecA + vecB
+        self.assertTrue(sum(abs(eq1)) < 1e-6 or sum(abs(eq2)) < 1e-6)
+
+    def test_svd(self):
+        denseMat = RowMatrix(self.sc.parallelize(self.denseData))
+        sparseMat = RowMatrix(self.sc.parallelize(self.sparseData))
+        m = 4
+        n = 3
+        for mat in [denseMat, sparseMat]:
+            for k in range(1, 4):
+                rm = mat.computeSVD(k, computeU=True)
+                self.assertEqual(rm.s.size, k)
+                self.assertEqual(rm.U.numRows(), m)
+                self.assertEqual(rm.U.numCols(), k)
+                self.assertEqual(rm.V.numRows, n)
+                self.assertEqual(rm.V.numCols, k)
+
+        # Test that U returned is None if computeU is set to False.
+        self.assertEqual(mat.computeSVD(1).U, None)
+
+        # Test that low rank matrices cannot have number of singular values
+        # greater than a limit.
+        rm = RowMatrix(self.sc.parallelize(tile([1, 2, 3], (3, 1))))
+        self.assertEqual(rm.computeSVD(3, False, 1e-6).s.size, 1)
+
+    def test_pca(self):
+        expected_pcs = array([
+            [0.0, 1.0, 0.0],
+            [sqrt(2.0) / 2.0, 0.0, sqrt(2.0) / 2.0],
+            [sqrt(2.0) / 2.0, 0.0, -sqrt(2.0) / 2.0]
+        ])
+        n = 3
+        denseMat = RowMatrix(self.sc.parallelize(self.denseData))
+        sparseMat = RowMatrix(self.sc.parallelize(self.sparseData))
+        for mat in [denseMat, sparseMat]:
+            for k in range(1, 4):
+                pcs = mat.computePrincipalComponents(k)
+                self.assertEqual(pcs.numRows, n)
+                self.assertEqual(pcs.numCols, k)
+
+                # We can just test the updated principal component for equality.
+                self.assertEqualUpToSign(pcs.toArray()[:, k - 1], expected_pcs[:, k - 1])
+
+
+class FPGrowthTest(MLlibTestCase):
+
+    def test_fpgrowth(self):
+        data = [["a", "b", "c"], ["a", "b", "d", "e"], ["a", "c", "e"], ["a", "c", "f"]]
+        rdd = self.sc.parallelize(data, 2)
+        model1 = FPGrowth.train(rdd, 0.6, 2)
+        # use default data partition number when numPartitions is not specified
+        model2 = FPGrowth.train(rdd, 0.6)
+        self.assertEqual(sorted(model1.freqItemsets().collect()),
+                         sorted(model2.freqItemsets().collect()))
+
+if __name__ == "__main__":
+    from pyspark.mllib.tests import *
+    if not _have_scipy:
+        print("NOTE: Skipping SciPy tests as it does not seem to be installed")
+    if xmlrunner:
+        unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
+    else:
+        unittest.main(verbosity=2)
+    if not _have_scipy:
+        print("NOTE: SciPy tests were skipped as it does not seem to be installed")
+    sc.stop()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/tree.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/tree.py
new file mode 100644
index 0000000000000000000000000000000000000000..b05734ce489d94a6feb45be2717ab68b0aed1c08
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/tree.py
@@ -0,0 +1,661 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import absolute_import
+
+import sys
+import random
+
+from pyspark import SparkContext, RDD, since
+from pyspark.mllib.common import callMLlibFunc, inherit_doc, JavaModelWrapper
+from pyspark.mllib.linalg import _convert_to_vector
+from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.util import JavaLoader, JavaSaveable
+
+__all__ = ['DecisionTreeModel', 'DecisionTree', 'RandomForestModel',
+           'RandomForest', 'GradientBoostedTreesModel', 'GradientBoostedTrees']
+
+
+class TreeEnsembleModel(JavaModelWrapper, JavaSaveable):
+    """TreeEnsembleModel
+
+    .. versionadded:: 1.3.0
+    """
+    @since("1.3.0")
+    def predict(self, x):
+        """
+        Predict values for a single data point or an RDD of points using
+        the model trained.
+
+        .. note:: In Python, predict cannot currently be used within an RDD
+            transformation or action.
+            Call predict directly on the RDD instead.
+        """
+        if isinstance(x, RDD):
+            return self.call("predict", x.map(_convert_to_vector))
+
+        else:
+            return self.call("predict", _convert_to_vector(x))
+
+    @since("1.3.0")
+    def numTrees(self):
+        """
+        Get number of trees in ensemble.
+        """
+        return self.call("numTrees")
+
+    @since("1.3.0")
+    def totalNumNodes(self):
+        """
+        Get total number of nodes, summed over all trees in the ensemble.
+        """
+        return self.call("totalNumNodes")
+
+    def __repr__(self):
+        """ Summary of model """
+        return self._java_model.toString()
+
+    @since("1.3.0")
+    def toDebugString(self):
+        """ Full model """
+        return self._java_model.toDebugString()
+
+
+class DecisionTreeModel(JavaModelWrapper, JavaSaveable, JavaLoader):
+    """
+    A decision tree model for classification or regression.
+
+    .. versionadded:: 1.1.0
+    """
+    @since("1.1.0")
+    def predict(self, x):
+        """
+        Predict the label of one or more examples.
+
+        .. note:: In Python, predict cannot currently be used within an RDD
+            transformation or action.
+            Call predict directly on the RDD instead.
+
+        :param x:
+          Data point (feature vector), or an RDD of data points (feature
+          vectors).
+        """
+        if isinstance(x, RDD):
+            return self.call("predict", x.map(_convert_to_vector))
+
+        else:
+            return self.call("predict", _convert_to_vector(x))
+
+    @since("1.1.0")
+    def numNodes(self):
+        """Get number of nodes in tree, including leaf nodes."""
+        return self._java_model.numNodes()
+
+    @since("1.1.0")
+    def depth(self):
+        """
+        Get depth of tree (e.g. depth 0 means 1 leaf node, depth 1
+        means 1 internal node + 2 leaf nodes).
+        """
+        return self._java_model.depth()
+
+    def __repr__(self):
+        """ summary of model. """
+        return self._java_model.toString()
+
+    @since("1.2.0")
+    def toDebugString(self):
+        """ full model. """
+        return self._java_model.toDebugString()
+
+    @classmethod
+    def _java_loader_class(cls):
+        return "org.apache.spark.mllib.tree.model.DecisionTreeModel"
+
+
+class DecisionTree(object):
+    """
+    Learning algorithm for a decision tree model for classification or
+    regression.
+
+    .. versionadded:: 1.1.0
+    """
+
+    @classmethod
+    def _train(cls, data, type, numClasses, features, impurity="gini", maxDepth=5, maxBins=32,
+               minInstancesPerNode=1, minInfoGain=0.0):
+        first = data.first()
+        assert isinstance(first, LabeledPoint), "the data should be RDD of LabeledPoint"
+        model = callMLlibFunc("trainDecisionTreeModel", data, type, numClasses, features,
+                              impurity, maxDepth, maxBins, minInstancesPerNode, minInfoGain)
+        return DecisionTreeModel(model)
+
+    @classmethod
+    @since("1.1.0")
+    def trainClassifier(cls, data, numClasses, categoricalFeaturesInfo,
+                        impurity="gini", maxDepth=5, maxBins=32, minInstancesPerNode=1,
+                        minInfoGain=0.0):
+        """
+        Train a decision tree model for classification.
+
+        :param data:
+          Training data: RDD of LabeledPoint. Labels should take values
+          {0, 1, ..., numClasses-1}.
+        :param numClasses:
+          Number of classes for classification.
+        :param categoricalFeaturesInfo:
+          Map storing arity of categorical features. An entry (n -> k)
+          indicates that feature n is categorical with k categories
+          indexed from 0: {0, 1, ..., k-1}.
+        :param impurity:
+          Criterion used for information gain calculation.
+          Supported values: "gini" or "entropy".
+          (default: "gini")
+        :param maxDepth:
+          Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1
+          means 1 internal node + 2 leaf nodes).
+          (default: 5)
+        :param maxBins:
+          Number of bins used for finding splits at each node.
+          (default: 32)
+        :param minInstancesPerNode:
+          Minimum number of instances required at child nodes to create
+          the parent split.
+          (default: 1)
+        :param minInfoGain:
+          Minimum info gain required to create a split.
+          (default: 0.0)
+        :return:
+          DecisionTreeModel.
+
+        Example usage:
+
+        >>> from numpy import array
+        >>> from pyspark.mllib.regression import LabeledPoint
+        >>> from pyspark.mllib.tree import DecisionTree
+        >>>
+        >>> data = [
+        ...     LabeledPoint(0.0, [0.0]),
+        ...     LabeledPoint(1.0, [1.0]),
+        ...     LabeledPoint(1.0, [2.0]),
+        ...     LabeledPoint(1.0, [3.0])
+        ... ]
+        >>> model = DecisionTree.trainClassifier(sc.parallelize(data), 2, {})
+        >>> print(model)
+        DecisionTreeModel classifier of depth 1 with 3 nodes
+
+        >>> print(model.toDebugString())
+        DecisionTreeModel classifier of depth 1 with 3 nodes
+          If (feature 0 <= 0.5)
+           Predict: 0.0
+          Else (feature 0 > 0.5)
+           Predict: 1.0
+        <BLANKLINE>
+        >>> model.predict(array([1.0]))
+        1.0
+        >>> model.predict(array([0.0]))
+        0.0
+        >>> rdd = sc.parallelize([[1.0], [0.0]])
+        >>> model.predict(rdd).collect()
+        [1.0, 0.0]
+        """
+        return cls._train(data, "classification", numClasses, categoricalFeaturesInfo,
+                          impurity, maxDepth, maxBins, minInstancesPerNode, minInfoGain)
+
+    @classmethod
+    @since("1.1.0")
+    def trainRegressor(cls, data, categoricalFeaturesInfo,
+                       impurity="variance", maxDepth=5, maxBins=32, minInstancesPerNode=1,
+                       minInfoGain=0.0):
+        """
+        Train a decision tree model for regression.
+
+        :param data:
+          Training data: RDD of LabeledPoint. Labels are real numbers.
+        :param categoricalFeaturesInfo:
+          Map storing arity of categorical features. An entry (n -> k)
+          indicates that feature n is categorical with k categories
+          indexed from 0: {0, 1, ..., k-1}.
+        :param impurity:
+          Criterion used for information gain calculation.
+          The only supported value for regression is "variance".
+          (default: "variance")
+        :param maxDepth:
+          Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1
+          means 1 internal node + 2 leaf nodes).
+          (default: 5)
+        :param maxBins:
+          Number of bins used for finding splits at each node.
+          (default: 32)
+        :param minInstancesPerNode:
+          Minimum number of instances required at child nodes to create
+          the parent split.
+          (default: 1)
+        :param minInfoGain:
+          Minimum info gain required to create a split.
+          (default: 0.0)
+        :return:
+          DecisionTreeModel.
+
+        Example usage:
+
+        >>> from pyspark.mllib.regression import LabeledPoint
+        >>> from pyspark.mllib.tree import DecisionTree
+        >>> from pyspark.mllib.linalg import SparseVector
+        >>>
+        >>> sparse_data = [
+        ...     LabeledPoint(0.0, SparseVector(2, {0: 0.0})),
+        ...     LabeledPoint(1.0, SparseVector(2, {1: 1.0})),
+        ...     LabeledPoint(0.0, SparseVector(2, {0: 0.0})),
+        ...     LabeledPoint(1.0, SparseVector(2, {1: 2.0}))
+        ... ]
+        >>>
+        >>> model = DecisionTree.trainRegressor(sc.parallelize(sparse_data), {})
+        >>> model.predict(SparseVector(2, {1: 1.0}))
+        1.0
+        >>> model.predict(SparseVector(2, {1: 0.0}))
+        0.0
+        >>> rdd = sc.parallelize([[0.0, 1.0], [0.0, 0.0]])
+        >>> model.predict(rdd).collect()
+        [1.0, 0.0]
+        """
+        return cls._train(data, "regression", 0, categoricalFeaturesInfo,
+                          impurity, maxDepth, maxBins, minInstancesPerNode, minInfoGain)
+
+
+@inherit_doc
+class RandomForestModel(TreeEnsembleModel, JavaLoader):
+    """
+    Represents a random forest model.
+
+    .. versionadded:: 1.2.0
+    """
+
+    @classmethod
+    def _java_loader_class(cls):
+        return "org.apache.spark.mllib.tree.model.RandomForestModel"
+
+
+class RandomForest(object):
+    """
+    Learning algorithm for a random forest model for classification or
+    regression.
+
+    .. versionadded:: 1.2.0
+    """
+
+    supportedFeatureSubsetStrategies = ("auto", "all", "sqrt", "log2", "onethird")
+
+    @classmethod
+    def _train(cls, data, algo, numClasses, categoricalFeaturesInfo, numTrees,
+               featureSubsetStrategy, impurity, maxDepth, maxBins, seed):
+        first = data.first()
+        assert isinstance(first, LabeledPoint), "the data should be RDD of LabeledPoint"
+        if featureSubsetStrategy not in cls.supportedFeatureSubsetStrategies:
+            raise ValueError("unsupported featureSubsetStrategy: %s" % featureSubsetStrategy)
+        if seed is None:
+            seed = random.randint(0, 1 << 30)
+        model = callMLlibFunc("trainRandomForestModel", data, algo, numClasses,
+                              categoricalFeaturesInfo, numTrees, featureSubsetStrategy, impurity,
+                              maxDepth, maxBins, seed)
+        return RandomForestModel(model)
+
+    @classmethod
+    @since("1.2.0")
+    def trainClassifier(cls, data, numClasses, categoricalFeaturesInfo, numTrees,
+                        featureSubsetStrategy="auto", impurity="gini", maxDepth=4, maxBins=32,
+                        seed=None):
+        """
+        Train a random forest model for binary or multiclass
+        classification.
+
+        :param data:
+          Training dataset: RDD of LabeledPoint. Labels should take values
+          {0, 1, ..., numClasses-1}.
+        :param numClasses:
+          Number of classes for classification.
+        :param categoricalFeaturesInfo:
+          Map storing arity of categorical features. An entry (n -> k)
+          indicates that feature n is categorical with k categories
+          indexed from 0: {0, 1, ..., k-1}.
+        :param numTrees:
+          Number of trees in the random forest.
+        :param featureSubsetStrategy:
+          Number of features to consider for splits at each node.
+          Supported values: "auto", "all", "sqrt", "log2", "onethird".
+          If "auto" is set, this parameter is set based on numTrees:
+          if numTrees == 1, set to "all";
+          if numTrees > 1 (forest) set to "sqrt".
+          (default: "auto")
+        :param impurity:
+          Criterion used for information gain calculation.
+          Supported values: "gini" or "entropy".
+          (default: "gini")
+        :param maxDepth:
+          Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1
+          means 1 internal node + 2 leaf nodes).
+          (default: 4)
+        :param maxBins:
+          Maximum number of bins used for splitting features.
+          (default: 32)
+        :param seed:
+          Random seed for bootstrapping and choosing feature subsets.
+          Set as None to generate seed based on system time.
+          (default: None)
+        :return:
+          RandomForestModel that can be used for prediction.
+
+        Example usage:
+
+        >>> from pyspark.mllib.regression import LabeledPoint
+        >>> from pyspark.mllib.tree import RandomForest
+        >>>
+        >>> data = [
+        ...     LabeledPoint(0.0, [0.0]),
+        ...     LabeledPoint(0.0, [1.0]),
+        ...     LabeledPoint(1.0, [2.0]),
+        ...     LabeledPoint(1.0, [3.0])
+        ... ]
+        >>> model = RandomForest.trainClassifier(sc.parallelize(data), 2, {}, 3, seed=42)
+        >>> model.numTrees()
+        3
+        >>> model.totalNumNodes()
+        7
+        >>> print(model)
+        TreeEnsembleModel classifier with 3 trees
+        <BLANKLINE>
+        >>> print(model.toDebugString())
+        TreeEnsembleModel classifier with 3 trees
+        <BLANKLINE>
+          Tree 0:
+            Predict: 1.0
+          Tree 1:
+            If (feature 0 <= 1.5)
+             Predict: 0.0
+            Else (feature 0 > 1.5)
+             Predict: 1.0
+          Tree 2:
+            If (feature 0 <= 1.5)
+             Predict: 0.0
+            Else (feature 0 > 1.5)
+             Predict: 1.0
+        <BLANKLINE>
+        >>> model.predict([2.0])
+        1.0
+        >>> model.predict([0.0])
+        0.0
+        >>> rdd = sc.parallelize([[3.0], [1.0]])
+        >>> model.predict(rdd).collect()
+        [1.0, 0.0]
+        """
+        return cls._train(data, "classification", numClasses,
+                          categoricalFeaturesInfo, numTrees, featureSubsetStrategy, impurity,
+                          maxDepth, maxBins, seed)
+
+    @classmethod
+    @since("1.2.0")
+    def trainRegressor(cls, data, categoricalFeaturesInfo, numTrees, featureSubsetStrategy="auto",
+                       impurity="variance", maxDepth=4, maxBins=32, seed=None):
+        """
+        Train a random forest model for regression.
+
+        :param data:
+          Training dataset: RDD of LabeledPoint. Labels are real numbers.
+        :param categoricalFeaturesInfo:
+          Map storing arity of categorical features. An entry (n -> k)
+          indicates that feature n is categorical with k categories
+          indexed from 0: {0, 1, ..., k-1}.
+        :param numTrees:
+          Number of trees in the random forest.
+        :param featureSubsetStrategy:
+          Number of features to consider for splits at each node.
+          Supported values: "auto", "all", "sqrt", "log2", "onethird".
+          If "auto" is set, this parameter is set based on numTrees:
+          if numTrees == 1, set to "all";
+          if numTrees > 1 (forest) set to "onethird" for regression.
+          (default: "auto")
+        :param impurity:
+          Criterion used for information gain calculation.
+          The only supported value for regression is "variance".
+          (default: "variance")
+        :param maxDepth:
+          Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1
+          means 1 internal node + 2 leaf nodes).
+          (default: 4)
+        :param maxBins:
+          Maximum number of bins used for splitting features.
+          (default: 32)
+        :param seed:
+          Random seed for bootstrapping and choosing feature subsets.
+          Set as None to generate seed based on system time.
+          (default: None)
+        :return:
+          RandomForestModel that can be used for prediction.
+
+        Example usage:
+
+        >>> from pyspark.mllib.regression import LabeledPoint
+        >>> from pyspark.mllib.tree import RandomForest
+        >>> from pyspark.mllib.linalg import SparseVector
+        >>>
+        >>> sparse_data = [
+        ...     LabeledPoint(0.0, SparseVector(2, {0: 1.0})),
+        ...     LabeledPoint(1.0, SparseVector(2, {1: 1.0})),
+        ...     LabeledPoint(0.0, SparseVector(2, {0: 1.0})),
+        ...     LabeledPoint(1.0, SparseVector(2, {1: 2.0}))
+        ... ]
+        >>>
+        >>> model = RandomForest.trainRegressor(sc.parallelize(sparse_data), {}, 2, seed=42)
+        >>> model.numTrees()
+        2
+        >>> model.totalNumNodes()
+        4
+        >>> model.predict(SparseVector(2, {1: 1.0}))
+        1.0
+        >>> model.predict(SparseVector(2, {0: 1.0}))
+        0.5
+        >>> rdd = sc.parallelize([[0.0, 1.0], [1.0, 0.0]])
+        >>> model.predict(rdd).collect()
+        [1.0, 0.5]
+        """
+        return cls._train(data, "regression", 0, categoricalFeaturesInfo, numTrees,
+                          featureSubsetStrategy, impurity, maxDepth, maxBins, seed)
+
+
+@inherit_doc
+class GradientBoostedTreesModel(TreeEnsembleModel, JavaLoader):
+    """
+    Represents a gradient-boosted tree model.
+
+    .. versionadded:: 1.3.0
+    """
+
+    @classmethod
+    def _java_loader_class(cls):
+        return "org.apache.spark.mllib.tree.model.GradientBoostedTreesModel"
+
+
+class GradientBoostedTrees(object):
+    """
+    Learning algorithm for a gradient boosted trees model for
+    classification or regression.
+
+    .. versionadded:: 1.3.0
+    """
+
+    @classmethod
+    def _train(cls, data, algo, categoricalFeaturesInfo,
+               loss, numIterations, learningRate, maxDepth, maxBins):
+        first = data.first()
+        assert isinstance(first, LabeledPoint), "the data should be RDD of LabeledPoint"
+        model = callMLlibFunc("trainGradientBoostedTreesModel", data, algo, categoricalFeaturesInfo,
+                              loss, numIterations, learningRate, maxDepth, maxBins)
+        return GradientBoostedTreesModel(model)
+
+    @classmethod
+    @since("1.3.0")
+    def trainClassifier(cls, data, categoricalFeaturesInfo,
+                        loss="logLoss", numIterations=100, learningRate=0.1, maxDepth=3,
+                        maxBins=32):
+        """
+        Train a gradient-boosted trees model for classification.
+
+        :param data:
+          Training dataset: RDD of LabeledPoint. Labels should take values
+          {0, 1}.
+        :param categoricalFeaturesInfo:
+          Map storing arity of categorical features. An entry (n -> k)
+          indicates that feature n is categorical with k categories
+          indexed from 0: {0, 1, ..., k-1}.
+        :param loss:
+          Loss function used for minimization during gradient boosting.
+          Supported values: "logLoss", "leastSquaresError",
+          "leastAbsoluteError".
+          (default: "logLoss")
+        :param numIterations:
+          Number of iterations of boosting.
+          (default: 100)
+        :param learningRate:
+          Learning rate for shrinking the contribution of each estimator.
+          The learning rate should be between in the interval (0, 1].
+          (default: 0.1)
+        :param maxDepth:
+          Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1
+          means 1 internal node + 2 leaf nodes).
+          (default: 3)
+        :param maxBins:
+          Maximum number of bins used for splitting features. DecisionTree
+          requires maxBins >= max categories.
+          (default: 32)
+        :return:
+          GradientBoostedTreesModel that can be used for prediction.
+
+        Example usage:
+
+        >>> from pyspark.mllib.regression import LabeledPoint
+        >>> from pyspark.mllib.tree import GradientBoostedTrees
+        >>>
+        >>> data = [
+        ...     LabeledPoint(0.0, [0.0]),
+        ...     LabeledPoint(0.0, [1.0]),
+        ...     LabeledPoint(1.0, [2.0]),
+        ...     LabeledPoint(1.0, [3.0])
+        ... ]
+        >>>
+        >>> model = GradientBoostedTrees.trainClassifier(sc.parallelize(data), {}, numIterations=10)
+        >>> model.numTrees()
+        10
+        >>> model.totalNumNodes()
+        30
+        >>> print(model)  # it already has newline
+        TreeEnsembleModel classifier with 10 trees
+        <BLANKLINE>
+        >>> model.predict([2.0])
+        1.0
+        >>> model.predict([0.0])
+        0.0
+        >>> rdd = sc.parallelize([[2.0], [0.0]])
+        >>> model.predict(rdd).collect()
+        [1.0, 0.0]
+        """
+        return cls._train(data, "classification", categoricalFeaturesInfo,
+                          loss, numIterations, learningRate, maxDepth, maxBins)
+
+    @classmethod
+    @since("1.3.0")
+    def trainRegressor(cls, data, categoricalFeaturesInfo,
+                       loss="leastSquaresError", numIterations=100, learningRate=0.1, maxDepth=3,
+                       maxBins=32):
+        """
+        Train a gradient-boosted trees model for regression.
+
+        :param data:
+          Training dataset: RDD of LabeledPoint. Labels are real numbers.
+        :param categoricalFeaturesInfo:
+          Map storing arity of categorical features. An entry (n -> k)
+          indicates that feature n is categorical with k categories
+          indexed from 0: {0, 1, ..., k-1}.
+        :param loss:
+          Loss function used for minimization during gradient boosting.
+          Supported values: "logLoss", "leastSquaresError",
+          "leastAbsoluteError".
+          (default: "leastSquaresError")
+        :param numIterations:
+          Number of iterations of boosting.
+          (default: 100)
+        :param learningRate:
+          Learning rate for shrinking the contribution of each estimator.
+          The learning rate should be between in the interval (0, 1].
+          (default: 0.1)
+        :param maxDepth:
+          Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1
+          means 1 internal node + 2 leaf nodes).
+          (default: 3)
+        :param maxBins:
+          Maximum number of bins used for splitting features. DecisionTree
+          requires maxBins >= max categories.
+          (default: 32)
+        :return:
+          GradientBoostedTreesModel that can be used for prediction.
+
+        Example usage:
+
+        >>> from pyspark.mllib.regression import LabeledPoint
+        >>> from pyspark.mllib.tree import GradientBoostedTrees
+        >>> from pyspark.mllib.linalg import SparseVector
+        >>>
+        >>> sparse_data = [
+        ...     LabeledPoint(0.0, SparseVector(2, {0: 1.0})),
+        ...     LabeledPoint(1.0, SparseVector(2, {1: 1.0})),
+        ...     LabeledPoint(0.0, SparseVector(2, {0: 1.0})),
+        ...     LabeledPoint(1.0, SparseVector(2, {1: 2.0}))
+        ... ]
+        >>>
+        >>> data = sc.parallelize(sparse_data)
+        >>> model = GradientBoostedTrees.trainRegressor(data, {}, numIterations=10)
+        >>> model.numTrees()
+        10
+        >>> model.totalNumNodes()
+        12
+        >>> model.predict(SparseVector(2, {1: 1.0}))
+        1.0
+        >>> model.predict(SparseVector(2, {0: 1.0}))
+        0.0
+        >>> rdd = sc.parallelize([[0.0, 1.0], [1.0, 0.0]])
+        >>> model.predict(rdd).collect()
+        [1.0, 0.0]
+        """
+        return cls._train(data, "regression", categoricalFeaturesInfo,
+                          loss, numIterations, learningRate, maxDepth, maxBins)
+
+
+def _test():
+    import doctest
+    globs = globals().copy()
+    from pyspark.sql import SparkSession
+    spark = SparkSession.builder\
+        .master("local[4]")\
+        .appName("mllib.tree tests")\
+        .getOrCreate()
+    globs['sc'] = spark.sparkContext
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/util.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc7809387b13a79a11e9c46fa4b121ed739cdfe6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/mllib/util.py
@@ -0,0 +1,528 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import numpy as np
+import warnings
+
+if sys.version > '3':
+    xrange = range
+    basestring = str
+
+from pyspark import SparkContext, since
+from pyspark.mllib.common import callMLlibFunc, inherit_doc
+from pyspark.mllib.linalg import Vectors, SparseVector, _convert_to_vector
+from pyspark.sql import DataFrame
+
+
+class MLUtils(object):
+
+    """
+    Helper methods to load, save and pre-process data used in MLlib.
+
+    .. versionadded:: 1.0.0
+    """
+
+    @staticmethod
+    def _parse_libsvm_line(line, multiclass=None):
+        """
+        Parses a line in LIBSVM format into (label, indices, values).
+        """
+        if multiclass is not None:
+            warnings.warn("deprecated", DeprecationWarning)
+        items = line.split(None)
+        label = float(items[0])
+        nnz = len(items) - 1
+        indices = np.zeros(nnz, dtype=np.int32)
+        values = np.zeros(nnz)
+        for i in xrange(nnz):
+            index, value = items[1 + i].split(":")
+            indices[i] = int(index) - 1
+            values[i] = float(value)
+        return label, indices, values
+
+    @staticmethod
+    def _convert_labeled_point_to_libsvm(p):
+        """Converts a LabeledPoint to a string in LIBSVM format."""
+        from pyspark.mllib.regression import LabeledPoint
+        assert isinstance(p, LabeledPoint)
+        items = [str(p.label)]
+        v = _convert_to_vector(p.features)
+        if isinstance(v, SparseVector):
+            nnz = len(v.indices)
+            for i in xrange(nnz):
+                items.append(str(v.indices[i] + 1) + ":" + str(v.values[i]))
+        else:
+            for i in xrange(len(v)):
+                items.append(str(i + 1) + ":" + str(v[i]))
+        return " ".join(items)
+
+    @staticmethod
+    @since("1.0.0")
+    def loadLibSVMFile(sc, path, numFeatures=-1, minPartitions=None, multiclass=None):
+        """
+        Loads labeled data in the LIBSVM format into an RDD of
+        LabeledPoint. The LIBSVM format is a text-based format used by
+        LIBSVM and LIBLINEAR. Each line represents a labeled sparse
+        feature vector using the following format:
+
+        label index1:value1 index2:value2 ...
+
+        where the indices are one-based and in ascending order. This
+        method parses each line into a LabeledPoint, where the feature
+        indices are converted to zero-based.
+
+        :param sc: Spark context
+        :param path: file or directory path in any Hadoop-supported file
+                     system URI
+        :param numFeatures: number of features, which will be determined
+                            from the input data if a nonpositive value
+                            is given. This is useful when the dataset is
+                            already split into multiple files and you
+                            want to load them separately, because some
+                            features may not present in certain files,
+                            which leads to inconsistent feature
+                            dimensions.
+        :param minPartitions: min number of partitions
+        @return: labeled data stored as an RDD of LabeledPoint
+
+        >>> from tempfile import NamedTemporaryFile
+        >>> from pyspark.mllib.util import MLUtils
+        >>> from pyspark.mllib.regression import LabeledPoint
+        >>> tempFile = NamedTemporaryFile(delete=True)
+        >>> _ = tempFile.write(b"+1 1:1.0 3:2.0 5:3.0\\n-1\\n-1 2:4.0 4:5.0 6:6.0")
+        >>> tempFile.flush()
+        >>> examples = MLUtils.loadLibSVMFile(sc, tempFile.name).collect()
+        >>> tempFile.close()
+        >>> examples[0]
+        LabeledPoint(1.0, (6,[0,2,4],[1.0,2.0,3.0]))
+        >>> examples[1]
+        LabeledPoint(-1.0, (6,[],[]))
+        >>> examples[2]
+        LabeledPoint(-1.0, (6,[1,3,5],[4.0,5.0,6.0]))
+        """
+        from pyspark.mllib.regression import LabeledPoint
+        if multiclass is not None:
+            warnings.warn("deprecated", DeprecationWarning)
+
+        lines = sc.textFile(path, minPartitions)
+        parsed = lines.map(lambda l: MLUtils._parse_libsvm_line(l))
+        if numFeatures <= 0:
+            parsed.cache()
+            numFeatures = parsed.map(lambda x: -1 if x[1].size == 0 else x[1][-1]).reduce(max) + 1
+        return parsed.map(lambda x: LabeledPoint(x[0], Vectors.sparse(numFeatures, x[1], x[2])))
+
+    @staticmethod
+    @since("1.0.0")
+    def saveAsLibSVMFile(data, dir):
+        """
+        Save labeled data in LIBSVM format.
+
+        :param data: an RDD of LabeledPoint to be saved
+        :param dir: directory to save the data
+
+        >>> from tempfile import NamedTemporaryFile
+        >>> from fileinput import input
+        >>> from pyspark.mllib.regression import LabeledPoint
+        >>> from glob import glob
+        >>> from pyspark.mllib.util import MLUtils
+        >>> examples = [LabeledPoint(1.1, Vectors.sparse(3, [(0, 1.23), (2, 4.56)])),
+        ...             LabeledPoint(0.0, Vectors.dense([1.01, 2.02, 3.03]))]
+        >>> tempFile = NamedTemporaryFile(delete=True)
+        >>> tempFile.close()
+        >>> MLUtils.saveAsLibSVMFile(sc.parallelize(examples), tempFile.name)
+        >>> ''.join(sorted(input(glob(tempFile.name + "/part-0000*"))))
+        '0.0 1:1.01 2:2.02 3:3.03\\n1.1 1:1.23 3:4.56\\n'
+        """
+        lines = data.map(lambda p: MLUtils._convert_labeled_point_to_libsvm(p))
+        lines.saveAsTextFile(dir)
+
+    @staticmethod
+    @since("1.1.0")
+    def loadLabeledPoints(sc, path, minPartitions=None):
+        """
+        Load labeled points saved using RDD.saveAsTextFile.
+
+        :param sc: Spark context
+        :param path: file or directory path in any Hadoop-supported file
+                     system URI
+        :param minPartitions: min number of partitions
+        @return: labeled data stored as an RDD of LabeledPoint
+
+        >>> from tempfile import NamedTemporaryFile
+        >>> from pyspark.mllib.util import MLUtils
+        >>> from pyspark.mllib.regression import LabeledPoint
+        >>> examples = [LabeledPoint(1.1, Vectors.sparse(3, [(0, -1.23), (2, 4.56e-7)])),
+        ...             LabeledPoint(0.0, Vectors.dense([1.01, 2.02, 3.03]))]
+        >>> tempFile = NamedTemporaryFile(delete=True)
+        >>> tempFile.close()
+        >>> sc.parallelize(examples, 1).saveAsTextFile(tempFile.name)
+        >>> MLUtils.loadLabeledPoints(sc, tempFile.name).collect()
+        [LabeledPoint(1.1, (3,[0,2],[-1.23,4.56e-07])), LabeledPoint(0.0, [1.01,2.02,3.03])]
+        """
+        minPartitions = minPartitions or min(sc.defaultParallelism, 2)
+        return callMLlibFunc("loadLabeledPoints", sc, path, minPartitions)
+
+    @staticmethod
+    @since("1.5.0")
+    def appendBias(data):
+        """
+        Returns a new vector with `1.0` (bias) appended to
+        the end of the input vector.
+        """
+        vec = _convert_to_vector(data)
+        if isinstance(vec, SparseVector):
+            newIndices = np.append(vec.indices, len(vec))
+            newValues = np.append(vec.values, 1.0)
+            return SparseVector(len(vec) + 1, newIndices, newValues)
+        else:
+            return _convert_to_vector(np.append(vec.toArray(), 1.0))
+
+    @staticmethod
+    @since("1.5.0")
+    def loadVectors(sc, path):
+        """
+        Loads vectors saved using `RDD[Vector].saveAsTextFile`
+        with the default number of partitions.
+        """
+        return callMLlibFunc("loadVectors", sc, path)
+
+    @staticmethod
+    @since("2.0.0")
+    def convertVectorColumnsToML(dataset, *cols):
+        """
+        Converts vector columns in an input DataFrame from the
+        :py:class:`pyspark.mllib.linalg.Vector` type to the new
+        :py:class:`pyspark.ml.linalg.Vector` type under the `spark.ml`
+        package.
+
+        :param dataset:
+          input dataset
+        :param cols:
+          a list of vector columns to be converted.
+          New vector columns will be ignored. If unspecified, all old
+          vector columns will be converted excepted nested ones.
+        :return:
+          the input dataset with old vector columns converted to the
+          new vector type
+
+        >>> import pyspark
+        >>> from pyspark.mllib.linalg import Vectors
+        >>> from pyspark.mllib.util import MLUtils
+        >>> df = spark.createDataFrame(
+        ...     [(0, Vectors.sparse(2, [1], [1.0]), Vectors.dense(2.0, 3.0))],
+        ...     ["id", "x", "y"])
+        >>> r1 = MLUtils.convertVectorColumnsToML(df).first()
+        >>> isinstance(r1.x, pyspark.ml.linalg.SparseVector)
+        True
+        >>> isinstance(r1.y, pyspark.ml.linalg.DenseVector)
+        True
+        >>> r2 = MLUtils.convertVectorColumnsToML(df, "x").first()
+        >>> isinstance(r2.x, pyspark.ml.linalg.SparseVector)
+        True
+        >>> isinstance(r2.y, pyspark.mllib.linalg.DenseVector)
+        True
+        """
+        if not isinstance(dataset, DataFrame):
+            raise TypeError("Input dataset must be a DataFrame but got {}.".format(type(dataset)))
+        return callMLlibFunc("convertVectorColumnsToML", dataset, list(cols))
+
+    @staticmethod
+    @since("2.0.0")
+    def convertVectorColumnsFromML(dataset, *cols):
+        """
+        Converts vector columns in an input DataFrame to the
+        :py:class:`pyspark.mllib.linalg.Vector` type from the new
+        :py:class:`pyspark.ml.linalg.Vector` type under the `spark.ml`
+        package.
+
+        :param dataset:
+          input dataset
+        :param cols:
+          a list of vector columns to be converted.
+          Old vector columns will be ignored. If unspecified, all new
+          vector columns will be converted except nested ones.
+        :return:
+          the input dataset with new vector columns converted to the
+          old vector type
+
+        >>> import pyspark
+        >>> from pyspark.ml.linalg import Vectors
+        >>> from pyspark.mllib.util import MLUtils
+        >>> df = spark.createDataFrame(
+        ...     [(0, Vectors.sparse(2, [1], [1.0]), Vectors.dense(2.0, 3.0))],
+        ...     ["id", "x", "y"])
+        >>> r1 = MLUtils.convertVectorColumnsFromML(df).first()
+        >>> isinstance(r1.x, pyspark.mllib.linalg.SparseVector)
+        True
+        >>> isinstance(r1.y, pyspark.mllib.linalg.DenseVector)
+        True
+        >>> r2 = MLUtils.convertVectorColumnsFromML(df, "x").first()
+        >>> isinstance(r2.x, pyspark.mllib.linalg.SparseVector)
+        True
+        >>> isinstance(r2.y, pyspark.ml.linalg.DenseVector)
+        True
+        """
+        if not isinstance(dataset, DataFrame):
+            raise TypeError("Input dataset must be a DataFrame but got {}.".format(type(dataset)))
+        return callMLlibFunc("convertVectorColumnsFromML", dataset, list(cols))
+
+    @staticmethod
+    @since("2.0.0")
+    def convertMatrixColumnsToML(dataset, *cols):
+        """
+        Converts matrix columns in an input DataFrame from the
+        :py:class:`pyspark.mllib.linalg.Matrix` type to the new
+        :py:class:`pyspark.ml.linalg.Matrix` type under the `spark.ml`
+        package.
+
+        :param dataset:
+          input dataset
+        :param cols:
+          a list of matrix columns to be converted.
+          New matrix columns will be ignored. If unspecified, all old
+          matrix columns will be converted excepted nested ones.
+        :return:
+          the input dataset with old matrix columns converted to the
+          new matrix type
+
+        >>> import pyspark
+        >>> from pyspark.mllib.linalg import Matrices
+        >>> from pyspark.mllib.util import MLUtils
+        >>> df = spark.createDataFrame(
+        ...     [(0, Matrices.sparse(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4]),
+        ...     Matrices.dense(2, 2, range(4)))], ["id", "x", "y"])
+        >>> r1 = MLUtils.convertMatrixColumnsToML(df).first()
+        >>> isinstance(r1.x, pyspark.ml.linalg.SparseMatrix)
+        True
+        >>> isinstance(r1.y, pyspark.ml.linalg.DenseMatrix)
+        True
+        >>> r2 = MLUtils.convertMatrixColumnsToML(df, "x").first()
+        >>> isinstance(r2.x, pyspark.ml.linalg.SparseMatrix)
+        True
+        >>> isinstance(r2.y, pyspark.mllib.linalg.DenseMatrix)
+        True
+        """
+        if not isinstance(dataset, DataFrame):
+            raise TypeError("Input dataset must be a DataFrame but got {}.".format(type(dataset)))
+        return callMLlibFunc("convertMatrixColumnsToML", dataset, list(cols))
+
+    @staticmethod
+    @since("2.0.0")
+    def convertMatrixColumnsFromML(dataset, *cols):
+        """
+        Converts matrix columns in an input DataFrame to the
+        :py:class:`pyspark.mllib.linalg.Matrix` type from the new
+        :py:class:`pyspark.ml.linalg.Matrix` type under the `spark.ml`
+        package.
+
+        :param dataset:
+          input dataset
+        :param cols:
+          a list of matrix columns to be converted.
+          Old matrix columns will be ignored. If unspecified, all new
+          matrix columns will be converted except nested ones.
+        :return:
+          the input dataset with new matrix columns converted to the
+          old matrix type
+
+        >>> import pyspark
+        >>> from pyspark.ml.linalg import Matrices
+        >>> from pyspark.mllib.util import MLUtils
+        >>> df = spark.createDataFrame(
+        ...     [(0, Matrices.sparse(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4]),
+        ...     Matrices.dense(2, 2, range(4)))], ["id", "x", "y"])
+        >>> r1 = MLUtils.convertMatrixColumnsFromML(df).first()
+        >>> isinstance(r1.x, pyspark.mllib.linalg.SparseMatrix)
+        True
+        >>> isinstance(r1.y, pyspark.mllib.linalg.DenseMatrix)
+        True
+        >>> r2 = MLUtils.convertMatrixColumnsFromML(df, "x").first()
+        >>> isinstance(r2.x, pyspark.mllib.linalg.SparseMatrix)
+        True
+        >>> isinstance(r2.y, pyspark.ml.linalg.DenseMatrix)
+        True
+        """
+        if not isinstance(dataset, DataFrame):
+            raise TypeError("Input dataset must be a DataFrame but got {}.".format(type(dataset)))
+        return callMLlibFunc("convertMatrixColumnsFromML", dataset, list(cols))
+
+
+class Saveable(object):
+    """
+    Mixin for models and transformers which may be saved as files.
+
+    .. versionadded:: 1.3.0
+    """
+
+    def save(self, sc, path):
+        """
+        Save this model to the given path.
+
+        This saves:
+         * human-readable (JSON) model metadata to path/metadata/
+         * Parquet formatted data to path/data/
+
+        The model may be loaded using py:meth:`Loader.load`.
+
+        :param sc: Spark context used to save model data.
+        :param path: Path specifying the directory in which to save
+                     this model. If the directory already exists,
+                     this method throws an exception.
+        """
+        raise NotImplementedError
+
+
+@inherit_doc
+class JavaSaveable(Saveable):
+    """
+    Mixin for models that provide save() through their Scala
+    implementation.
+
+    .. versionadded:: 1.3.0
+    """
+
+    @since("1.3.0")
+    def save(self, sc, path):
+        """Save this model to the given path."""
+        if not isinstance(sc, SparkContext):
+            raise TypeError("sc should be a SparkContext, got type %s" % type(sc))
+        if not isinstance(path, basestring):
+            raise TypeError("path should be a basestring, got type %s" % type(path))
+        self._java_model.save(sc._jsc.sc(), path)
+
+
+class Loader(object):
+    """
+    Mixin for classes which can load saved models from files.
+
+    .. versionadded:: 1.3.0
+    """
+
+    @classmethod
+    def load(cls, sc, path):
+        """
+        Load a model from the given path. The model should have been
+        saved using py:meth:`Saveable.save`.
+
+        :param sc: Spark context used for loading model files.
+        :param path: Path specifying the directory to which the model
+                     was saved.
+        :return: model instance
+        """
+        raise NotImplemented
+
+
+@inherit_doc
+class JavaLoader(Loader):
+    """
+    Mixin for classes which can load saved models using its Scala
+    implementation.
+
+    .. versionadded:: 1.3.0
+    """
+
+    @classmethod
+    def _java_loader_class(cls):
+        """
+        Returns the full class name of the Java loader. The default
+        implementation replaces "pyspark" by "org.apache.spark" in
+        the Python full class name.
+        """
+        java_package = cls.__module__.replace("pyspark", "org.apache.spark")
+        return ".".join([java_package, cls.__name__])
+
+    @classmethod
+    def _load_java(cls, sc, path):
+        """
+        Load a Java model from the given path.
+        """
+        java_class = cls._java_loader_class()
+        java_obj = sc._jvm
+        for name in java_class.split("."):
+            java_obj = getattr(java_obj, name)
+        return java_obj.load(sc._jsc.sc(), path)
+
+    @classmethod
+    @since("1.3.0")
+    def load(cls, sc, path):
+        """Load a model from the given path."""
+        java_model = cls._load_java(sc, path)
+        return cls(java_model)
+
+
+class LinearDataGenerator(object):
+    """Utils for generating linear data.
+
+    .. versionadded:: 1.5.0
+    """
+
+    @staticmethod
+    @since("1.5.0")
+    def generateLinearInput(intercept, weights, xMean, xVariance,
+                            nPoints, seed, eps):
+        """
+        :param: intercept bias factor, the term c in X'w + c
+        :param: weights   feature vector, the term w in X'w + c
+        :param: xMean     Point around which the data X is centered.
+        :param: xVariance Variance of the given data
+        :param: nPoints   Number of points to be generated
+        :param: seed      Random Seed
+        :param: eps       Used to scale the noise. If eps is set high,
+                          the amount of gaussian noise added is more.
+
+        Returns a list of LabeledPoints of length nPoints
+        """
+        weights = [float(weight) for weight in weights]
+        xMean = [float(mean) for mean in xMean]
+        xVariance = [float(var) for var in xVariance]
+        return list(callMLlibFunc(
+            "generateLinearInputWrapper", float(intercept), weights, xMean,
+            xVariance, int(nPoints), int(seed), float(eps)))
+
+    @staticmethod
+    @since("1.5.0")
+    def generateLinearRDD(sc, nexamples, nfeatures, eps,
+                          nParts=2, intercept=0.0):
+        """
+        Generate an RDD of LabeledPoints.
+        """
+        return callMLlibFunc(
+            "generateLinearRDDWrapper", sc, int(nexamples), int(nfeatures),
+            float(eps), int(nParts), float(intercept))
+
+
+def _test():
+    import doctest
+    from pyspark.sql import SparkSession
+    globs = globals().copy()
+    # The small batch size here ensures that we see multiple batches,
+    # even in these small test examples:
+    spark = SparkSession.builder\
+        .master("local[2]")\
+        .appName("mllib.util tests")\
+        .getOrCreate()
+    globs['spark'] = spark
+    globs['sc'] = spark.sparkContext
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/profiler.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/profiler.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c7656ab5758c9c11abf74a1f8f2e84fadbdb2db
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/profiler.py
@@ -0,0 +1,177 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import cProfile
+import pstats
+import os
+import atexit
+import sys
+
+from pyspark.accumulators import AccumulatorParam
+
+
+class ProfilerCollector(object):
+    """
+    This class keeps track of different profilers on a per
+    stage basis. Also this is used to create new profilers for
+    the different stages.
+    """
+
+    def __init__(self, profiler_cls, dump_path=None):
+        self.profiler_cls = profiler_cls
+        self.profile_dump_path = dump_path
+        self.profilers = []
+
+    def new_profiler(self, ctx):
+        """ Create a new profiler using class `profiler_cls` """
+        return self.profiler_cls(ctx)
+
+    def add_profiler(self, id, profiler):
+        """ Add a profiler for RDD `id` """
+        if not self.profilers:
+            if self.profile_dump_path:
+                atexit.register(self.dump_profiles, self.profile_dump_path)
+            else:
+                atexit.register(self.show_profiles)
+
+        self.profilers.append([id, profiler, False])
+
+    def dump_profiles(self, path):
+        """ Dump the profile stats into directory `path` """
+        for id, profiler, _ in self.profilers:
+            profiler.dump(id, path)
+        self.profilers = []
+
+    def show_profiles(self):
+        """ Print the profile stats to stdout """
+        for i, (id, profiler, showed) in enumerate(self.profilers):
+            if not showed and profiler:
+                profiler.show(id)
+                # mark it as showed
+                self.profilers[i][2] = True
+
+
+class Profiler(object):
+    """
+    .. note:: DeveloperApi
+
+    PySpark supports custom profilers, this is to allow for different profilers to
+    be used as well as outputting to different formats than what is provided in the
+    BasicProfiler.
+
+    A custom profiler has to define or inherit the following methods:
+        profile - will produce a system profile of some sort.
+        stats - return the collected stats.
+        dump - dumps the profiles to a path
+        add - adds a profile to the existing accumulated profile
+
+    The profiler class is chosen when creating a SparkContext
+
+    >>> from pyspark import SparkConf, SparkContext
+    >>> from pyspark import BasicProfiler
+    >>> class MyCustomProfiler(BasicProfiler):
+    ...     def show(self, id):
+    ...         print("My custom profiles for RDD:%s" % id)
+    ...
+    >>> conf = SparkConf().set("spark.python.profile", "true")
+    >>> sc = SparkContext('local', 'test', conf=conf, profiler_cls=MyCustomProfiler)
+    >>> sc.parallelize(range(1000)).map(lambda x: 2 * x).take(10)
+    [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
+    >>> sc.parallelize(range(1000)).count()
+    1000
+    >>> sc.show_profiles()
+    My custom profiles for RDD:1
+    My custom profiles for RDD:3
+    >>> sc.stop()
+    """
+
+    def __init__(self, ctx):
+        pass
+
+    def profile(self, func):
+        """ Do profiling on the function `func`"""
+        raise NotImplemented
+
+    def stats(self):
+        """ Return the collected profiling stats (pstats.Stats)"""
+        raise NotImplemented
+
+    def show(self, id):
+        """ Print the profile stats to stdout, id is the RDD id """
+        stats = self.stats()
+        if stats:
+            print("=" * 60)
+            print("Profile of RDD<id=%d>" % id)
+            print("=" * 60)
+            stats.sort_stats("time", "cumulative").print_stats()
+
+    def dump(self, id, path):
+        """ Dump the profile into path, id is the RDD id """
+        if not os.path.exists(path):
+            os.makedirs(path)
+        stats = self.stats()
+        if stats:
+            p = os.path.join(path, "rdd_%d.pstats" % id)
+            stats.dump_stats(p)
+
+
+class PStatsParam(AccumulatorParam):
+    """PStatsParam is used to merge pstats.Stats"""
+
+    @staticmethod
+    def zero(value):
+        return None
+
+    @staticmethod
+    def addInPlace(value1, value2):
+        if value1 is None:
+            return value2
+        value1.add(value2)
+        return value1
+
+
+class BasicProfiler(Profiler):
+    """
+    BasicProfiler is the default profiler, which is implemented based on
+    cProfile and Accumulator
+    """
+    def __init__(self, ctx):
+        Profiler.__init__(self, ctx)
+        # Creates a new accumulator for combining the profiles of different
+        # partitions of a stage
+        self._accumulator = ctx.accumulator(None, PStatsParam)
+
+    def profile(self, func):
+        """ Runs and profiles the method to_profile passed in. A profile object is returned. """
+        pr = cProfile.Profile()
+        pr.runcall(func)
+        st = pstats.Stats(pr)
+        st.stream = None  # make it picklable
+        st.strip_dirs()
+
+        # Adds a new profile to the existing accumulated value
+        self._accumulator.add(st)
+
+    def stats(self):
+        return self._accumulator.value
+
+
+if __name__ == "__main__":
+    import doctest
+    (failure_count, test_count) = doctest.testmod()
+    if failure_count:
+        sys.exit(-1)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/python/pyspark/shell.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/python/pyspark/shell.py
new file mode 100644
index 0000000000000000000000000000000000000000..65e3bdbc05ce8e014d9f5d1fe67a072c658ac06d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/python/pyspark/shell.py
@@ -0,0 +1,76 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+An interactive shell.
+
+This file is designed to be launched as a PYTHONSTARTUP script.
+"""
+
+import atexit
+import os
+import platform
+import warnings
+
+import py4j
+
+from pyspark import SparkConf
+from pyspark.context import SparkContext
+from pyspark.sql import SparkSession, SQLContext
+
+if os.environ.get("SPARK_EXECUTOR_URI"):
+    SparkContext.setSystemProperty("spark.executor.uri", os.environ["SPARK_EXECUTOR_URI"])
+
+SparkContext._ensure_initialized()
+
+try:
+    spark = SparkSession._create_shell_session()
+except Exception:
+    import sys
+    import traceback
+    warnings.warn("Failed to initialize Spark session.")
+    traceback.print_exc(file=sys.stderr)
+    sys.exit(1)
+
+sc = spark.sparkContext
+sql = spark.sql
+atexit.register(lambda: sc.stop())
+
+# for compatibility
+sqlContext = spark._wrapped
+sqlCtx = sqlContext
+
+print(r"""Welcome to
+      ____              __
+     / __/__  ___ _____/ /__
+    _\ \/ _ \/ _ `/ __/  '_/
+   /__ / .__/\_,_/_/ /_/\_\   version %s
+      /_/
+""" % sc.version)
+print("Using Python version %s (%s, %s)" % (
+    platform.python_version(),
+    platform.python_build()[0],
+    platform.python_build()[1]))
+print("SparkSession available as 'spark'.")
+
+# The ./bin/pyspark script stores the old PYTHONSTARTUP value in OLD_PYTHONSTARTUP,
+# which allows us to execute the user's PYTHONSTARTUP file:
+_pythonstartup = os.environ.get('OLD_PYTHONSTARTUP')
+if _pythonstartup and os.path.isfile(_pythonstartup):
+    with open(_pythonstartup) as f:
+        code = compile(f.read(), _pythonstartup, 'exec')
+        exec(code)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/rdd.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/rdd.py
new file mode 100644
index 0000000000000000000000000000000000000000..ccf39e1ffbe96970a3e11f1e6723b1bc6fbb874d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/rdd.py
@@ -0,0 +1,2569 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import copy
+import sys
+import os
+import re
+import operator
+import shlex
+import warnings
+import heapq
+import bisect
+import random
+import socket
+from subprocess import Popen, PIPE
+from tempfile import NamedTemporaryFile
+from threading import Thread
+from collections import defaultdict
+from itertools import chain
+from functools import reduce
+from math import sqrt, log, isinf, isnan, pow, ceil
+
+if sys.version > '3':
+    basestring = unicode = str
+else:
+    from itertools import imap as map, ifilter as filter
+
+from pyspark.java_gateway import local_connect_and_auth
+from pyspark.serializers import NoOpSerializer, CartesianDeserializer, \
+    BatchedSerializer, CloudPickleSerializer, PairDeserializer, \
+    PickleSerializer, pack_long, AutoBatchedSerializer, write_with_length, \
+    UTF8Deserializer
+from pyspark.join import python_join, python_left_outer_join, \
+    python_right_outer_join, python_full_outer_join, python_cogroup
+from pyspark.statcounter import StatCounter
+from pyspark.rddsampler import RDDSampler, RDDRangeSampler, RDDStratifiedSampler
+from pyspark.storagelevel import StorageLevel
+from pyspark.resultiterable import ResultIterable
+from pyspark.shuffle import Aggregator, ExternalMerger, \
+    get_used_memory, ExternalSorter, ExternalGroupBy
+from pyspark.traceback_utils import SCCallSiteSync
+from pyspark.util import fail_on_stopiteration, _exception_message
+
+
+__all__ = ["RDD"]
+
+
+class PythonEvalType(object):
+    """
+    Evaluation type of python rdd.
+
+    These values are internal to PySpark.
+
+    These values should match values in org.apache.spark.api.python.PythonEvalType.
+    """
+    NON_UDF = 0
+
+    SQL_BATCHED_UDF = 100
+
+    SQL_SCALAR_PANDAS_UDF = 200
+    SQL_GROUPED_MAP_PANDAS_UDF = 201
+    SQL_GROUPED_AGG_PANDAS_UDF = 202
+    SQL_WINDOW_AGG_PANDAS_UDF = 203
+
+
+def portable_hash(x):
+    """
+    This function returns consistent hash code for builtin types, especially
+    for None and tuple with None.
+
+    The algorithm is similar to that one used by CPython 2.7
+
+    >>> portable_hash(None)
+    0
+    >>> portable_hash((None, 1)) & 0xffffffff
+    219750521
+    """
+
+    if sys.version_info >= (3, 2, 3) and 'PYTHONHASHSEED' not in os.environ:
+        raise Exception("Randomness of hash of string should be disabled via PYTHONHASHSEED")
+
+    if x is None:
+        return 0
+    if isinstance(x, tuple):
+        h = 0x345678
+        for i in x:
+            h ^= portable_hash(i)
+            h *= 1000003
+            h &= sys.maxsize
+        h ^= len(x)
+        if h == -1:
+            h = -2
+        return int(h)
+    return hash(x)
+
+
+class BoundedFloat(float):
+    """
+    Bounded value is generated by approximate job, with confidence and low
+    bound and high bound.
+
+    >>> BoundedFloat(100.0, 0.95, 95.0, 105.0)
+    100.0
+    """
+    def __new__(cls, mean, confidence, low, high):
+        obj = float.__new__(cls, mean)
+        obj.confidence = confidence
+        obj.low = low
+        obj.high = high
+        return obj
+
+
+def _parse_memory(s):
+    """
+    Parse a memory string in the format supported by Java (e.g. 1g, 200m) and
+    return the value in MB
+
+    >>> _parse_memory("256m")
+    256
+    >>> _parse_memory("2g")
+    2048
+    """
+    units = {'g': 1024, 'm': 1, 't': 1 << 20, 'k': 1.0 / 1024}
+    if s[-1].lower() not in units:
+        raise ValueError("invalid format: " + s)
+    return int(float(s[:-1]) * units[s[-1].lower()])
+
+
+def _load_from_socket(sock_info, serializer):
+    (sockfile, sock) = local_connect_and_auth(*sock_info)
+    # The RDD materialization time is unpredicable, if we set a timeout for socket reading
+    # operation, it will very possibly fail. See SPARK-18281.
+    sock.settimeout(None)
+    # The socket will be automatically closed when garbage-collected.
+    return serializer.load_stream(sockfile)
+
+
+def ignore_unicode_prefix(f):
+    """
+    Ignore the 'u' prefix of string in doc tests, to make it works
+    in both python 2 and 3
+    """
+    if sys.version >= '3':
+        # the representation of unicode string in Python 3 does not have prefix 'u',
+        # so remove the prefix 'u' for doc tests
+        literal_re = re.compile(r"(\W|^)[uU](['])", re.UNICODE)
+        f.__doc__ = literal_re.sub(r'\1\2', f.__doc__)
+    return f
+
+
+class Partitioner(object):
+    def __init__(self, numPartitions, partitionFunc):
+        self.numPartitions = numPartitions
+        self.partitionFunc = partitionFunc
+
+    def __eq__(self, other):
+        return (isinstance(other, Partitioner) and self.numPartitions == other.numPartitions
+                and self.partitionFunc == other.partitionFunc)
+
+    def __call__(self, k):
+        return self.partitionFunc(k) % self.numPartitions
+
+
+class RDD(object):
+
+    """
+    A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
+    Represents an immutable, partitioned collection of elements that can be
+    operated on in parallel.
+    """
+
+    def __init__(self, jrdd, ctx, jrdd_deserializer=AutoBatchedSerializer(PickleSerializer())):
+        self._jrdd = jrdd
+        self.is_cached = False
+        self.is_checkpointed = False
+        self.ctx = ctx
+        self._jrdd_deserializer = jrdd_deserializer
+        self._id = jrdd.id()
+        self.partitioner = None
+
+    def _pickled(self):
+        return self._reserialize(AutoBatchedSerializer(PickleSerializer()))
+
+    def id(self):
+        """
+        A unique ID for this RDD (within its SparkContext).
+        """
+        return self._id
+
+    def __repr__(self):
+        return self._jrdd.toString()
+
+    def __getnewargs__(self):
+        # This method is called when attempting to pickle an RDD, which is always an error:
+        raise Exception(
+            "It appears that you are attempting to broadcast an RDD or reference an RDD from an "
+            "action or transformation. RDD transformations and actions can only be invoked by the "
+            "driver, not inside of other transformations; for example, "
+            "rdd1.map(lambda x: rdd2.values.count() * x) is invalid because the values "
+            "transformation and count action cannot be performed inside of the rdd1.map "
+            "transformation. For more information, see SPARK-5063."
+        )
+
+    @property
+    def context(self):
+        """
+        The L{SparkContext} that this RDD was created on.
+        """
+        return self.ctx
+
+    def cache(self):
+        """
+        Persist this RDD with the default storage level (C{MEMORY_ONLY}).
+        """
+        self.is_cached = True
+        self.persist(StorageLevel.MEMORY_ONLY)
+        return self
+
+    def persist(self, storageLevel=StorageLevel.MEMORY_ONLY):
+        """
+        Set this RDD's storage level to persist its values across operations
+        after the first time it is computed. This can only be used to assign
+        a new storage level if the RDD does not have a storage level set yet.
+        If no storage level is specified defaults to (C{MEMORY_ONLY}).
+
+        >>> rdd = sc.parallelize(["b", "a", "c"])
+        >>> rdd.persist().is_cached
+        True
+        """
+        self.is_cached = True
+        javaStorageLevel = self.ctx._getJavaStorageLevel(storageLevel)
+        self._jrdd.persist(javaStorageLevel)
+        return self
+
+    def unpersist(self):
+        """
+        Mark the RDD as non-persistent, and remove all blocks for it from
+        memory and disk.
+        """
+        self.is_cached = False
+        self._jrdd.unpersist()
+        return self
+
+    def checkpoint(self):
+        """
+        Mark this RDD for checkpointing. It will be saved to a file inside the
+        checkpoint directory set with L{SparkContext.setCheckpointDir()} and
+        all references to its parent RDDs will be removed. This function must
+        be called before any job has been executed on this RDD. It is strongly
+        recommended that this RDD is persisted in memory, otherwise saving it
+        on a file will require recomputation.
+        """
+        self.is_checkpointed = True
+        self._jrdd.rdd().checkpoint()
+
+    def isCheckpointed(self):
+        """
+        Return whether this RDD is checkpointed and materialized, either reliably or locally.
+        """
+        return self._jrdd.rdd().isCheckpointed()
+
+    def localCheckpoint(self):
+        """
+        Mark this RDD for local checkpointing using Spark's existing caching layer.
+
+        This method is for users who wish to truncate RDD lineages while skipping the expensive
+        step of replicating the materialized data in a reliable distributed file system. This is
+        useful for RDDs with long lineages that need to be truncated periodically (e.g. GraphX).
+
+        Local checkpointing sacrifices fault-tolerance for performance. In particular, checkpointed
+        data is written to ephemeral local storage in the executors instead of to a reliable,
+        fault-tolerant storage. The effect is that if an executor fails during the computation,
+        the checkpointed data may no longer be accessible, causing an irrecoverable job failure.
+
+        This is NOT safe to use with dynamic allocation, which removes executors along
+        with their cached blocks. If you must use both features, you are advised to set
+        L{spark.dynamicAllocation.cachedExecutorIdleTimeout} to a high value.
+
+        The checkpoint directory set through L{SparkContext.setCheckpointDir()} is not used.
+        """
+        self._jrdd.rdd().localCheckpoint()
+
+    def isLocallyCheckpointed(self):
+        """
+        Return whether this RDD is marked for local checkpointing.
+
+        Exposed for testing.
+        """
+        return self._jrdd.rdd().isLocallyCheckpointed()
+
+    def getCheckpointFile(self):
+        """
+        Gets the name of the file to which this RDD was checkpointed
+
+        Not defined if RDD is checkpointed locally.
+        """
+        checkpointFile = self._jrdd.rdd().getCheckpointFile()
+        if checkpointFile.isDefined():
+            return checkpointFile.get()
+
+    def map(self, f, preservesPartitioning=False):
+        """
+        Return a new RDD by applying a function to each element of this RDD.
+
+        >>> rdd = sc.parallelize(["b", "a", "c"])
+        >>> sorted(rdd.map(lambda x: (x, 1)).collect())
+        [('a', 1), ('b', 1), ('c', 1)]
+        """
+        def func(_, iterator):
+            return map(fail_on_stopiteration(f), iterator)
+        return self.mapPartitionsWithIndex(func, preservesPartitioning)
+
+    def flatMap(self, f, preservesPartitioning=False):
+        """
+        Return a new RDD by first applying a function to all elements of this
+        RDD, and then flattening the results.
+
+        >>> rdd = sc.parallelize([2, 3, 4])
+        >>> sorted(rdd.flatMap(lambda x: range(1, x)).collect())
+        [1, 1, 1, 2, 2, 3]
+        >>> sorted(rdd.flatMap(lambda x: [(x, x), (x, x)]).collect())
+        [(2, 2), (2, 2), (3, 3), (3, 3), (4, 4), (4, 4)]
+        """
+        def func(s, iterator):
+            return chain.from_iterable(map(fail_on_stopiteration(f), iterator))
+        return self.mapPartitionsWithIndex(func, preservesPartitioning)
+
+    def mapPartitions(self, f, preservesPartitioning=False):
+        """
+        Return a new RDD by applying a function to each partition of this RDD.
+
+        >>> rdd = sc.parallelize([1, 2, 3, 4], 2)
+        >>> def f(iterator): yield sum(iterator)
+        >>> rdd.mapPartitions(f).collect()
+        [3, 7]
+        """
+        def func(s, iterator):
+            return f(iterator)
+        return self.mapPartitionsWithIndex(func, preservesPartitioning)
+
+    def mapPartitionsWithIndex(self, f, preservesPartitioning=False):
+        """
+        Return a new RDD by applying a function to each partition of this RDD,
+        while tracking the index of the original partition.
+
+        >>> rdd = sc.parallelize([1, 2, 3, 4], 4)
+        >>> def f(splitIndex, iterator): yield splitIndex
+        >>> rdd.mapPartitionsWithIndex(f).sum()
+        6
+        """
+        return PipelinedRDD(self, f, preservesPartitioning)
+
+    def mapPartitionsWithSplit(self, f, preservesPartitioning=False):
+        """
+        Deprecated: use mapPartitionsWithIndex instead.
+
+        Return a new RDD by applying a function to each partition of this RDD,
+        while tracking the index of the original partition.
+
+        >>> rdd = sc.parallelize([1, 2, 3, 4], 4)
+        >>> def f(splitIndex, iterator): yield splitIndex
+        >>> rdd.mapPartitionsWithSplit(f).sum()
+        6
+        """
+        warnings.warn("mapPartitionsWithSplit is deprecated; "
+                      "use mapPartitionsWithIndex instead", DeprecationWarning, stacklevel=2)
+        return self.mapPartitionsWithIndex(f, preservesPartitioning)
+
+    def getNumPartitions(self):
+        """
+        Returns the number of partitions in RDD
+
+        >>> rdd = sc.parallelize([1, 2, 3, 4], 2)
+        >>> rdd.getNumPartitions()
+        2
+        """
+        return self._jrdd.partitions().size()
+
+    def filter(self, f):
+        """
+        Return a new RDD containing only the elements that satisfy a predicate.
+
+        >>> rdd = sc.parallelize([1, 2, 3, 4, 5])
+        >>> rdd.filter(lambda x: x % 2 == 0).collect()
+        [2, 4]
+        """
+        def func(iterator):
+            return filter(fail_on_stopiteration(f), iterator)
+        return self.mapPartitions(func, True)
+
+    def distinct(self, numPartitions=None):
+        """
+        Return a new RDD containing the distinct elements in this RDD.
+
+        >>> sorted(sc.parallelize([1, 1, 2, 3]).distinct().collect())
+        [1, 2, 3]
+        """
+        return self.map(lambda x: (x, None)) \
+                   .reduceByKey(lambda x, _: x, numPartitions) \
+                   .map(lambda x: x[0])
+
+    def sample(self, withReplacement, fraction, seed=None):
+        """
+        Return a sampled subset of this RDD.
+
+        :param withReplacement: can elements be sampled multiple times (replaced when sampled out)
+        :param fraction: expected size of the sample as a fraction of this RDD's size
+            without replacement: probability that each element is chosen; fraction must be [0, 1]
+            with replacement: expected number of times each element is chosen; fraction must be >= 0
+        :param seed: seed for the random number generator
+
+        .. note:: This is not guaranteed to provide exactly the fraction specified of the total
+            count of the given :class:`DataFrame`.
+
+        >>> rdd = sc.parallelize(range(100), 4)
+        >>> 6 <= rdd.sample(False, 0.1, 81).count() <= 14
+        True
+        """
+        assert fraction >= 0.0, "Negative fraction value: %s" % fraction
+        return self.mapPartitionsWithIndex(RDDSampler(withReplacement, fraction, seed).func, True)
+
+    def randomSplit(self, weights, seed=None):
+        """
+        Randomly splits this RDD with the provided weights.
+
+        :param weights: weights for splits, will be normalized if they don't sum to 1
+        :param seed: random seed
+        :return: split RDDs in a list
+
+        >>> rdd = sc.parallelize(range(500), 1)
+        >>> rdd1, rdd2 = rdd.randomSplit([2, 3], 17)
+        >>> len(rdd1.collect() + rdd2.collect())
+        500
+        >>> 150 < rdd1.count() < 250
+        True
+        >>> 250 < rdd2.count() < 350
+        True
+        """
+        s = float(sum(weights))
+        cweights = [0.0]
+        for w in weights:
+            cweights.append(cweights[-1] + w / s)
+        if seed is None:
+            seed = random.randint(0, 2 ** 32 - 1)
+        return [self.mapPartitionsWithIndex(RDDRangeSampler(lb, ub, seed).func, True)
+                for lb, ub in zip(cweights, cweights[1:])]
+
+    # this is ported from scala/spark/RDD.scala
+    def takeSample(self, withReplacement, num, seed=None):
+        """
+        Return a fixed-size sampled subset of this RDD.
+
+        .. note:: This method should only be used if the resulting array is expected
+            to be small, as all the data is loaded into the driver's memory.
+
+        >>> rdd = sc.parallelize(range(0, 10))
+        >>> len(rdd.takeSample(True, 20, 1))
+        20
+        >>> len(rdd.takeSample(False, 5, 2))
+        5
+        >>> len(rdd.takeSample(False, 15, 3))
+        10
+        """
+        numStDev = 10.0
+
+        if num < 0:
+            raise ValueError("Sample size cannot be negative.")
+        elif num == 0:
+            return []
+
+        initialCount = self.count()
+        if initialCount == 0:
+            return []
+
+        rand = random.Random(seed)
+
+        if (not withReplacement) and num >= initialCount:
+            # shuffle current RDD and return
+            samples = self.collect()
+            rand.shuffle(samples)
+            return samples
+
+        maxSampleSize = sys.maxsize - int(numStDev * sqrt(sys.maxsize))
+        if num > maxSampleSize:
+            raise ValueError(
+                "Sample size cannot be greater than %d." % maxSampleSize)
+
+        fraction = RDD._computeFractionForSampleSize(
+            num, initialCount, withReplacement)
+        samples = self.sample(withReplacement, fraction, seed).collect()
+
+        # If the first sample didn't turn out large enough, keep trying to take samples;
+        # this shouldn't happen often because we use a big multiplier for their initial size.
+        # See: scala/spark/RDD.scala
+        while len(samples) < num:
+            # TODO: add log warning for when more than one iteration was run
+            seed = rand.randint(0, sys.maxsize)
+            samples = self.sample(withReplacement, fraction, seed).collect()
+
+        rand.shuffle(samples)
+
+        return samples[0:num]
+
+    @staticmethod
+    def _computeFractionForSampleSize(sampleSizeLowerBound, total, withReplacement):
+        """
+        Returns a sampling rate that guarantees a sample of
+        size >= sampleSizeLowerBound 99.99% of the time.
+
+        How the sampling rate is determined:
+        Let p = num / total, where num is the sample size and total is the
+        total number of data points in the RDD. We're trying to compute
+        q > p such that
+          - when sampling with replacement, we're drawing each data point
+            with prob_i ~ Pois(q), where we want to guarantee
+            Pr[s < num] < 0.0001 for s = sum(prob_i for i from 0 to
+            total), i.e. the failure rate of not having a sufficiently large
+            sample < 0.0001. Setting q = p + 5 * sqrt(p/total) is sufficient
+            to guarantee 0.9999 success rate for num > 12, but we need a
+            slightly larger q (9 empirically determined).
+          - when sampling without replacement, we're drawing each data point
+            with prob_i ~ Binomial(total, fraction) and our choice of q
+            guarantees 1-delta, or 0.9999 success rate, where success rate is
+            defined the same as in sampling with replacement.
+        """
+        fraction = float(sampleSizeLowerBound) / total
+        if withReplacement:
+            numStDev = 5
+            if (sampleSizeLowerBound < 12):
+                numStDev = 9
+            return fraction + numStDev * sqrt(fraction / total)
+        else:
+            delta = 0.00005
+            gamma = - log(delta) / total
+            return min(1, fraction + gamma + sqrt(gamma * gamma + 2 * gamma * fraction))
+
+    def union(self, other):
+        """
+        Return the union of this RDD and another one.
+
+        >>> rdd = sc.parallelize([1, 1, 2, 3])
+        >>> rdd.union(rdd).collect()
+        [1, 1, 2, 3, 1, 1, 2, 3]
+        """
+        if self._jrdd_deserializer == other._jrdd_deserializer:
+            rdd = RDD(self._jrdd.union(other._jrdd), self.ctx,
+                      self._jrdd_deserializer)
+        else:
+            # These RDDs contain data in different serialized formats, so we
+            # must normalize them to the default serializer.
+            self_copy = self._reserialize()
+            other_copy = other._reserialize()
+            rdd = RDD(self_copy._jrdd.union(other_copy._jrdd), self.ctx,
+                      self.ctx.serializer)
+        if (self.partitioner == other.partitioner and
+                self.getNumPartitions() == rdd.getNumPartitions()):
+            rdd.partitioner = self.partitioner
+        return rdd
+
+    def intersection(self, other):
+        """
+        Return the intersection of this RDD and another one. The output will
+        not contain any duplicate elements, even if the input RDDs did.
+
+        .. note:: This method performs a shuffle internally.
+
+        >>> rdd1 = sc.parallelize([1, 10, 2, 3, 4, 5])
+        >>> rdd2 = sc.parallelize([1, 6, 2, 3, 7, 8])
+        >>> rdd1.intersection(rdd2).collect()
+        [1, 2, 3]
+        """
+        return self.map(lambda v: (v, None)) \
+            .cogroup(other.map(lambda v: (v, None))) \
+            .filter(lambda k_vs: all(k_vs[1])) \
+            .keys()
+
+    def _reserialize(self, serializer=None):
+        serializer = serializer or self.ctx.serializer
+        if self._jrdd_deserializer != serializer:
+            self = self.map(lambda x: x, preservesPartitioning=True)
+            self._jrdd_deserializer = serializer
+        return self
+
+    def __add__(self, other):
+        """
+        Return the union of this RDD and another one.
+
+        >>> rdd = sc.parallelize([1, 1, 2, 3])
+        >>> (rdd + rdd).collect()
+        [1, 1, 2, 3, 1, 1, 2, 3]
+        """
+        if not isinstance(other, RDD):
+            raise TypeError
+        return self.union(other)
+
+    def repartitionAndSortWithinPartitions(self, numPartitions=None, partitionFunc=portable_hash,
+                                           ascending=True, keyfunc=lambda x: x):
+        """
+        Repartition the RDD according to the given partitioner and, within each resulting partition,
+        sort records by their keys.
+
+        >>> rdd = sc.parallelize([(0, 5), (3, 8), (2, 6), (0, 8), (3, 8), (1, 3)])
+        >>> rdd2 = rdd.repartitionAndSortWithinPartitions(2, lambda x: x % 2, True)
+        >>> rdd2.glom().collect()
+        [[(0, 5), (0, 8), (2, 6)], [(1, 3), (3, 8), (3, 8)]]
+        """
+        if numPartitions is None:
+            numPartitions = self._defaultReducePartitions()
+
+        memory = _parse_memory(self.ctx._conf.get("spark.python.worker.memory", "512m"))
+        serializer = self._jrdd_deserializer
+
+        def sortPartition(iterator):
+            sort = ExternalSorter(memory * 0.9, serializer).sorted
+            return iter(sort(iterator, key=lambda k_v: keyfunc(k_v[0]), reverse=(not ascending)))
+
+        return self.partitionBy(numPartitions, partitionFunc).mapPartitions(sortPartition, True)
+
+    def sortByKey(self, ascending=True, numPartitions=None, keyfunc=lambda x: x):
+        """
+        Sorts this RDD, which is assumed to consist of (key, value) pairs.
+
+        >>> tmp = [('a', 1), ('b', 2), ('1', 3), ('d', 4), ('2', 5)]
+        >>> sc.parallelize(tmp).sortByKey().first()
+        ('1', 3)
+        >>> sc.parallelize(tmp).sortByKey(True, 1).collect()
+        [('1', 3), ('2', 5), ('a', 1), ('b', 2), ('d', 4)]
+        >>> sc.parallelize(tmp).sortByKey(True, 2).collect()
+        [('1', 3), ('2', 5), ('a', 1), ('b', 2), ('d', 4)]
+        >>> tmp2 = [('Mary', 1), ('had', 2), ('a', 3), ('little', 4), ('lamb', 5)]
+        >>> tmp2.extend([('whose', 6), ('fleece', 7), ('was', 8), ('white', 9)])
+        >>> sc.parallelize(tmp2).sortByKey(True, 3, keyfunc=lambda k: k.lower()).collect()
+        [('a', 3), ('fleece', 7), ('had', 2), ('lamb', 5),...('white', 9), ('whose', 6)]
+        """
+        if numPartitions is None:
+            numPartitions = self._defaultReducePartitions()
+
+        memory = self._memory_limit()
+        serializer = self._jrdd_deserializer
+
+        def sortPartition(iterator):
+            sort = ExternalSorter(memory * 0.9, serializer).sorted
+            return iter(sort(iterator, key=lambda kv: keyfunc(kv[0]), reverse=(not ascending)))
+
+        if numPartitions == 1:
+            if self.getNumPartitions() > 1:
+                self = self.coalesce(1)
+            return self.mapPartitions(sortPartition, True)
+
+        # first compute the boundary of each part via sampling: we want to partition
+        # the key-space into bins such that the bins have roughly the same
+        # number of (key, value) pairs falling into them
+        rddSize = self.count()
+        if not rddSize:
+            return self  # empty RDD
+        maxSampleSize = numPartitions * 20.0  # constant from Spark's RangePartitioner
+        fraction = min(maxSampleSize / max(rddSize, 1), 1.0)
+        samples = self.sample(False, fraction, 1).map(lambda kv: kv[0]).collect()
+        samples = sorted(samples, key=keyfunc)
+
+        # we have numPartitions many parts but one of the them has
+        # an implicit boundary
+        bounds = [samples[int(len(samples) * (i + 1) / numPartitions)]
+                  for i in range(0, numPartitions - 1)]
+
+        def rangePartitioner(k):
+            p = bisect.bisect_left(bounds, keyfunc(k))
+            if ascending:
+                return p
+            else:
+                return numPartitions - 1 - p
+
+        return self.partitionBy(numPartitions, rangePartitioner).mapPartitions(sortPartition, True)
+
+    def sortBy(self, keyfunc, ascending=True, numPartitions=None):
+        """
+        Sorts this RDD by the given keyfunc
+
+        >>> tmp = [('a', 1), ('b', 2), ('1', 3), ('d', 4), ('2', 5)]
+        >>> sc.parallelize(tmp).sortBy(lambda x: x[0]).collect()
+        [('1', 3), ('2', 5), ('a', 1), ('b', 2), ('d', 4)]
+        >>> sc.parallelize(tmp).sortBy(lambda x: x[1]).collect()
+        [('a', 1), ('b', 2), ('1', 3), ('d', 4), ('2', 5)]
+        """
+        return self.keyBy(keyfunc).sortByKey(ascending, numPartitions).values()
+
+    def glom(self):
+        """
+        Return an RDD created by coalescing all elements within each partition
+        into a list.
+
+        >>> rdd = sc.parallelize([1, 2, 3, 4], 2)
+        >>> sorted(rdd.glom().collect())
+        [[1, 2], [3, 4]]
+        """
+        def func(iterator):
+            yield list(iterator)
+        return self.mapPartitions(func)
+
+    def cartesian(self, other):
+        """
+        Return the Cartesian product of this RDD and another one, that is, the
+        RDD of all pairs of elements C{(a, b)} where C{a} is in C{self} and
+        C{b} is in C{other}.
+
+        >>> rdd = sc.parallelize([1, 2])
+        >>> sorted(rdd.cartesian(rdd).collect())
+        [(1, 1), (1, 2), (2, 1), (2, 2)]
+        """
+        # Due to batching, we can't use the Java cartesian method.
+        deserializer = CartesianDeserializer(self._jrdd_deserializer,
+                                             other._jrdd_deserializer)
+        return RDD(self._jrdd.cartesian(other._jrdd), self.ctx, deserializer)
+
+    def groupBy(self, f, numPartitions=None, partitionFunc=portable_hash):
+        """
+        Return an RDD of grouped items.
+
+        >>> rdd = sc.parallelize([1, 1, 2, 3, 5, 8])
+        >>> result = rdd.groupBy(lambda x: x % 2).collect()
+        >>> sorted([(x, sorted(y)) for (x, y) in result])
+        [(0, [2, 8]), (1, [1, 1, 3, 5])]
+        """
+        return self.map(lambda x: (f(x), x)).groupByKey(numPartitions, partitionFunc)
+
+    @ignore_unicode_prefix
+    def pipe(self, command, env=None, checkCode=False):
+        """
+        Return an RDD created by piping elements to a forked external process.
+
+        >>> sc.parallelize(['1', '2', '', '3']).pipe('cat').collect()
+        [u'1', u'2', u'', u'3']
+
+        :param checkCode: whether or not to check the return value of the shell command.
+        """
+        if env is None:
+            env = dict()
+
+        def func(iterator):
+            pipe = Popen(
+                shlex.split(command), env=env, stdin=PIPE, stdout=PIPE)
+
+            def pipe_objs(out):
+                for obj in iterator:
+                    s = unicode(obj).rstrip('\n') + '\n'
+                    out.write(s.encode('utf-8'))
+                out.close()
+            Thread(target=pipe_objs, args=[pipe.stdin]).start()
+
+            def check_return_code():
+                pipe.wait()
+                if checkCode and pipe.returncode:
+                    raise Exception("Pipe function `%s' exited "
+                                    "with error code %d" % (command, pipe.returncode))
+                else:
+                    for i in range(0):
+                        yield i
+            return (x.rstrip(b'\n').decode('utf-8') for x in
+                    chain(iter(pipe.stdout.readline, b''), check_return_code()))
+        return self.mapPartitions(func)
+
+    def foreach(self, f):
+        """
+        Applies a function to all elements of this RDD.
+
+        >>> def f(x): print(x)
+        >>> sc.parallelize([1, 2, 3, 4, 5]).foreach(f)
+        """
+        f = fail_on_stopiteration(f)
+
+        def processPartition(iterator):
+            for x in iterator:
+                f(x)
+            return iter([])
+        self.mapPartitions(processPartition).count()  # Force evaluation
+
+    def foreachPartition(self, f):
+        """
+        Applies a function to each partition of this RDD.
+
+        >>> def f(iterator):
+        ...     for x in iterator:
+        ...          print(x)
+        >>> sc.parallelize([1, 2, 3, 4, 5]).foreachPartition(f)
+        """
+        def func(it):
+            r = f(it)
+            try:
+                return iter(r)
+            except TypeError:
+                return iter([])
+        self.mapPartitions(func).count()  # Force evaluation
+
+    def collect(self):
+        """
+        Return a list that contains all of the elements in this RDD.
+
+        .. note:: This method should only be used if the resulting array is expected
+            to be small, as all the data is loaded into the driver's memory.
+        """
+        with SCCallSiteSync(self.context) as css:
+            sock_info = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
+        return list(_load_from_socket(sock_info, self._jrdd_deserializer))
+
+    def reduce(self, f):
+        """
+        Reduces the elements of this RDD using the specified commutative and
+        associative binary operator. Currently reduces partitions locally.
+
+        >>> from operator import add
+        >>> sc.parallelize([1, 2, 3, 4, 5]).reduce(add)
+        15
+        >>> sc.parallelize((2 for _ in range(10))).map(lambda x: 1).cache().reduce(add)
+        10
+        >>> sc.parallelize([]).reduce(add)
+        Traceback (most recent call last):
+            ...
+        ValueError: Can not reduce() empty RDD
+        """
+        f = fail_on_stopiteration(f)
+
+        def func(iterator):
+            iterator = iter(iterator)
+            try:
+                initial = next(iterator)
+            except StopIteration:
+                return
+            yield reduce(f, iterator, initial)
+
+        vals = self.mapPartitions(func).collect()
+        if vals:
+            return reduce(f, vals)
+        raise ValueError("Can not reduce() empty RDD")
+
+    def treeReduce(self, f, depth=2):
+        """
+        Reduces the elements of this RDD in a multi-level tree pattern.
+
+        :param depth: suggested depth of the tree (default: 2)
+
+        >>> add = lambda x, y: x + y
+        >>> rdd = sc.parallelize([-5, -4, -3, -2, -1, 1, 2, 3, 4], 10)
+        >>> rdd.treeReduce(add)
+        -5
+        >>> rdd.treeReduce(add, 1)
+        -5
+        >>> rdd.treeReduce(add, 2)
+        -5
+        >>> rdd.treeReduce(add, 5)
+        -5
+        >>> rdd.treeReduce(add, 10)
+        -5
+        """
+        if depth < 1:
+            raise ValueError("Depth cannot be smaller than 1 but got %d." % depth)
+
+        zeroValue = None, True  # Use the second entry to indicate whether this is a dummy value.
+
+        def op(x, y):
+            if x[1]:
+                return y
+            elif y[1]:
+                return x
+            else:
+                return f(x[0], y[0]), False
+
+        reduced = self.map(lambda x: (x, False)).treeAggregate(zeroValue, op, op, depth)
+        if reduced[1]:
+            raise ValueError("Cannot reduce empty RDD.")
+        return reduced[0]
+
+    def fold(self, zeroValue, op):
+        """
+        Aggregate the elements of each partition, and then the results for all
+        the partitions, using a given associative function and a neutral "zero value."
+
+        The function C{op(t1, t2)} is allowed to modify C{t1} and return it
+        as its result value to avoid object allocation; however, it should not
+        modify C{t2}.
+
+        This behaves somewhat differently from fold operations implemented
+        for non-distributed collections in functional languages like Scala.
+        This fold operation may be applied to partitions individually, and then
+        fold those results into the final result, rather than apply the fold
+        to each element sequentially in some defined ordering. For functions
+        that are not commutative, the result may differ from that of a fold
+        applied to a non-distributed collection.
+
+        >>> from operator import add
+        >>> sc.parallelize([1, 2, 3, 4, 5]).fold(0, add)
+        15
+        """
+        op = fail_on_stopiteration(op)
+
+        def func(iterator):
+            acc = zeroValue
+            for obj in iterator:
+                acc = op(acc, obj)
+            yield acc
+        # collecting result of mapPartitions here ensures that the copy of
+        # zeroValue provided to each partition is unique from the one provided
+        # to the final reduce call
+        vals = self.mapPartitions(func).collect()
+        return reduce(op, vals, zeroValue)
+
+    def aggregate(self, zeroValue, seqOp, combOp):
+        """
+        Aggregate the elements of each partition, and then the results for all
+        the partitions, using a given combine functions and a neutral "zero
+        value."
+
+        The functions C{op(t1, t2)} is allowed to modify C{t1} and return it
+        as its result value to avoid object allocation; however, it should not
+        modify C{t2}.
+
+        The first function (seqOp) can return a different result type, U, than
+        the type of this RDD. Thus, we need one operation for merging a T into
+        an U and one operation for merging two U
+
+        >>> seqOp = (lambda x, y: (x[0] + y, x[1] + 1))
+        >>> combOp = (lambda x, y: (x[0] + y[0], x[1] + y[1]))
+        >>> sc.parallelize([1, 2, 3, 4]).aggregate((0, 0), seqOp, combOp)
+        (10, 4)
+        >>> sc.parallelize([]).aggregate((0, 0), seqOp, combOp)
+        (0, 0)
+        """
+        seqOp = fail_on_stopiteration(seqOp)
+        combOp = fail_on_stopiteration(combOp)
+
+        def func(iterator):
+            acc = zeroValue
+            for obj in iterator:
+                acc = seqOp(acc, obj)
+            yield acc
+        # collecting result of mapPartitions here ensures that the copy of
+        # zeroValue provided to each partition is unique from the one provided
+        # to the final reduce call
+        vals = self.mapPartitions(func).collect()
+        return reduce(combOp, vals, zeroValue)
+
+    def treeAggregate(self, zeroValue, seqOp, combOp, depth=2):
+        """
+        Aggregates the elements of this RDD in a multi-level tree
+        pattern.
+
+        :param depth: suggested depth of the tree (default: 2)
+
+        >>> add = lambda x, y: x + y
+        >>> rdd = sc.parallelize([-5, -4, -3, -2, -1, 1, 2, 3, 4], 10)
+        >>> rdd.treeAggregate(0, add, add)
+        -5
+        >>> rdd.treeAggregate(0, add, add, 1)
+        -5
+        >>> rdd.treeAggregate(0, add, add, 2)
+        -5
+        >>> rdd.treeAggregate(0, add, add, 5)
+        -5
+        >>> rdd.treeAggregate(0, add, add, 10)
+        -5
+        """
+        if depth < 1:
+            raise ValueError("Depth cannot be smaller than 1 but got %d." % depth)
+
+        if self.getNumPartitions() == 0:
+            return zeroValue
+
+        def aggregatePartition(iterator):
+            acc = zeroValue
+            for obj in iterator:
+                acc = seqOp(acc, obj)
+            yield acc
+
+        partiallyAggregated = self.mapPartitions(aggregatePartition)
+        numPartitions = partiallyAggregated.getNumPartitions()
+        scale = max(int(ceil(pow(numPartitions, 1.0 / depth))), 2)
+        # If creating an extra level doesn't help reduce the wall-clock time, we stop the tree
+        # aggregation.
+        while numPartitions > scale + numPartitions / scale:
+            numPartitions /= scale
+            curNumPartitions = int(numPartitions)
+
+            def mapPartition(i, iterator):
+                for obj in iterator:
+                    yield (i % curNumPartitions, obj)
+
+            partiallyAggregated = partiallyAggregated \
+                .mapPartitionsWithIndex(mapPartition) \
+                .reduceByKey(combOp, curNumPartitions) \
+                .values()
+
+        return partiallyAggregated.reduce(combOp)
+
+    def max(self, key=None):
+        """
+        Find the maximum item in this RDD.
+
+        :param key: A function used to generate key for comparing
+
+        >>> rdd = sc.parallelize([1.0, 5.0, 43.0, 10.0])
+        >>> rdd.max()
+        43.0
+        >>> rdd.max(key=str)
+        5.0
+        """
+        if key is None:
+            return self.reduce(max)
+        return self.reduce(lambda a, b: max(a, b, key=key))
+
+    def min(self, key=None):
+        """
+        Find the minimum item in this RDD.
+
+        :param key: A function used to generate key for comparing
+
+        >>> rdd = sc.parallelize([2.0, 5.0, 43.0, 10.0])
+        >>> rdd.min()
+        2.0
+        >>> rdd.min(key=str)
+        10.0
+        """
+        if key is None:
+            return self.reduce(min)
+        return self.reduce(lambda a, b: min(a, b, key=key))
+
+    def sum(self):
+        """
+        Add up the elements in this RDD.
+
+        >>> sc.parallelize([1.0, 2.0, 3.0]).sum()
+        6.0
+        """
+        return self.mapPartitions(lambda x: [sum(x)]).fold(0, operator.add)
+
+    def count(self):
+        """
+        Return the number of elements in this RDD.
+
+        >>> sc.parallelize([2, 3, 4]).count()
+        3
+        """
+        return self.mapPartitions(lambda i: [sum(1 for _ in i)]).sum()
+
+    def stats(self):
+        """
+        Return a L{StatCounter} object that captures the mean, variance
+        and count of the RDD's elements in one operation.
+        """
+        def redFunc(left_counter, right_counter):
+            return left_counter.mergeStats(right_counter)
+
+        return self.mapPartitions(lambda i: [StatCounter(i)]).reduce(redFunc)
+
+    def histogram(self, buckets):
+        """
+        Compute a histogram using the provided buckets. The buckets
+        are all open to the right except for the last which is closed.
+        e.g. [1,10,20,50] means the buckets are [1,10) [10,20) [20,50],
+        which means 1<=x<10, 10<=x<20, 20<=x<=50. And on the input of 1
+        and 50 we would have a histogram of 1,0,1.
+
+        If your histogram is evenly spaced (e.g. [0, 10, 20, 30]),
+        this can be switched from an O(log n) inseration to O(1) per
+        element (where n is the number of buckets).
+
+        Buckets must be sorted, not contain any duplicates, and have
+        at least two elements.
+
+        If `buckets` is a number, it will generate buckets which are
+        evenly spaced between the minimum and maximum of the RDD. For
+        example, if the min value is 0 and the max is 100, given `buckets`
+        as 2, the resulting buckets will be [0,50) [50,100]. `buckets` must
+        be at least 1. An exception is raised if the RDD contains infinity.
+        If the elements in the RDD do not vary (max == min), a single bucket
+        will be used.
+
+        The return value is a tuple of buckets and histogram.
+
+        >>> rdd = sc.parallelize(range(51))
+        >>> rdd.histogram(2)
+        ([0, 25, 50], [25, 26])
+        >>> rdd.histogram([0, 5, 25, 50])
+        ([0, 5, 25, 50], [5, 20, 26])
+        >>> rdd.histogram([0, 15, 30, 45, 60])  # evenly spaced buckets
+        ([0, 15, 30, 45, 60], [15, 15, 15, 6])
+        >>> rdd = sc.parallelize(["ab", "ac", "b", "bd", "ef"])
+        >>> rdd.histogram(("a", "b", "c"))
+        (('a', 'b', 'c'), [2, 2])
+        """
+
+        if isinstance(buckets, int):
+            if buckets < 1:
+                raise ValueError("number of buckets must be >= 1")
+
+            # filter out non-comparable elements
+            def comparable(x):
+                if x is None:
+                    return False
+                if type(x) is float and isnan(x):
+                    return False
+                return True
+
+            filtered = self.filter(comparable)
+
+            # faster than stats()
+            def minmax(a, b):
+                return min(a[0], b[0]), max(a[1], b[1])
+            try:
+                minv, maxv = filtered.map(lambda x: (x, x)).reduce(minmax)
+            except TypeError as e:
+                if " empty " in str(e):
+                    raise ValueError("can not generate buckets from empty RDD")
+                raise
+
+            if minv == maxv or buckets == 1:
+                return [minv, maxv], [filtered.count()]
+
+            try:
+                inc = (maxv - minv) / buckets
+            except TypeError:
+                raise TypeError("Can not generate buckets with non-number in RDD")
+
+            if isinf(inc):
+                raise ValueError("Can not generate buckets with infinite value")
+
+            # keep them as integer if possible
+            inc = int(inc)
+            if inc * buckets != maxv - minv:
+                inc = (maxv - minv) * 1.0 / buckets
+
+            buckets = [i * inc + minv for i in range(buckets)]
+            buckets.append(maxv)  # fix accumulated error
+            even = True
+
+        elif isinstance(buckets, (list, tuple)):
+            if len(buckets) < 2:
+                raise ValueError("buckets should have more than one value")
+
+            if any(i is None or isinstance(i, float) and isnan(i) for i in buckets):
+                raise ValueError("can not have None or NaN in buckets")
+
+            if sorted(buckets) != list(buckets):
+                raise ValueError("buckets should be sorted")
+
+            if len(set(buckets)) != len(buckets):
+                raise ValueError("buckets should not contain duplicated values")
+
+            minv = buckets[0]
+            maxv = buckets[-1]
+            even = False
+            inc = None
+            try:
+                steps = [buckets[i + 1] - buckets[i] for i in range(len(buckets) - 1)]
+            except TypeError:
+                pass  # objects in buckets do not support '-'
+            else:
+                if max(steps) - min(steps) < 1e-10:  # handle precision errors
+                    even = True
+                    inc = (maxv - minv) / (len(buckets) - 1)
+
+        else:
+            raise TypeError("buckets should be a list or tuple or number(int or long)")
+
+        def histogram(iterator):
+            counters = [0] * len(buckets)
+            for i in iterator:
+                if i is None or (type(i) is float and isnan(i)) or i > maxv or i < minv:
+                    continue
+                t = (int((i - minv) / inc) if even
+                     else bisect.bisect_right(buckets, i) - 1)
+                counters[t] += 1
+            # add last two together
+            last = counters.pop()
+            counters[-1] += last
+            return [counters]
+
+        def mergeCounters(a, b):
+            return [i + j for i, j in zip(a, b)]
+
+        return buckets, self.mapPartitions(histogram).reduce(mergeCounters)
+
+    def mean(self):
+        """
+        Compute the mean of this RDD's elements.
+
+        >>> sc.parallelize([1, 2, 3]).mean()
+        2.0
+        """
+        return self.stats().mean()
+
+    def variance(self):
+        """
+        Compute the variance of this RDD's elements.
+
+        >>> sc.parallelize([1, 2, 3]).variance()
+        0.666...
+        """
+        return self.stats().variance()
+
+    def stdev(self):
+        """
+        Compute the standard deviation of this RDD's elements.
+
+        >>> sc.parallelize([1, 2, 3]).stdev()
+        0.816...
+        """
+        return self.stats().stdev()
+
+    def sampleStdev(self):
+        """
+        Compute the sample standard deviation of this RDD's elements (which
+        corrects for bias in estimating the standard deviation by dividing by
+        N-1 instead of N).
+
+        >>> sc.parallelize([1, 2, 3]).sampleStdev()
+        1.0
+        """
+        return self.stats().sampleStdev()
+
+    def sampleVariance(self):
+        """
+        Compute the sample variance of this RDD's elements (which corrects
+        for bias in estimating the variance by dividing by N-1 instead of N).
+
+        >>> sc.parallelize([1, 2, 3]).sampleVariance()
+        1.0
+        """
+        return self.stats().sampleVariance()
+
+    def countByValue(self):
+        """
+        Return the count of each unique value in this RDD as a dictionary of
+        (value, count) pairs.
+
+        >>> sorted(sc.parallelize([1, 2, 1, 2, 2], 2).countByValue().items())
+        [(1, 2), (2, 3)]
+        """
+        def countPartition(iterator):
+            counts = defaultdict(int)
+            for obj in iterator:
+                counts[obj] += 1
+            yield counts
+
+        def mergeMaps(m1, m2):
+            for k, v in m2.items():
+                m1[k] += v
+            return m1
+        return self.mapPartitions(countPartition).reduce(mergeMaps)
+
+    def top(self, num, key=None):
+        """
+        Get the top N elements from an RDD.
+
+        .. note:: This method should only be used if the resulting array is expected
+            to be small, as all the data is loaded into the driver's memory.
+
+        .. note:: It returns the list sorted in descending order.
+
+        >>> sc.parallelize([10, 4, 2, 12, 3]).top(1)
+        [12]
+        >>> sc.parallelize([2, 3, 4, 5, 6], 2).top(2)
+        [6, 5]
+        >>> sc.parallelize([10, 4, 2, 12, 3]).top(3, key=str)
+        [4, 3, 2]
+        """
+        def topIterator(iterator):
+            yield heapq.nlargest(num, iterator, key=key)
+
+        def merge(a, b):
+            return heapq.nlargest(num, a + b, key=key)
+
+        return self.mapPartitions(topIterator).reduce(merge)
+
+    def takeOrdered(self, num, key=None):
+        """
+        Get the N elements from an RDD ordered in ascending order or as
+        specified by the optional key function.
+
+        .. note:: this method should only be used if the resulting array is expected
+            to be small, as all the data is loaded into the driver's memory.
+
+        >>> sc.parallelize([10, 1, 2, 9, 3, 4, 5, 6, 7]).takeOrdered(6)
+        [1, 2, 3, 4, 5, 6]
+        >>> sc.parallelize([10, 1, 2, 9, 3, 4, 5, 6, 7], 2).takeOrdered(6, key=lambda x: -x)
+        [10, 9, 7, 6, 5, 4]
+        """
+
+        def merge(a, b):
+            return heapq.nsmallest(num, a + b, key)
+
+        return self.mapPartitions(lambda it: [heapq.nsmallest(num, it, key)]).reduce(merge)
+
+    def take(self, num):
+        """
+        Take the first num elements of the RDD.
+
+        It works by first scanning one partition, and use the results from
+        that partition to estimate the number of additional partitions needed
+        to satisfy the limit.
+
+        Translated from the Scala implementation in RDD#take().
+
+        .. note:: this method should only be used if the resulting array is expected
+            to be small, as all the data is loaded into the driver's memory.
+
+        >>> sc.parallelize([2, 3, 4, 5, 6]).cache().take(2)
+        [2, 3]
+        >>> sc.parallelize([2, 3, 4, 5, 6]).take(10)
+        [2, 3, 4, 5, 6]
+        >>> sc.parallelize(range(100), 100).filter(lambda x: x > 90).take(3)
+        [91, 92, 93]
+        """
+        items = []
+        totalParts = self.getNumPartitions()
+        partsScanned = 0
+
+        while len(items) < num and partsScanned < totalParts:
+            # The number of partitions to try in this iteration.
+            # It is ok for this number to be greater than totalParts because
+            # we actually cap it at totalParts in runJob.
+            numPartsToTry = 1
+            if partsScanned > 0:
+                # If we didn't find any rows after the previous iteration,
+                # quadruple and retry.  Otherwise, interpolate the number of
+                # partitions we need to try, but overestimate it by 50%.
+                # We also cap the estimation in the end.
+                if len(items) == 0:
+                    numPartsToTry = partsScanned * 4
+                else:
+                    # the first parameter of max is >=1 whenever partsScanned >= 2
+                    numPartsToTry = int(1.5 * num * partsScanned / len(items)) - partsScanned
+                    numPartsToTry = min(max(numPartsToTry, 1), partsScanned * 4)
+
+            left = num - len(items)
+
+            def takeUpToNumLeft(iterator):
+                iterator = iter(iterator)
+                taken = 0
+                while taken < left:
+                    try:
+                        yield next(iterator)
+                    except StopIteration:
+                        return
+                    taken += 1
+
+            p = range(partsScanned, min(partsScanned + numPartsToTry, totalParts))
+            res = self.context.runJob(self, takeUpToNumLeft, p)
+
+            items += res
+            partsScanned += numPartsToTry
+
+        return items[:num]
+
+    def first(self):
+        """
+        Return the first element in this RDD.
+
+        >>> sc.parallelize([2, 3, 4]).first()
+        2
+        >>> sc.parallelize([]).first()
+        Traceback (most recent call last):
+            ...
+        ValueError: RDD is empty
+        """
+        rs = self.take(1)
+        if rs:
+            return rs[0]
+        raise ValueError("RDD is empty")
+
+    def isEmpty(self):
+        """
+        Returns true if and only if the RDD contains no elements at all.
+
+        .. note:: an RDD may be empty even when it has at least 1 partition.
+
+        >>> sc.parallelize([]).isEmpty()
+        True
+        >>> sc.parallelize([1]).isEmpty()
+        False
+        """
+        return self.getNumPartitions() == 0 or len(self.take(1)) == 0
+
+    def saveAsNewAPIHadoopDataset(self, conf, keyConverter=None, valueConverter=None):
+        """
+        Output a Python RDD of key-value pairs (of form C{RDD[(K, V)]}) to any Hadoop file
+        system, using the new Hadoop OutputFormat API (mapreduce package). Keys/values are
+        converted for output using either user specified converters or, by default,
+        L{org.apache.spark.api.python.JavaToWritableConverter}.
+
+        :param conf: Hadoop job configuration, passed in as a dict
+        :param keyConverter: (None by default)
+        :param valueConverter: (None by default)
+        """
+        jconf = self.ctx._dictToJavaMap(conf)
+        pickledRDD = self._pickled()
+        self.ctx._jvm.PythonRDD.saveAsHadoopDataset(pickledRDD._jrdd, True, jconf,
+                                                    keyConverter, valueConverter, True)
+
+    def saveAsNewAPIHadoopFile(self, path, outputFormatClass, keyClass=None, valueClass=None,
+                               keyConverter=None, valueConverter=None, conf=None):
+        """
+        Output a Python RDD of key-value pairs (of form C{RDD[(K, V)]}) to any Hadoop file
+        system, using the new Hadoop OutputFormat API (mapreduce package). Key and value types
+        will be inferred if not specified. Keys and values are converted for output using either
+        user specified converters or L{org.apache.spark.api.python.JavaToWritableConverter}. The
+        C{conf} is applied on top of the base Hadoop conf associated with the SparkContext
+        of this RDD to create a merged Hadoop MapReduce job configuration for saving the data.
+
+        :param path: path to Hadoop file
+        :param outputFormatClass: fully qualified classname of Hadoop OutputFormat
+               (e.g. "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat")
+        :param keyClass: fully qualified classname of key Writable class
+               (e.g. "org.apache.hadoop.io.IntWritable", None by default)
+        :param valueClass: fully qualified classname of value Writable class
+               (e.g. "org.apache.hadoop.io.Text", None by default)
+        :param keyConverter: (None by default)
+        :param valueConverter: (None by default)
+        :param conf: Hadoop job configuration, passed in as a dict (None by default)
+        """
+        jconf = self.ctx._dictToJavaMap(conf)
+        pickledRDD = self._pickled()
+        self.ctx._jvm.PythonRDD.saveAsNewAPIHadoopFile(pickledRDD._jrdd, True, path,
+                                                       outputFormatClass,
+                                                       keyClass, valueClass,
+                                                       keyConverter, valueConverter, jconf)
+
+    def saveAsHadoopDataset(self, conf, keyConverter=None, valueConverter=None):
+        """
+        Output a Python RDD of key-value pairs (of form C{RDD[(K, V)]}) to any Hadoop file
+        system, using the old Hadoop OutputFormat API (mapred package). Keys/values are
+        converted for output using either user specified converters or, by default,
+        L{org.apache.spark.api.python.JavaToWritableConverter}.
+
+        :param conf: Hadoop job configuration, passed in as a dict
+        :param keyConverter: (None by default)
+        :param valueConverter: (None by default)
+        """
+        jconf = self.ctx._dictToJavaMap(conf)
+        pickledRDD = self._pickled()
+        self.ctx._jvm.PythonRDD.saveAsHadoopDataset(pickledRDD._jrdd, True, jconf,
+                                                    keyConverter, valueConverter, False)
+
+    def saveAsHadoopFile(self, path, outputFormatClass, keyClass=None, valueClass=None,
+                         keyConverter=None, valueConverter=None, conf=None,
+                         compressionCodecClass=None):
+        """
+        Output a Python RDD of key-value pairs (of form C{RDD[(K, V)]}) to any Hadoop file
+        system, using the old Hadoop OutputFormat API (mapred package). Key and value types
+        will be inferred if not specified. Keys and values are converted for output using either
+        user specified converters or L{org.apache.spark.api.python.JavaToWritableConverter}. The
+        C{conf} is applied on top of the base Hadoop conf associated with the SparkContext
+        of this RDD to create a merged Hadoop MapReduce job configuration for saving the data.
+
+        :param path: path to Hadoop file
+        :param outputFormatClass: fully qualified classname of Hadoop OutputFormat
+               (e.g. "org.apache.hadoop.mapred.SequenceFileOutputFormat")
+        :param keyClass: fully qualified classname of key Writable class
+               (e.g. "org.apache.hadoop.io.IntWritable", None by default)
+        :param valueClass: fully qualified classname of value Writable class
+               (e.g. "org.apache.hadoop.io.Text", None by default)
+        :param keyConverter: (None by default)
+        :param valueConverter: (None by default)
+        :param conf: (None by default)
+        :param compressionCodecClass: (None by default)
+        """
+        jconf = self.ctx._dictToJavaMap(conf)
+        pickledRDD = self._pickled()
+        self.ctx._jvm.PythonRDD.saveAsHadoopFile(pickledRDD._jrdd, True, path,
+                                                 outputFormatClass,
+                                                 keyClass, valueClass,
+                                                 keyConverter, valueConverter,
+                                                 jconf, compressionCodecClass)
+
+    def saveAsSequenceFile(self, path, compressionCodecClass=None):
+        """
+        Output a Python RDD of key-value pairs (of form C{RDD[(K, V)]}) to any Hadoop file
+        system, using the L{org.apache.hadoop.io.Writable} types that we convert from the
+        RDD's key and value types. The mechanism is as follows:
+
+            1. Pyrolite is used to convert pickled Python RDD into RDD of Java objects.
+            2. Keys and values of this Java RDD are converted to Writables and written out.
+
+        :param path: path to sequence file
+        :param compressionCodecClass: (None by default)
+        """
+        pickledRDD = self._pickled()
+        self.ctx._jvm.PythonRDD.saveAsSequenceFile(pickledRDD._jrdd, True,
+                                                   path, compressionCodecClass)
+
+    def saveAsPickleFile(self, path, batchSize=10):
+        """
+        Save this RDD as a SequenceFile of serialized objects. The serializer
+        used is L{pyspark.serializers.PickleSerializer}, default batch size
+        is 10.
+
+        >>> tmpFile = NamedTemporaryFile(delete=True)
+        >>> tmpFile.close()
+        >>> sc.parallelize([1, 2, 'spark', 'rdd']).saveAsPickleFile(tmpFile.name, 3)
+        >>> sorted(sc.pickleFile(tmpFile.name, 5).map(str).collect())
+        ['1', '2', 'rdd', 'spark']
+        """
+        if batchSize == 0:
+            ser = AutoBatchedSerializer(PickleSerializer())
+        else:
+            ser = BatchedSerializer(PickleSerializer(), batchSize)
+        self._reserialize(ser)._jrdd.saveAsObjectFile(path)
+
+    @ignore_unicode_prefix
+    def saveAsTextFile(self, path, compressionCodecClass=None):
+        """
+        Save this RDD as a text file, using string representations of elements.
+
+        @param path: path to text file
+        @param compressionCodecClass: (None by default) string i.e.
+            "org.apache.hadoop.io.compress.GzipCodec"
+
+        >>> tempFile = NamedTemporaryFile(delete=True)
+        >>> tempFile.close()
+        >>> sc.parallelize(range(10)).saveAsTextFile(tempFile.name)
+        >>> from fileinput import input
+        >>> from glob import glob
+        >>> ''.join(sorted(input(glob(tempFile.name + "/part-0000*"))))
+        '0\\n1\\n2\\n3\\n4\\n5\\n6\\n7\\n8\\n9\\n'
+
+        Empty lines are tolerated when saving to text files.
+
+        >>> tempFile2 = NamedTemporaryFile(delete=True)
+        >>> tempFile2.close()
+        >>> sc.parallelize(['', 'foo', '', 'bar', '']).saveAsTextFile(tempFile2.name)
+        >>> ''.join(sorted(input(glob(tempFile2.name + "/part-0000*"))))
+        '\\n\\n\\nbar\\nfoo\\n'
+
+        Using compressionCodecClass
+
+        >>> tempFile3 = NamedTemporaryFile(delete=True)
+        >>> tempFile3.close()
+        >>> codec = "org.apache.hadoop.io.compress.GzipCodec"
+        >>> sc.parallelize(['foo', 'bar']).saveAsTextFile(tempFile3.name, codec)
+        >>> from fileinput import input, hook_compressed
+        >>> result = sorted(input(glob(tempFile3.name + "/part*.gz"), openhook=hook_compressed))
+        >>> b''.join(result).decode('utf-8')
+        u'bar\\nfoo\\n'
+        """
+        def func(split, iterator):
+            for x in iterator:
+                if not isinstance(x, (unicode, bytes)):
+                    x = unicode(x)
+                if isinstance(x, unicode):
+                    x = x.encode("utf-8")
+                yield x
+        keyed = self.mapPartitionsWithIndex(func)
+        keyed._bypass_serializer = True
+        if compressionCodecClass:
+            compressionCodec = self.ctx._jvm.java.lang.Class.forName(compressionCodecClass)
+            keyed._jrdd.map(self.ctx._jvm.BytesToString()).saveAsTextFile(path, compressionCodec)
+        else:
+            keyed._jrdd.map(self.ctx._jvm.BytesToString()).saveAsTextFile(path)
+
+    # Pair functions
+
+    def collectAsMap(self):
+        """
+        Return the key-value pairs in this RDD to the master as a dictionary.
+
+        .. note:: this method should only be used if the resulting data is expected
+            to be small, as all the data is loaded into the driver's memory.
+
+        >>> m = sc.parallelize([(1, 2), (3, 4)]).collectAsMap()
+        >>> m[1]
+        2
+        >>> m[3]
+        4
+        """
+        return dict(self.collect())
+
+    def keys(self):
+        """
+        Return an RDD with the keys of each tuple.
+
+        >>> m = sc.parallelize([(1, 2), (3, 4)]).keys()
+        >>> m.collect()
+        [1, 3]
+        """
+        return self.map(lambda x: x[0])
+
+    def values(self):
+        """
+        Return an RDD with the values of each tuple.
+
+        >>> m = sc.parallelize([(1, 2), (3, 4)]).values()
+        >>> m.collect()
+        [2, 4]
+        """
+        return self.map(lambda x: x[1])
+
+    def reduceByKey(self, func, numPartitions=None, partitionFunc=portable_hash):
+        """
+        Merge the values for each key using an associative and commutative reduce function.
+
+        This will also perform the merging locally on each mapper before
+        sending results to a reducer, similarly to a "combiner" in MapReduce.
+
+        Output will be partitioned with C{numPartitions} partitions, or
+        the default parallelism level if C{numPartitions} is not specified.
+        Default partitioner is hash-partition.
+
+        >>> from operator import add
+        >>> rdd = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
+        >>> sorted(rdd.reduceByKey(add).collect())
+        [('a', 2), ('b', 1)]
+        """
+        return self.combineByKey(lambda x: x, func, func, numPartitions, partitionFunc)
+
+    def reduceByKeyLocally(self, func):
+        """
+        Merge the values for each key using an associative and commutative reduce function, but
+        return the results immediately to the master as a dictionary.
+
+        This will also perform the merging locally on each mapper before
+        sending results to a reducer, similarly to a "combiner" in MapReduce.
+
+        >>> from operator import add
+        >>> rdd = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
+        >>> sorted(rdd.reduceByKeyLocally(add).items())
+        [('a', 2), ('b', 1)]
+        """
+        func = fail_on_stopiteration(func)
+
+        def reducePartition(iterator):
+            m = {}
+            for k, v in iterator:
+                m[k] = func(m[k], v) if k in m else v
+            yield m
+
+        def mergeMaps(m1, m2):
+            for k, v in m2.items():
+                m1[k] = func(m1[k], v) if k in m1 else v
+            return m1
+        return self.mapPartitions(reducePartition).reduce(mergeMaps)
+
+    def countByKey(self):
+        """
+        Count the number of elements for each key, and return the result to the
+        master as a dictionary.
+
+        >>> rdd = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
+        >>> sorted(rdd.countByKey().items())
+        [('a', 2), ('b', 1)]
+        """
+        return self.map(lambda x: x[0]).countByValue()
+
+    def join(self, other, numPartitions=None):
+        """
+        Return an RDD containing all pairs of elements with matching keys in
+        C{self} and C{other}.
+
+        Each pair of elements will be returned as a (k, (v1, v2)) tuple, where
+        (k, v1) is in C{self} and (k, v2) is in C{other}.
+
+        Performs a hash join across the cluster.
+
+        >>> x = sc.parallelize([("a", 1), ("b", 4)])
+        >>> y = sc.parallelize([("a", 2), ("a", 3)])
+        >>> sorted(x.join(y).collect())
+        [('a', (1, 2)), ('a', (1, 3))]
+        """
+        return python_join(self, other, numPartitions)
+
+    def leftOuterJoin(self, other, numPartitions=None):
+        """
+        Perform a left outer join of C{self} and C{other}.
+
+        For each element (k, v) in C{self}, the resulting RDD will either
+        contain all pairs (k, (v, w)) for w in C{other}, or the pair
+        (k, (v, None)) if no elements in C{other} have key k.
+
+        Hash-partitions the resulting RDD into the given number of partitions.
+
+        >>> x = sc.parallelize([("a", 1), ("b", 4)])
+        >>> y = sc.parallelize([("a", 2)])
+        >>> sorted(x.leftOuterJoin(y).collect())
+        [('a', (1, 2)), ('b', (4, None))]
+        """
+        return python_left_outer_join(self, other, numPartitions)
+
+    def rightOuterJoin(self, other, numPartitions=None):
+        """
+        Perform a right outer join of C{self} and C{other}.
+
+        For each element (k, w) in C{other}, the resulting RDD will either
+        contain all pairs (k, (v, w)) for v in this, or the pair (k, (None, w))
+        if no elements in C{self} have key k.
+
+        Hash-partitions the resulting RDD into the given number of partitions.
+
+        >>> x = sc.parallelize([("a", 1), ("b", 4)])
+        >>> y = sc.parallelize([("a", 2)])
+        >>> sorted(y.rightOuterJoin(x).collect())
+        [('a', (2, 1)), ('b', (None, 4))]
+        """
+        return python_right_outer_join(self, other, numPartitions)
+
+    def fullOuterJoin(self, other, numPartitions=None):
+        """
+        Perform a right outer join of C{self} and C{other}.
+
+        For each element (k, v) in C{self}, the resulting RDD will either
+        contain all pairs (k, (v, w)) for w in C{other}, or the pair
+        (k, (v, None)) if no elements in C{other} have key k.
+
+        Similarly, for each element (k, w) in C{other}, the resulting RDD will
+        either contain all pairs (k, (v, w)) for v in C{self}, or the pair
+        (k, (None, w)) if no elements in C{self} have key k.
+
+        Hash-partitions the resulting RDD into the given number of partitions.
+
+        >>> x = sc.parallelize([("a", 1), ("b", 4)])
+        >>> y = sc.parallelize([("a", 2), ("c", 8)])
+        >>> sorted(x.fullOuterJoin(y).collect())
+        [('a', (1, 2)), ('b', (4, None)), ('c', (None, 8))]
+        """
+        return python_full_outer_join(self, other, numPartitions)
+
+    # TODO: add option to control map-side combining
+    # portable_hash is used as default, because builtin hash of None is different
+    # cross machines.
+    def partitionBy(self, numPartitions, partitionFunc=portable_hash):
+        """
+        Return a copy of the RDD partitioned using the specified partitioner.
+
+        >>> pairs = sc.parallelize([1, 2, 3, 4, 2, 4, 1]).map(lambda x: (x, x))
+        >>> sets = pairs.partitionBy(2).glom().collect()
+        >>> len(set(sets[0]).intersection(set(sets[1])))
+        0
+        """
+        if numPartitions is None:
+            numPartitions = self._defaultReducePartitions()
+        partitioner = Partitioner(numPartitions, partitionFunc)
+        if self.partitioner == partitioner:
+            return self
+
+        # Transferring O(n) objects to Java is too expensive.
+        # Instead, we'll form the hash buckets in Python,
+        # transferring O(numPartitions) objects to Java.
+        # Each object is a (splitNumber, [objects]) pair.
+        # In order to avoid too huge objects, the objects are
+        # grouped into chunks.
+        outputSerializer = self.ctx._unbatched_serializer
+
+        limit = (_parse_memory(self.ctx._conf.get(
+            "spark.python.worker.memory", "512m")) / 2)
+
+        def add_shuffle_key(split, iterator):
+
+            buckets = defaultdict(list)
+            c, batch = 0, min(10 * numPartitions, 1000)
+
+            for k, v in iterator:
+                buckets[partitionFunc(k) % numPartitions].append((k, v))
+                c += 1
+
+                # check used memory and avg size of chunk of objects
+                if (c % 1000 == 0 and get_used_memory() > limit
+                        or c > batch):
+                    n, size = len(buckets), 0
+                    for split in list(buckets.keys()):
+                        yield pack_long(split)
+                        d = outputSerializer.dumps(buckets[split])
+                        del buckets[split]
+                        yield d
+                        size += len(d)
+
+                    avg = int(size / n) >> 20
+                    # let 1M < avg < 10M
+                    if avg < 1:
+                        batch *= 1.5
+                    elif avg > 10:
+                        batch = max(int(batch / 1.5), 1)
+                    c = 0
+
+            for split, items in buckets.items():
+                yield pack_long(split)
+                yield outputSerializer.dumps(items)
+
+        keyed = self.mapPartitionsWithIndex(add_shuffle_key, preservesPartitioning=True)
+        keyed._bypass_serializer = True
+        with SCCallSiteSync(self.context) as css:
+            pairRDD = self.ctx._jvm.PairwiseRDD(
+                keyed._jrdd.rdd()).asJavaPairRDD()
+            jpartitioner = self.ctx._jvm.PythonPartitioner(numPartitions,
+                                                           id(partitionFunc))
+        jrdd = self.ctx._jvm.PythonRDD.valueOfPair(pairRDD.partitionBy(jpartitioner))
+        rdd = RDD(jrdd, self.ctx, BatchedSerializer(outputSerializer))
+        rdd.partitioner = partitioner
+        return rdd
+
+    # TODO: add control over map-side aggregation
+    def combineByKey(self, createCombiner, mergeValue, mergeCombiners,
+                     numPartitions=None, partitionFunc=portable_hash):
+        """
+        Generic function to combine the elements for each key using a custom
+        set of aggregation functions.
+
+        Turns an RDD[(K, V)] into a result of type RDD[(K, C)], for a "combined
+        type" C.
+
+        Users provide three functions:
+
+            - C{createCombiner}, which turns a V into a C (e.g., creates
+              a one-element list)
+            - C{mergeValue}, to merge a V into a C (e.g., adds it to the end of
+              a list)
+            - C{mergeCombiners}, to combine two C's into a single one (e.g., merges
+              the lists)
+
+        To avoid memory allocation, both mergeValue and mergeCombiners are allowed to
+        modify and return their first argument instead of creating a new C.
+
+        In addition, users can control the partitioning of the output RDD.
+
+        .. note:: V and C can be different -- for example, one might group an RDD of type
+            (Int, Int) into an RDD of type (Int, List[Int]).
+
+        >>> x = sc.parallelize([("a", 1), ("b", 1), ("a", 2)])
+        >>> def to_list(a):
+        ...     return [a]
+        ...
+        >>> def append(a, b):
+        ...     a.append(b)
+        ...     return a
+        ...
+        >>> def extend(a, b):
+        ...     a.extend(b)
+        ...     return a
+        ...
+        >>> sorted(x.combineByKey(to_list, append, extend).collect())
+        [('a', [1, 2]), ('b', [1])]
+        """
+        if numPartitions is None:
+            numPartitions = self._defaultReducePartitions()
+
+        serializer = self.ctx.serializer
+        memory = self._memory_limit()
+        agg = Aggregator(createCombiner, mergeValue, mergeCombiners)
+
+        def combineLocally(iterator):
+            merger = ExternalMerger(agg, memory * 0.9, serializer)
+            merger.mergeValues(iterator)
+            return merger.items()
+
+        locally_combined = self.mapPartitions(combineLocally, preservesPartitioning=True)
+        shuffled = locally_combined.partitionBy(numPartitions, partitionFunc)
+
+        def _mergeCombiners(iterator):
+            merger = ExternalMerger(agg, memory, serializer)
+            merger.mergeCombiners(iterator)
+            return merger.items()
+
+        return shuffled.mapPartitions(_mergeCombiners, preservesPartitioning=True)
+
+    def aggregateByKey(self, zeroValue, seqFunc, combFunc, numPartitions=None,
+                       partitionFunc=portable_hash):
+        """
+        Aggregate the values of each key, using given combine functions and a neutral
+        "zero value". This function can return a different result type, U, than the type
+        of the values in this RDD, V. Thus, we need one operation for merging a V into
+        a U and one operation for merging two U's, The former operation is used for merging
+        values within a partition, and the latter is used for merging values between
+        partitions. To avoid memory allocation, both of these functions are
+        allowed to modify and return their first argument instead of creating a new U.
+        """
+        def createZero():
+            return copy.deepcopy(zeroValue)
+
+        return self.combineByKey(
+            lambda v: seqFunc(createZero(), v), seqFunc, combFunc, numPartitions, partitionFunc)
+
+    def foldByKey(self, zeroValue, func, numPartitions=None, partitionFunc=portable_hash):
+        """
+        Merge the values for each key using an associative function "func"
+        and a neutral "zeroValue" which may be added to the result an
+        arbitrary number of times, and must not change the result
+        (e.g., 0 for addition, or 1 for multiplication.).
+
+        >>> rdd = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
+        >>> from operator import add
+        >>> sorted(rdd.foldByKey(0, add).collect())
+        [('a', 2), ('b', 1)]
+        """
+        def createZero():
+            return copy.deepcopy(zeroValue)
+
+        return self.combineByKey(lambda v: func(createZero(), v), func, func, numPartitions,
+                                 partitionFunc)
+
+    def _memory_limit(self):
+        return _parse_memory(self.ctx._conf.get("spark.python.worker.memory", "512m"))
+
+    # TODO: support variant with custom partitioner
+    def groupByKey(self, numPartitions=None, partitionFunc=portable_hash):
+        """
+        Group the values for each key in the RDD into a single sequence.
+        Hash-partitions the resulting RDD with numPartitions partitions.
+
+        .. note:: If you are grouping in order to perform an aggregation (such as a
+            sum or average) over each key, using reduceByKey or aggregateByKey will
+            provide much better performance.
+
+        >>> rdd = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
+        >>> sorted(rdd.groupByKey().mapValues(len).collect())
+        [('a', 2), ('b', 1)]
+        >>> sorted(rdd.groupByKey().mapValues(list).collect())
+        [('a', [1, 1]), ('b', [1])]
+        """
+        def createCombiner(x):
+            return [x]
+
+        def mergeValue(xs, x):
+            xs.append(x)
+            return xs
+
+        def mergeCombiners(a, b):
+            a.extend(b)
+            return a
+
+        memory = self._memory_limit()
+        serializer = self._jrdd_deserializer
+        agg = Aggregator(createCombiner, mergeValue, mergeCombiners)
+
+        def combine(iterator):
+            merger = ExternalMerger(agg, memory * 0.9, serializer)
+            merger.mergeValues(iterator)
+            return merger.items()
+
+        locally_combined = self.mapPartitions(combine, preservesPartitioning=True)
+        shuffled = locally_combined.partitionBy(numPartitions, partitionFunc)
+
+        def groupByKey(it):
+            merger = ExternalGroupBy(agg, memory, serializer)
+            merger.mergeCombiners(it)
+            return merger.items()
+
+        return shuffled.mapPartitions(groupByKey, True).mapValues(ResultIterable)
+
+    def flatMapValues(self, f):
+        """
+        Pass each value in the key-value pair RDD through a flatMap function
+        without changing the keys; this also retains the original RDD's
+        partitioning.
+
+        >>> x = sc.parallelize([("a", ["x", "y", "z"]), ("b", ["p", "r"])])
+        >>> def f(x): return x
+        >>> x.flatMapValues(f).collect()
+        [('a', 'x'), ('a', 'y'), ('a', 'z'), ('b', 'p'), ('b', 'r')]
+        """
+        flat_map_fn = lambda kv: ((kv[0], x) for x in f(kv[1]))
+        return self.flatMap(flat_map_fn, preservesPartitioning=True)
+
+    def mapValues(self, f):
+        """
+        Pass each value in the key-value pair RDD through a map function
+        without changing the keys; this also retains the original RDD's
+        partitioning.
+
+        >>> x = sc.parallelize([("a", ["apple", "banana", "lemon"]), ("b", ["grapes"])])
+        >>> def f(x): return len(x)
+        >>> x.mapValues(f).collect()
+        [('a', 3), ('b', 1)]
+        """
+        map_values_fn = lambda kv: (kv[0], f(kv[1]))
+        return self.map(map_values_fn, preservesPartitioning=True)
+
+    def groupWith(self, other, *others):
+        """
+        Alias for cogroup but with support for multiple RDDs.
+
+        >>> w = sc.parallelize([("a", 5), ("b", 6)])
+        >>> x = sc.parallelize([("a", 1), ("b", 4)])
+        >>> y = sc.parallelize([("a", 2)])
+        >>> z = sc.parallelize([("b", 42)])
+        >>> [(x, tuple(map(list, y))) for x, y in sorted(list(w.groupWith(x, y, z).collect()))]
+        [('a', ([5], [1], [2], [])), ('b', ([6], [4], [], [42]))]
+
+        """
+        return python_cogroup((self, other) + others, numPartitions=None)
+
+    # TODO: add variant with custom parittioner
+    def cogroup(self, other, numPartitions=None):
+        """
+        For each key k in C{self} or C{other}, return a resulting RDD that
+        contains a tuple with the list of values for that key in C{self} as
+        well as C{other}.
+
+        >>> x = sc.parallelize([("a", 1), ("b", 4)])
+        >>> y = sc.parallelize([("a", 2)])
+        >>> [(x, tuple(map(list, y))) for x, y in sorted(list(x.cogroup(y).collect()))]
+        [('a', ([1], [2])), ('b', ([4], []))]
+        """
+        return python_cogroup((self, other), numPartitions)
+
+    def sampleByKey(self, withReplacement, fractions, seed=None):
+        """
+        Return a subset of this RDD sampled by key (via stratified sampling).
+        Create a sample of this RDD using variable sampling rates for
+        different keys as specified by fractions, a key to sampling rate map.
+
+        >>> fractions = {"a": 0.2, "b": 0.1}
+        >>> rdd = sc.parallelize(fractions.keys()).cartesian(sc.parallelize(range(0, 1000)))
+        >>> sample = dict(rdd.sampleByKey(False, fractions, 2).groupByKey().collect())
+        >>> 100 < len(sample["a"]) < 300 and 50 < len(sample["b"]) < 150
+        True
+        >>> max(sample["a"]) <= 999 and min(sample["a"]) >= 0
+        True
+        >>> max(sample["b"]) <= 999 and min(sample["b"]) >= 0
+        True
+        """
+        for fraction in fractions.values():
+            assert fraction >= 0.0, "Negative fraction value: %s" % fraction
+        return self.mapPartitionsWithIndex(
+            RDDStratifiedSampler(withReplacement, fractions, seed).func, True)
+
+    def subtractByKey(self, other, numPartitions=None):
+        """
+        Return each (key, value) pair in C{self} that has no pair with matching
+        key in C{other}.
+
+        >>> x = sc.parallelize([("a", 1), ("b", 4), ("b", 5), ("a", 2)])
+        >>> y = sc.parallelize([("a", 3), ("c", None)])
+        >>> sorted(x.subtractByKey(y).collect())
+        [('b', 4), ('b', 5)]
+        """
+        def filter_func(pair):
+            key, (val1, val2) = pair
+            return val1 and not val2
+        return self.cogroup(other, numPartitions).filter(filter_func).flatMapValues(lambda x: x[0])
+
+    def subtract(self, other, numPartitions=None):
+        """
+        Return each value in C{self} that is not contained in C{other}.
+
+        >>> x = sc.parallelize([("a", 1), ("b", 4), ("b", 5), ("a", 3)])
+        >>> y = sc.parallelize([("a", 3), ("c", None)])
+        >>> sorted(x.subtract(y).collect())
+        [('a', 1), ('b', 4), ('b', 5)]
+        """
+        # note: here 'True' is just a placeholder
+        rdd = other.map(lambda x: (x, True))
+        return self.map(lambda x: (x, True)).subtractByKey(rdd, numPartitions).keys()
+
+    def keyBy(self, f):
+        """
+        Creates tuples of the elements in this RDD by applying C{f}.
+
+        >>> x = sc.parallelize(range(0,3)).keyBy(lambda x: x*x)
+        >>> y = sc.parallelize(zip(range(0,5), range(0,5)))
+        >>> [(x, list(map(list, y))) for x, y in sorted(x.cogroup(y).collect())]
+        [(0, [[0], [0]]), (1, [[1], [1]]), (2, [[], [2]]), (3, [[], [3]]), (4, [[2], [4]])]
+        """
+        return self.map(lambda x: (f(x), x))
+
+    def repartition(self, numPartitions):
+        """
+         Return a new RDD that has exactly numPartitions partitions.
+
+         Can increase or decrease the level of parallelism in this RDD.
+         Internally, this uses a shuffle to redistribute data.
+         If you are decreasing the number of partitions in this RDD, consider
+         using `coalesce`, which can avoid performing a shuffle.
+
+         >>> rdd = sc.parallelize([1,2,3,4,5,6,7], 4)
+         >>> sorted(rdd.glom().collect())
+         [[1], [2, 3], [4, 5], [6, 7]]
+         >>> len(rdd.repartition(2).glom().collect())
+         2
+         >>> len(rdd.repartition(10).glom().collect())
+         10
+        """
+        return self.coalesce(numPartitions, shuffle=True)
+
+    def coalesce(self, numPartitions, shuffle=False):
+        """
+        Return a new RDD that is reduced into `numPartitions` partitions.
+
+        >>> sc.parallelize([1, 2, 3, 4, 5], 3).glom().collect()
+        [[1], [2, 3], [4, 5]]
+        >>> sc.parallelize([1, 2, 3, 4, 5], 3).coalesce(1).glom().collect()
+        [[1, 2, 3, 4, 5]]
+        """
+        if shuffle:
+            # Decrease the batch size in order to distribute evenly the elements across output
+            # partitions. Otherwise, repartition will possibly produce highly skewed partitions.
+            batchSize = min(10, self.ctx._batchSize or 1024)
+            ser = BatchedSerializer(PickleSerializer(), batchSize)
+            selfCopy = self._reserialize(ser)
+            jrdd_deserializer = selfCopy._jrdd_deserializer
+            jrdd = selfCopy._jrdd.coalesce(numPartitions, shuffle)
+        else:
+            jrdd_deserializer = self._jrdd_deserializer
+            jrdd = self._jrdd.coalesce(numPartitions, shuffle)
+        return RDD(jrdd, self.ctx, jrdd_deserializer)
+
+    def zip(self, other):
+        """
+        Zips this RDD with another one, returning key-value pairs with the
+        first element in each RDD second element in each RDD, etc. Assumes
+        that the two RDDs have the same number of partitions and the same
+        number of elements in each partition (e.g. one was made through
+        a map on the other).
+
+        >>> x = sc.parallelize(range(0,5))
+        >>> y = sc.parallelize(range(1000, 1005))
+        >>> x.zip(y).collect()
+        [(0, 1000), (1, 1001), (2, 1002), (3, 1003), (4, 1004)]
+        """
+        def get_batch_size(ser):
+            if isinstance(ser, BatchedSerializer):
+                return ser.batchSize
+            return 1  # not batched
+
+        def batch_as(rdd, batchSize):
+            return rdd._reserialize(BatchedSerializer(PickleSerializer(), batchSize))
+
+        my_batch = get_batch_size(self._jrdd_deserializer)
+        other_batch = get_batch_size(other._jrdd_deserializer)
+        if my_batch != other_batch or not my_batch:
+            # use the smallest batchSize for both of them
+            batchSize = min(my_batch, other_batch)
+            if batchSize <= 0:
+                # auto batched or unlimited
+                batchSize = 100
+            other = batch_as(other, batchSize)
+            self = batch_as(self, batchSize)
+
+        if self.getNumPartitions() != other.getNumPartitions():
+            raise ValueError("Can only zip with RDD which has the same number of partitions")
+
+        # There will be an Exception in JVM if there are different number
+        # of items in each partitions.
+        pairRDD = self._jrdd.zip(other._jrdd)
+        deserializer = PairDeserializer(self._jrdd_deserializer,
+                                        other._jrdd_deserializer)
+        return RDD(pairRDD, self.ctx, deserializer)
+
+    def zipWithIndex(self):
+        """
+        Zips this RDD with its element indices.
+
+        The ordering is first based on the partition index and then the
+        ordering of items within each partition. So the first item in
+        the first partition gets index 0, and the last item in the last
+        partition receives the largest index.
+
+        This method needs to trigger a spark job when this RDD contains
+        more than one partitions.
+
+        >>> sc.parallelize(["a", "b", "c", "d"], 3).zipWithIndex().collect()
+        [('a', 0), ('b', 1), ('c', 2), ('d', 3)]
+        """
+        starts = [0]
+        if self.getNumPartitions() > 1:
+            nums = self.mapPartitions(lambda it: [sum(1 for i in it)]).collect()
+            for i in range(len(nums) - 1):
+                starts.append(starts[-1] + nums[i])
+
+        def func(k, it):
+            for i, v in enumerate(it, starts[k]):
+                yield v, i
+
+        return self.mapPartitionsWithIndex(func)
+
+    def zipWithUniqueId(self):
+        """
+        Zips this RDD with generated unique Long ids.
+
+        Items in the kth partition will get ids k, n+k, 2*n+k, ..., where
+        n is the number of partitions. So there may exist gaps, but this
+        method won't trigger a spark job, which is different from
+        L{zipWithIndex}
+
+        >>> sc.parallelize(["a", "b", "c", "d", "e"], 3).zipWithUniqueId().collect()
+        [('a', 0), ('b', 1), ('c', 4), ('d', 2), ('e', 5)]
+        """
+        n = self.getNumPartitions()
+
+        def func(k, it):
+            for i, v in enumerate(it):
+                yield v, i * n + k
+
+        return self.mapPartitionsWithIndex(func)
+
+    def name(self):
+        """
+        Return the name of this RDD.
+        """
+        n = self._jrdd.name()
+        if n:
+            return n
+
+    @ignore_unicode_prefix
+    def setName(self, name):
+        """
+        Assign a name to this RDD.
+
+        >>> rdd1 = sc.parallelize([1, 2])
+        >>> rdd1.setName('RDD1').name()
+        u'RDD1'
+        """
+        self._jrdd.setName(name)
+        return self
+
+    def toDebugString(self):
+        """
+        A description of this RDD and its recursive dependencies for debugging.
+        """
+        debug_string = self._jrdd.toDebugString()
+        if debug_string:
+            return debug_string.encode('utf-8')
+
+    def getStorageLevel(self):
+        """
+        Get the RDD's current storage level.
+
+        >>> rdd1 = sc.parallelize([1,2])
+        >>> rdd1.getStorageLevel()
+        StorageLevel(False, False, False, False, 1)
+        >>> print(rdd1.getStorageLevel())
+        Serialized 1x Replicated
+        """
+        java_storage_level = self._jrdd.getStorageLevel()
+        storage_level = StorageLevel(java_storage_level.useDisk(),
+                                     java_storage_level.useMemory(),
+                                     java_storage_level.useOffHeap(),
+                                     java_storage_level.deserialized(),
+                                     java_storage_level.replication())
+        return storage_level
+
+    def _defaultReducePartitions(self):
+        """
+        Returns the default number of partitions to use during reduce tasks (e.g., groupBy).
+        If spark.default.parallelism is set, then we'll use the value from SparkContext
+        defaultParallelism, otherwise we'll use the number of partitions in this RDD.
+
+        This mirrors the behavior of the Scala Partitioner#defaultPartitioner, intended to reduce
+        the likelihood of OOMs. Once PySpark adopts Partitioner-based APIs, this behavior will
+        be inherent.
+        """
+        if self.ctx._conf.contains("spark.default.parallelism"):
+            return self.ctx.defaultParallelism
+        else:
+            return self.getNumPartitions()
+
+    def lookup(self, key):
+        """
+        Return the list of values in the RDD for key `key`. This operation
+        is done efficiently if the RDD has a known partitioner by only
+        searching the partition that the key maps to.
+
+        >>> l = range(1000)
+        >>> rdd = sc.parallelize(zip(l, l), 10)
+        >>> rdd.lookup(42)  # slow
+        [42]
+        >>> sorted = rdd.sortByKey()
+        >>> sorted.lookup(42)  # fast
+        [42]
+        >>> sorted.lookup(1024)
+        []
+        >>> rdd2 = sc.parallelize([(('a', 'b'), 'c')]).groupByKey()
+        >>> list(rdd2.lookup(('a', 'b'))[0])
+        ['c']
+        """
+        values = self.filter(lambda kv: kv[0] == key).values()
+
+        if self.partitioner is not None:
+            return self.ctx.runJob(values, lambda x: x, [self.partitioner(key)])
+
+        return values.collect()
+
+    def _to_java_object_rdd(self):
+        """ Return a JavaRDD of Object by unpickling
+
+        It will convert each Python object into Java object by Pyrolite, whenever the
+        RDD is serialized in batch or not.
+        """
+        rdd = self._pickled()
+        return self.ctx._jvm.SerDeUtil.pythonToJava(rdd._jrdd, True)
+
+    def countApprox(self, timeout, confidence=0.95):
+        """
+        .. note:: Experimental
+
+        Approximate version of count() that returns a potentially incomplete
+        result within a timeout, even if not all tasks have finished.
+
+        >>> rdd = sc.parallelize(range(1000), 10)
+        >>> rdd.countApprox(1000, 1.0)
+        1000
+        """
+        drdd = self.mapPartitions(lambda it: [float(sum(1 for i in it))])
+        return int(drdd.sumApprox(timeout, confidence))
+
+    def sumApprox(self, timeout, confidence=0.95):
+        """
+        .. note:: Experimental
+
+        Approximate operation to return the sum within a timeout
+        or meet the confidence.
+
+        >>> rdd = sc.parallelize(range(1000), 10)
+        >>> r = sum(range(1000))
+        >>> abs(rdd.sumApprox(1000) - r) / r < 0.05
+        True
+        """
+        jrdd = self.mapPartitions(lambda it: [float(sum(it))])._to_java_object_rdd()
+        jdrdd = self.ctx._jvm.JavaDoubleRDD.fromRDD(jrdd.rdd())
+        r = jdrdd.sumApprox(timeout, confidence).getFinalValue()
+        return BoundedFloat(r.mean(), r.confidence(), r.low(), r.high())
+
+    def meanApprox(self, timeout, confidence=0.95):
+        """
+        .. note:: Experimental
+
+        Approximate operation to return the mean within a timeout
+        or meet the confidence.
+
+        >>> rdd = sc.parallelize(range(1000), 10)
+        >>> r = sum(range(1000)) / 1000.0
+        >>> abs(rdd.meanApprox(1000) - r) / r < 0.05
+        True
+        """
+        jrdd = self.map(float)._to_java_object_rdd()
+        jdrdd = self.ctx._jvm.JavaDoubleRDD.fromRDD(jrdd.rdd())
+        r = jdrdd.meanApprox(timeout, confidence).getFinalValue()
+        return BoundedFloat(r.mean(), r.confidence(), r.low(), r.high())
+
+    def countApproxDistinct(self, relativeSD=0.05):
+        """
+        .. note:: Experimental
+
+        Return approximate number of distinct elements in the RDD.
+
+        The algorithm used is based on streamlib's implementation of
+        `"HyperLogLog in Practice: Algorithmic Engineering of a State
+        of The Art Cardinality Estimation Algorithm", available here
+        <http://dx.doi.org/10.1145/2452376.2452456>`_.
+
+        :param relativeSD: Relative accuracy. Smaller values create
+                           counters that require more space.
+                           It must be greater than 0.000017.
+
+        >>> n = sc.parallelize(range(1000)).map(str).countApproxDistinct()
+        >>> 900 < n < 1100
+        True
+        >>> n = sc.parallelize([i % 20 for i in range(1000)]).countApproxDistinct()
+        >>> 16 < n < 24
+        True
+        """
+        if relativeSD < 0.000017:
+            raise ValueError("relativeSD should be greater than 0.000017")
+        # the hash space in Java is 2^32
+        hashRDD = self.map(lambda x: portable_hash(x) & 0xFFFFFFFF)
+        return hashRDD._to_java_object_rdd().countApproxDistinct(relativeSD)
+
+    def toLocalIterator(self):
+        """
+        Return an iterator that contains all of the elements in this RDD.
+        The iterator will consume as much memory as the largest partition in this RDD.
+
+        >>> rdd = sc.parallelize(range(10))
+        >>> [x for x in rdd.toLocalIterator()]
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+        """
+        with SCCallSiteSync(self.context) as css:
+            sock_info = self.ctx._jvm.PythonRDD.toLocalIteratorAndServe(self._jrdd.rdd())
+        return _load_from_socket(sock_info, self._jrdd_deserializer)
+
+    def barrier(self):
+        """
+        .. note:: Experimental
+
+        Marks the current stage as a barrier stage, where Spark must launch all tasks together.
+        In case of a task failure, instead of only restarting the failed task, Spark will abort the
+        entire stage and relaunch all tasks for this stage.
+        The barrier execution mode feature is experimental and it only handles limited scenarios.
+        Please read the linked SPIP and design docs to understand the limitations and future plans.
+
+        :return: an :class:`RDDBarrier` instance that provides actions within a barrier stage.
+
+        .. seealso:: :class:`BarrierTaskContext`
+        .. seealso:: `SPIP: Barrier Execution Mode
+            <http://jira.apache.org/jira/browse/SPARK-24374>`_
+        .. seealso:: `Design Doc <https://jira.apache.org/jira/browse/SPARK-24582>`_
+
+        .. versionadded:: 2.4.0
+        """
+        return RDDBarrier(self)
+
+    def _is_barrier(self):
+        """
+        Whether this RDD is in a barrier stage.
+        """
+        return self._jrdd.rdd().isBarrier()
+
+
+def _prepare_for_python_RDD(sc, command):
+    # the serialized command will be compressed by broadcast
+    ser = CloudPickleSerializer()
+    pickled_command = ser.dumps(command)
+    if len(pickled_command) > (1 << 20):  # 1M
+        # The broadcast will have same life cycle as created PythonRDD
+        broadcast = sc.broadcast(pickled_command)
+        pickled_command = ser.dumps(broadcast)
+    broadcast_vars = [x._jbroadcast for x in sc._pickled_broadcast_vars]
+    sc._pickled_broadcast_vars.clear()
+    return pickled_command, broadcast_vars, sc.environment, sc._python_includes
+
+
+def _wrap_function(sc, func, deserializer, serializer, profiler=None):
+    assert deserializer, "deserializer should not be empty"
+    assert serializer, "serializer should not be empty"
+    command = (func, profiler, deserializer, serializer)
+    pickled_command, broadcast_vars, env, includes = _prepare_for_python_RDD(sc, command)
+    return sc._jvm.PythonFunction(bytearray(pickled_command), env, includes, sc.pythonExec,
+                                  sc.pythonVer, broadcast_vars, sc._javaAccumulator)
+
+
+class RDDBarrier(object):
+
+    """
+    .. note:: Experimental
+
+    Wraps an RDD in a barrier stage, which forces Spark to launch tasks of this stage together.
+    :class:`RDDBarrier` instances are created by :func:`RDD.barrier`.
+
+    .. versionadded:: 2.4.0
+    """
+
+    def __init__(self, rdd):
+        self.rdd = rdd
+
+    def mapPartitions(self, f, preservesPartitioning=False):
+        """
+        .. note:: Experimental
+
+        Returns a new RDD by applying a function to each partition of the wrapped RDD,
+        where tasks are launched together in a barrier stage.
+        The interface is the same as :func:`RDD.mapPartitions`.
+        Please see the API doc there.
+
+        .. versionadded:: 2.4.0
+        """
+        def func(s, iterator):
+            return f(iterator)
+        return PipelinedRDD(self.rdd, func, preservesPartitioning, isFromBarrier=True)
+
+
+class PipelinedRDD(RDD):
+
+    """
+    Pipelined maps:
+
+    >>> rdd = sc.parallelize([1, 2, 3, 4])
+    >>> rdd.map(lambda x: 2 * x).cache().map(lambda x: 2 * x).collect()
+    [4, 8, 12, 16]
+    >>> rdd.map(lambda x: 2 * x).map(lambda x: 2 * x).collect()
+    [4, 8, 12, 16]
+
+    Pipelined reduces:
+    >>> from operator import add
+    >>> rdd.map(lambda x: 2 * x).reduce(add)
+    20
+    >>> rdd.flatMap(lambda x: [x, x]).reduce(add)
+    20
+    """
+
+    def __init__(self, prev, func, preservesPartitioning=False, isFromBarrier=False):
+        if not isinstance(prev, PipelinedRDD) or not prev._is_pipelinable():
+            # This transformation is the first in its stage:
+            self.func = func
+            self.preservesPartitioning = preservesPartitioning
+            self._prev_jrdd = prev._jrdd
+            self._prev_jrdd_deserializer = prev._jrdd_deserializer
+        else:
+            prev_func = prev.func
+
+            def pipeline_func(split, iterator):
+                return func(split, prev_func(split, iterator))
+            self.func = pipeline_func
+            self.preservesPartitioning = \
+                prev.preservesPartitioning and preservesPartitioning
+            self._prev_jrdd = prev._prev_jrdd  # maintain the pipeline
+            self._prev_jrdd_deserializer = prev._prev_jrdd_deserializer
+        self.is_cached = False
+        self.is_checkpointed = False
+        self.ctx = prev.ctx
+        self.prev = prev
+        self._jrdd_val = None
+        self._id = None
+        self._jrdd_deserializer = self.ctx.serializer
+        self._bypass_serializer = False
+        self.partitioner = prev.partitioner if self.preservesPartitioning else None
+        self.is_barrier = prev._is_barrier() or isFromBarrier
+
+    def getNumPartitions(self):
+        return self._prev_jrdd.partitions().size()
+
+    @property
+    def _jrdd(self):
+        if self._jrdd_val:
+            return self._jrdd_val
+        if self._bypass_serializer:
+            self._jrdd_deserializer = NoOpSerializer()
+
+        if self.ctx.profiler_collector:
+            profiler = self.ctx.profiler_collector.new_profiler(self.ctx)
+        else:
+            profiler = None
+
+        wrapped_func = _wrap_function(self.ctx, self.func, self._prev_jrdd_deserializer,
+                                      self._jrdd_deserializer, profiler)
+        python_rdd = self.ctx._jvm.PythonRDD(self._prev_jrdd.rdd(), wrapped_func,
+                                             self.preservesPartitioning, self.is_barrier)
+        self._jrdd_val = python_rdd.asJavaRDD()
+
+        if profiler:
+            self._id = self._jrdd_val.id()
+            self.ctx.profiler_collector.add_profiler(self._id, profiler)
+        return self._jrdd_val
+
+    def id(self):
+        if self._id is None:
+            self._id = self._jrdd.id()
+        return self._id
+
+    def _is_pipelinable(self):
+        return not (self.is_cached or self.is_checkpointed)
+
+    def _is_barrier(self):
+        return self.is_barrier
+
+
+def _test():
+    import doctest
+    from pyspark.context import SparkContext
+    globs = globals().copy()
+    # The small batch size here ensures that we see multiple batches,
+    # even in these small test examples:
+    globs['sc'] = SparkContext('local[4]', 'PythonTest')
+    (failure_count, test_count) = doctest.testmod(
+        globs=globs, optionflags=doctest.ELLIPSIS)
+    globs['sc'].stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/rddsampler.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/rddsampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe8f87324804bc2c8bf39af1f458f8279f78b325
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/rddsampler.py
@@ -0,0 +1,119 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import random
+import math
+
+
+class RDDSamplerBase(object):
+
+    def __init__(self, withReplacement, seed=None):
+        self._seed = seed if seed is not None else random.randint(0, sys.maxsize)
+        self._withReplacement = withReplacement
+        self._random = None
+
+    def initRandomGenerator(self, split):
+        self._random = random.Random(self._seed ^ split)
+
+        # mixing because the initial seeds are close to each other
+        for _ in range(10):
+            self._random.randint(0, 1)
+
+    def getUniformSample(self):
+        return self._random.random()
+
+    def getPoissonSample(self, mean):
+        # Using Knuth's algorithm described in
+        # http://en.wikipedia.org/wiki/Poisson_distribution
+        if mean < 20.0:
+            # one exp and k+1 random calls
+            l = math.exp(-mean)
+            p = self._random.random()
+            k = 0
+            while p > l:
+                k += 1
+                p *= self._random.random()
+        else:
+            # switch to the log domain, k+1 expovariate (random + log) calls
+            p = self._random.expovariate(mean)
+            k = 0
+            while p < 1.0:
+                k += 1
+                p += self._random.expovariate(mean)
+        return k
+
+    def func(self, split, iterator):
+        raise NotImplementedError
+
+
+class RDDSampler(RDDSamplerBase):
+
+    def __init__(self, withReplacement, fraction, seed=None):
+        RDDSamplerBase.__init__(self, withReplacement, seed)
+        self._fraction = fraction
+
+    def func(self, split, iterator):
+        self.initRandomGenerator(split)
+        if self._withReplacement:
+            for obj in iterator:
+                # For large datasets, the expected number of occurrences of each element in
+                # a sample with replacement is Poisson(frac). We use that to get a count for
+                # each element.
+                count = self.getPoissonSample(self._fraction)
+                for _ in range(0, count):
+                    yield obj
+        else:
+            for obj in iterator:
+                if self.getUniformSample() < self._fraction:
+                    yield obj
+
+
+class RDDRangeSampler(RDDSamplerBase):
+
+    def __init__(self, lowerBound, upperBound, seed=None):
+        RDDSamplerBase.__init__(self, False, seed)
+        self._lowerBound = lowerBound
+        self._upperBound = upperBound
+
+    def func(self, split, iterator):
+        self.initRandomGenerator(split)
+        for obj in iterator:
+            if self._lowerBound <= self.getUniformSample() < self._upperBound:
+                yield obj
+
+
+class RDDStratifiedSampler(RDDSamplerBase):
+
+    def __init__(self, withReplacement, fractions, seed=None):
+        RDDSamplerBase.__init__(self, withReplacement, seed)
+        self._fractions = fractions
+
+    def func(self, split, iterator):
+        self.initRandomGenerator(split)
+        if self._withReplacement:
+            for key, val in iterator:
+                # For large datasets, the expected number of occurrences of each element in
+                # a sample with replacement is Poisson(frac). We use that to get a count for
+                # each element.
+                count = self.getPoissonSample(self._fractions[key])
+                for _ in range(0, count):
+                    yield key, val
+        else:
+            for key, val in iterator:
+                if self.getUniformSample() < self._fractions[key]:
+                    yield key, val
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/resultiterable.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/resultiterable.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ab5ce14c3531c7aaaa7be5dbd31fae31802b47d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/resultiterable.py
@@ -0,0 +1,39 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import collections
+
+__all__ = ["ResultIterable"]
+
+
+class ResultIterable(collections.Iterable):
+
+    """
+    A special result iterable. This is used because the standard
+    iterator can not be pickled
+    """
+
+    def __init__(self, data):
+        self.data = data
+        self.index = 0
+        self.maxindex = len(data)
+
+    def __iter__(self):
+        return iter(self.data)
+
+    def __len__(self):
+        return len(self.data)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/serializers.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/serializers.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff9a612b77f6103b1a04f816fb7d097abb3576fc
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/serializers.py
@@ -0,0 +1,789 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+PySpark supports custom serializers for transferring data; this can improve
+performance.
+
+By default, PySpark uses L{PickleSerializer} to serialize objects using Python's
+C{cPickle} serializer, which can serialize nearly any Python object.
+Other serializers, like L{MarshalSerializer}, support fewer datatypes but can be
+faster.
+
+The serializer is chosen when creating L{SparkContext}:
+
+>>> from pyspark.context import SparkContext
+>>> from pyspark.serializers import MarshalSerializer
+>>> sc = SparkContext('local', 'test', serializer=MarshalSerializer())
+>>> sc.parallelize(list(range(1000))).map(lambda x: 2 * x).take(10)
+[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
+>>> sc.stop()
+
+PySpark serializes objects in batches; by default, the batch size is chosen based
+on the size of objects and is also configurable by SparkContext's C{batchSize}
+parameter:
+
+>>> sc = SparkContext('local', 'test', batchSize=2)
+>>> rdd = sc.parallelize(range(16), 4).map(lambda x: x)
+
+Behind the scenes, this creates a JavaRDD with four partitions, each of
+which contains two batches of two objects:
+
+>>> rdd.glom().collect()
+[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]]
+>>> int(rdd._jrdd.count())
+8
+>>> sc.stop()
+"""
+
+import sys
+from itertools import chain, product
+import marshal
+import struct
+import types
+import collections
+import zlib
+import itertools
+
+if sys.version < '3':
+    import cPickle as pickle
+    protocol = 2
+    from itertools import izip as zip, imap as map
+else:
+    import pickle
+    protocol = 3
+    xrange = range
+
+from pyspark import cloudpickle
+from pyspark.util import _exception_message
+
+
+__all__ = ["PickleSerializer", "MarshalSerializer", "UTF8Deserializer"]
+
+
+class SpecialLengths(object):
+    END_OF_DATA_SECTION = -1
+    PYTHON_EXCEPTION_THROWN = -2
+    TIMING_DATA = -3
+    END_OF_STREAM = -4
+    NULL = -5
+    START_ARROW_STREAM = -6
+
+
+class Serializer(object):
+
+    def dump_stream(self, iterator, stream):
+        """
+        Serialize an iterator of objects to the output stream.
+        """
+        raise NotImplementedError
+
+    def load_stream(self, stream):
+        """
+        Return an iterator of deserialized objects from the input stream.
+        """
+        raise NotImplementedError
+
+    def _load_stream_without_unbatching(self, stream):
+        """
+        Return an iterator of deserialized batches (iterable) of objects from the input stream.
+        If the serializer does not operate on batches the default implementation returns an
+        iterator of single element lists.
+        """
+        return map(lambda x: [x], self.load_stream(stream))
+
+    # Note: our notion of "equality" is that output generated by
+    # equal serializers can be deserialized using the same serializer.
+
+    # This default implementation handles the simple cases;
+    # subclasses should override __eq__ as appropriate.
+
+    def __eq__(self, other):
+        return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __repr__(self):
+        return "%s()" % self.__class__.__name__
+
+    def __hash__(self):
+        return hash(str(self))
+
+
+class FramedSerializer(Serializer):
+
+    """
+    Serializer that writes objects as a stream of (length, data) pairs,
+    where C{length} is a 32-bit integer and data is C{length} bytes.
+    """
+
+    def __init__(self):
+        # On Python 2.6, we can't write bytearrays to streams, so we need to convert them
+        # to strings first. Check if the version number is that old.
+        self._only_write_strings = sys.version_info[0:2] <= (2, 6)
+
+    def dump_stream(self, iterator, stream):
+        for obj in iterator:
+            self._write_with_length(obj, stream)
+
+    def load_stream(self, stream):
+        while True:
+            try:
+                yield self._read_with_length(stream)
+            except EOFError:
+                return
+
+    def _write_with_length(self, obj, stream):
+        serialized = self.dumps(obj)
+        if serialized is None:
+            raise ValueError("serialized value should not be None")
+        if len(serialized) > (1 << 31):
+            raise ValueError("can not serialize object larger than 2G")
+        write_int(len(serialized), stream)
+        if self._only_write_strings:
+            stream.write(str(serialized))
+        else:
+            stream.write(serialized)
+
+    def _read_with_length(self, stream):
+        length = read_int(stream)
+        if length == SpecialLengths.END_OF_DATA_SECTION:
+            raise EOFError
+        elif length == SpecialLengths.NULL:
+            return None
+        obj = stream.read(length)
+        if len(obj) < length:
+            raise EOFError
+        return self.loads(obj)
+
+    def dumps(self, obj):
+        """
+        Serialize an object into a byte array.
+        When batching is used, this will be called with an array of objects.
+        """
+        raise NotImplementedError
+
+    def loads(self, obj):
+        """
+        Deserialize an object from a byte array.
+        """
+        raise NotImplementedError
+
+
+class ArrowStreamSerializer(Serializer):
+    """
+    Serializes Arrow record batches as a stream.
+    """
+
+    def dump_stream(self, iterator, stream):
+        import pyarrow as pa
+        writer = None
+        try:
+            for batch in iterator:
+                if writer is None:
+                    writer = pa.RecordBatchStreamWriter(stream, batch.schema)
+                writer.write_batch(batch)
+        finally:
+            if writer is not None:
+                writer.close()
+
+    def load_stream(self, stream):
+        import pyarrow as pa
+        reader = pa.open_stream(stream)
+        for batch in reader:
+            yield batch
+
+    def __repr__(self):
+        return "ArrowStreamSerializer"
+
+
+def _create_batch(series, timezone):
+    """
+    Create an Arrow record batch from the given pandas.Series or list of Series, with optional type.
+
+    :param series: A single pandas.Series, list of Series, or list of (series, arrow_type)
+    :param timezone: A timezone to respect when handling timestamp values
+    :return: Arrow RecordBatch
+    """
+    import decimal
+    from distutils.version import LooseVersion
+    import pyarrow as pa
+    from pyspark.sql.types import _check_series_convert_timestamps_internal
+    # Make input conform to [(series1, type1), (series2, type2), ...]
+    if not isinstance(series, (list, tuple)) or \
+            (len(series) == 2 and isinstance(series[1], pa.DataType)):
+        series = [series]
+    series = ((s, None) if not isinstance(s, (list, tuple)) else s for s in series)
+
+    def create_array(s, t):
+        mask = s.isnull()
+        # Ensure timestamp series are in expected form for Spark internal representation
+        # TODO: maybe don't need None check anymore as of Arrow 0.9.1
+        if t is not None and pa.types.is_timestamp(t):
+            s = _check_series_convert_timestamps_internal(s.fillna(0), timezone)
+            # TODO: need cast after Arrow conversion, ns values cause error with pandas 0.19.2
+            return pa.Array.from_pandas(s, mask=mask).cast(t, safe=False)
+        elif t is not None and pa.types.is_string(t) and sys.version < '3':
+            # TODO: need decode before converting to Arrow in Python 2
+            # TODO: don't need as of Arrow 0.9.1
+            return pa.Array.from_pandas(s.apply(
+                lambda v: v.decode("utf-8") if isinstance(v, str) else v), mask=mask, type=t)
+        elif t is not None and pa.types.is_decimal(t) and \
+                LooseVersion("0.9.0") <= LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
+            # TODO: see ARROW-2432. Remove when the minimum PyArrow version becomes 0.10.0.
+            return pa.Array.from_pandas(s.apply(
+                lambda v: decimal.Decimal('NaN') if v is None else v), mask=mask, type=t)
+        return pa.Array.from_pandas(s, mask=mask, type=t)
+
+    arrs = [create_array(s, t) for s, t in series]
+    return pa.RecordBatch.from_arrays(arrs, ["_%d" % i for i in xrange(len(arrs))])
+
+
+class ArrowStreamPandasSerializer(Serializer):
+    """
+    Serializes Pandas.Series as Arrow data with Arrow streaming format.
+    """
+
+    def __init__(self, timezone):
+        super(ArrowStreamPandasSerializer, self).__init__()
+        self._timezone = timezone
+
+    def arrow_to_pandas(self, arrow_column):
+        from pyspark.sql.types import from_arrow_type, \
+            _check_series_convert_date, _check_series_localize_timestamps
+
+        s = arrow_column.to_pandas()
+        s = _check_series_convert_date(s, from_arrow_type(arrow_column.type))
+        s = _check_series_localize_timestamps(s, self._timezone)
+        return s
+
+    def dump_stream(self, iterator, stream):
+        """
+        Make ArrowRecordBatches from Pandas Series and serialize. Input is a single series or
+        a list of series accompanied by an optional pyarrow type to coerce the data to.
+        """
+        import pyarrow as pa
+        writer = None
+        try:
+            for series in iterator:
+                batch = _create_batch(series, self._timezone)
+                if writer is None:
+                    write_int(SpecialLengths.START_ARROW_STREAM, stream)
+                    writer = pa.RecordBatchStreamWriter(stream, batch.schema)
+                writer.write_batch(batch)
+        finally:
+            if writer is not None:
+                writer.close()
+
+    def load_stream(self, stream):
+        """
+        Deserialize ArrowRecordBatches to an Arrow table and return as a list of pandas.Series.
+        """
+        import pyarrow as pa
+        reader = pa.open_stream(stream)
+
+        for batch in reader:
+            yield [self.arrow_to_pandas(c) for c in pa.Table.from_batches([batch]).itercolumns()]
+
+    def __repr__(self):
+        return "ArrowStreamPandasSerializer"
+
+
+class BatchedSerializer(Serializer):
+
+    """
+    Serializes a stream of objects in batches by calling its wrapped
+    Serializer with streams of objects.
+    """
+
+    UNLIMITED_BATCH_SIZE = -1
+    UNKNOWN_BATCH_SIZE = 0
+
+    def __init__(self, serializer, batchSize=UNLIMITED_BATCH_SIZE):
+        self.serializer = serializer
+        self.batchSize = batchSize
+
+    def _batched(self, iterator):
+        if self.batchSize == self.UNLIMITED_BATCH_SIZE:
+            yield list(iterator)
+        elif hasattr(iterator, "__len__") and hasattr(iterator, "__getslice__"):
+            n = len(iterator)
+            for i in xrange(0, n, self.batchSize):
+                yield iterator[i: i + self.batchSize]
+        else:
+            items = []
+            count = 0
+            for item in iterator:
+                items.append(item)
+                count += 1
+                if count == self.batchSize:
+                    yield items
+                    items = []
+                    count = 0
+            if items:
+                yield items
+
+    def dump_stream(self, iterator, stream):
+        self.serializer.dump_stream(self._batched(iterator), stream)
+
+    def load_stream(self, stream):
+        return chain.from_iterable(self._load_stream_without_unbatching(stream))
+
+    def _load_stream_without_unbatching(self, stream):
+        return self.serializer.load_stream(stream)
+
+    def __repr__(self):
+        return "BatchedSerializer(%s, %d)" % (str(self.serializer), self.batchSize)
+
+
+class FlattenedValuesSerializer(BatchedSerializer):
+
+    """
+    Serializes a stream of list of pairs, split the list of values
+    which contain more than a certain number of objects to make them
+    have similar sizes.
+    """
+    def __init__(self, serializer, batchSize=10):
+        BatchedSerializer.__init__(self, serializer, batchSize)
+
+    def _batched(self, iterator):
+        n = self.batchSize
+        for key, values in iterator:
+            for i in range(0, len(values), n):
+                yield key, values[i:i + n]
+
+    def load_stream(self, stream):
+        return self.serializer.load_stream(stream)
+
+    def __repr__(self):
+        return "FlattenedValuesSerializer(%s, %d)" % (self.serializer, self.batchSize)
+
+
+class AutoBatchedSerializer(BatchedSerializer):
+    """
+    Choose the size of batch automatically based on the size of object
+    """
+
+    def __init__(self, serializer, bestSize=1 << 16):
+        BatchedSerializer.__init__(self, serializer, self.UNKNOWN_BATCH_SIZE)
+        self.bestSize = bestSize
+
+    def dump_stream(self, iterator, stream):
+        batch, best = 1, self.bestSize
+        iterator = iter(iterator)
+        while True:
+            vs = list(itertools.islice(iterator, batch))
+            if not vs:
+                break
+
+            bytes = self.serializer.dumps(vs)
+            write_int(len(bytes), stream)
+            stream.write(bytes)
+
+            size = len(bytes)
+            if size < best:
+                batch *= 2
+            elif size > best * 10 and batch > 1:
+                batch //= 2
+
+    def __repr__(self):
+        return "AutoBatchedSerializer(%s)" % self.serializer
+
+
+class CartesianDeserializer(Serializer):
+
+    """
+    Deserializes the JavaRDD cartesian() of two PythonRDDs.
+    Due to pyspark batching we cannot simply use the result of the Java RDD cartesian,
+    we additionally need to do the cartesian within each pair of batches.
+    """
+
+    def __init__(self, key_ser, val_ser):
+        self.key_ser = key_ser
+        self.val_ser = val_ser
+
+    def _load_stream_without_unbatching(self, stream):
+        key_batch_stream = self.key_ser._load_stream_without_unbatching(stream)
+        val_batch_stream = self.val_ser._load_stream_without_unbatching(stream)
+        for (key_batch, val_batch) in zip(key_batch_stream, val_batch_stream):
+            # for correctness with repeated cartesian/zip this must be returned as one batch
+            yield product(key_batch, val_batch)
+
+    def load_stream(self, stream):
+        return chain.from_iterable(self._load_stream_without_unbatching(stream))
+
+    def __repr__(self):
+        return "CartesianDeserializer(%s, %s)" % \
+               (str(self.key_ser), str(self.val_ser))
+
+
+class PairDeserializer(Serializer):
+
+    """
+    Deserializes the JavaRDD zip() of two PythonRDDs.
+    Due to pyspark batching we cannot simply use the result of the Java RDD zip,
+    we additionally need to do the zip within each pair of batches.
+    """
+
+    def __init__(self, key_ser, val_ser):
+        self.key_ser = key_ser
+        self.val_ser = val_ser
+
+    def _load_stream_without_unbatching(self, stream):
+        key_batch_stream = self.key_ser._load_stream_without_unbatching(stream)
+        val_batch_stream = self.val_ser._load_stream_without_unbatching(stream)
+        for (key_batch, val_batch) in zip(key_batch_stream, val_batch_stream):
+            # For double-zipped RDDs, the batches can be iterators from other PairDeserializer,
+            # instead of lists. We need to convert them to lists if needed.
+            key_batch = key_batch if hasattr(key_batch, '__len__') else list(key_batch)
+            val_batch = val_batch if hasattr(val_batch, '__len__') else list(val_batch)
+            if len(key_batch) != len(val_batch):
+                raise ValueError("Can not deserialize PairRDD with different number of items"
+                                 " in batches: (%d, %d)" % (len(key_batch), len(val_batch)))
+            # for correctness with repeated cartesian/zip this must be returned as one batch
+            yield zip(key_batch, val_batch)
+
+    def load_stream(self, stream):
+        return chain.from_iterable(self._load_stream_without_unbatching(stream))
+
+    def __repr__(self):
+        return "PairDeserializer(%s, %s)" % (str(self.key_ser), str(self.val_ser))
+
+
+class NoOpSerializer(FramedSerializer):
+
+    def loads(self, obj):
+        return obj
+
+    def dumps(self, obj):
+        return obj
+
+
+# Hack namedtuple, make it picklable
+
+__cls = {}
+
+
+def _restore(name, fields, value):
+    """ Restore an object of namedtuple"""
+    k = (name, fields)
+    cls = __cls.get(k)
+    if cls is None:
+        cls = collections.namedtuple(name, fields)
+        __cls[k] = cls
+    return cls(*value)
+
+
+def _hack_namedtuple(cls):
+    """ Make class generated by namedtuple picklable """
+    name = cls.__name__
+    fields = cls._fields
+
+    def __reduce__(self):
+        return (_restore, (name, fields, tuple(self)))
+    cls.__reduce__ = __reduce__
+    cls._is_namedtuple_ = True
+    return cls
+
+
+def _hijack_namedtuple():
+    """ Hack namedtuple() to make it picklable """
+    # hijack only one time
+    if hasattr(collections.namedtuple, "__hijack"):
+        return
+
+    global _old_namedtuple  # or it will put in closure
+    global _old_namedtuple_kwdefaults  # or it will put in closure too
+
+    def _copy_func(f):
+        return types.FunctionType(f.__code__, f.__globals__, f.__name__,
+                                  f.__defaults__, f.__closure__)
+
+    def _kwdefaults(f):
+        # __kwdefaults__ contains the default values of keyword-only arguments which are
+        # introduced from Python 3. The possible cases for __kwdefaults__ in namedtuple
+        # are as below:
+        #
+        # - Does not exist in Python 2.
+        # - Returns None in <= Python 3.5.x.
+        # - Returns a dictionary containing the default values to the keys from Python 3.6.x
+        #    (See https://bugs.python.org/issue25628).
+        kargs = getattr(f, "__kwdefaults__", None)
+        if kargs is None:
+            return {}
+        else:
+            return kargs
+
+    _old_namedtuple = _copy_func(collections.namedtuple)
+    _old_namedtuple_kwdefaults = _kwdefaults(collections.namedtuple)
+
+    def namedtuple(*args, **kwargs):
+        for k, v in _old_namedtuple_kwdefaults.items():
+            kwargs[k] = kwargs.get(k, v)
+        cls = _old_namedtuple(*args, **kwargs)
+        return _hack_namedtuple(cls)
+
+    # replace namedtuple with the new one
+    collections.namedtuple.__globals__["_old_namedtuple_kwdefaults"] = _old_namedtuple_kwdefaults
+    collections.namedtuple.__globals__["_old_namedtuple"] = _old_namedtuple
+    collections.namedtuple.__globals__["_hack_namedtuple"] = _hack_namedtuple
+    collections.namedtuple.__code__ = namedtuple.__code__
+    collections.namedtuple.__hijack = 1
+
+    # hack the cls already generated by namedtuple.
+    # Those created in other modules can be pickled as normal,
+    # so only hack those in __main__ module
+    for n, o in sys.modules["__main__"].__dict__.items():
+        if (type(o) is type and o.__base__ is tuple
+                and hasattr(o, "_fields")
+                and "__reduce__" not in o.__dict__):
+            _hack_namedtuple(o)  # hack inplace
+
+
+_hijack_namedtuple()
+
+
+class PickleSerializer(FramedSerializer):
+
+    """
+    Serializes objects using Python's pickle serializer:
+
+        http://docs.python.org/2/library/pickle.html
+
+    This serializer supports nearly any Python object, but may
+    not be as fast as more specialized serializers.
+    """
+
+    def dumps(self, obj):
+        return pickle.dumps(obj, protocol)
+
+    if sys.version >= '3':
+        def loads(self, obj, encoding="bytes"):
+            return pickle.loads(obj, encoding=encoding)
+    else:
+        def loads(self, obj, encoding=None):
+            return pickle.loads(obj)
+
+
+class CloudPickleSerializer(PickleSerializer):
+
+    def dumps(self, obj):
+        try:
+            return cloudpickle.dumps(obj, 2)
+        except pickle.PickleError:
+            raise
+        except Exception as e:
+            emsg = _exception_message(e)
+            if "'i' format requires" in emsg:
+                msg = "Object too large to serialize: %s" % emsg
+            else:
+                msg = "Could not serialize object: %s: %s" % (e.__class__.__name__, emsg)
+            cloudpickle.print_exec(sys.stderr)
+            raise pickle.PicklingError(msg)
+
+
+class MarshalSerializer(FramedSerializer):
+
+    """
+    Serializes objects using Python's Marshal serializer:
+
+        http://docs.python.org/2/library/marshal.html
+
+    This serializer is faster than PickleSerializer but supports fewer datatypes.
+    """
+
+    def dumps(self, obj):
+        return marshal.dumps(obj)
+
+    def loads(self, obj):
+        return marshal.loads(obj)
+
+
+class AutoSerializer(FramedSerializer):
+
+    """
+    Choose marshal or pickle as serialization protocol automatically
+    """
+
+    def __init__(self):
+        FramedSerializer.__init__(self)
+        self._type = None
+
+    def dumps(self, obj):
+        if self._type is not None:
+            return b'P' + pickle.dumps(obj, -1)
+        try:
+            return b'M' + marshal.dumps(obj)
+        except Exception:
+            self._type = b'P'
+            return b'P' + pickle.dumps(obj, -1)
+
+    def loads(self, obj):
+        _type = obj[0]
+        if _type == b'M':
+            return marshal.loads(obj[1:])
+        elif _type == b'P':
+            return pickle.loads(obj[1:])
+        else:
+            raise ValueError("invalid serialization type: %s" % _type)
+
+
+class CompressedSerializer(FramedSerializer):
+    """
+    Compress the serialized data
+    """
+    def __init__(self, serializer):
+        FramedSerializer.__init__(self)
+        assert isinstance(serializer, FramedSerializer), "serializer must be a FramedSerializer"
+        self.serializer = serializer
+
+    def dumps(self, obj):
+        return zlib.compress(self.serializer.dumps(obj), 1)
+
+    def loads(self, obj):
+        return self.serializer.loads(zlib.decompress(obj))
+
+    def __repr__(self):
+        return "CompressedSerializer(%s)" % self.serializer
+
+
+class UTF8Deserializer(Serializer):
+
+    """
+    Deserializes streams written by String.getBytes.
+    """
+
+    def __init__(self, use_unicode=True):
+        self.use_unicode = use_unicode
+
+    def loads(self, stream):
+        length = read_int(stream)
+        if length == SpecialLengths.END_OF_DATA_SECTION:
+            raise EOFError
+        elif length == SpecialLengths.NULL:
+            return None
+        s = stream.read(length)
+        return s.decode("utf-8") if self.use_unicode else s
+
+    def load_stream(self, stream):
+        try:
+            while True:
+                yield self.loads(stream)
+        except struct.error:
+            return
+        except EOFError:
+            return
+
+    def __repr__(self):
+        return "UTF8Deserializer(%s)" % self.use_unicode
+
+
+def read_long(stream):
+    length = stream.read(8)
+    if not length:
+        raise EOFError
+    return struct.unpack("!q", length)[0]
+
+
+def write_long(value, stream):
+    stream.write(struct.pack("!q", value))
+
+
+def pack_long(value):
+    return struct.pack("!q", value)
+
+
+def read_int(stream):
+    length = stream.read(4)
+    if not length:
+        raise EOFError
+    return struct.unpack("!i", length)[0]
+
+
+def write_int(value, stream):
+    stream.write(struct.pack("!i", value))
+
+
+def read_bool(stream):
+    length = stream.read(1)
+    if not length:
+        raise EOFError
+    return struct.unpack("!?", length)[0]
+
+
+def write_with_length(obj, stream):
+    write_int(len(obj), stream)
+    stream.write(obj)
+
+
+class ChunkedStream(object):
+
+    """
+    This is a file-like object takes a stream of data, of unknown length, and breaks it into fixed
+    length frames.  The intended use case is serializing large data and sending it immediately over
+    a socket -- we do not want to buffer the entire data before sending it, but the receiving end
+    needs to know whether or not there is more data coming.
+
+    It works by buffering the incoming data in some fixed-size chunks.  If the buffer is full, it
+    first sends the buffer size, then the data.  This repeats as long as there is more data to send.
+    When this is closed, it sends the length of whatever data is in the buffer, then that data, and
+    finally a "length" of -1 to indicate the stream has completed.
+    """
+
+    def __init__(self, wrapped, buffer_size):
+        self.buffer_size = buffer_size
+        self.buffer = bytearray(buffer_size)
+        self.current_pos = 0
+        self.wrapped = wrapped
+
+    def write(self, bytes):
+        byte_pos = 0
+        byte_remaining = len(bytes)
+        while byte_remaining > 0:
+            new_pos = byte_remaining + self.current_pos
+            if new_pos < self.buffer_size:
+                # just put it in our buffer
+                self.buffer[self.current_pos:new_pos] = bytes[byte_pos:]
+                self.current_pos = new_pos
+                byte_remaining = 0
+            else:
+                # fill the buffer, send the length then the contents, and start filling again
+                space_left = self.buffer_size - self.current_pos
+                new_byte_pos = byte_pos + space_left
+                self.buffer[self.current_pos:self.buffer_size] = bytes[byte_pos:new_byte_pos]
+                write_int(self.buffer_size, self.wrapped)
+                self.wrapped.write(self.buffer)
+                byte_remaining -= space_left
+                byte_pos = new_byte_pos
+                self.current_pos = 0
+
+    def close(self):
+        # if there is anything left in the buffer, write it out first
+        if self.current_pos > 0:
+            write_int(self.current_pos, self.wrapped)
+            self.wrapped.write(self.buffer[:self.current_pos])
+        # -1 length indicates to the receiving end that we're done.
+        write_int(-1, self.wrapped)
+        self.wrapped.close()
+
+
+if __name__ == '__main__':
+    import doctest
+    (failure_count, test_count) = doctest.testmod()
+    if failure_count:
+        sys.exit(-1)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/shell.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/shell.py
new file mode 100644
index 0000000000000000000000000000000000000000..65e3bdbc05ce8e014d9f5d1fe67a072c658ac06d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/shell.py
@@ -0,0 +1,76 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+An interactive shell.
+
+This file is designed to be launched as a PYTHONSTARTUP script.
+"""
+
+import atexit
+import os
+import platform
+import warnings
+
+import py4j
+
+from pyspark import SparkConf
+from pyspark.context import SparkContext
+from pyspark.sql import SparkSession, SQLContext
+
+if os.environ.get("SPARK_EXECUTOR_URI"):
+    SparkContext.setSystemProperty("spark.executor.uri", os.environ["SPARK_EXECUTOR_URI"])
+
+SparkContext._ensure_initialized()
+
+try:
+    spark = SparkSession._create_shell_session()
+except Exception:
+    import sys
+    import traceback
+    warnings.warn("Failed to initialize Spark session.")
+    traceback.print_exc(file=sys.stderr)
+    sys.exit(1)
+
+sc = spark.sparkContext
+sql = spark.sql
+atexit.register(lambda: sc.stop())
+
+# for compatibility
+sqlContext = spark._wrapped
+sqlCtx = sqlContext
+
+print(r"""Welcome to
+      ____              __
+     / __/__  ___ _____/ /__
+    _\ \/ _ \/ _ `/ __/  '_/
+   /__ / .__/\_,_/_/ /_/\_\   version %s
+      /_/
+""" % sc.version)
+print("Using Python version %s (%s, %s)" % (
+    platform.python_version(),
+    platform.python_build()[0],
+    platform.python_build()[1]))
+print("SparkSession available as 'spark'.")
+
+# The ./bin/pyspark script stores the old PYTHONSTARTUP value in OLD_PYTHONSTARTUP,
+# which allows us to execute the user's PYTHONSTARTUP file:
+_pythonstartup = os.environ.get('OLD_PYTHONSTARTUP')
+if _pythonstartup and os.path.isfile(_pythonstartup):
+    with open(_pythonstartup) as f:
+        code = compile(f.read(), _pythonstartup, 'exec')
+        exec(code)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/shuffle.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/shuffle.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd0ac0039ffe1b130a674ab7550e188001c44f13
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/shuffle.py
@@ -0,0 +1,815 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import platform
+import shutil
+import warnings
+import gc
+import itertools
+import operator
+import random
+import sys
+
+import pyspark.heapq3 as heapq
+from pyspark.serializers import BatchedSerializer, PickleSerializer, FlattenedValuesSerializer, \
+    CompressedSerializer, AutoBatchedSerializer
+from pyspark.util import fail_on_stopiteration
+
+
+try:
+    import psutil
+
+    process = None
+
+    def get_used_memory():
+        """ Return the used memory in MB """
+        global process
+        if process is None or process._pid != os.getpid():
+            process = psutil.Process(os.getpid())
+        if hasattr(process, "memory_info"):
+            info = process.memory_info()
+        else:
+            info = process.get_memory_info()
+        return info.rss >> 20
+
+except ImportError:
+
+    def get_used_memory():
+        """ Return the used memory in MB """
+        if platform.system() == 'Linux':
+            for line in open('/proc/self/status'):
+                if line.startswith('VmRSS:'):
+                    return int(line.split()[1]) >> 10
+
+        else:
+            warnings.warn("Please install psutil to have better "
+                          "support with spilling")
+            if platform.system() == "Darwin":
+                import resource
+                rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+                return rss >> 20
+            # TODO: support windows
+
+        return 0
+
+
+def _get_local_dirs(sub):
+    """ Get all the directories """
+    path = os.environ.get("SPARK_LOCAL_DIRS", "/tmp")
+    dirs = path.split(",")
+    if len(dirs) > 1:
+        # different order in different processes and instances
+        rnd = random.Random(os.getpid() + id(dirs))
+        random.shuffle(dirs, rnd.random)
+    return [os.path.join(d, "python", str(os.getpid()), sub) for d in dirs]
+
+
+# global stats
+MemoryBytesSpilled = 0
+DiskBytesSpilled = 0
+
+
+class Aggregator(object):
+
+    """
+    Aggregator has tree functions to merge values into combiner.
+
+    createCombiner:  (value) -> combiner
+    mergeValue:      (combine, value) -> combiner
+    mergeCombiners:  (combiner, combiner) -> combiner
+    """
+
+    def __init__(self, createCombiner, mergeValue, mergeCombiners):
+        self.createCombiner = fail_on_stopiteration(createCombiner)
+        self.mergeValue = fail_on_stopiteration(mergeValue)
+        self.mergeCombiners = fail_on_stopiteration(mergeCombiners)
+
+
+class SimpleAggregator(Aggregator):
+
+    """
+    SimpleAggregator is useful for the cases that combiners have
+    same type with values
+    """
+
+    def __init__(self, combiner):
+        Aggregator.__init__(self, lambda x: x, combiner, combiner)
+
+
+class Merger(object):
+
+    """
+    Merge shuffled data together by aggregator
+    """
+
+    def __init__(self, aggregator):
+        self.agg = aggregator
+
+    def mergeValues(self, iterator):
+        """ Combine the items by creator and combiner """
+        raise NotImplementedError
+
+    def mergeCombiners(self, iterator):
+        """ Merge the combined items by mergeCombiner """
+        raise NotImplementedError
+
+    def items(self):
+        """ Return the merged items ad iterator """
+        raise NotImplementedError
+
+
+def _compressed_serializer(self, serializer=None):
+    # always use PickleSerializer to simplify implementation
+    ser = PickleSerializer()
+    return AutoBatchedSerializer(CompressedSerializer(ser))
+
+
+class ExternalMerger(Merger):
+
+    """
+    External merger will dump the aggregated data into disks when
+    memory usage goes above the limit, then merge them together.
+
+    This class works as follows:
+
+    - It repeatedly combine the items and save them in one dict in
+      memory.
+
+    - When the used memory goes above memory limit, it will split
+      the combined data into partitions by hash code, dump them
+      into disk, one file per partition.
+
+    - Then it goes through the rest of the iterator, combine items
+      into different dict by hash. Until the used memory goes over
+      memory limit, it dump all the dicts into disks, one file per
+      dict. Repeat this again until combine all the items.
+
+    - Before return any items, it will load each partition and
+      combine them separately. Yield them before loading next
+      partition.
+
+    - During loading a partition, if the memory goes over limit,
+      it will partition the loaded data and dump them into disks
+      and load them partition by partition again.
+
+    `data` and `pdata` are used to hold the merged items in memory.
+    At first, all the data are merged into `data`. Once the used
+    memory goes over limit, the items in `data` are dumped into
+    disks, `data` will be cleared, all rest of items will be merged
+    into `pdata` and then dumped into disks. Before returning, all
+    the items in `pdata` will be dumped into disks.
+
+    Finally, if any items were spilled into disks, each partition
+    will be merged into `data` and be yielded, then cleared.
+
+    >>> agg = SimpleAggregator(lambda x, y: x + y)
+    >>> merger = ExternalMerger(agg, 10)
+    >>> N = 10000
+    >>> merger.mergeValues(zip(range(N), range(N)))
+    >>> assert merger.spills > 0
+    >>> sum(v for k,v in merger.items())
+    49995000
+
+    >>> merger = ExternalMerger(agg, 10)
+    >>> merger.mergeCombiners(zip(range(N), range(N)))
+    >>> assert merger.spills > 0
+    >>> sum(v for k,v in merger.items())
+    49995000
+    """
+
+    # the max total partitions created recursively
+    MAX_TOTAL_PARTITIONS = 4096
+
+    def __init__(self, aggregator, memory_limit=512, serializer=None,
+                 localdirs=None, scale=1, partitions=59, batch=1000):
+        Merger.__init__(self, aggregator)
+        self.memory_limit = memory_limit
+        self.serializer = _compressed_serializer(serializer)
+        self.localdirs = localdirs or _get_local_dirs(str(id(self)))
+        # number of partitions when spill data into disks
+        self.partitions = partitions
+        # check the memory after # of items merged
+        self.batch = batch
+        # scale is used to scale down the hash of key for recursive hash map
+        self.scale = scale
+        # un-partitioned merged data
+        self.data = {}
+        # partitioned merged data, list of dicts
+        self.pdata = []
+        # number of chunks dumped into disks
+        self.spills = 0
+        # randomize the hash of key, id(o) is the address of o (aligned by 8)
+        self._seed = id(self) + 7
+
+    def _get_spill_dir(self, n):
+        """ Choose one directory for spill by number n """
+        return os.path.join(self.localdirs[n % len(self.localdirs)], str(n))
+
+    def _next_limit(self):
+        """
+        Return the next memory limit. If the memory is not released
+        after spilling, it will dump the data only when the used memory
+        starts to increase.
+        """
+        return max(self.memory_limit, get_used_memory() * 1.05)
+
+    def mergeValues(self, iterator):
+        """ Combine the items by creator and combiner """
+        # speedup attribute lookup
+        creator, comb = self.agg.createCombiner, self.agg.mergeValue
+        c, data, pdata, hfun, batch = 0, self.data, self.pdata, self._partition, self.batch
+        limit = self.memory_limit
+
+        for k, v in iterator:
+            d = pdata[hfun(k)] if pdata else data
+            d[k] = comb(d[k], v) if k in d else creator(v)
+
+            c += 1
+            if c >= batch:
+                if get_used_memory() >= limit:
+                    self._spill()
+                    limit = self._next_limit()
+                    batch /= 2
+                    c = 0
+                else:
+                    batch *= 1.5
+
+        if get_used_memory() >= limit:
+            self._spill()
+
+    def _partition(self, key):
+        """ Return the partition for key """
+        return hash((key, self._seed)) % self.partitions
+
+    def _object_size(self, obj):
+        """ How much of memory for this obj, assume that all the objects
+        consume similar bytes of memory
+        """
+        return 1
+
+    def mergeCombiners(self, iterator, limit=None):
+        """ Merge (K,V) pair by mergeCombiner """
+        if limit is None:
+            limit = self.memory_limit
+        # speedup attribute lookup
+        comb, hfun, objsize = self.agg.mergeCombiners, self._partition, self._object_size
+        c, data, pdata, batch = 0, self.data, self.pdata, self.batch
+        for k, v in iterator:
+            d = pdata[hfun(k)] if pdata else data
+            d[k] = comb(d[k], v) if k in d else v
+            if not limit:
+                continue
+
+            c += objsize(v)
+            if c > batch:
+                if get_used_memory() > limit:
+                    self._spill()
+                    limit = self._next_limit()
+                    batch /= 2
+                    c = 0
+                else:
+                    batch *= 1.5
+
+        if limit and get_used_memory() >= limit:
+            self._spill()
+
+    def _spill(self):
+        """
+        dump already partitioned data into disks.
+
+        It will dump the data in batch for better performance.
+        """
+        global MemoryBytesSpilled, DiskBytesSpilled
+        path = self._get_spill_dir(self.spills)
+        if not os.path.exists(path):
+            os.makedirs(path)
+
+        used_memory = get_used_memory()
+        if not self.pdata:
+            # The data has not been partitioned, it will iterator the
+            # dataset once, write them into different files, has no
+            # additional memory. It only called when the memory goes
+            # above limit at the first time.
+
+            # open all the files for writing
+            streams = [open(os.path.join(path, str(i)), 'wb')
+                       for i in range(self.partitions)]
+
+            for k, v in self.data.items():
+                h = self._partition(k)
+                # put one item in batch, make it compatible with load_stream
+                # it will increase the memory if dump them in batch
+                self.serializer.dump_stream([(k, v)], streams[h])
+
+            for s in streams:
+                DiskBytesSpilled += s.tell()
+                s.close()
+
+            self.data.clear()
+            self.pdata.extend([{} for i in range(self.partitions)])
+
+        else:
+            for i in range(self.partitions):
+                p = os.path.join(path, str(i))
+                with open(p, "wb") as f:
+                    # dump items in batch
+                    self.serializer.dump_stream(iter(self.pdata[i].items()), f)
+                self.pdata[i].clear()
+                DiskBytesSpilled += os.path.getsize(p)
+
+        self.spills += 1
+        gc.collect()  # release the memory as much as possible
+        MemoryBytesSpilled += max(used_memory - get_used_memory(), 0) << 20
+
+    def items(self):
+        """ Return all merged items as iterator """
+        if not self.pdata and not self.spills:
+            return iter(self.data.items())
+        return self._external_items()
+
+    def _external_items(self):
+        """ Return all partitioned items as iterator """
+        assert not self.data
+        if any(self.pdata):
+            self._spill()
+        # disable partitioning and spilling when merge combiners from disk
+        self.pdata = []
+
+        try:
+            for i in range(self.partitions):
+                for v in self._merged_items(i):
+                    yield v
+                self.data.clear()
+
+                # remove the merged partition
+                for j in range(self.spills):
+                    path = self._get_spill_dir(j)
+                    os.remove(os.path.join(path, str(i)))
+        finally:
+            self._cleanup()
+
+    def _merged_items(self, index):
+        self.data = {}
+        limit = self._next_limit()
+        for j in range(self.spills):
+            path = self._get_spill_dir(j)
+            p = os.path.join(path, str(index))
+            # do not check memory during merging
+            with open(p, "rb") as f:
+                self.mergeCombiners(self.serializer.load_stream(f), 0)
+
+            # limit the total partitions
+            if (self.scale * self.partitions < self.MAX_TOTAL_PARTITIONS
+                    and j < self.spills - 1
+                    and get_used_memory() > limit):
+                self.data.clear()  # will read from disk again
+                gc.collect()  # release the memory as much as possible
+                return self._recursive_merged_items(index)
+
+        return self.data.items()
+
+    def _recursive_merged_items(self, index):
+        """
+        merge the partitioned items and return the as iterator
+
+        If one partition can not be fit in memory, then them will be
+        partitioned and merged recursively.
+        """
+        subdirs = [os.path.join(d, "parts", str(index)) for d in self.localdirs]
+        m = ExternalMerger(self.agg, self.memory_limit, self.serializer, subdirs,
+                           self.scale * self.partitions, self.partitions, self.batch)
+        m.pdata = [{} for _ in range(self.partitions)]
+        limit = self._next_limit()
+
+        for j in range(self.spills):
+            path = self._get_spill_dir(j)
+            p = os.path.join(path, str(index))
+            with open(p, 'rb') as f:
+                m.mergeCombiners(self.serializer.load_stream(f), 0)
+
+            if get_used_memory() > limit:
+                m._spill()
+                limit = self._next_limit()
+
+        return m._external_items()
+
+    def _cleanup(self):
+        """ Clean up all the files in disks """
+        for d in self.localdirs:
+            shutil.rmtree(d, True)
+
+
+class ExternalSorter(object):
+    """
+    ExtenalSorter will divide the elements into chunks, sort them in
+    memory and dump them into disks, finally merge them back.
+
+    The spilling will only happen when the used memory goes above
+    the limit.
+
+
+    >>> sorter = ExternalSorter(1)  # 1M
+    >>> import random
+    >>> l = list(range(1024))
+    >>> random.shuffle(l)
+    >>> sorted(l) == list(sorter.sorted(l))
+    True
+    >>> sorted(l) == list(sorter.sorted(l, key=lambda x: -x, reverse=True))
+    True
+    """
+    def __init__(self, memory_limit, serializer=None):
+        self.memory_limit = memory_limit
+        self.local_dirs = _get_local_dirs("sort")
+        self.serializer = _compressed_serializer(serializer)
+
+    def _get_path(self, n):
+        """ Choose one directory for spill by number n """
+        d = self.local_dirs[n % len(self.local_dirs)]
+        if not os.path.exists(d):
+            os.makedirs(d)
+        return os.path.join(d, str(n))
+
+    def _next_limit(self):
+        """
+        Return the next memory limit. If the memory is not released
+        after spilling, it will dump the data only when the used memory
+        starts to increase.
+        """
+        return max(self.memory_limit, get_used_memory() * 1.05)
+
+    def sorted(self, iterator, key=None, reverse=False):
+        """
+        Sort the elements in iterator, do external sort when the memory
+        goes above the limit.
+        """
+        global MemoryBytesSpilled, DiskBytesSpilled
+        batch, limit = 100, self._next_limit()
+        chunks, current_chunk = [], []
+        iterator = iter(iterator)
+        while True:
+            # pick elements in batch
+            chunk = list(itertools.islice(iterator, batch))
+            current_chunk.extend(chunk)
+            if len(chunk) < batch:
+                break
+
+            used_memory = get_used_memory()
+            if used_memory > limit:
+                # sort them inplace will save memory
+                current_chunk.sort(key=key, reverse=reverse)
+                path = self._get_path(len(chunks))
+                with open(path, 'wb') as f:
+                    self.serializer.dump_stream(current_chunk, f)
+
+                def load(f):
+                    for v in self.serializer.load_stream(f):
+                        yield v
+                    # close the file explicit once we consume all the items
+                    # to avoid ResourceWarning in Python3
+                    f.close()
+                chunks.append(load(open(path, 'rb')))
+                current_chunk = []
+                MemoryBytesSpilled += max(used_memory - get_used_memory(), 0) << 20
+                DiskBytesSpilled += os.path.getsize(path)
+                os.unlink(path)  # data will be deleted after close
+
+            elif not chunks:
+                batch = min(int(batch * 1.5), 10000)
+
+        current_chunk.sort(key=key, reverse=reverse)
+        if not chunks:
+            return current_chunk
+
+        if current_chunk:
+            chunks.append(iter(current_chunk))
+
+        return heapq.merge(chunks, key=key, reverse=reverse)
+
+
+class ExternalList(object):
+    """
+    ExternalList can have many items which cannot be hold in memory in
+    the same time.
+
+    >>> l = ExternalList(list(range(100)))
+    >>> len(l)
+    100
+    >>> l.append(10)
+    >>> len(l)
+    101
+    >>> for i in range(20240):
+    ...     l.append(i)
+    >>> len(l)
+    20341
+    >>> import pickle
+    >>> l2 = pickle.loads(pickle.dumps(l))
+    >>> len(l2)
+    20341
+    >>> list(l2)[100]
+    10
+    """
+    LIMIT = 10240
+
+    def __init__(self, values):
+        self.values = values
+        self.count = len(values)
+        self._file = None
+        self._ser = None
+
+    def __getstate__(self):
+        if self._file is not None:
+            self._file.flush()
+            with os.fdopen(os.dup(self._file.fileno()), "rb") as f:
+                f.seek(0)
+                serialized = f.read()
+        else:
+            serialized = b''
+        return self.values, self.count, serialized
+
+    def __setstate__(self, item):
+        self.values, self.count, serialized = item
+        if serialized:
+            self._open_file()
+            self._file.write(serialized)
+        else:
+            self._file = None
+            self._ser = None
+
+    def __iter__(self):
+        if self._file is not None:
+            self._file.flush()
+            # read all items from disks first
+            with os.fdopen(os.dup(self._file.fileno()), 'rb') as f:
+                f.seek(0)
+                for v in self._ser.load_stream(f):
+                    yield v
+
+        for v in self.values:
+            yield v
+
+    def __len__(self):
+        return self.count
+
+    def append(self, value):
+        self.values.append(value)
+        self.count += 1
+        # dump them into disk if the key is huge
+        if len(self.values) >= self.LIMIT:
+            self._spill()
+
+    def _open_file(self):
+        dirs = _get_local_dirs("objects")
+        d = dirs[id(self) % len(dirs)]
+        if not os.path.exists(d):
+            os.makedirs(d)
+        p = os.path.join(d, str(id(self)))
+        self._file = open(p, "w+b", 65536)
+        self._ser = BatchedSerializer(CompressedSerializer(PickleSerializer()), 1024)
+        os.unlink(p)
+
+    def __del__(self):
+        if self._file:
+            self._file.close()
+            self._file = None
+
+    def _spill(self):
+        """ dump the values into disk """
+        global MemoryBytesSpilled, DiskBytesSpilled
+        if self._file is None:
+            self._open_file()
+
+        used_memory = get_used_memory()
+        pos = self._file.tell()
+        self._ser.dump_stream(self.values, self._file)
+        self.values = []
+        gc.collect()
+        DiskBytesSpilled += self._file.tell() - pos
+        MemoryBytesSpilled += max(used_memory - get_used_memory(), 0) << 20
+
+
+class ExternalListOfList(ExternalList):
+    """
+    An external list for list.
+
+    >>> l = ExternalListOfList([[i, i] for i in range(100)])
+    >>> len(l)
+    200
+    >>> l.append(range(10))
+    >>> len(l)
+    210
+    >>> len(list(l))
+    210
+    """
+
+    def __init__(self, values):
+        ExternalList.__init__(self, values)
+        self.count = sum(len(i) for i in values)
+
+    def append(self, value):
+        ExternalList.append(self, value)
+        # already counted 1 in ExternalList.append
+        self.count += len(value) - 1
+
+    def __iter__(self):
+        for values in ExternalList.__iter__(self):
+            for v in values:
+                yield v
+
+
+class GroupByKey(object):
+    """
+    Group a sorted iterator as [(k1, it1), (k2, it2), ...]
+
+    >>> k = [i // 3 for i in range(6)]
+    >>> v = [[i] for i in range(6)]
+    >>> g = GroupByKey(zip(k, v))
+    >>> [(k, list(it)) for k, it in g]
+    [(0, [0, 1, 2]), (1, [3, 4, 5])]
+    """
+
+    def __init__(self, iterator):
+        self.iterator = iterator
+
+    def __iter__(self):
+        key, values = None, None
+        for k, v in self.iterator:
+            if values is not None and k == key:
+                values.append(v)
+            else:
+                if values is not None:
+                    yield (key, values)
+                key = k
+                values = ExternalListOfList([v])
+        if values is not None:
+            yield (key, values)
+
+
+class ExternalGroupBy(ExternalMerger):
+
+    """
+    Group by the items by key. If any partition of them can not been
+    hold in memory, it will do sort based group by.
+
+    This class works as follows:
+
+    - It repeatedly group the items by key and save them in one dict in
+      memory.
+
+    - When the used memory goes above memory limit, it will split
+      the combined data into partitions by hash code, dump them
+      into disk, one file per partition. If the number of keys
+      in one partitions is smaller than 1000, it will sort them
+      by key before dumping into disk.
+
+    - Then it goes through the rest of the iterator, group items
+      by key into different dict by hash. Until the used memory goes over
+      memory limit, it dump all the dicts into disks, one file per
+      dict. Repeat this again until combine all the items. It
+      also will try to sort the items by key in each partition
+      before dumping into disks.
+
+    - It will yield the grouped items partitions by partitions.
+      If the data in one partitions can be hold in memory, then it
+      will load and combine them in memory and yield.
+
+    - If the dataset in one partition cannot be hold in memory,
+      it will sort them first. If all the files are already sorted,
+      it merge them by heap.merge(), so it will do external sort
+      for all the files.
+
+    - After sorting, `GroupByKey` class will put all the continuous
+      items with the same key as a group, yield the values as
+      an iterator.
+    """
+    SORT_KEY_LIMIT = 1000
+
+    def flattened_serializer(self):
+        assert isinstance(self.serializer, BatchedSerializer)
+        ser = self.serializer
+        return FlattenedValuesSerializer(ser, 20)
+
+    def _object_size(self, obj):
+        return len(obj)
+
+    def _spill(self):
+        """
+        dump already partitioned data into disks.
+        """
+        global MemoryBytesSpilled, DiskBytesSpilled
+        path = self._get_spill_dir(self.spills)
+        if not os.path.exists(path):
+            os.makedirs(path)
+
+        used_memory = get_used_memory()
+        if not self.pdata:
+            # The data has not been partitioned, it will iterator the
+            # data once, write them into different files, has no
+            # additional memory. It only called when the memory goes
+            # above limit at the first time.
+
+            # open all the files for writing
+            streams = [open(os.path.join(path, str(i)), 'wb')
+                       for i in range(self.partitions)]
+
+            # If the number of keys is small, then the overhead of sort is small
+            # sort them before dumping into disks
+            self._sorted = len(self.data) < self.SORT_KEY_LIMIT
+            if self._sorted:
+                self.serializer = self.flattened_serializer()
+                for k in sorted(self.data.keys()):
+                    h = self._partition(k)
+                    self.serializer.dump_stream([(k, self.data[k])], streams[h])
+            else:
+                for k, v in self.data.items():
+                    h = self._partition(k)
+                    self.serializer.dump_stream([(k, v)], streams[h])
+
+            for s in streams:
+                DiskBytesSpilled += s.tell()
+                s.close()
+
+            self.data.clear()
+            # self.pdata is cached in `mergeValues` and `mergeCombiners`
+            self.pdata.extend([{} for i in range(self.partitions)])
+
+        else:
+            for i in range(self.partitions):
+                p = os.path.join(path, str(i))
+                with open(p, "wb") as f:
+                    # dump items in batch
+                    if self._sorted:
+                        # sort by key only (stable)
+                        sorted_items = sorted(self.pdata[i].items(), key=operator.itemgetter(0))
+                        self.serializer.dump_stream(sorted_items, f)
+                    else:
+                        self.serializer.dump_stream(self.pdata[i].items(), f)
+                self.pdata[i].clear()
+                DiskBytesSpilled += os.path.getsize(p)
+
+        self.spills += 1
+        gc.collect()  # release the memory as much as possible
+        MemoryBytesSpilled += max(used_memory - get_used_memory(), 0) << 20
+
+    def _merged_items(self, index):
+        size = sum(os.path.getsize(os.path.join(self._get_spill_dir(j), str(index)))
+                   for j in range(self.spills))
+        # if the memory can not hold all the partition,
+        # then use sort based merge. Because of compression,
+        # the data on disks will be much smaller than needed memory
+        if size >= self.memory_limit << 17:  # * 1M / 8
+            return self._merge_sorted_items(index)
+
+        self.data = {}
+        for j in range(self.spills):
+            path = self._get_spill_dir(j)
+            p = os.path.join(path, str(index))
+            # do not check memory during merging
+            with open(p, "rb") as f:
+                self.mergeCombiners(self.serializer.load_stream(f), 0)
+        return self.data.items()
+
+    def _merge_sorted_items(self, index):
+        """ load a partition from disk, then sort and group by key """
+        def load_partition(j):
+            path = self._get_spill_dir(j)
+            p = os.path.join(path, str(index))
+            with open(p, 'rb', 65536) as f:
+                for v in self.serializer.load_stream(f):
+                    yield v
+
+        disk_items = [load_partition(j) for j in range(self.spills)]
+
+        if self._sorted:
+            # all the partitions are already sorted
+            sorted_items = heapq.merge(disk_items, key=operator.itemgetter(0))
+
+        else:
+            # Flatten the combined values, so it will not consume huge
+            # memory during merging sort.
+            ser = self.flattened_serializer()
+            sorter = ExternalSorter(self.memory_limit, ser)
+            sorted_items = sorter.sorted(itertools.chain(*disk_items),
+                                         key=operator.itemgetter(0))
+        return ((k, vs) for k, vs in GroupByKey(sorted_items))
+
+
+if __name__ == "__main__":
+    import doctest
+    (failure_count, test_count) = doctest.testmod()
+    if failure_count:
+        sys.exit(-1)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/__init__.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3c06c81243623e9802e3d61110a5d36f89b571c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/__init__.py
@@ -0,0 +1,61 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Important classes of Spark SQL and DataFrames:
+
+    - :class:`pyspark.sql.SparkSession`
+      Main entry point for :class:`DataFrame` and SQL functionality.
+    - :class:`pyspark.sql.DataFrame`
+      A distributed collection of data grouped into named columns.
+    - :class:`pyspark.sql.Column`
+      A column expression in a :class:`DataFrame`.
+    - :class:`pyspark.sql.Row`
+      A row of data in a :class:`DataFrame`.
+    - :class:`pyspark.sql.GroupedData`
+      Aggregation methods, returned by :func:`DataFrame.groupBy`.
+    - :class:`pyspark.sql.DataFrameNaFunctions`
+      Methods for handling missing data (null values).
+    - :class:`pyspark.sql.DataFrameStatFunctions`
+      Methods for statistics functionality.
+    - :class:`pyspark.sql.functions`
+      List of built-in functions available for :class:`DataFrame`.
+    - :class:`pyspark.sql.types`
+      List of data types available.
+    - :class:`pyspark.sql.Window`
+      For working with window functions.
+"""
+from __future__ import absolute_import
+
+
+from pyspark.sql.types import Row
+from pyspark.sql.context import SQLContext, HiveContext, UDFRegistration
+from pyspark.sql.session import SparkSession
+from pyspark.sql.column import Column
+from pyspark.sql.catalog import Catalog
+from pyspark.sql.dataframe import DataFrame, DataFrameNaFunctions, DataFrameStatFunctions
+from pyspark.sql.group import GroupedData
+from pyspark.sql.readwriter import DataFrameReader, DataFrameWriter
+from pyspark.sql.window import Window, WindowSpec
+
+
+__all__ = [
+    'SparkSession', 'SQLContext', 'HiveContext', 'UDFRegistration',
+    'DataFrame', 'GroupedData', 'Column', 'Catalog', 'Row',
+    'DataFrameNaFunctions', 'DataFrameStatFunctions', 'Window', 'WindowSpec',
+    'DataFrameReader', 'DataFrameWriter'
+]
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/catalog.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/catalog.py
new file mode 100644
index 0000000000000000000000000000000000000000..974251f63b37af72d85fc2efa7232332fdd5fd3f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/catalog.py
@@ -0,0 +1,312 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import warnings
+from collections import namedtuple
+
+from pyspark import since
+from pyspark.rdd import ignore_unicode_prefix, PythonEvalType
+from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.udf import UserDefinedFunction
+from pyspark.sql.types import IntegerType, StringType, StructType
+
+
+Database = namedtuple("Database", "name description locationUri")
+Table = namedtuple("Table", "name database description tableType isTemporary")
+Column = namedtuple("Column", "name description dataType nullable isPartition isBucket")
+Function = namedtuple("Function", "name description className isTemporary")
+
+
+class Catalog(object):
+    """User-facing catalog API, accessible through `SparkSession.catalog`.
+
+    This is a thin wrapper around its Scala implementation org.apache.spark.sql.catalog.Catalog.
+    """
+
+    def __init__(self, sparkSession):
+        """Create a new Catalog that wraps the underlying JVM object."""
+        self._sparkSession = sparkSession
+        self._jsparkSession = sparkSession._jsparkSession
+        self._jcatalog = sparkSession._jsparkSession.catalog()
+
+    @ignore_unicode_prefix
+    @since(2.0)
+    def currentDatabase(self):
+        """Returns the current default database in this session."""
+        return self._jcatalog.currentDatabase()
+
+    @ignore_unicode_prefix
+    @since(2.0)
+    def setCurrentDatabase(self, dbName):
+        """Sets the current default database in this session."""
+        return self._jcatalog.setCurrentDatabase(dbName)
+
+    @ignore_unicode_prefix
+    @since(2.0)
+    def listDatabases(self):
+        """Returns a list of databases available across all sessions."""
+        iter = self._jcatalog.listDatabases().toLocalIterator()
+        databases = []
+        while iter.hasNext():
+            jdb = iter.next()
+            databases.append(Database(
+                name=jdb.name(),
+                description=jdb.description(),
+                locationUri=jdb.locationUri()))
+        return databases
+
+    @ignore_unicode_prefix
+    @since(2.0)
+    def listTables(self, dbName=None):
+        """Returns a list of tables/views in the specified database.
+
+        If no database is specified, the current database is used.
+        This includes all temporary views.
+        """
+        if dbName is None:
+            dbName = self.currentDatabase()
+        iter = self._jcatalog.listTables(dbName).toLocalIterator()
+        tables = []
+        while iter.hasNext():
+            jtable = iter.next()
+            tables.append(Table(
+                name=jtable.name(),
+                database=jtable.database(),
+                description=jtable.description(),
+                tableType=jtable.tableType(),
+                isTemporary=jtable.isTemporary()))
+        return tables
+
+    @ignore_unicode_prefix
+    @since(2.0)
+    def listFunctions(self, dbName=None):
+        """Returns a list of functions registered in the specified database.
+
+        If no database is specified, the current database is used.
+        This includes all temporary functions.
+        """
+        if dbName is None:
+            dbName = self.currentDatabase()
+        iter = self._jcatalog.listFunctions(dbName).toLocalIterator()
+        functions = []
+        while iter.hasNext():
+            jfunction = iter.next()
+            functions.append(Function(
+                name=jfunction.name(),
+                description=jfunction.description(),
+                className=jfunction.className(),
+                isTemporary=jfunction.isTemporary()))
+        return functions
+
+    @ignore_unicode_prefix
+    @since(2.0)
+    def listColumns(self, tableName, dbName=None):
+        """Returns a list of columns for the given table/view in the specified database.
+
+        If no database is specified, the current database is used.
+
+        Note: the order of arguments here is different from that of its JVM counterpart
+        because Python does not support method overloading.
+        """
+        if dbName is None:
+            dbName = self.currentDatabase()
+        iter = self._jcatalog.listColumns(dbName, tableName).toLocalIterator()
+        columns = []
+        while iter.hasNext():
+            jcolumn = iter.next()
+            columns.append(Column(
+                name=jcolumn.name(),
+                description=jcolumn.description(),
+                dataType=jcolumn.dataType(),
+                nullable=jcolumn.nullable(),
+                isPartition=jcolumn.isPartition(),
+                isBucket=jcolumn.isBucket()))
+        return columns
+
+    @since(2.0)
+    def createExternalTable(self, tableName, path=None, source=None, schema=None, **options):
+        """Creates a table based on the dataset in a data source.
+
+        It returns the DataFrame associated with the external table.
+
+        The data source is specified by the ``source`` and a set of ``options``.
+        If ``source`` is not specified, the default data source configured by
+        ``spark.sql.sources.default`` will be used.
+
+        Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and
+        created external table.
+
+        :return: :class:`DataFrame`
+        """
+        warnings.warn(
+            "createExternalTable is deprecated since Spark 2.2, please use createTable instead.",
+            DeprecationWarning)
+        return self.createTable(tableName, path, source, schema, **options)
+
+    @since(2.2)
+    def createTable(self, tableName, path=None, source=None, schema=None, **options):
+        """Creates a table based on the dataset in a data source.
+
+        It returns the DataFrame associated with the table.
+
+        The data source is specified by the ``source`` and a set of ``options``.
+        If ``source`` is not specified, the default data source configured by
+        ``spark.sql.sources.default`` will be used. When ``path`` is specified, an external table is
+        created from the data at the given path. Otherwise a managed table is created.
+
+        Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and
+        created table.
+
+        :return: :class:`DataFrame`
+        """
+        if path is not None:
+            options["path"] = path
+        if source is None:
+            source = self._sparkSession._wrapped._conf.defaultDataSourceName()
+        if schema is None:
+            df = self._jcatalog.createTable(tableName, source, options)
+        else:
+            if not isinstance(schema, StructType):
+                raise TypeError("schema should be StructType")
+            scala_datatype = self._jsparkSession.parseDataType(schema.json())
+            df = self._jcatalog.createTable(tableName, source, scala_datatype, options)
+        return DataFrame(df, self._sparkSession._wrapped)
+
+    @since(2.0)
+    def dropTempView(self, viewName):
+        """Drops the local temporary view with the given view name in the catalog.
+        If the view has been cached before, then it will also be uncached.
+        Returns true if this view is dropped successfully, false otherwise.
+
+        Note that, the return type of this method was None in Spark 2.0, but changed to Boolean
+        in Spark 2.1.
+
+        >>> spark.createDataFrame([(1, 1)]).createTempView("my_table")
+        >>> spark.table("my_table").collect()
+        [Row(_1=1, _2=1)]
+        >>> spark.catalog.dropTempView("my_table")
+        >>> spark.table("my_table") # doctest: +IGNORE_EXCEPTION_DETAIL
+        Traceback (most recent call last):
+            ...
+        AnalysisException: ...
+        """
+        self._jcatalog.dropTempView(viewName)
+
+    @since(2.1)
+    def dropGlobalTempView(self, viewName):
+        """Drops the global temporary view with the given view name in the catalog.
+        If the view has been cached before, then it will also be uncached.
+        Returns true if this view is dropped successfully, false otherwise.
+
+        >>> spark.createDataFrame([(1, 1)]).createGlobalTempView("my_table")
+        >>> spark.table("global_temp.my_table").collect()
+        [Row(_1=1, _2=1)]
+        >>> spark.catalog.dropGlobalTempView("my_table")
+        >>> spark.table("global_temp.my_table") # doctest: +IGNORE_EXCEPTION_DETAIL
+        Traceback (most recent call last):
+            ...
+        AnalysisException: ...
+        """
+        self._jcatalog.dropGlobalTempView(viewName)
+
+    @since(2.0)
+    def registerFunction(self, name, f, returnType=None):
+        """An alias for :func:`spark.udf.register`.
+        See :meth:`pyspark.sql.UDFRegistration.register`.
+
+        .. note:: Deprecated in 2.3.0. Use :func:`spark.udf.register` instead.
+        """
+        warnings.warn(
+            "Deprecated in 2.3.0. Use spark.udf.register instead.",
+            DeprecationWarning)
+        return self._sparkSession.udf.register(name, f, returnType)
+
+    @since(2.0)
+    def isCached(self, tableName):
+        """Returns true if the table is currently cached in-memory."""
+        return self._jcatalog.isCached(tableName)
+
+    @since(2.0)
+    def cacheTable(self, tableName):
+        """Caches the specified table in-memory."""
+        self._jcatalog.cacheTable(tableName)
+
+    @since(2.0)
+    def uncacheTable(self, tableName):
+        """Removes the specified table from the in-memory cache."""
+        self._jcatalog.uncacheTable(tableName)
+
+    @since(2.0)
+    def clearCache(self):
+        """Removes all cached tables from the in-memory cache."""
+        self._jcatalog.clearCache()
+
+    @since(2.0)
+    def refreshTable(self, tableName):
+        """Invalidates and refreshes all the cached data and metadata of the given table."""
+        self._jcatalog.refreshTable(tableName)
+
+    @since('2.1.1')
+    def recoverPartitions(self, tableName):
+        """Recovers all the partitions of the given table and update the catalog.
+
+        Only works with a partitioned table, and not a view.
+        """
+        self._jcatalog.recoverPartitions(tableName)
+
+    @since('2.2.0')
+    def refreshByPath(self, path):
+        """Invalidates and refreshes all the cached data (and the associated metadata) for any
+        DataFrame that contains the given data source path.
+        """
+        self._jcatalog.refreshByPath(path)
+
+    def _reset(self):
+        """(Internal use only) Drop all existing databases (except "default"), tables,
+        partitions and functions, and set the current database to "default".
+
+        This is mainly used for tests.
+        """
+        self._jsparkSession.sessionState().catalog().reset()
+
+
+def _test():
+    import os
+    import doctest
+    from pyspark.sql import SparkSession
+    import pyspark.sql.catalog
+
+    os.chdir(os.environ["SPARK_HOME"])
+
+    globs = pyspark.sql.catalog.__dict__.copy()
+    spark = SparkSession.builder\
+        .master("local[4]")\
+        .appName("sql.catalog tests")\
+        .getOrCreate()
+    globs['sc'] = spark.sparkContext
+    globs['spark'] = spark
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.catalog,
+        globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/column.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/column.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7dec11c69b57f0bc1ce41200270d9cf64582c82
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/column.py
@@ -0,0 +1,714 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import json
+
+if sys.version >= '3':
+    basestring = str
+    long = int
+
+from pyspark import copy_func, since
+from pyspark.context import SparkContext
+from pyspark.rdd import ignore_unicode_prefix
+from pyspark.sql.types import *
+
+__all__ = ["Column"]
+
+
+def _create_column_from_literal(literal):
+    sc = SparkContext._active_spark_context
+    return sc._jvm.functions.lit(literal)
+
+
+def _create_column_from_name(name):
+    sc = SparkContext._active_spark_context
+    return sc._jvm.functions.col(name)
+
+
+def _to_java_column(col):
+    if isinstance(col, Column):
+        jcol = col._jc
+    elif isinstance(col, basestring):
+        jcol = _create_column_from_name(col)
+    else:
+        raise TypeError(
+            "Invalid argument, not a string or column: "
+            "{0} of type {1}. "
+            "For column literals, use 'lit', 'array', 'struct' or 'create_map' "
+            "function.".format(col, type(col)))
+    return jcol
+
+
+def _to_seq(sc, cols, converter=None):
+    """
+    Convert a list of Column (or names) into a JVM Seq of Column.
+
+    An optional `converter` could be used to convert items in `cols`
+    into JVM Column objects.
+    """
+    if converter:
+        cols = [converter(c) for c in cols]
+    return sc._jvm.PythonUtils.toSeq(cols)
+
+
+def _to_list(sc, cols, converter=None):
+    """
+    Convert a list of Column (or names) into a JVM (Scala) List of Column.
+
+    An optional `converter` could be used to convert items in `cols`
+    into JVM Column objects.
+    """
+    if converter:
+        cols = [converter(c) for c in cols]
+    return sc._jvm.PythonUtils.toList(cols)
+
+
+def _unary_op(name, doc="unary operator"):
+    """ Create a method for given unary operator """
+    def _(self):
+        jc = getattr(self._jc, name)()
+        return Column(jc)
+    _.__doc__ = doc
+    return _
+
+
+def _func_op(name, doc=''):
+    def _(self):
+        sc = SparkContext._active_spark_context
+        jc = getattr(sc._jvm.functions, name)(self._jc)
+        return Column(jc)
+    _.__doc__ = doc
+    return _
+
+
+def _bin_func_op(name, reverse=False, doc="binary function"):
+    def _(self, other):
+        sc = SparkContext._active_spark_context
+        fn = getattr(sc._jvm.functions, name)
+        jc = other._jc if isinstance(other, Column) else _create_column_from_literal(other)
+        njc = fn(self._jc, jc) if not reverse else fn(jc, self._jc)
+        return Column(njc)
+    _.__doc__ = doc
+    return _
+
+
+def _bin_op(name, doc="binary operator"):
+    """ Create a method for given binary operator
+    """
+    def _(self, other):
+        jc = other._jc if isinstance(other, Column) else other
+        njc = getattr(self._jc, name)(jc)
+        return Column(njc)
+    _.__doc__ = doc
+    return _
+
+
+def _reverse_op(name, doc="binary operator"):
+    """ Create a method for binary operator (this object is on right side)
+    """
+    def _(self, other):
+        jother = _create_column_from_literal(other)
+        jc = getattr(jother, name)(self._jc)
+        return Column(jc)
+    _.__doc__ = doc
+    return _
+
+
+class Column(object):
+
+    """
+    A column in a DataFrame.
+
+    :class:`Column` instances can be created by::
+
+        # 1. Select a column out of a DataFrame
+
+        df.colName
+        df["colName"]
+
+        # 2. Create from an expression
+        df.colName + 1
+        1 / df.colName
+
+    .. versionadded:: 1.3
+    """
+
+    def __init__(self, jc):
+        self._jc = jc
+
+    # arithmetic operators
+    __neg__ = _func_op("negate")
+    __add__ = _bin_op("plus")
+    __sub__ = _bin_op("minus")
+    __mul__ = _bin_op("multiply")
+    __div__ = _bin_op("divide")
+    __truediv__ = _bin_op("divide")
+    __mod__ = _bin_op("mod")
+    __radd__ = _bin_op("plus")
+    __rsub__ = _reverse_op("minus")
+    __rmul__ = _bin_op("multiply")
+    __rdiv__ = _reverse_op("divide")
+    __rtruediv__ = _reverse_op("divide")
+    __rmod__ = _reverse_op("mod")
+    __pow__ = _bin_func_op("pow")
+    __rpow__ = _bin_func_op("pow", reverse=True)
+
+    # logistic operators
+    __eq__ = _bin_op("equalTo")
+    __ne__ = _bin_op("notEqual")
+    __lt__ = _bin_op("lt")
+    __le__ = _bin_op("leq")
+    __ge__ = _bin_op("geq")
+    __gt__ = _bin_op("gt")
+
+    _eqNullSafe_doc = """
+    Equality test that is safe for null values.
+
+    :param other: a value or :class:`Column`
+
+    >>> from pyspark.sql import Row
+    >>> df1 = spark.createDataFrame([
+    ...     Row(id=1, value='foo'),
+    ...     Row(id=2, value=None)
+    ... ])
+    >>> df1.select(
+    ...     df1['value'] == 'foo',
+    ...     df1['value'].eqNullSafe('foo'),
+    ...     df1['value'].eqNullSafe(None)
+    ... ).show()
+    +-------------+---------------+----------------+
+    |(value = foo)|(value <=> foo)|(value <=> NULL)|
+    +-------------+---------------+----------------+
+    |         true|           true|           false|
+    |         null|          false|            true|
+    +-------------+---------------+----------------+
+    >>> df2 = spark.createDataFrame([
+    ...     Row(value = 'bar'),
+    ...     Row(value = None)
+    ... ])
+    >>> df1.join(df2, df1["value"] == df2["value"]).count()
+    0
+    >>> df1.join(df2, df1["value"].eqNullSafe(df2["value"])).count()
+    1
+    >>> df2 = spark.createDataFrame([
+    ...     Row(id=1, value=float('NaN')),
+    ...     Row(id=2, value=42.0),
+    ...     Row(id=3, value=None)
+    ... ])
+    >>> df2.select(
+    ...     df2['value'].eqNullSafe(None),
+    ...     df2['value'].eqNullSafe(float('NaN')),
+    ...     df2['value'].eqNullSafe(42.0)
+    ... ).show()
+    +----------------+---------------+----------------+
+    |(value <=> NULL)|(value <=> NaN)|(value <=> 42.0)|
+    +----------------+---------------+----------------+
+    |           false|           true|           false|
+    |           false|          false|            true|
+    |            true|          false|           false|
+    +----------------+---------------+----------------+
+
+    .. note:: Unlike Pandas, PySpark doesn't consider NaN values to be NULL.
+       See the `NaN Semantics`_ for details.
+    .. _NaN Semantics:
+       https://spark.apache.org/docs/latest/sql-programming-guide.html#nan-semantics
+    .. versionadded:: 2.3.0
+    """
+    eqNullSafe = _bin_op("eqNullSafe", _eqNullSafe_doc)
+
+    # `and`, `or`, `not` cannot be overloaded in Python,
+    # so use bitwise operators as boolean operators
+    __and__ = _bin_op('and')
+    __or__ = _bin_op('or')
+    __invert__ = _func_op('not')
+    __rand__ = _bin_op("and")
+    __ror__ = _bin_op("or")
+
+    # container operators
+    def __contains__(self, item):
+        raise ValueError("Cannot apply 'in' operator against a column: please use 'contains' "
+                         "in a string column or 'array_contains' function for an array column.")
+
+    # bitwise operators
+    _bitwiseOR_doc = """
+    Compute bitwise OR of this expression with another expression.
+
+    :param other: a value or :class:`Column` to calculate bitwise or(|) against
+                  this :class:`Column`.
+
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([Row(a=170, b=75)])
+    >>> df.select(df.a.bitwiseOR(df.b)).collect()
+    [Row((a | b)=235)]
+    """
+    _bitwiseAND_doc = """
+    Compute bitwise AND of this expression with another expression.
+
+    :param other: a value or :class:`Column` to calculate bitwise and(&) against
+                  this :class:`Column`.
+
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([Row(a=170, b=75)])
+    >>> df.select(df.a.bitwiseAND(df.b)).collect()
+    [Row((a & b)=10)]
+    """
+    _bitwiseXOR_doc = """
+    Compute bitwise XOR of this expression with another expression.
+
+    :param other: a value or :class:`Column` to calculate bitwise xor(^) against
+                  this :class:`Column`.
+
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([Row(a=170, b=75)])
+    >>> df.select(df.a.bitwiseXOR(df.b)).collect()
+    [Row((a ^ b)=225)]
+    """
+
+    bitwiseOR = _bin_op("bitwiseOR", _bitwiseOR_doc)
+    bitwiseAND = _bin_op("bitwiseAND", _bitwiseAND_doc)
+    bitwiseXOR = _bin_op("bitwiseXOR", _bitwiseXOR_doc)
+
+    @since(1.3)
+    def getItem(self, key):
+        """
+        An expression that gets an item at position ``ordinal`` out of a list,
+        or gets an item by key out of a dict.
+
+        >>> df = spark.createDataFrame([([1, 2], {"key": "value"})], ["l", "d"])
+        >>> df.select(df.l.getItem(0), df.d.getItem("key")).show()
+        +----+------+
+        |l[0]|d[key]|
+        +----+------+
+        |   1| value|
+        +----+------+
+        >>> df.select(df.l[0], df.d["key"]).show()
+        +----+------+
+        |l[0]|d[key]|
+        +----+------+
+        |   1| value|
+        +----+------+
+        """
+        return self[key]
+
+    @since(1.3)
+    def getField(self, name):
+        """
+        An expression that gets a field by name in a StructField.
+
+        >>> from pyspark.sql import Row
+        >>> df = spark.createDataFrame([Row(r=Row(a=1, b="b"))])
+        >>> df.select(df.r.getField("b")).show()
+        +---+
+        |r.b|
+        +---+
+        |  b|
+        +---+
+        >>> df.select(df.r.a).show()
+        +---+
+        |r.a|
+        +---+
+        |  1|
+        +---+
+        """
+        return self[name]
+
+    def __getattr__(self, item):
+        if item.startswith("__"):
+            raise AttributeError(item)
+        return self.getField(item)
+
+    def __getitem__(self, k):
+        if isinstance(k, slice):
+            if k.step is not None:
+                raise ValueError("slice with step is not supported.")
+            return self.substr(k.start, k.stop)
+        else:
+            return _bin_op("apply")(self, k)
+
+    def __iter__(self):
+        raise TypeError("Column is not iterable")
+
+    # string methods
+    _contains_doc = """
+    Contains the other element. Returns a boolean :class:`Column` based on a string match.
+
+    :param other: string in line
+
+    >>> df.filter(df.name.contains('o')).collect()
+    [Row(age=5, name=u'Bob')]
+    """
+    _rlike_doc = """
+    SQL RLIKE expression (LIKE with Regex). Returns a boolean :class:`Column` based on a regex
+    match.
+
+    :param other: an extended regex expression
+
+    >>> df.filter(df.name.rlike('ice$')).collect()
+    [Row(age=2, name=u'Alice')]
+    """
+    _like_doc = """
+    SQL like expression. Returns a boolean :class:`Column` based on a SQL LIKE match.
+
+    :param other: a SQL LIKE pattern
+
+    See :func:`rlike` for a regex version
+
+    >>> df.filter(df.name.like('Al%')).collect()
+    [Row(age=2, name=u'Alice')]
+    """
+    _startswith_doc = """
+    String starts with. Returns a boolean :class:`Column` based on a string match.
+
+    :param other: string at start of line (do not use a regex `^`)
+
+    >>> df.filter(df.name.startswith('Al')).collect()
+    [Row(age=2, name=u'Alice')]
+    >>> df.filter(df.name.startswith('^Al')).collect()
+    []
+    """
+    _endswith_doc = """
+    String ends with. Returns a boolean :class:`Column` based on a string match.
+
+    :param other: string at end of line (do not use a regex `$`)
+
+    >>> df.filter(df.name.endswith('ice')).collect()
+    [Row(age=2, name=u'Alice')]
+    >>> df.filter(df.name.endswith('ice$')).collect()
+    []
+    """
+
+    contains = ignore_unicode_prefix(_bin_op("contains", _contains_doc))
+    rlike = ignore_unicode_prefix(_bin_op("rlike", _rlike_doc))
+    like = ignore_unicode_prefix(_bin_op("like", _like_doc))
+    startswith = ignore_unicode_prefix(_bin_op("startsWith", _startswith_doc))
+    endswith = ignore_unicode_prefix(_bin_op("endsWith", _endswith_doc))
+
+    @ignore_unicode_prefix
+    @since(1.3)
+    def substr(self, startPos, length):
+        """
+        Return a :class:`Column` which is a substring of the column.
+
+        :param startPos: start position (int or Column)
+        :param length:  length of the substring (int or Column)
+
+        >>> df.select(df.name.substr(1, 3).alias("col")).collect()
+        [Row(col=u'Ali'), Row(col=u'Bob')]
+        """
+        if type(startPos) != type(length):
+            raise TypeError(
+                "startPos and length must be the same type. "
+                "Got {startPos_t} and {length_t}, respectively."
+                .format(
+                    startPos_t=type(startPos),
+                    length_t=type(length),
+                ))
+        if isinstance(startPos, int):
+            jc = self._jc.substr(startPos, length)
+        elif isinstance(startPos, Column):
+            jc = self._jc.substr(startPos._jc, length._jc)
+        else:
+            raise TypeError("Unexpected type: %s" % type(startPos))
+        return Column(jc)
+
+    @ignore_unicode_prefix
+    @since(1.5)
+    def isin(self, *cols):
+        """
+        A boolean expression that is evaluated to true if the value of this
+        expression is contained by the evaluated values of the arguments.
+
+        >>> df[df.name.isin("Bob", "Mike")].collect()
+        [Row(age=5, name=u'Bob')]
+        >>> df[df.age.isin([1, 2, 3])].collect()
+        [Row(age=2, name=u'Alice')]
+        """
+        if len(cols) == 1 and isinstance(cols[0], (list, set)):
+            cols = cols[0]
+        cols = [c._jc if isinstance(c, Column) else _create_column_from_literal(c) for c in cols]
+        sc = SparkContext._active_spark_context
+        jc = getattr(self._jc, "isin")(_to_seq(sc, cols))
+        return Column(jc)
+
+    # order
+    _asc_doc = """
+    Returns a sort expression based on ascending order of the column.
+
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"])
+    >>> df.select(df.name).orderBy(df.name.asc()).collect()
+    [Row(name=u'Alice'), Row(name=u'Tom')]
+    """
+    _asc_nulls_first_doc = """
+    Returns a sort expression based on ascending order of the column, and null values
+    return before non-null values.
+
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
+    >>> df.select(df.name).orderBy(df.name.asc_nulls_first()).collect()
+    [Row(name=None), Row(name=u'Alice'), Row(name=u'Tom')]
+
+    .. versionadded:: 2.4
+    """
+    _asc_nulls_last_doc = """
+    Returns a sort expression based on ascending order of the column, and null values
+    appear after non-null values.
+
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
+    >>> df.select(df.name).orderBy(df.name.asc_nulls_last()).collect()
+    [Row(name=u'Alice'), Row(name=u'Tom'), Row(name=None)]
+
+    .. versionadded:: 2.4
+    """
+    _desc_doc = """
+    Returns a sort expression based on the descending order of the column.
+
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"])
+    >>> df.select(df.name).orderBy(df.name.desc()).collect()
+    [Row(name=u'Tom'), Row(name=u'Alice')]
+    """
+    _desc_nulls_first_doc = """
+    Returns a sort expression based on the descending order of the column, and null values
+    appear before non-null values.
+
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
+    >>> df.select(df.name).orderBy(df.name.desc_nulls_first()).collect()
+    [Row(name=None), Row(name=u'Tom'), Row(name=u'Alice')]
+
+    .. versionadded:: 2.4
+    """
+    _desc_nulls_last_doc = """
+    Returns a sort expression based on the descending order of the column, and null values
+    appear after non-null values.
+
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
+    >>> df.select(df.name).orderBy(df.name.desc_nulls_last()).collect()
+    [Row(name=u'Tom'), Row(name=u'Alice'), Row(name=None)]
+
+    .. versionadded:: 2.4
+    """
+
+    asc = ignore_unicode_prefix(_unary_op("asc", _asc_doc))
+    asc_nulls_first = ignore_unicode_prefix(_unary_op("asc_nulls_first", _asc_nulls_first_doc))
+    asc_nulls_last = ignore_unicode_prefix(_unary_op("asc_nulls_last", _asc_nulls_last_doc))
+    desc = ignore_unicode_prefix(_unary_op("desc", _desc_doc))
+    desc_nulls_first = ignore_unicode_prefix(_unary_op("desc_nulls_first", _desc_nulls_first_doc))
+    desc_nulls_last = ignore_unicode_prefix(_unary_op("desc_nulls_last", _desc_nulls_last_doc))
+
+    _isNull_doc = """
+    True if the current expression is null.
+
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)])
+    >>> df.filter(df.height.isNull()).collect()
+    [Row(height=None, name=u'Alice')]
+    """
+    _isNotNull_doc = """
+    True if the current expression is NOT null.
+
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)])
+    >>> df.filter(df.height.isNotNull()).collect()
+    [Row(height=80, name=u'Tom')]
+    """
+
+    isNull = ignore_unicode_prefix(_unary_op("isNull", _isNull_doc))
+    isNotNull = ignore_unicode_prefix(_unary_op("isNotNull", _isNotNull_doc))
+
+    @since(1.3)
+    def alias(self, *alias, **kwargs):
+        """
+        Returns this column aliased with a new name or names (in the case of expressions that
+        return more than one column, such as explode).
+
+        :param alias: strings of desired column names (collects all positional arguments passed)
+        :param metadata: a dict of information to be stored in ``metadata`` attribute of the
+            corresponding :class: `StructField` (optional, keyword only argument)
+
+        .. versionchanged:: 2.2
+           Added optional ``metadata`` argument.
+
+        >>> df.select(df.age.alias("age2")).collect()
+        [Row(age2=2), Row(age2=5)]
+        >>> df.select(df.age.alias("age3", metadata={'max': 99})).schema['age3'].metadata['max']
+        99
+        """
+
+        metadata = kwargs.pop('metadata', None)
+        assert not kwargs, 'Unexpected kwargs where passed: %s' % kwargs
+
+        sc = SparkContext._active_spark_context
+        if len(alias) == 1:
+            if metadata:
+                jmeta = sc._jvm.org.apache.spark.sql.types.Metadata.fromJson(
+                    json.dumps(metadata))
+                return Column(getattr(self._jc, "as")(alias[0], jmeta))
+            else:
+                return Column(getattr(self._jc, "as")(alias[0]))
+        else:
+            if metadata:
+                raise ValueError('metadata can only be provided for a single column')
+            return Column(getattr(self._jc, "as")(_to_seq(sc, list(alias))))
+
+    name = copy_func(alias, sinceversion=2.0, doc=":func:`name` is an alias for :func:`alias`.")
+
+    @ignore_unicode_prefix
+    @since(1.3)
+    def cast(self, dataType):
+        """ Convert the column into type ``dataType``.
+
+        >>> df.select(df.age.cast("string").alias('ages')).collect()
+        [Row(ages=u'2'), Row(ages=u'5')]
+        >>> df.select(df.age.cast(StringType()).alias('ages')).collect()
+        [Row(ages=u'2'), Row(ages=u'5')]
+        """
+        if isinstance(dataType, basestring):
+            jc = self._jc.cast(dataType)
+        elif isinstance(dataType, DataType):
+            from pyspark.sql import SparkSession
+            spark = SparkSession.builder.getOrCreate()
+            jdt = spark._jsparkSession.parseDataType(dataType.json())
+            jc = self._jc.cast(jdt)
+        else:
+            raise TypeError("unexpected type: %s" % type(dataType))
+        return Column(jc)
+
+    astype = copy_func(cast, sinceversion=1.4, doc=":func:`astype` is an alias for :func:`cast`.")
+
+    @since(1.3)
+    def between(self, lowerBound, upperBound):
+        """
+        A boolean expression that is evaluated to true if the value of this
+        expression is between the given columns.
+
+        >>> df.select(df.name, df.age.between(2, 4)).show()
+        +-----+---------------------------+
+        | name|((age >= 2) AND (age <= 4))|
+        +-----+---------------------------+
+        |Alice|                       true|
+        |  Bob|                      false|
+        +-----+---------------------------+
+        """
+        return (self >= lowerBound) & (self <= upperBound)
+
+    @since(1.4)
+    def when(self, condition, value):
+        """
+        Evaluates a list of conditions and returns one of multiple possible result expressions.
+        If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.
+
+        See :func:`pyspark.sql.functions.when` for example usage.
+
+        :param condition: a boolean :class:`Column` expression.
+        :param value: a literal value, or a :class:`Column` expression.
+
+        >>> from pyspark.sql import functions as F
+        >>> df.select(df.name, F.when(df.age > 4, 1).when(df.age < 3, -1).otherwise(0)).show()
+        +-----+------------------------------------------------------------+
+        | name|CASE WHEN (age > 4) THEN 1 WHEN (age < 3) THEN -1 ELSE 0 END|
+        +-----+------------------------------------------------------------+
+        |Alice|                                                          -1|
+        |  Bob|                                                           1|
+        +-----+------------------------------------------------------------+
+        """
+        if not isinstance(condition, Column):
+            raise TypeError("condition should be a Column")
+        v = value._jc if isinstance(value, Column) else value
+        jc = self._jc.when(condition._jc, v)
+        return Column(jc)
+
+    @since(1.4)
+    def otherwise(self, value):
+        """
+        Evaluates a list of conditions and returns one of multiple possible result expressions.
+        If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.
+
+        See :func:`pyspark.sql.functions.when` for example usage.
+
+        :param value: a literal value, or a :class:`Column` expression.
+
+        >>> from pyspark.sql import functions as F
+        >>> df.select(df.name, F.when(df.age > 3, 1).otherwise(0)).show()
+        +-----+-------------------------------------+
+        | name|CASE WHEN (age > 3) THEN 1 ELSE 0 END|
+        +-----+-------------------------------------+
+        |Alice|                                    0|
+        |  Bob|                                    1|
+        +-----+-------------------------------------+
+        """
+        v = value._jc if isinstance(value, Column) else value
+        jc = self._jc.otherwise(v)
+        return Column(jc)
+
+    @since(1.4)
+    def over(self, window):
+        """
+        Define a windowing column.
+
+        :param window: a :class:`WindowSpec`
+        :return: a Column
+
+        >>> from pyspark.sql import Window
+        >>> window = Window.partitionBy("name").orderBy("age").rowsBetween(-1, 1)
+        >>> from pyspark.sql.functions import rank, min
+        >>> # df.select(rank().over(window), min('age').over(window))
+        """
+        from pyspark.sql.window import WindowSpec
+        if not isinstance(window, WindowSpec):
+            raise TypeError("window should be WindowSpec")
+        jc = self._jc.over(window._jspec)
+        return Column(jc)
+
+    def __nonzero__(self):
+        raise ValueError("Cannot convert column into bool: please use '&' for 'and', '|' for 'or', "
+                         "'~' for 'not' when building DataFrame boolean expressions.")
+    __bool__ = __nonzero__
+
+    def __repr__(self):
+        return 'Column<%s>' % self._jc.toString().encode('utf8')
+
+
+def _test():
+    import doctest
+    from pyspark.sql import SparkSession
+    import pyspark.sql.column
+    globs = pyspark.sql.column.__dict__.copy()
+    spark = SparkSession.builder\
+        .master("local[4]")\
+        .appName("sql.column tests")\
+        .getOrCreate()
+    sc = spark.sparkContext
+    globs['spark'] = spark
+    globs['df'] = sc.parallelize([(2, 'Alice'), (5, 'Bob')]) \
+        .toDF(StructType([StructField('age', IntegerType()),
+                          StructField('name', StringType())]))
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.column, globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/conf.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/conf.py
new file mode 100644
index 0000000000000000000000000000000000000000..71ea1631718f13566f69b0fedbfd95f8fa578ab1
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/conf.py
@@ -0,0 +1,99 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+
+from pyspark import since, _NoValue
+from pyspark.rdd import ignore_unicode_prefix
+
+if sys.version_info[0] >= 3:
+    basestring = str
+
+
+class RuntimeConfig(object):
+    """User-facing configuration API, accessible through `SparkSession.conf`.
+
+    Options set here are automatically propagated to the Hadoop configuration during I/O.
+    """
+
+    def __init__(self, jconf):
+        """Create a new RuntimeConfig that wraps the underlying JVM object."""
+        self._jconf = jconf
+
+    @ignore_unicode_prefix
+    @since(2.0)
+    def set(self, key, value):
+        """Sets the given Spark runtime configuration property."""
+        self._jconf.set(key, value)
+
+    @ignore_unicode_prefix
+    @since(2.0)
+    def get(self, key, default=_NoValue):
+        """Returns the value of Spark runtime configuration property for the given key,
+        assuming it is set.
+        """
+        self._checkType(key, "key")
+        if default is _NoValue:
+            return self._jconf.get(key)
+        else:
+            if default is not None:
+                self._checkType(default, "default")
+            return self._jconf.get(key, default)
+
+    @ignore_unicode_prefix
+    @since(2.0)
+    def unset(self, key):
+        """Resets the configuration property for the given key."""
+        self._jconf.unset(key)
+
+    def _checkType(self, obj, identifier):
+        """Assert that an object is of type str."""
+        if not isinstance(obj, basestring):
+            raise TypeError("expected %s '%s' to be a string (was '%s')" %
+                            (identifier, obj, type(obj).__name__))
+
+    @ignore_unicode_prefix
+    @since(2.4)
+    def isModifiable(self, key):
+        """Indicates whether the configuration property with the given key
+        is modifiable in the current session.
+        """
+        return self._jconf.isModifiable(key)
+
+
+def _test():
+    import os
+    import doctest
+    from pyspark.sql.session import SparkSession
+    import pyspark.sql.conf
+
+    os.chdir(os.environ["SPARK_HOME"])
+
+    globs = pyspark.sql.conf.__dict__.copy()
+    spark = SparkSession.builder\
+        .master("local[4]")\
+        .appName("sql.conf tests")\
+        .getOrCreate()
+    globs['sc'] = spark.sparkContext
+    globs['spark'] = spark
+    (failure_count, test_count) = doctest.testmod(pyspark.sql.conf, globs=globs)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/context.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/context.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c094dd9a90337d5044994e57659e4ae7fd0e924
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/context.py
@@ -0,0 +1,555 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+import sys
+import warnings
+
+if sys.version >= '3':
+    basestring = unicode = str
+
+from pyspark import since, _NoValue
+from pyspark.rdd import ignore_unicode_prefix
+from pyspark.sql.session import _monkey_patch_RDD, SparkSession
+from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.readwriter import DataFrameReader
+from pyspark.sql.streaming import DataStreamReader
+from pyspark.sql.types import IntegerType, Row, StringType
+from pyspark.sql.udf import UDFRegistration
+from pyspark.sql.utils import install_exception_handler
+
+__all__ = ["SQLContext", "HiveContext"]
+
+
+class SQLContext(object):
+    """The entry point for working with structured data (rows and columns) in Spark, in Spark 1.x.
+
+    As of Spark 2.0, this is replaced by :class:`SparkSession`. However, we are keeping the class
+    here for backward compatibility.
+
+    A SQLContext can be used create :class:`DataFrame`, register :class:`DataFrame` as
+    tables, execute SQL over tables, cache tables, and read parquet files.
+
+    :param sparkContext: The :class:`SparkContext` backing this SQLContext.
+    :param sparkSession: The :class:`SparkSession` around which this SQLContext wraps.
+    :param jsqlContext: An optional JVM Scala SQLContext. If set, we do not instantiate a new
+        SQLContext in the JVM, instead we make all calls to this object.
+    """
+
+    _instantiatedContext = None
+
+    @ignore_unicode_prefix
+    def __init__(self, sparkContext, sparkSession=None, jsqlContext=None):
+        """Creates a new SQLContext.
+
+        >>> from datetime import datetime
+        >>> sqlContext = SQLContext(sc)
+        >>> allTypes = sc.parallelize([Row(i=1, s="string", d=1.0, l=1,
+        ...     b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1),
+        ...     time=datetime(2014, 8, 1, 14, 1, 5))])
+        >>> df = allTypes.toDF()
+        >>> df.createOrReplaceTempView("allTypes")
+        >>> sqlContext.sql('select i+1, d+1, not b, list[1], dict["s"], time, row.a '
+        ...            'from allTypes where b and i > 0').collect()
+        [Row((i + CAST(1 AS BIGINT))=2, (d + CAST(1 AS DOUBLE))=2.0, (NOT b)=False, list[1]=2, \
+            dict[s]=0, time=datetime.datetime(2014, 8, 1, 14, 1, 5), a=1)]
+        >>> df.rdd.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time, x.row.a, x.list)).collect()
+        [(1, u'string', 1.0, 1, True, datetime.datetime(2014, 8, 1, 14, 1, 5), 1, [1, 2, 3])]
+        """
+        self._sc = sparkContext
+        self._jsc = self._sc._jsc
+        self._jvm = self._sc._jvm
+        if sparkSession is None:
+            sparkSession = SparkSession.builder.getOrCreate()
+        if jsqlContext is None:
+            jsqlContext = sparkSession._jwrapped
+        self.sparkSession = sparkSession
+        self._jsqlContext = jsqlContext
+        _monkey_patch_RDD(self.sparkSession)
+        install_exception_handler()
+        if SQLContext._instantiatedContext is None:
+            SQLContext._instantiatedContext = self
+
+    @property
+    def _ssql_ctx(self):
+        """Accessor for the JVM Spark SQL context.
+
+        Subclasses can override this property to provide their own
+        JVM Contexts.
+        """
+        return self._jsqlContext
+
+    @property
+    def _conf(self):
+        """Accessor for the JVM SQL-specific configurations"""
+        return self.sparkSession._jsparkSession.sessionState().conf()
+
+    @classmethod
+    @since(1.6)
+    def getOrCreate(cls, sc):
+        """
+        Get the existing SQLContext or create a new one with given SparkContext.
+
+        :param sc: SparkContext
+        """
+        if cls._instantiatedContext is None:
+            jsqlContext = sc._jvm.SQLContext.getOrCreate(sc._jsc.sc())
+            sparkSession = SparkSession(sc, jsqlContext.sparkSession())
+            cls(sc, sparkSession, jsqlContext)
+        return cls._instantiatedContext
+
+    @since(1.6)
+    def newSession(self):
+        """
+        Returns a new SQLContext as new session, that has separate SQLConf,
+        registered temporary views and UDFs, but shared SparkContext and
+        table cache.
+        """
+        return self.__class__(self._sc, self.sparkSession.newSession())
+
+    @since(1.3)
+    def setConf(self, key, value):
+        """Sets the given Spark SQL configuration property.
+        """
+        self.sparkSession.conf.set(key, value)
+
+    @ignore_unicode_prefix
+    @since(1.3)
+    def getConf(self, key, defaultValue=_NoValue):
+        """Returns the value of Spark SQL configuration property for the given key.
+
+        If the key is not set and defaultValue is set, return
+        defaultValue. If the key is not set and defaultValue is not set, return
+        the system default value.
+
+        >>> sqlContext.getConf("spark.sql.shuffle.partitions")
+        u'200'
+        >>> sqlContext.getConf("spark.sql.shuffle.partitions", u"10")
+        u'10'
+        >>> sqlContext.setConf("spark.sql.shuffle.partitions", u"50")
+        >>> sqlContext.getConf("spark.sql.shuffle.partitions", u"10")
+        u'50'
+        """
+        return self.sparkSession.conf.get(key, defaultValue)
+
+    @property
+    @since("1.3.1")
+    def udf(self):
+        """Returns a :class:`UDFRegistration` for UDF registration.
+
+        :return: :class:`UDFRegistration`
+        """
+        return self.sparkSession.udf
+
+    @since(1.4)
+    def range(self, start, end=None, step=1, numPartitions=None):
+        """
+        Create a :class:`DataFrame` with single :class:`pyspark.sql.types.LongType` column named
+        ``id``, containing elements in a range from ``start`` to ``end`` (exclusive) with
+        step value ``step``.
+
+        :param start: the start value
+        :param end: the end value (exclusive)
+        :param step: the incremental step (default: 1)
+        :param numPartitions: the number of partitions of the DataFrame
+        :return: :class:`DataFrame`
+
+        >>> sqlContext.range(1, 7, 2).collect()
+        [Row(id=1), Row(id=3), Row(id=5)]
+
+        If only one argument is specified, it will be used as the end value.
+
+        >>> sqlContext.range(3).collect()
+        [Row(id=0), Row(id=1), Row(id=2)]
+        """
+        return self.sparkSession.range(start, end, step, numPartitions)
+
+    @since(1.2)
+    def registerFunction(self, name, f, returnType=None):
+        """An alias for :func:`spark.udf.register`.
+        See :meth:`pyspark.sql.UDFRegistration.register`.
+
+        .. note:: Deprecated in 2.3.0. Use :func:`spark.udf.register` instead.
+        """
+        warnings.warn(
+            "Deprecated in 2.3.0. Use spark.udf.register instead.",
+            DeprecationWarning)
+        return self.sparkSession.udf.register(name, f, returnType)
+
+    @since(2.1)
+    def registerJavaFunction(self, name, javaClassName, returnType=None):
+        """An alias for :func:`spark.udf.registerJavaFunction`.
+        See :meth:`pyspark.sql.UDFRegistration.registerJavaFunction`.
+
+        .. note:: Deprecated in 2.3.0. Use :func:`spark.udf.registerJavaFunction` instead.
+        """
+        warnings.warn(
+            "Deprecated in 2.3.0. Use spark.udf.registerJavaFunction instead.",
+            DeprecationWarning)
+        return self.sparkSession.udf.registerJavaFunction(name, javaClassName, returnType)
+
+    # TODO(andrew): delete this once we refactor things to take in SparkSession
+    def _inferSchema(self, rdd, samplingRatio=None):
+        """
+        Infer schema from an RDD of Row or tuple.
+
+        :param rdd: an RDD of Row or tuple
+        :param samplingRatio: sampling ratio, or no sampling (default)
+        :return: :class:`pyspark.sql.types.StructType`
+        """
+        return self.sparkSession._inferSchema(rdd, samplingRatio)
+
+    @since(1.3)
+    @ignore_unicode_prefix
+    def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=True):
+        """
+        Creates a :class:`DataFrame` from an :class:`RDD`, a list or a :class:`pandas.DataFrame`.
+
+        When ``schema`` is a list of column names, the type of each column
+        will be inferred from ``data``.
+
+        When ``schema`` is ``None``, it will try to infer the schema (column names and types)
+        from ``data``, which should be an RDD of :class:`Row`,
+        or :class:`namedtuple`, or :class:`dict`.
+
+        When ``schema`` is :class:`pyspark.sql.types.DataType` or a datatype string it must match
+        the real data, or an exception will be thrown at runtime. If the given schema is not
+        :class:`pyspark.sql.types.StructType`, it will be wrapped into a
+        :class:`pyspark.sql.types.StructType` as its only field, and the field name will be "value",
+        each record will also be wrapped into a tuple, which can be converted to row later.
+
+        If schema inference is needed, ``samplingRatio`` is used to determined the ratio of
+        rows used for schema inference. The first row will be used if ``samplingRatio`` is ``None``.
+
+        :param data: an RDD of any kind of SQL data representation(e.g. :class:`Row`,
+            :class:`tuple`, ``int``, ``boolean``, etc.), or :class:`list`, or
+            :class:`pandas.DataFrame`.
+        :param schema: a :class:`pyspark.sql.types.DataType` or a datatype string or a list of
+            column names, default is None.  The data type string format equals to
+            :class:`pyspark.sql.types.DataType.simpleString`, except that top level struct type can
+            omit the ``struct<>`` and atomic types use ``typeName()`` as their format, e.g. use
+            ``byte`` instead of ``tinyint`` for :class:`pyspark.sql.types.ByteType`.
+            We can also use ``int`` as a short name for :class:`pyspark.sql.types.IntegerType`.
+        :param samplingRatio: the sample ratio of rows used for inferring
+        :param verifySchema: verify data types of every row against schema.
+        :return: :class:`DataFrame`
+
+        .. versionchanged:: 2.0
+           The ``schema`` parameter can be a :class:`pyspark.sql.types.DataType` or a
+           datatype string after 2.0.
+           If it's not a :class:`pyspark.sql.types.StructType`, it will be wrapped into a
+           :class:`pyspark.sql.types.StructType` and each record will also be wrapped into a tuple.
+
+        .. versionchanged:: 2.1
+           Added verifySchema.
+
+        >>> l = [('Alice', 1)]
+        >>> sqlContext.createDataFrame(l).collect()
+        [Row(_1=u'Alice', _2=1)]
+        >>> sqlContext.createDataFrame(l, ['name', 'age']).collect()
+        [Row(name=u'Alice', age=1)]
+
+        >>> d = [{'name': 'Alice', 'age': 1}]
+        >>> sqlContext.createDataFrame(d).collect()
+        [Row(age=1, name=u'Alice')]
+
+        >>> rdd = sc.parallelize(l)
+        >>> sqlContext.createDataFrame(rdd).collect()
+        [Row(_1=u'Alice', _2=1)]
+        >>> df = sqlContext.createDataFrame(rdd, ['name', 'age'])
+        >>> df.collect()
+        [Row(name=u'Alice', age=1)]
+
+        >>> from pyspark.sql import Row
+        >>> Person = Row('name', 'age')
+        >>> person = rdd.map(lambda r: Person(*r))
+        >>> df2 = sqlContext.createDataFrame(person)
+        >>> df2.collect()
+        [Row(name=u'Alice', age=1)]
+
+        >>> from pyspark.sql.types import *
+        >>> schema = StructType([
+        ...    StructField("name", StringType(), True),
+        ...    StructField("age", IntegerType(), True)])
+        >>> df3 = sqlContext.createDataFrame(rdd, schema)
+        >>> df3.collect()
+        [Row(name=u'Alice', age=1)]
+
+        >>> sqlContext.createDataFrame(df.toPandas()).collect()  # doctest: +SKIP
+        [Row(name=u'Alice', age=1)]
+        >>> sqlContext.createDataFrame(pandas.DataFrame([[1, 2]])).collect()  # doctest: +SKIP
+        [Row(0=1, 1=2)]
+
+        >>> sqlContext.createDataFrame(rdd, "a: string, b: int").collect()
+        [Row(a=u'Alice', b=1)]
+        >>> rdd = rdd.map(lambda row: row[1])
+        >>> sqlContext.createDataFrame(rdd, "int").collect()
+        [Row(value=1)]
+        >>> sqlContext.createDataFrame(rdd, "boolean").collect() # doctest: +IGNORE_EXCEPTION_DETAIL
+        Traceback (most recent call last):
+            ...
+        Py4JJavaError: ...
+        """
+        return self.sparkSession.createDataFrame(data, schema, samplingRatio, verifySchema)
+
+    @since(1.3)
+    def registerDataFrameAsTable(self, df, tableName):
+        """Registers the given :class:`DataFrame` as a temporary table in the catalog.
+
+        Temporary tables exist only during the lifetime of this instance of :class:`SQLContext`.
+
+        >>> sqlContext.registerDataFrameAsTable(df, "table1")
+        """
+        df.createOrReplaceTempView(tableName)
+
+    @since(1.6)
+    def dropTempTable(self, tableName):
+        """ Remove the temp table from catalog.
+
+        >>> sqlContext.registerDataFrameAsTable(df, "table1")
+        >>> sqlContext.dropTempTable("table1")
+        """
+        self.sparkSession.catalog.dropTempView(tableName)
+
+    @since(1.3)
+    def createExternalTable(self, tableName, path=None, source=None, schema=None, **options):
+        """Creates an external table based on the dataset in a data source.
+
+        It returns the DataFrame associated with the external table.
+
+        The data source is specified by the ``source`` and a set of ``options``.
+        If ``source`` is not specified, the default data source configured by
+        ``spark.sql.sources.default`` will be used.
+
+        Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and
+        created external table.
+
+        :return: :class:`DataFrame`
+        """
+        return self.sparkSession.catalog.createExternalTable(
+            tableName, path, source, schema, **options)
+
+    @ignore_unicode_prefix
+    @since(1.0)
+    def sql(self, sqlQuery):
+        """Returns a :class:`DataFrame` representing the result of the given query.
+
+        :return: :class:`DataFrame`
+
+        >>> sqlContext.registerDataFrameAsTable(df, "table1")
+        >>> df2 = sqlContext.sql("SELECT field1 AS f1, field2 as f2 from table1")
+        >>> df2.collect()
+        [Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]
+        """
+        return self.sparkSession.sql(sqlQuery)
+
+    @since(1.0)
+    def table(self, tableName):
+        """Returns the specified table or view as a :class:`DataFrame`.
+
+        :return: :class:`DataFrame`
+
+        >>> sqlContext.registerDataFrameAsTable(df, "table1")
+        >>> df2 = sqlContext.table("table1")
+        >>> sorted(df.collect()) == sorted(df2.collect())
+        True
+        """
+        return self.sparkSession.table(tableName)
+
+    @ignore_unicode_prefix
+    @since(1.3)
+    def tables(self, dbName=None):
+        """Returns a :class:`DataFrame` containing names of tables in the given database.
+
+        If ``dbName`` is not specified, the current database will be used.
+
+        The returned DataFrame has two columns: ``tableName`` and ``isTemporary``
+        (a column with :class:`BooleanType` indicating if a table is a temporary one or not).
+
+        :param dbName: string, name of the database to use.
+        :return: :class:`DataFrame`
+
+        >>> sqlContext.registerDataFrameAsTable(df, "table1")
+        >>> df2 = sqlContext.tables()
+        >>> df2.filter("tableName = 'table1'").first()
+        Row(database=u'', tableName=u'table1', isTemporary=True)
+        """
+        if dbName is None:
+            return DataFrame(self._ssql_ctx.tables(), self)
+        else:
+            return DataFrame(self._ssql_ctx.tables(dbName), self)
+
+    @since(1.3)
+    def tableNames(self, dbName=None):
+        """Returns a list of names of tables in the database ``dbName``.
+
+        :param dbName: string, name of the database to use. Default to the current database.
+        :return: list of table names, in string
+
+        >>> sqlContext.registerDataFrameAsTable(df, "table1")
+        >>> "table1" in sqlContext.tableNames()
+        True
+        >>> "table1" in sqlContext.tableNames("default")
+        True
+        """
+        if dbName is None:
+            return [name for name in self._ssql_ctx.tableNames()]
+        else:
+            return [name for name in self._ssql_ctx.tableNames(dbName)]
+
+    @since(1.0)
+    def cacheTable(self, tableName):
+        """Caches the specified table in-memory."""
+        self._ssql_ctx.cacheTable(tableName)
+
+    @since(1.0)
+    def uncacheTable(self, tableName):
+        """Removes the specified table from the in-memory cache."""
+        self._ssql_ctx.uncacheTable(tableName)
+
+    @since(1.3)
+    def clearCache(self):
+        """Removes all cached tables from the in-memory cache. """
+        self._ssql_ctx.clearCache()
+
+    @property
+    @since(1.4)
+    def read(self):
+        """
+        Returns a :class:`DataFrameReader` that can be used to read data
+        in as a :class:`DataFrame`.
+
+        :return: :class:`DataFrameReader`
+        """
+        return DataFrameReader(self)
+
+    @property
+    @since(2.0)
+    def readStream(self):
+        """
+        Returns a :class:`DataStreamReader` that can be used to read data streams
+        as a streaming :class:`DataFrame`.
+
+        .. note:: Evolving.
+
+        :return: :class:`DataStreamReader`
+
+        >>> text_sdf = sqlContext.readStream.text(tempfile.mkdtemp())
+        >>> text_sdf.isStreaming
+        True
+        """
+        return DataStreamReader(self)
+
+    @property
+    @since(2.0)
+    def streams(self):
+        """Returns a :class:`StreamingQueryManager` that allows managing all the
+        :class:`StreamingQuery` StreamingQueries active on `this` context.
+
+        .. note:: Evolving.
+        """
+        from pyspark.sql.streaming import StreamingQueryManager
+        return StreamingQueryManager(self._ssql_ctx.streams())
+
+
+class HiveContext(SQLContext):
+    """A variant of Spark SQL that integrates with data stored in Hive.
+
+    Configuration for Hive is read from ``hive-site.xml`` on the classpath.
+    It supports running both SQL and HiveQL commands.
+
+    :param sparkContext: The SparkContext to wrap.
+    :param jhiveContext: An optional JVM Scala HiveContext. If set, we do not instantiate a new
+        :class:`HiveContext` in the JVM, instead we make all calls to this object.
+
+    .. note:: Deprecated in 2.0.0. Use SparkSession.builder.enableHiveSupport().getOrCreate().
+    """
+
+    def __init__(self, sparkContext, jhiveContext=None):
+        warnings.warn(
+            "HiveContext is deprecated in Spark 2.0.0. Please use " +
+            "SparkSession.builder.enableHiveSupport().getOrCreate() instead.",
+            DeprecationWarning)
+        if jhiveContext is None:
+            sparkSession = SparkSession.builder.enableHiveSupport().getOrCreate()
+        else:
+            sparkSession = SparkSession(sparkContext, jhiveContext.sparkSession())
+        SQLContext.__init__(self, sparkContext, sparkSession, jhiveContext)
+
+    @classmethod
+    def _createForTesting(cls, sparkContext):
+        """(Internal use only) Create a new HiveContext for testing.
+
+        All test code that touches HiveContext *must* go through this method. Otherwise,
+        you may end up launching multiple derby instances and encounter with incredibly
+        confusing error messages.
+        """
+        jsc = sparkContext._jsc.sc()
+        jtestHive = sparkContext._jvm.org.apache.spark.sql.hive.test.TestHiveContext(jsc, False)
+        return cls(sparkContext, jtestHive)
+
+    def refreshTable(self, tableName):
+        """Invalidate and refresh all the cached the metadata of the given
+        table. For performance reasons, Spark SQL or the external data source
+        library it uses might cache certain metadata about a table, such as the
+        location of blocks. When those change outside of Spark SQL, users should
+        call this function to invalidate the cache.
+        """
+        self._ssql_ctx.refreshTable(tableName)
+
+
+def _test():
+    import os
+    import doctest
+    import tempfile
+    from pyspark.context import SparkContext
+    from pyspark.sql import Row, SQLContext
+    import pyspark.sql.context
+
+    os.chdir(os.environ["SPARK_HOME"])
+
+    globs = pyspark.sql.context.__dict__.copy()
+    sc = SparkContext('local[4]', 'PythonTest')
+    globs['tempfile'] = tempfile
+    globs['os'] = os
+    globs['sc'] = sc
+    globs['sqlContext'] = SQLContext(sc)
+    globs['rdd'] = rdd = sc.parallelize(
+        [Row(field1=1, field2="row1"),
+         Row(field1=2, field2="row2"),
+         Row(field1=3, field2="row3")]
+    )
+    globs['df'] = rdd.toDF()
+    jsonStrings = [
+        '{"field1": 1, "field2": "row1", "field3":{"field4":11}}',
+        '{"field1" : 2, "field3":{"field4":22, "field5": [10, 11]},'
+        '"field6":[{"field7": "row2"}]}',
+        '{"field1" : null, "field2": "row3", '
+        '"field3":{"field4":33, "field5": []}}'
+    ]
+    globs['jsonStrings'] = jsonStrings
+    globs['json'] = sc.parallelize(jsonStrings)
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.context, globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)
+    globs['sc'].stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/dataframe.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/dataframe.py
new file mode 100644
index 0000000000000000000000000000000000000000..1affc9b4fcf6c832c66601d84dff747639a2dad8
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/dataframe.py
@@ -0,0 +1,2327 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import random
+
+if sys.version >= '3':
+    basestring = unicode = str
+    long = int
+    from functools import reduce
+else:
+    from itertools import imap as map
+
+import warnings
+
+from pyspark import copy_func, since, _NoValue
+from pyspark.rdd import RDD, _load_from_socket, ignore_unicode_prefix
+from pyspark.serializers import ArrowStreamSerializer, BatchedSerializer, PickleSerializer, \
+    UTF8Deserializer
+from pyspark.storagelevel import StorageLevel
+from pyspark.traceback_utils import SCCallSiteSync
+from pyspark.sql.types import _parse_datatype_json_string
+from pyspark.sql.column import Column, _to_seq, _to_list, _to_java_column
+from pyspark.sql.readwriter import DataFrameWriter
+from pyspark.sql.streaming import DataStreamWriter
+from pyspark.sql.types import IntegralType
+from pyspark.sql.types import *
+from pyspark.util import _exception_message
+
+__all__ = ["DataFrame", "DataFrameNaFunctions", "DataFrameStatFunctions"]
+
+
+class DataFrame(object):
+    """A distributed collection of data grouped into named columns.
+
+    A :class:`DataFrame` is equivalent to a relational table in Spark SQL,
+    and can be created using various functions in :class:`SparkSession`::
+
+        people = spark.read.parquet("...")
+
+    Once created, it can be manipulated using the various domain-specific-language
+    (DSL) functions defined in: :class:`DataFrame`, :class:`Column`.
+
+    To select a column from the data frame, use the apply method::
+
+        ageCol = people.age
+
+    A more concrete example::
+
+        # To create DataFrame using SparkSession
+        people = spark.read.parquet("...")
+        department = spark.read.parquet("...")
+
+        people.filter(people.age > 30).join(department, people.deptId == department.id) \\
+          .groupBy(department.name, "gender").agg({"salary": "avg", "age": "max"})
+
+    .. versionadded:: 1.3
+    """
+
+    def __init__(self, jdf, sql_ctx):
+        self._jdf = jdf
+        self.sql_ctx = sql_ctx
+        self._sc = sql_ctx and sql_ctx._sc
+        self.is_cached = False
+        self._schema = None  # initialized lazily
+        self._lazy_rdd = None
+        # Check whether _repr_html is supported or not, we use it to avoid calling _jdf twice
+        # by __repr__ and _repr_html_ while eager evaluation opened.
+        self._support_repr_html = False
+
+    @property
+    @since(1.3)
+    def rdd(self):
+        """Returns the content as an :class:`pyspark.RDD` of :class:`Row`.
+        """
+        if self._lazy_rdd is None:
+            jrdd = self._jdf.javaToPython()
+            self._lazy_rdd = RDD(jrdd, self.sql_ctx._sc, BatchedSerializer(PickleSerializer()))
+        return self._lazy_rdd
+
+    @property
+    @since("1.3.1")
+    def na(self):
+        """Returns a :class:`DataFrameNaFunctions` for handling missing values.
+        """
+        return DataFrameNaFunctions(self)
+
+    @property
+    @since(1.4)
+    def stat(self):
+        """Returns a :class:`DataFrameStatFunctions` for statistic functions.
+        """
+        return DataFrameStatFunctions(self)
+
+    @ignore_unicode_prefix
+    @since(1.3)
+    def toJSON(self, use_unicode=True):
+        """Converts a :class:`DataFrame` into a :class:`RDD` of string.
+
+        Each row is turned into a JSON document as one element in the returned RDD.
+
+        >>> df.toJSON().first()
+        u'{"age":2,"name":"Alice"}'
+        """
+        rdd = self._jdf.toJSON()
+        return RDD(rdd.toJavaRDD(), self._sc, UTF8Deserializer(use_unicode))
+
+    @since(1.3)
+    def registerTempTable(self, name):
+        """Registers this DataFrame as a temporary table using the given name.
+
+        The lifetime of this temporary table is tied to the :class:`SparkSession`
+        that was used to create this :class:`DataFrame`.
+
+        >>> df.registerTempTable("people")
+        >>> df2 = spark.sql("select * from people")
+        >>> sorted(df.collect()) == sorted(df2.collect())
+        True
+        >>> spark.catalog.dropTempView("people")
+
+        .. note:: Deprecated in 2.0, use createOrReplaceTempView instead.
+        """
+        warnings.warn(
+            "Deprecated in 2.0, use createOrReplaceTempView instead.", DeprecationWarning)
+        self._jdf.createOrReplaceTempView(name)
+
+    @since(2.0)
+    def createTempView(self, name):
+        """Creates a local temporary view with this DataFrame.
+
+        The lifetime of this temporary table is tied to the :class:`SparkSession`
+        that was used to create this :class:`DataFrame`.
+        throws :class:`TempTableAlreadyExistsException`, if the view name already exists in the
+        catalog.
+
+        >>> df.createTempView("people")
+        >>> df2 = spark.sql("select * from people")
+        >>> sorted(df.collect()) == sorted(df2.collect())
+        True
+        >>> df.createTempView("people")  # doctest: +IGNORE_EXCEPTION_DETAIL
+        Traceback (most recent call last):
+        ...
+        AnalysisException: u"Temporary table 'people' already exists;"
+        >>> spark.catalog.dropTempView("people")
+
+        """
+        self._jdf.createTempView(name)
+
+    @since(2.0)
+    def createOrReplaceTempView(self, name):
+        """Creates or replaces a local temporary view with this DataFrame.
+
+        The lifetime of this temporary table is tied to the :class:`SparkSession`
+        that was used to create this :class:`DataFrame`.
+
+        >>> df.createOrReplaceTempView("people")
+        >>> df2 = df.filter(df.age > 3)
+        >>> df2.createOrReplaceTempView("people")
+        >>> df3 = spark.sql("select * from people")
+        >>> sorted(df3.collect()) == sorted(df2.collect())
+        True
+        >>> spark.catalog.dropTempView("people")
+
+        """
+        self._jdf.createOrReplaceTempView(name)
+
+    @since(2.1)
+    def createGlobalTempView(self, name):
+        """Creates a global temporary view with this DataFrame.
+
+        The lifetime of this temporary view is tied to this Spark application.
+        throws :class:`TempTableAlreadyExistsException`, if the view name already exists in the
+        catalog.
+
+        >>> df.createGlobalTempView("people")
+        >>> df2 = spark.sql("select * from global_temp.people")
+        >>> sorted(df.collect()) == sorted(df2.collect())
+        True
+        >>> df.createGlobalTempView("people")  # doctest: +IGNORE_EXCEPTION_DETAIL
+        Traceback (most recent call last):
+        ...
+        AnalysisException: u"Temporary table 'people' already exists;"
+        >>> spark.catalog.dropGlobalTempView("people")
+
+        """
+        self._jdf.createGlobalTempView(name)
+
+    @since(2.2)
+    def createOrReplaceGlobalTempView(self, name):
+        """Creates or replaces a global temporary view using the given name.
+
+        The lifetime of this temporary view is tied to this Spark application.
+
+        >>> df.createOrReplaceGlobalTempView("people")
+        >>> df2 = df.filter(df.age > 3)
+        >>> df2.createOrReplaceGlobalTempView("people")
+        >>> df3 = spark.sql("select * from global_temp.people")
+        >>> sorted(df3.collect()) == sorted(df2.collect())
+        True
+        >>> spark.catalog.dropGlobalTempView("people")
+
+        """
+        self._jdf.createOrReplaceGlobalTempView(name)
+
+    @property
+    @since(1.4)
+    def write(self):
+        """
+        Interface for saving the content of the non-streaming :class:`DataFrame` out into external
+        storage.
+
+        :return: :class:`DataFrameWriter`
+        """
+        return DataFrameWriter(self)
+
+    @property
+    @since(2.0)
+    def writeStream(self):
+        """
+        Interface for saving the content of the streaming :class:`DataFrame` out into external
+        storage.
+
+        .. note:: Evolving.
+
+        :return: :class:`DataStreamWriter`
+        """
+        return DataStreamWriter(self)
+
+    @property
+    @since(1.3)
+    def schema(self):
+        """Returns the schema of this :class:`DataFrame` as a :class:`pyspark.sql.types.StructType`.
+
+        >>> df.schema
+        StructType(List(StructField(age,IntegerType,true),StructField(name,StringType,true)))
+        """
+        if self._schema is None:
+            try:
+                self._schema = _parse_datatype_json_string(self._jdf.schema().json())
+            except AttributeError as e:
+                raise Exception(
+                    "Unable to parse datatype from schema. %s" % e)
+        return self._schema
+
+    @since(1.3)
+    def printSchema(self):
+        """Prints out the schema in the tree format.
+
+        >>> df.printSchema()
+        root
+         |-- age: integer (nullable = true)
+         |-- name: string (nullable = true)
+        <BLANKLINE>
+        """
+        print(self._jdf.schema().treeString())
+
+    @since(1.3)
+    def explain(self, extended=False):
+        """Prints the (logical and physical) plans to the console for debugging purpose.
+
+        :param extended: boolean, default ``False``. If ``False``, prints only the physical plan.
+
+        >>> df.explain()
+        == Physical Plan ==
+        Scan ExistingRDD[age#0,name#1]
+
+        >>> df.explain(True)
+        == Parsed Logical Plan ==
+        ...
+        == Analyzed Logical Plan ==
+        ...
+        == Optimized Logical Plan ==
+        ...
+        == Physical Plan ==
+        ...
+        """
+        if extended:
+            print(self._jdf.queryExecution().toString())
+        else:
+            print(self._jdf.queryExecution().simpleString())
+
+    @since(2.4)
+    def exceptAll(self, other):
+        """Return a new :class:`DataFrame` containing rows in this :class:`DataFrame` but
+        not in another :class:`DataFrame` while preserving duplicates.
+
+        This is equivalent to `EXCEPT ALL` in SQL.
+
+        >>> df1 = spark.createDataFrame(
+        ...         [("a", 1), ("a", 1), ("a", 1), ("a", 2), ("b",  3), ("c", 4)], ["C1", "C2"])
+        >>> df2 = spark.createDataFrame([("a", 1), ("b", 3)], ["C1", "C2"])
+
+        >>> df1.exceptAll(df2).show()
+        +---+---+
+        | C1| C2|
+        +---+---+
+        |  a|  1|
+        |  a|  1|
+        |  a|  2|
+        |  c|  4|
+        +---+---+
+
+        Also as standard in SQL, this function resolves columns by position (not by name).
+        """
+        return DataFrame(self._jdf.exceptAll(other._jdf), self.sql_ctx)
+
+    @since(1.3)
+    def isLocal(self):
+        """Returns ``True`` if the :func:`collect` and :func:`take` methods can be run locally
+        (without any Spark executors).
+        """
+        return self._jdf.isLocal()
+
+    @property
+    @since(2.0)
+    def isStreaming(self):
+        """Returns true if this :class:`Dataset` contains one or more sources that continuously
+        return data as it arrives. A :class:`Dataset` that reads data from a streaming source
+        must be executed as a :class:`StreamingQuery` using the :func:`start` method in
+        :class:`DataStreamWriter`.  Methods that return a single answer, (e.g., :func:`count` or
+        :func:`collect`) will throw an :class:`AnalysisException` when there is a streaming
+        source present.
+
+        .. note:: Evolving
+        """
+        return self._jdf.isStreaming()
+
+    @since(1.3)
+    def show(self, n=20, truncate=True, vertical=False):
+        """Prints the first ``n`` rows to the console.
+
+        :param n: Number of rows to show.
+        :param truncate: If set to True, truncate strings longer than 20 chars by default.
+            If set to a number greater than one, truncates long strings to length ``truncate``
+            and align cells right.
+        :param vertical: If set to True, print output rows vertically (one line
+            per column value).
+
+        >>> df
+        DataFrame[age: int, name: string]
+        >>> df.show()
+        +---+-----+
+        |age| name|
+        +---+-----+
+        |  2|Alice|
+        |  5|  Bob|
+        +---+-----+
+        >>> df.show(truncate=3)
+        +---+----+
+        |age|name|
+        +---+----+
+        |  2| Ali|
+        |  5| Bob|
+        +---+----+
+        >>> df.show(vertical=True)
+        -RECORD 0-----
+         age  | 2
+         name | Alice
+        -RECORD 1-----
+         age  | 5
+         name | Bob
+        """
+        if isinstance(truncate, bool) and truncate:
+            print(self._jdf.showString(n, 20, vertical))
+        else:
+            print(self._jdf.showString(n, int(truncate), vertical))
+
+    def __repr__(self):
+        if not self._support_repr_html and self.sql_ctx._conf.isReplEagerEvalEnabled():
+            vertical = False
+            return self._jdf.showString(
+                self.sql_ctx._conf.replEagerEvalMaxNumRows(),
+                self.sql_ctx._conf.replEagerEvalTruncate(), vertical)
+        else:
+            return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes))
+
+    def _repr_html_(self):
+        """Returns a dataframe with html code when you enabled eager evaluation
+        by 'spark.sql.repl.eagerEval.enabled', this only called by REPL you are
+        using support eager evaluation with HTML.
+        """
+        import cgi
+        if not self._support_repr_html:
+            self._support_repr_html = True
+        if self.sql_ctx._conf.isReplEagerEvalEnabled():
+            max_num_rows = max(self.sql_ctx._conf.replEagerEvalMaxNumRows(), 0)
+            sock_info = self._jdf.getRowsToPython(
+                max_num_rows, self.sql_ctx._conf.replEagerEvalTruncate())
+            rows = list(_load_from_socket(sock_info, BatchedSerializer(PickleSerializer())))
+            head = rows[0]
+            row_data = rows[1:]
+            has_more_data = len(row_data) > max_num_rows
+            row_data = row_data[:max_num_rows]
+
+            html = "<table border='1'>\n"
+            # generate table head
+            html += "<tr><th>%s</th></tr>\n" % "</th><th>".join(map(lambda x: cgi.escape(x), head))
+            # generate table rows
+            for row in row_data:
+                html += "<tr><td>%s</td></tr>\n" % "</td><td>".join(
+                    map(lambda x: cgi.escape(x), row))
+            html += "</table>\n"
+            if has_more_data:
+                html += "only showing top %d %s\n" % (
+                    max_num_rows, "row" if max_num_rows == 1 else "rows")
+            return html
+        else:
+            return None
+
+    @since(2.1)
+    def checkpoint(self, eager=True):
+        """Returns a checkpointed version of this Dataset. Checkpointing can be used to truncate the
+        logical plan of this DataFrame, which is especially useful in iterative algorithms where the
+        plan may grow exponentially. It will be saved to files inside the checkpoint
+        directory set with L{SparkContext.setCheckpointDir()}.
+
+        :param eager: Whether to checkpoint this DataFrame immediately
+
+        .. note:: Experimental
+        """
+        jdf = self._jdf.checkpoint(eager)
+        return DataFrame(jdf, self.sql_ctx)
+
+    @since(2.3)
+    def localCheckpoint(self, eager=True):
+        """Returns a locally checkpointed version of this Dataset. Checkpointing can be used to
+        truncate the logical plan of this DataFrame, which is especially useful in iterative
+        algorithms where the plan may grow exponentially. Local checkpoints are stored in the
+        executors using the caching subsystem and therefore they are not reliable.
+
+        :param eager: Whether to checkpoint this DataFrame immediately
+
+        .. note:: Experimental
+        """
+        jdf = self._jdf.localCheckpoint(eager)
+        return DataFrame(jdf, self.sql_ctx)
+
+    @since(2.1)
+    def withWatermark(self, eventTime, delayThreshold):
+        """Defines an event time watermark for this :class:`DataFrame`. A watermark tracks a point
+        in time before which we assume no more late data is going to arrive.
+
+        Spark will use this watermark for several purposes:
+          - To know when a given time window aggregation can be finalized and thus can be emitted
+            when using output modes that do not allow updates.
+
+          - To minimize the amount of state that we need to keep for on-going aggregations.
+
+        The current watermark is computed by looking at the `MAX(eventTime)` seen across
+        all of the partitions in the query minus a user specified `delayThreshold`.  Due to the cost
+        of coordinating this value across partitions, the actual watermark used is only guaranteed
+        to be at least `delayThreshold` behind the actual event time.  In some cases we may still
+        process records that arrive more than `delayThreshold` late.
+
+        :param eventTime: the name of the column that contains the event time of the row.
+        :param delayThreshold: the minimum delay to wait to data to arrive late, relative to the
+            latest record that has been processed in the form of an interval
+            (e.g. "1 minute" or "5 hours").
+
+        .. note:: Evolving
+
+        >>> sdf.select('name', sdf.time.cast('timestamp')).withWatermark('time', '10 minutes')
+        DataFrame[name: string, time: timestamp]
+        """
+        if not eventTime or type(eventTime) is not str:
+            raise TypeError("eventTime should be provided as a string")
+        if not delayThreshold or type(delayThreshold) is not str:
+            raise TypeError("delayThreshold should be provided as a string interval")
+        jdf = self._jdf.withWatermark(eventTime, delayThreshold)
+        return DataFrame(jdf, self.sql_ctx)
+
+    @since(2.2)
+    def hint(self, name, *parameters):
+        """Specifies some hint on the current DataFrame.
+
+        :param name: A name of the hint.
+        :param parameters: Optional parameters.
+        :return: :class:`DataFrame`
+
+        >>> df.join(df2.hint("broadcast"), "name").show()
+        +----+---+------+
+        |name|age|height|
+        +----+---+------+
+        | Bob|  5|    85|
+        +----+---+------+
+        """
+        if len(parameters) == 1 and isinstance(parameters[0], list):
+            parameters = parameters[0]
+
+        if not isinstance(name, str):
+            raise TypeError("name should be provided as str, got {0}".format(type(name)))
+
+        for p in parameters:
+            if not isinstance(p, str):
+                raise TypeError(
+                    "all parameters should be str, got {0} of type {1}".format(p, type(p)))
+
+        jdf = self._jdf.hint(name, self._jseq(parameters))
+        return DataFrame(jdf, self.sql_ctx)
+
+    @since(1.3)
+    def count(self):
+        """Returns the number of rows in this :class:`DataFrame`.
+
+        >>> df.count()
+        2
+        """
+        return int(self._jdf.count())
+
+    @ignore_unicode_prefix
+    @since(1.3)
+    def collect(self):
+        """Returns all the records as a list of :class:`Row`.
+
+        >>> df.collect()
+        [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
+        """
+        with SCCallSiteSync(self._sc) as css:
+            sock_info = self._jdf.collectToPython()
+        return list(_load_from_socket(sock_info, BatchedSerializer(PickleSerializer())))
+
+    @ignore_unicode_prefix
+    @since(2.0)
+    def toLocalIterator(self):
+        """
+        Returns an iterator that contains all of the rows in this :class:`DataFrame`.
+        The iterator will consume as much memory as the largest partition in this DataFrame.
+
+        >>> list(df.toLocalIterator())
+        [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
+        """
+        with SCCallSiteSync(self._sc) as css:
+            sock_info = self._jdf.toPythonIterator()
+        return _load_from_socket(sock_info, BatchedSerializer(PickleSerializer()))
+
+    @ignore_unicode_prefix
+    @since(1.3)
+    def limit(self, num):
+        """Limits the result count to the number specified.
+
+        >>> df.limit(1).collect()
+        [Row(age=2, name=u'Alice')]
+        >>> df.limit(0).collect()
+        []
+        """
+        jdf = self._jdf.limit(num)
+        return DataFrame(jdf, self.sql_ctx)
+
+    @ignore_unicode_prefix
+    @since(1.3)
+    def take(self, num):
+        """Returns the first ``num`` rows as a :class:`list` of :class:`Row`.
+
+        >>> df.take(2)
+        [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
+        """
+        return self.limit(num).collect()
+
+    @since(1.3)
+    def foreach(self, f):
+        """Applies the ``f`` function to all :class:`Row` of this :class:`DataFrame`.
+
+        This is a shorthand for ``df.rdd.foreach()``.
+
+        >>> def f(person):
+        ...     print(person.name)
+        >>> df.foreach(f)
+        """
+        self.rdd.foreach(f)
+
+    @since(1.3)
+    def foreachPartition(self, f):
+        """Applies the ``f`` function to each partition of this :class:`DataFrame`.
+
+        This a shorthand for ``df.rdd.foreachPartition()``.
+
+        >>> def f(people):
+        ...     for person in people:
+        ...         print(person.name)
+        >>> df.foreachPartition(f)
+        """
+        self.rdd.foreachPartition(f)
+
+    @since(1.3)
+    def cache(self):
+        """Persists the :class:`DataFrame` with the default storage level (C{MEMORY_AND_DISK}).
+
+        .. note:: The default storage level has changed to C{MEMORY_AND_DISK} to match Scala in 2.0.
+        """
+        self.is_cached = True
+        self._jdf.cache()
+        return self
+
+    @since(1.3)
+    def persist(self, storageLevel=StorageLevel.MEMORY_AND_DISK):
+        """Sets the storage level to persist the contents of the :class:`DataFrame` across
+        operations after the first time it is computed. This can only be used to assign
+        a new storage level if the :class:`DataFrame` does not have a storage level set yet.
+        If no storage level is specified defaults to (C{MEMORY_AND_DISK}).
+
+        .. note:: The default storage level has changed to C{MEMORY_AND_DISK} to match Scala in 2.0.
+        """
+        self.is_cached = True
+        javaStorageLevel = self._sc._getJavaStorageLevel(storageLevel)
+        self._jdf.persist(javaStorageLevel)
+        return self
+
+    @property
+    @since(2.1)
+    def storageLevel(self):
+        """Get the :class:`DataFrame`'s current storage level.
+
+        >>> df.storageLevel
+        StorageLevel(False, False, False, False, 1)
+        >>> df.cache().storageLevel
+        StorageLevel(True, True, False, True, 1)
+        >>> df2.persist(StorageLevel.DISK_ONLY_2).storageLevel
+        StorageLevel(True, False, False, False, 2)
+        """
+        java_storage_level = self._jdf.storageLevel()
+        storage_level = StorageLevel(java_storage_level.useDisk(),
+                                     java_storage_level.useMemory(),
+                                     java_storage_level.useOffHeap(),
+                                     java_storage_level.deserialized(),
+                                     java_storage_level.replication())
+        return storage_level
+
+    @since(1.3)
+    def unpersist(self, blocking=False):
+        """Marks the :class:`DataFrame` as non-persistent, and remove all blocks for it from
+        memory and disk.
+
+        .. note:: `blocking` default has changed to False to match Scala in 2.0.
+        """
+        self.is_cached = False
+        self._jdf.unpersist(blocking)
+        return self
+
+    @since(1.4)
+    def coalesce(self, numPartitions):
+        """
+        Returns a new :class:`DataFrame` that has exactly `numPartitions` partitions.
+
+        :param numPartitions: int, to specify the target number of partitions
+
+        Similar to coalesce defined on an :class:`RDD`, this operation results in a
+        narrow dependency, e.g. if you go from 1000 partitions to 100 partitions,
+        there will not be a shuffle, instead each of the 100 new partitions will
+        claim 10 of the current partitions. If a larger number of partitions is requested,
+        it will stay at the current number of partitions.
+
+        However, if you're doing a drastic coalesce, e.g. to numPartitions = 1,
+        this may result in your computation taking place on fewer nodes than
+        you like (e.g. one node in the case of numPartitions = 1). To avoid this,
+        you can call repartition(). This will add a shuffle step, but means the
+        current upstream partitions will be executed in parallel (per whatever
+        the current partitioning is).
+
+        >>> df.coalesce(1).rdd.getNumPartitions()
+        1
+        """
+        return DataFrame(self._jdf.coalesce(numPartitions), self.sql_ctx)
+
+    @since(1.3)
+    def repartition(self, numPartitions, *cols):
+        """
+        Returns a new :class:`DataFrame` partitioned by the given partitioning expressions. The
+        resulting DataFrame is hash partitioned.
+
+        :param numPartitions:
+            can be an int to specify the target number of partitions or a Column.
+            If it is a Column, it will be used as the first partitioning column. If not specified,
+            the default number of partitions is used.
+
+        .. versionchanged:: 1.6
+           Added optional arguments to specify the partitioning columns. Also made numPartitions
+           optional if partitioning columns are specified.
+
+        >>> df.repartition(10).rdd.getNumPartitions()
+        10
+        >>> data = df.union(df).repartition("age")
+        >>> data.show()
+        +---+-----+
+        |age| name|
+        +---+-----+
+        |  5|  Bob|
+        |  5|  Bob|
+        |  2|Alice|
+        |  2|Alice|
+        +---+-----+
+        >>> data = data.repartition(7, "age")
+        >>> data.show()
+        +---+-----+
+        |age| name|
+        +---+-----+
+        |  2|Alice|
+        |  5|  Bob|
+        |  2|Alice|
+        |  5|  Bob|
+        +---+-----+
+        >>> data.rdd.getNumPartitions()
+        7
+        >>> data = data.repartition("name", "age")
+        >>> data.show()
+        +---+-----+
+        |age| name|
+        +---+-----+
+        |  5|  Bob|
+        |  5|  Bob|
+        |  2|Alice|
+        |  2|Alice|
+        +---+-----+
+        """
+        if isinstance(numPartitions, int):
+            if len(cols) == 0:
+                return DataFrame(self._jdf.repartition(numPartitions), self.sql_ctx)
+            else:
+                return DataFrame(
+                    self._jdf.repartition(numPartitions, self._jcols(*cols)), self.sql_ctx)
+        elif isinstance(numPartitions, (basestring, Column)):
+            cols = (numPartitions, ) + cols
+            return DataFrame(self._jdf.repartition(self._jcols(*cols)), self.sql_ctx)
+        else:
+            raise TypeError("numPartitions should be an int or Column")
+
+    @since("2.4.0")
+    def repartitionByRange(self, numPartitions, *cols):
+        """
+        Returns a new :class:`DataFrame` partitioned by the given partitioning expressions. The
+        resulting DataFrame is range partitioned.
+
+        :param numPartitions:
+            can be an int to specify the target number of partitions or a Column.
+            If it is a Column, it will be used as the first partitioning column. If not specified,
+            the default number of partitions is used.
+
+        At least one partition-by expression must be specified.
+        When no explicit sort order is specified, "ascending nulls first" is assumed.
+
+        >>> df.repartitionByRange(2, "age").rdd.getNumPartitions()
+        2
+        >>> df.show()
+        +---+-----+
+        |age| name|
+        +---+-----+
+        |  2|Alice|
+        |  5|  Bob|
+        +---+-----+
+        >>> df.repartitionByRange(1, "age").rdd.getNumPartitions()
+        1
+        >>> data = df.repartitionByRange("age")
+        >>> df.show()
+        +---+-----+
+        |age| name|
+        +---+-----+
+        |  2|Alice|
+        |  5|  Bob|
+        +---+-----+
+        """
+        if isinstance(numPartitions, int):
+            if len(cols) == 0:
+                return ValueError("At least one partition-by expression must be specified.")
+            else:
+                return DataFrame(
+                    self._jdf.repartitionByRange(numPartitions, self._jcols(*cols)), self.sql_ctx)
+        elif isinstance(numPartitions, (basestring, Column)):
+            cols = (numPartitions,) + cols
+            return DataFrame(self._jdf.repartitionByRange(self._jcols(*cols)), self.sql_ctx)
+        else:
+            raise TypeError("numPartitions should be an int, string or Column")
+
+    @since(1.3)
+    def distinct(self):
+        """Returns a new :class:`DataFrame` containing the distinct rows in this :class:`DataFrame`.
+
+        >>> df.distinct().count()
+        2
+        """
+        return DataFrame(self._jdf.distinct(), self.sql_ctx)
+
+    @since(1.3)
+    def sample(self, withReplacement=None, fraction=None, seed=None):
+        """Returns a sampled subset of this :class:`DataFrame`.
+
+        :param withReplacement: Sample with replacement or not (default False).
+        :param fraction: Fraction of rows to generate, range [0.0, 1.0].
+        :param seed: Seed for sampling (default a random seed).
+
+        .. note:: This is not guaranteed to provide exactly the fraction specified of the total
+            count of the given :class:`DataFrame`.
+
+        .. note:: `fraction` is required and, `withReplacement` and `seed` are optional.
+
+        >>> df = spark.range(10)
+        >>> df.sample(0.5, 3).count()
+        4
+        >>> df.sample(fraction=0.5, seed=3).count()
+        4
+        >>> df.sample(withReplacement=True, fraction=0.5, seed=3).count()
+        1
+        >>> df.sample(1.0).count()
+        10
+        >>> df.sample(fraction=1.0).count()
+        10
+        >>> df.sample(False, fraction=1.0).count()
+        10
+        """
+
+        # For the cases below:
+        #   sample(True, 0.5 [, seed])
+        #   sample(True, fraction=0.5 [, seed])
+        #   sample(withReplacement=False, fraction=0.5 [, seed])
+        is_withReplacement_set = \
+            type(withReplacement) == bool and isinstance(fraction, float)
+
+        # For the case below:
+        #   sample(faction=0.5 [, seed])
+        is_withReplacement_omitted_kwargs = \
+            withReplacement is None and isinstance(fraction, float)
+
+        # For the case below:
+        #   sample(0.5 [, seed])
+        is_withReplacement_omitted_args = isinstance(withReplacement, float)
+
+        if not (is_withReplacement_set
+                or is_withReplacement_omitted_kwargs
+                or is_withReplacement_omitted_args):
+            argtypes = [
+                str(type(arg)) for arg in [withReplacement, fraction, seed] if arg is not None]
+            raise TypeError(
+                "withReplacement (optional), fraction (required) and seed (optional)"
+                " should be a bool, float and number; however, "
+                "got [%s]." % ", ".join(argtypes))
+
+        if is_withReplacement_omitted_args:
+            if fraction is not None:
+                seed = fraction
+            fraction = withReplacement
+            withReplacement = None
+
+        seed = long(seed) if seed is not None else None
+        args = [arg for arg in [withReplacement, fraction, seed] if arg is not None]
+        jdf = self._jdf.sample(*args)
+        return DataFrame(jdf, self.sql_ctx)
+
+    @since(1.5)
+    def sampleBy(self, col, fractions, seed=None):
+        """
+        Returns a stratified sample without replacement based on the
+        fraction given on each stratum.
+
+        :param col: column that defines strata
+        :param fractions:
+            sampling fraction for each stratum. If a stratum is not
+            specified, we treat its fraction as zero.
+        :param seed: random seed
+        :return: a new DataFrame that represents the stratified sample
+
+        >>> from pyspark.sql.functions import col
+        >>> dataset = sqlContext.range(0, 100).select((col("id") % 3).alias("key"))
+        >>> sampled = dataset.sampleBy("key", fractions={0: 0.1, 1: 0.2}, seed=0)
+        >>> sampled.groupBy("key").count().orderBy("key").show()
+        +---+-----+
+        |key|count|
+        +---+-----+
+        |  0|    5|
+        |  1|    9|
+        +---+-----+
+
+        """
+        if not isinstance(col, basestring):
+            raise ValueError("col must be a string, but got %r" % type(col))
+        if not isinstance(fractions, dict):
+            raise ValueError("fractions must be a dict but got %r" % type(fractions))
+        for k, v in fractions.items():
+            if not isinstance(k, (float, int, long, basestring)):
+                raise ValueError("key must be float, int, long, or string, but got %r" % type(k))
+            fractions[k] = float(v)
+        seed = seed if seed is not None else random.randint(0, sys.maxsize)
+        return DataFrame(self._jdf.stat().sampleBy(col, self._jmap(fractions), seed), self.sql_ctx)
+
+    @since(1.4)
+    def randomSplit(self, weights, seed=None):
+        """Randomly splits this :class:`DataFrame` with the provided weights.
+
+        :param weights: list of doubles as weights with which to split the DataFrame. Weights will
+            be normalized if they don't sum up to 1.0.
+        :param seed: The seed for sampling.
+
+        >>> splits = df4.randomSplit([1.0, 2.0], 24)
+        >>> splits[0].count()
+        1
+
+        >>> splits[1].count()
+        3
+        """
+        for w in weights:
+            if w < 0.0:
+                raise ValueError("Weights must be positive. Found weight value: %s" % w)
+        seed = seed if seed is not None else random.randint(0, sys.maxsize)
+        rdd_array = self._jdf.randomSplit(_to_list(self.sql_ctx._sc, weights), long(seed))
+        return [DataFrame(rdd, self.sql_ctx) for rdd in rdd_array]
+
+    @property
+    @since(1.3)
+    def dtypes(self):
+        """Returns all column names and their data types as a list.
+
+        >>> df.dtypes
+        [('age', 'int'), ('name', 'string')]
+        """
+        return [(str(f.name), f.dataType.simpleString()) for f in self.schema.fields]
+
+    @property
+    @since(1.3)
+    def columns(self):
+        """Returns all column names as a list.
+
+        >>> df.columns
+        ['age', 'name']
+        """
+        return [f.name for f in self.schema.fields]
+
+    @since(2.3)
+    def colRegex(self, colName):
+        """
+        Selects column based on the column name specified as a regex and returns it
+        as :class:`Column`.
+
+        :param colName: string, column name specified as a regex.
+
+        >>> df = spark.createDataFrame([("a", 1), ("b", 2), ("c",  3)], ["Col1", "Col2"])
+        >>> df.select(df.colRegex("`(Col1)?+.+`")).show()
+        +----+
+        |Col2|
+        +----+
+        |   1|
+        |   2|
+        |   3|
+        +----+
+        """
+        if not isinstance(colName, basestring):
+            raise ValueError("colName should be provided as string")
+        jc = self._jdf.colRegex(colName)
+        return Column(jc)
+
+    @ignore_unicode_prefix
+    @since(1.3)
+    def alias(self, alias):
+        """Returns a new :class:`DataFrame` with an alias set.
+
+        :param alias: string, an alias name to be set for the DataFrame.
+
+        >>> from pyspark.sql.functions import *
+        >>> df_as1 = df.alias("df_as1")
+        >>> df_as2 = df.alias("df_as2")
+        >>> joined_df = df_as1.join(df_as2, col("df_as1.name") == col("df_as2.name"), 'inner')
+        >>> joined_df.select("df_as1.name", "df_as2.name", "df_as2.age").collect()
+        [Row(name=u'Bob', name=u'Bob', age=5), Row(name=u'Alice', name=u'Alice', age=2)]
+        """
+        assert isinstance(alias, basestring), "alias should be a string"
+        return DataFrame(getattr(self._jdf, "as")(alias), self.sql_ctx)
+
+    @ignore_unicode_prefix
+    @since(2.1)
+    def crossJoin(self, other):
+        """Returns the cartesian product with another :class:`DataFrame`.
+
+        :param other: Right side of the cartesian product.
+
+        >>> df.select("age", "name").collect()
+        [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
+        >>> df2.select("name", "height").collect()
+        [Row(name=u'Tom', height=80), Row(name=u'Bob', height=85)]
+        >>> df.crossJoin(df2.select("height")).select("age", "name", "height").collect()
+        [Row(age=2, name=u'Alice', height=80), Row(age=2, name=u'Alice', height=85),
+         Row(age=5, name=u'Bob', height=80), Row(age=5, name=u'Bob', height=85)]
+        """
+
+        jdf = self._jdf.crossJoin(other._jdf)
+        return DataFrame(jdf, self.sql_ctx)
+
+    @ignore_unicode_prefix
+    @since(1.3)
+    def join(self, other, on=None, how=None):
+        """Joins with another :class:`DataFrame`, using the given join expression.
+
+        :param other: Right side of the join
+        :param on: a string for the join column name, a list of column names,
+            a join expression (Column), or a list of Columns.
+            If `on` is a string or a list of strings indicating the name of the join column(s),
+            the column(s) must exist on both sides, and this performs an equi-join.
+        :param how: str, default ``inner``. Must be one of: ``inner``, ``cross``, ``outer``,
+            ``full``, ``full_outer``, ``left``, ``left_outer``, ``right``, ``right_outer``,
+            ``left_semi``, and ``left_anti``.
+
+        The following performs a full outer join between ``df1`` and ``df2``.
+
+        >>> df.join(df2, df.name == df2.name, 'outer').select(df.name, df2.height).collect()
+        [Row(name=None, height=80), Row(name=u'Bob', height=85), Row(name=u'Alice', height=None)]
+
+        >>> df.join(df2, 'name', 'outer').select('name', 'height').collect()
+        [Row(name=u'Tom', height=80), Row(name=u'Bob', height=85), Row(name=u'Alice', height=None)]
+
+        >>> cond = [df.name == df3.name, df.age == df3.age]
+        >>> df.join(df3, cond, 'outer').select(df.name, df3.age).collect()
+        [Row(name=u'Alice', age=2), Row(name=u'Bob', age=5)]
+
+        >>> df.join(df2, 'name').select(df.name, df2.height).collect()
+        [Row(name=u'Bob', height=85)]
+
+        >>> df.join(df4, ['name', 'age']).select(df.name, df.age).collect()
+        [Row(name=u'Bob', age=5)]
+        """
+
+        if on is not None and not isinstance(on, list):
+            on = [on]
+
+        if on is not None:
+            if isinstance(on[0], basestring):
+                on = self._jseq(on)
+            else:
+                assert isinstance(on[0], Column), "on should be Column or list of Column"
+                on = reduce(lambda x, y: x.__and__(y), on)
+                on = on._jc
+
+        if on is None and how is None:
+            jdf = self._jdf.join(other._jdf)
+        else:
+            if how is None:
+                how = "inner"
+            if on is None:
+                on = self._jseq([])
+            assert isinstance(how, basestring), "how should be basestring"
+            jdf = self._jdf.join(other._jdf, on, how)
+        return DataFrame(jdf, self.sql_ctx)
+
+    @since(1.6)
+    def sortWithinPartitions(self, *cols, **kwargs):
+        """Returns a new :class:`DataFrame` with each partition sorted by the specified column(s).
+
+        :param cols: list of :class:`Column` or column names to sort by.
+        :param ascending: boolean or list of boolean (default True).
+            Sort ascending vs. descending. Specify list for multiple sort orders.
+            If a list is specified, length of the list must equal length of the `cols`.
+
+        >>> df.sortWithinPartitions("age", ascending=False).show()
+        +---+-----+
+        |age| name|
+        +---+-----+
+        |  2|Alice|
+        |  5|  Bob|
+        +---+-----+
+        """
+        jdf = self._jdf.sortWithinPartitions(self._sort_cols(cols, kwargs))
+        return DataFrame(jdf, self.sql_ctx)
+
+    @ignore_unicode_prefix
+    @since(1.3)
+    def sort(self, *cols, **kwargs):
+        """Returns a new :class:`DataFrame` sorted by the specified column(s).
+
+        :param cols: list of :class:`Column` or column names to sort by.
+        :param ascending: boolean or list of boolean (default True).
+            Sort ascending vs. descending. Specify list for multiple sort orders.
+            If a list is specified, length of the list must equal length of the `cols`.
+
+        >>> df.sort(df.age.desc()).collect()
+        [Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]
+        >>> df.sort("age", ascending=False).collect()
+        [Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]
+        >>> df.orderBy(df.age.desc()).collect()
+        [Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]
+        >>> from pyspark.sql.functions import *
+        >>> df.sort(asc("age")).collect()
+        [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
+        >>> df.orderBy(desc("age"), "name").collect()
+        [Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]
+        >>> df.orderBy(["age", "name"], ascending=[0, 1]).collect()
+        [Row(age=5, name=u'Bob'), Row(age=2, name=u'Alice')]
+        """
+        jdf = self._jdf.sort(self._sort_cols(cols, kwargs))
+        return DataFrame(jdf, self.sql_ctx)
+
+    orderBy = sort
+
+    def _jseq(self, cols, converter=None):
+        """Return a JVM Seq of Columns from a list of Column or names"""
+        return _to_seq(self.sql_ctx._sc, cols, converter)
+
+    def _jmap(self, jm):
+        """Return a JVM Scala Map from a dict"""
+        return _to_scala_map(self.sql_ctx._sc, jm)
+
+    def _jcols(self, *cols):
+        """Return a JVM Seq of Columns from a list of Column or column names
+
+        If `cols` has only one list in it, cols[0] will be used as the list.
+        """
+        if len(cols) == 1 and isinstance(cols[0], list):
+            cols = cols[0]
+        return self._jseq(cols, _to_java_column)
+
+    def _sort_cols(self, cols, kwargs):
+        """ Return a JVM Seq of Columns that describes the sort order
+        """
+        if not cols:
+            raise ValueError("should sort by at least one column")
+        if len(cols) == 1 and isinstance(cols[0], list):
+            cols = cols[0]
+        jcols = [_to_java_column(c) for c in cols]
+        ascending = kwargs.get('ascending', True)
+        if isinstance(ascending, (bool, int)):
+            if not ascending:
+                jcols = [jc.desc() for jc in jcols]
+        elif isinstance(ascending, list):
+            jcols = [jc if asc else jc.desc()
+                     for asc, jc in zip(ascending, jcols)]
+        else:
+            raise TypeError("ascending can only be boolean or list, but got %s" % type(ascending))
+        return self._jseq(jcols)
+
+    @since("1.3.1")
+    def describe(self, *cols):
+        """Computes basic statistics for numeric and string columns.
+
+        This include count, mean, stddev, min, and max. If no columns are
+        given, this function computes statistics for all numerical or string columns.
+
+        .. note:: This function is meant for exploratory data analysis, as we make no
+            guarantee about the backward compatibility of the schema of the resulting DataFrame.
+
+        >>> df.describe(['age']).show()
+        +-------+------------------+
+        |summary|               age|
+        +-------+------------------+
+        |  count|                 2|
+        |   mean|               3.5|
+        | stddev|2.1213203435596424|
+        |    min|                 2|
+        |    max|                 5|
+        +-------+------------------+
+        >>> df.describe().show()
+        +-------+------------------+-----+
+        |summary|               age| name|
+        +-------+------------------+-----+
+        |  count|                 2|    2|
+        |   mean|               3.5| null|
+        | stddev|2.1213203435596424| null|
+        |    min|                 2|Alice|
+        |    max|                 5|  Bob|
+        +-------+------------------+-----+
+
+        Use summary for expanded statistics and control over which statistics to compute.
+        """
+        if len(cols) == 1 and isinstance(cols[0], list):
+            cols = cols[0]
+        jdf = self._jdf.describe(self._jseq(cols))
+        return DataFrame(jdf, self.sql_ctx)
+
+    @since("2.3.0")
+    def summary(self, *statistics):
+        """Computes specified statistics for numeric and string columns. Available statistics are:
+        - count
+        - mean
+        - stddev
+        - min
+        - max
+        - arbitrary approximate percentiles specified as a percentage (eg, 75%)
+
+        If no statistics are given, this function computes count, mean, stddev, min,
+        approximate quartiles (percentiles at 25%, 50%, and 75%), and max.
+
+        .. note:: This function is meant for exploratory data analysis, as we make no
+            guarantee about the backward compatibility of the schema of the resulting DataFrame.
+
+        >>> df.summary().show()
+        +-------+------------------+-----+
+        |summary|               age| name|
+        +-------+------------------+-----+
+        |  count|                 2|    2|
+        |   mean|               3.5| null|
+        | stddev|2.1213203435596424| null|
+        |    min|                 2|Alice|
+        |    25%|                 2| null|
+        |    50%|                 2| null|
+        |    75%|                 5| null|
+        |    max|                 5|  Bob|
+        +-------+------------------+-----+
+
+        >>> df.summary("count", "min", "25%", "75%", "max").show()
+        +-------+---+-----+
+        |summary|age| name|
+        +-------+---+-----+
+        |  count|  2|    2|
+        |    min|  2|Alice|
+        |    25%|  2| null|
+        |    75%|  5| null|
+        |    max|  5|  Bob|
+        +-------+---+-----+
+
+        To do a summary for specific columns first select them:
+
+        >>> df.select("age", "name").summary("count").show()
+        +-------+---+----+
+        |summary|age|name|
+        +-------+---+----+
+        |  count|  2|   2|
+        +-------+---+----+
+
+        See also describe for basic statistics.
+        """
+        if len(statistics) == 1 and isinstance(statistics[0], list):
+            statistics = statistics[0]
+        jdf = self._jdf.summary(self._jseq(statistics))
+        return DataFrame(jdf, self.sql_ctx)
+
+    @ignore_unicode_prefix
+    @since(1.3)
+    def head(self, n=None):
+        """Returns the first ``n`` rows.
+
+        .. note:: This method should only be used if the resulting array is expected
+            to be small, as all the data is loaded into the driver's memory.
+
+        :param n: int, default 1. Number of rows to return.
+        :return: If n is greater than 1, return a list of :class:`Row`.
+            If n is 1, return a single Row.
+
+        >>> df.head()
+        Row(age=2, name=u'Alice')
+        >>> df.head(1)
+        [Row(age=2, name=u'Alice')]
+        """
+        if n is None:
+            rs = self.head(1)
+            return rs[0] if rs else None
+        return self.take(n)
+
+    @ignore_unicode_prefix
+    @since(1.3)
+    def first(self):
+        """Returns the first row as a :class:`Row`.
+
+        >>> df.first()
+        Row(age=2, name=u'Alice')
+        """
+        return self.head()
+
+    @ignore_unicode_prefix
+    @since(1.3)
+    def __getitem__(self, item):
+        """Returns the column as a :class:`Column`.
+
+        >>> df.select(df['age']).collect()
+        [Row(age=2), Row(age=5)]
+        >>> df[ ["name", "age"]].collect()
+        [Row(name=u'Alice', age=2), Row(name=u'Bob', age=5)]
+        >>> df[ df.age > 3 ].collect()
+        [Row(age=5, name=u'Bob')]
+        >>> df[df[0] > 3].collect()
+        [Row(age=5, name=u'Bob')]
+        """
+        if isinstance(item, basestring):
+            jc = self._jdf.apply(item)
+            return Column(jc)
+        elif isinstance(item, Column):
+            return self.filter(item)
+        elif isinstance(item, (list, tuple)):
+            return self.select(*item)
+        elif isinstance(item, int):
+            jc = self._jdf.apply(self.columns[item])
+            return Column(jc)
+        else:
+            raise TypeError("unexpected item type: %s" % type(item))
+
+    @since(1.3)
+    def __getattr__(self, name):
+        """Returns the :class:`Column` denoted by ``name``.
+
+        >>> df.select(df.age).collect()
+        [Row(age=2), Row(age=5)]
+        """
+        if name not in self.columns:
+            raise AttributeError(
+                "'%s' object has no attribute '%s'" % (self.__class__.__name__, name))
+        jc = self._jdf.apply(name)
+        return Column(jc)
+
+    @ignore_unicode_prefix
+    @since(1.3)
+    def select(self, *cols):
+        """Projects a set of expressions and returns a new :class:`DataFrame`.
+
+        :param cols: list of column names (string) or expressions (:class:`Column`).
+            If one of the column names is '*', that column is expanded to include all columns
+            in the current DataFrame.
+
+        >>> df.select('*').collect()
+        [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
+        >>> df.select('name', 'age').collect()
+        [Row(name=u'Alice', age=2), Row(name=u'Bob', age=5)]
+        >>> df.select(df.name, (df.age + 10).alias('age')).collect()
+        [Row(name=u'Alice', age=12), Row(name=u'Bob', age=15)]
+        """
+        jdf = self._jdf.select(self._jcols(*cols))
+        return DataFrame(jdf, self.sql_ctx)
+
+    @since(1.3)
+    def selectExpr(self, *expr):
+        """Projects a set of SQL expressions and returns a new :class:`DataFrame`.
+
+        This is a variant of :func:`select` that accepts SQL expressions.
+
+        >>> df.selectExpr("age * 2", "abs(age)").collect()
+        [Row((age * 2)=4, abs(age)=2), Row((age * 2)=10, abs(age)=5)]
+        """
+        if len(expr) == 1 and isinstance(expr[0], list):
+            expr = expr[0]
+        jdf = self._jdf.selectExpr(self._jseq(expr))
+        return DataFrame(jdf, self.sql_ctx)
+
+    @ignore_unicode_prefix
+    @since(1.3)
+    def filter(self, condition):
+        """Filters rows using the given condition.
+
+        :func:`where` is an alias for :func:`filter`.
+
+        :param condition: a :class:`Column` of :class:`types.BooleanType`
+            or a string of SQL expression.
+
+        >>> df.filter(df.age > 3).collect()
+        [Row(age=5, name=u'Bob')]
+        >>> df.where(df.age == 2).collect()
+        [Row(age=2, name=u'Alice')]
+
+        >>> df.filter("age > 3").collect()
+        [Row(age=5, name=u'Bob')]
+        >>> df.where("age = 2").collect()
+        [Row(age=2, name=u'Alice')]
+        """
+        if isinstance(condition, basestring):
+            jdf = self._jdf.filter(condition)
+        elif isinstance(condition, Column):
+            jdf = self._jdf.filter(condition._jc)
+        else:
+            raise TypeError("condition should be string or Column")
+        return DataFrame(jdf, self.sql_ctx)
+
+    @ignore_unicode_prefix
+    @since(1.3)
+    def groupBy(self, *cols):
+        """Groups the :class:`DataFrame` using the specified columns,
+        so we can run aggregation on them. See :class:`GroupedData`
+        for all the available aggregate functions.
+
+        :func:`groupby` is an alias for :func:`groupBy`.
+
+        :param cols: list of columns to group by.
+            Each element should be a column name (string) or an expression (:class:`Column`).
+
+        >>> df.groupBy().avg().collect()
+        [Row(avg(age)=3.5)]
+        >>> sorted(df.groupBy('name').agg({'age': 'mean'}).collect())
+        [Row(name=u'Alice', avg(age)=2.0), Row(name=u'Bob', avg(age)=5.0)]
+        >>> sorted(df.groupBy(df.name).avg().collect())
+        [Row(name=u'Alice', avg(age)=2.0), Row(name=u'Bob', avg(age)=5.0)]
+        >>> sorted(df.groupBy(['name', df.age]).count().collect())
+        [Row(name=u'Alice', age=2, count=1), Row(name=u'Bob', age=5, count=1)]
+        """
+        jgd = self._jdf.groupBy(self._jcols(*cols))
+        from pyspark.sql.group import GroupedData
+        return GroupedData(jgd, self)
+
+    @since(1.4)
+    def rollup(self, *cols):
+        """
+        Create a multi-dimensional rollup for the current :class:`DataFrame` using
+        the specified columns, so we can run aggregation on them.
+
+        >>> df.rollup("name", df.age).count().orderBy("name", "age").show()
+        +-----+----+-----+
+        | name| age|count|
+        +-----+----+-----+
+        | null|null|    2|
+        |Alice|null|    1|
+        |Alice|   2|    1|
+        |  Bob|null|    1|
+        |  Bob|   5|    1|
+        +-----+----+-----+
+        """
+        jgd = self._jdf.rollup(self._jcols(*cols))
+        from pyspark.sql.group import GroupedData
+        return GroupedData(jgd, self)
+
+    @since(1.4)
+    def cube(self, *cols):
+        """
+        Create a multi-dimensional cube for the current :class:`DataFrame` using
+        the specified columns, so we can run aggregation on them.
+
+        >>> df.cube("name", df.age).count().orderBy("name", "age").show()
+        +-----+----+-----+
+        | name| age|count|
+        +-----+----+-----+
+        | null|null|    2|
+        | null|   2|    1|
+        | null|   5|    1|
+        |Alice|null|    1|
+        |Alice|   2|    1|
+        |  Bob|null|    1|
+        |  Bob|   5|    1|
+        +-----+----+-----+
+        """
+        jgd = self._jdf.cube(self._jcols(*cols))
+        from pyspark.sql.group import GroupedData
+        return GroupedData(jgd, self)
+
+    @since(1.3)
+    def agg(self, *exprs):
+        """ Aggregate on the entire :class:`DataFrame` without groups
+        (shorthand for ``df.groupBy.agg()``).
+
+        >>> df.agg({"age": "max"}).collect()
+        [Row(max(age)=5)]
+        >>> from pyspark.sql import functions as F
+        >>> df.agg(F.min(df.age)).collect()
+        [Row(min(age)=2)]
+        """
+        return self.groupBy().agg(*exprs)
+
+    @since(2.0)
+    def union(self, other):
+        """ Return a new :class:`DataFrame` containing union of rows in this and another frame.
+
+        This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union
+        (that does deduplication of elements), use this function followed by :func:`distinct`.
+
+        Also as standard in SQL, this function resolves columns by position (not by name).
+        """
+        return DataFrame(self._jdf.union(other._jdf), self.sql_ctx)
+
+    @since(1.3)
+    def unionAll(self, other):
+        """ Return a new :class:`DataFrame` containing union of rows in this and another frame.
+
+        This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union
+        (that does deduplication of elements), use this function followed by :func:`distinct`.
+
+        Also as standard in SQL, this function resolves columns by position (not by name).
+
+        .. note:: Deprecated in 2.0, use :func:`union` instead.
+        """
+        warnings.warn("Deprecated in 2.0, use union instead.", DeprecationWarning)
+        return self.union(other)
+
+    @since(2.3)
+    def unionByName(self, other):
+        """ Returns a new :class:`DataFrame` containing union of rows in this and another frame.
+
+        This is different from both `UNION ALL` and `UNION DISTINCT` in SQL. To do a SQL-style set
+        union (that does deduplication of elements), use this function followed by :func:`distinct`.
+
+        The difference between this function and :func:`union` is that this function
+        resolves columns by name (not by position):
+
+        >>> df1 = spark.createDataFrame([[1, 2, 3]], ["col0", "col1", "col2"])
+        >>> df2 = spark.createDataFrame([[4, 5, 6]], ["col1", "col2", "col0"])
+        >>> df1.unionByName(df2).show()
+        +----+----+----+
+        |col0|col1|col2|
+        +----+----+----+
+        |   1|   2|   3|
+        |   6|   4|   5|
+        +----+----+----+
+        """
+        return DataFrame(self._jdf.unionByName(other._jdf), self.sql_ctx)
+
+    @since(1.3)
+    def intersect(self, other):
+        """ Return a new :class:`DataFrame` containing rows only in
+        both this frame and another frame.
+
+        This is equivalent to `INTERSECT` in SQL.
+        """
+        return DataFrame(self._jdf.intersect(other._jdf), self.sql_ctx)
+
+    @since(2.4)
+    def intersectAll(self, other):
+        """ Return a new :class:`DataFrame` containing rows in both this dataframe and other
+        dataframe while preserving duplicates.
+
+        This is equivalent to `INTERSECT ALL` in SQL.
+        >>> df1 = spark.createDataFrame([("a", 1), ("a", 1), ("b", 3), ("c", 4)], ["C1", "C2"])
+        >>> df2 = spark.createDataFrame([("a", 1), ("a", 1), ("b", 3)], ["C1", "C2"])
+
+        >>> df1.intersectAll(df2).sort("C1", "C2").show()
+        +---+---+
+        | C1| C2|
+        +---+---+
+        |  a|  1|
+        |  a|  1|
+        |  b|  3|
+        +---+---+
+
+        Also as standard in SQL, this function resolves columns by position (not by name).
+        """
+        return DataFrame(self._jdf.intersectAll(other._jdf), self.sql_ctx)
+
+    @since(1.3)
+    def subtract(self, other):
+        """ Return a new :class:`DataFrame` containing rows in this frame
+        but not in another frame.
+
+        This is equivalent to `EXCEPT DISTINCT` in SQL.
+
+        """
+        return DataFrame(getattr(self._jdf, "except")(other._jdf), self.sql_ctx)
+
+    @since(1.4)
+    def dropDuplicates(self, subset=None):
+        """Return a new :class:`DataFrame` with duplicate rows removed,
+        optionally only considering certain columns.
+
+        For a static batch :class:`DataFrame`, it just drops duplicate rows. For a streaming
+        :class:`DataFrame`, it will keep all data across triggers as intermediate state to drop
+        duplicates rows. You can use :func:`withWatermark` to limit how late the duplicate data can
+        be and system will accordingly limit the state. In addition, too late data older than
+        watermark will be dropped to avoid any possibility of duplicates.
+
+        :func:`drop_duplicates` is an alias for :func:`dropDuplicates`.
+
+        >>> from pyspark.sql import Row
+        >>> df = sc.parallelize([ \\
+        ...     Row(name='Alice', age=5, height=80), \\
+        ...     Row(name='Alice', age=5, height=80), \\
+        ...     Row(name='Alice', age=10, height=80)]).toDF()
+        >>> df.dropDuplicates().show()
+        +---+------+-----+
+        |age|height| name|
+        +---+------+-----+
+        |  5|    80|Alice|
+        | 10|    80|Alice|
+        +---+------+-----+
+
+        >>> df.dropDuplicates(['name', 'height']).show()
+        +---+------+-----+
+        |age|height| name|
+        +---+------+-----+
+        |  5|    80|Alice|
+        +---+------+-----+
+        """
+        if subset is None:
+            jdf = self._jdf.dropDuplicates()
+        else:
+            jdf = self._jdf.dropDuplicates(self._jseq(subset))
+        return DataFrame(jdf, self.sql_ctx)
+
+    @since("1.3.1")
+    def dropna(self, how='any', thresh=None, subset=None):
+        """Returns a new :class:`DataFrame` omitting rows with null values.
+        :func:`DataFrame.dropna` and :func:`DataFrameNaFunctions.drop` are aliases of each other.
+
+        :param how: 'any' or 'all'.
+            If 'any', drop a row if it contains any nulls.
+            If 'all', drop a row only if all its values are null.
+        :param thresh: int, default None
+            If specified, drop rows that have less than `thresh` non-null values.
+            This overwrites the `how` parameter.
+        :param subset: optional list of column names to consider.
+
+        >>> df4.na.drop().show()
+        +---+------+-----+
+        |age|height| name|
+        +---+------+-----+
+        | 10|    80|Alice|
+        +---+------+-----+
+        """
+        if how is not None and how not in ['any', 'all']:
+            raise ValueError("how ('" + how + "') should be 'any' or 'all'")
+
+        if subset is None:
+            subset = self.columns
+        elif isinstance(subset, basestring):
+            subset = [subset]
+        elif not isinstance(subset, (list, tuple)):
+            raise ValueError("subset should be a list or tuple of column names")
+
+        if thresh is None:
+            thresh = len(subset) if how == 'any' else 1
+
+        return DataFrame(self._jdf.na().drop(thresh, self._jseq(subset)), self.sql_ctx)
+
+    @since("1.3.1")
+    def fillna(self, value, subset=None):
+        """Replace null values, alias for ``na.fill()``.
+        :func:`DataFrame.fillna` and :func:`DataFrameNaFunctions.fill` are aliases of each other.
+
+        :param value: int, long, float, string, bool or dict.
+            Value to replace null values with.
+            If the value is a dict, then `subset` is ignored and `value` must be a mapping
+            from column name (string) to replacement value. The replacement value must be
+            an int, long, float, boolean, or string.
+        :param subset: optional list of column names to consider.
+            Columns specified in subset that do not have matching data type are ignored.
+            For example, if `value` is a string, and subset contains a non-string column,
+            then the non-string column is simply ignored.
+
+        >>> df4.na.fill(50).show()
+        +---+------+-----+
+        |age|height| name|
+        +---+------+-----+
+        | 10|    80|Alice|
+        |  5|    50|  Bob|
+        | 50|    50|  Tom|
+        | 50|    50| null|
+        +---+------+-----+
+
+        >>> df5.na.fill(False).show()
+        +----+-------+-----+
+        | age|   name|  spy|
+        +----+-------+-----+
+        |  10|  Alice|false|
+        |   5|    Bob|false|
+        |null|Mallory| true|
+        +----+-------+-----+
+
+        >>> df4.na.fill({'age': 50, 'name': 'unknown'}).show()
+        +---+------+-------+
+        |age|height|   name|
+        +---+------+-------+
+        | 10|    80|  Alice|
+        |  5|  null|    Bob|
+        | 50|  null|    Tom|
+        | 50|  null|unknown|
+        +---+------+-------+
+        """
+        if not isinstance(value, (float, int, long, basestring, bool, dict)):
+            raise ValueError("value should be a float, int, long, string, bool or dict")
+
+        # Note that bool validates isinstance(int), but we don't want to
+        # convert bools to floats
+
+        if not isinstance(value, bool) and isinstance(value, (int, long)):
+            value = float(value)
+
+        if isinstance(value, dict):
+            return DataFrame(self._jdf.na().fill(value), self.sql_ctx)
+        elif subset is None:
+            return DataFrame(self._jdf.na().fill(value), self.sql_ctx)
+        else:
+            if isinstance(subset, basestring):
+                subset = [subset]
+            elif not isinstance(subset, (list, tuple)):
+                raise ValueError("subset should be a list or tuple of column names")
+
+            return DataFrame(self._jdf.na().fill(value, self._jseq(subset)), self.sql_ctx)
+
+    @since(1.4)
+    def replace(self, to_replace, value=_NoValue, subset=None):
+        """Returns a new :class:`DataFrame` replacing a value with another value.
+        :func:`DataFrame.replace` and :func:`DataFrameNaFunctions.replace` are
+        aliases of each other.
+        Values to_replace and value must have the same type and can only be numerics, booleans,
+        or strings. Value can have None. When replacing, the new value will be cast
+        to the type of the existing column.
+        For numeric replacements all values to be replaced should have unique
+        floating point representation. In case of conflicts (for example with `{42: -1, 42.0: 1}`)
+        and arbitrary replacement will be used.
+
+        :param to_replace: bool, int, long, float, string, list or dict.
+            Value to be replaced.
+            If the value is a dict, then `value` is ignored or can be omitted, and `to_replace`
+            must be a mapping between a value and a replacement.
+        :param value: bool, int, long, float, string, list or None.
+            The replacement value must be a bool, int, long, float, string or None. If `value` is a
+            list, `value` should be of the same length and type as `to_replace`.
+            If `value` is a scalar and `to_replace` is a sequence, then `value` is
+            used as a replacement for each item in `to_replace`.
+        :param subset: optional list of column names to consider.
+            Columns specified in subset that do not have matching data type are ignored.
+            For example, if `value` is a string, and subset contains a non-string column,
+            then the non-string column is simply ignored.
+
+        >>> df4.na.replace(10, 20).show()
+        +----+------+-----+
+        | age|height| name|
+        +----+------+-----+
+        |  20|    80|Alice|
+        |   5|  null|  Bob|
+        |null|  null|  Tom|
+        |null|  null| null|
+        +----+------+-----+
+
+        >>> df4.na.replace('Alice', None).show()
+        +----+------+----+
+        | age|height|name|
+        +----+------+----+
+        |  10|    80|null|
+        |   5|  null| Bob|
+        |null|  null| Tom|
+        |null|  null|null|
+        +----+------+----+
+
+        >>> df4.na.replace({'Alice': None}).show()
+        +----+------+----+
+        | age|height|name|
+        +----+------+----+
+        |  10|    80|null|
+        |   5|  null| Bob|
+        |null|  null| Tom|
+        |null|  null|null|
+        +----+------+----+
+
+        >>> df4.na.replace(['Alice', 'Bob'], ['A', 'B'], 'name').show()
+        +----+------+----+
+        | age|height|name|
+        +----+------+----+
+        |  10|    80|   A|
+        |   5|  null|   B|
+        |null|  null| Tom|
+        |null|  null|null|
+        +----+------+----+
+        """
+        if value is _NoValue:
+            if isinstance(to_replace, dict):
+                value = None
+            else:
+                raise TypeError("value argument is required when to_replace is not a dictionary.")
+
+        # Helper functions
+        def all_of(types):
+            """Given a type or tuple of types and a sequence of xs
+            check if each x is instance of type(s)
+
+            >>> all_of(bool)([True, False])
+            True
+            >>> all_of(basestring)(["a", 1])
+            False
+            """
+            def all_of_(xs):
+                return all(isinstance(x, types) for x in xs)
+            return all_of_
+
+        all_of_bool = all_of(bool)
+        all_of_str = all_of(basestring)
+        all_of_numeric = all_of((float, int, long))
+
+        # Validate input types
+        valid_types = (bool, float, int, long, basestring, list, tuple)
+        if not isinstance(to_replace, valid_types + (dict, )):
+            raise ValueError(
+                "to_replace should be a bool, float, int, long, string, list, tuple, or dict. "
+                "Got {0}".format(type(to_replace)))
+
+        if not isinstance(value, valid_types) and value is not None \
+                and not isinstance(to_replace, dict):
+            raise ValueError("If to_replace is not a dict, value should be "
+                             "a bool, float, int, long, string, list, tuple or None. "
+                             "Got {0}".format(type(value)))
+
+        if isinstance(to_replace, (list, tuple)) and isinstance(value, (list, tuple)):
+            if len(to_replace) != len(value):
+                raise ValueError("to_replace and value lists should be of the same length. "
+                                 "Got {0} and {1}".format(len(to_replace), len(value)))
+
+        if not (subset is None or isinstance(subset, (list, tuple, basestring))):
+            raise ValueError("subset should be a list or tuple of column names, "
+                             "column name or None. Got {0}".format(type(subset)))
+
+        # Reshape input arguments if necessary
+        if isinstance(to_replace, (float, int, long, basestring)):
+            to_replace = [to_replace]
+
+        if isinstance(to_replace, dict):
+            rep_dict = to_replace
+            if value is not None:
+                warnings.warn("to_replace is a dict and value is not None. value will be ignored.")
+        else:
+            if isinstance(value, (float, int, long, basestring)) or value is None:
+                value = [value for _ in range(len(to_replace))]
+            rep_dict = dict(zip(to_replace, value))
+
+        if isinstance(subset, basestring):
+            subset = [subset]
+
+        # Verify we were not passed in mixed type generics.
+        if not any(all_of_type(rep_dict.keys())
+                   and all_of_type(x for x in rep_dict.values() if x is not None)
+                   for all_of_type in [all_of_bool, all_of_str, all_of_numeric]):
+            raise ValueError("Mixed type replacements are not supported")
+
+        if subset is None:
+            return DataFrame(self._jdf.na().replace('*', rep_dict), self.sql_ctx)
+        else:
+            return DataFrame(
+                self._jdf.na().replace(self._jseq(subset), self._jmap(rep_dict)), self.sql_ctx)
+
+    @since(2.0)
+    def approxQuantile(self, col, probabilities, relativeError):
+        """
+        Calculates the approximate quantiles of numerical columns of a
+        DataFrame.
+
+        The result of this algorithm has the following deterministic bound:
+        If the DataFrame has N elements and if we request the quantile at
+        probability `p` up to error `err`, then the algorithm will return
+        a sample `x` from the DataFrame so that the *exact* rank of `x` is
+        close to (p * N). More precisely,
+
+          floor((p - err) * N) <= rank(x) <= ceil((p + err) * N).
+
+        This method implements a variation of the Greenwald-Khanna
+        algorithm (with some speed optimizations). The algorithm was first
+        present in [[http://dx.doi.org/10.1145/375663.375670
+        Space-efficient Online Computation of Quantile Summaries]]
+        by Greenwald and Khanna.
+
+        Note that null values will be ignored in numerical columns before calculation.
+        For columns only containing null values, an empty list is returned.
+
+        :param col: str, list.
+          Can be a single column name, or a list of names for multiple columns.
+        :param probabilities: a list of quantile probabilities
+          Each number must belong to [0, 1].
+          For example 0 is the minimum, 0.5 is the median, 1 is the maximum.
+        :param relativeError:  The relative target precision to achieve
+          (>= 0). If set to zero, the exact quantiles are computed, which
+          could be very expensive. Note that values greater than 1 are
+          accepted but give the same result as 1.
+        :return:  the approximate quantiles at the given probabilities. If
+          the input `col` is a string, the output is a list of floats. If the
+          input `col` is a list or tuple of strings, the output is also a
+          list, but each element in it is a list of floats, i.e., the output
+          is a list of list of floats.
+
+        .. versionchanged:: 2.2
+           Added support for multiple columns.
+        """
+
+        if not isinstance(col, (basestring, list, tuple)):
+            raise ValueError("col should be a string, list or tuple, but got %r" % type(col))
+
+        isStr = isinstance(col, basestring)
+
+        if isinstance(col, tuple):
+            col = list(col)
+        elif isStr:
+            col = [col]
+
+        for c in col:
+            if not isinstance(c, basestring):
+                raise ValueError("columns should be strings, but got %r" % type(c))
+        col = _to_list(self._sc, col)
+
+        if not isinstance(probabilities, (list, tuple)):
+            raise ValueError("probabilities should be a list or tuple")
+        if isinstance(probabilities, tuple):
+            probabilities = list(probabilities)
+        for p in probabilities:
+            if not isinstance(p, (float, int, long)) or p < 0 or p > 1:
+                raise ValueError("probabilities should be numerical (float, int, long) in [0,1].")
+        probabilities = _to_list(self._sc, probabilities)
+
+        if not isinstance(relativeError, (float, int, long)) or relativeError < 0:
+            raise ValueError("relativeError should be numerical (float, int, long) >= 0.")
+        relativeError = float(relativeError)
+
+        jaq = self._jdf.stat().approxQuantile(col, probabilities, relativeError)
+        jaq_list = [list(j) for j in jaq]
+        return jaq_list[0] if isStr else jaq_list
+
+    @since(1.4)
+    def corr(self, col1, col2, method=None):
+        """
+        Calculates the correlation of two columns of a DataFrame as a double value.
+        Currently only supports the Pearson Correlation Coefficient.
+        :func:`DataFrame.corr` and :func:`DataFrameStatFunctions.corr` are aliases of each other.
+
+        :param col1: The name of the first column
+        :param col2: The name of the second column
+        :param method: The correlation method. Currently only supports "pearson"
+        """
+        if not isinstance(col1, basestring):
+            raise ValueError("col1 should be a string.")
+        if not isinstance(col2, basestring):
+            raise ValueError("col2 should be a string.")
+        if not method:
+            method = "pearson"
+        if not method == "pearson":
+            raise ValueError("Currently only the calculation of the Pearson Correlation " +
+                             "coefficient is supported.")
+        return self._jdf.stat().corr(col1, col2, method)
+
+    @since(1.4)
+    def cov(self, col1, col2):
+        """
+        Calculate the sample covariance for the given columns, specified by their names, as a
+        double value. :func:`DataFrame.cov` and :func:`DataFrameStatFunctions.cov` are aliases.
+
+        :param col1: The name of the first column
+        :param col2: The name of the second column
+        """
+        if not isinstance(col1, basestring):
+            raise ValueError("col1 should be a string.")
+        if not isinstance(col2, basestring):
+            raise ValueError("col2 should be a string.")
+        return self._jdf.stat().cov(col1, col2)
+
+    @since(1.4)
+    def crosstab(self, col1, col2):
+        """
+        Computes a pair-wise frequency table of the given columns. Also known as a contingency
+        table. The number of distinct values for each column should be less than 1e4. At most 1e6
+        non-zero pair frequencies will be returned.
+        The first column of each row will be the distinct values of `col1` and the column names
+        will be the distinct values of `col2`. The name of the first column will be `$col1_$col2`.
+        Pairs that have no occurrences will have zero as their counts.
+        :func:`DataFrame.crosstab` and :func:`DataFrameStatFunctions.crosstab` are aliases.
+
+        :param col1: The name of the first column. Distinct items will make the first item of
+            each row.
+        :param col2: The name of the second column. Distinct items will make the column names
+            of the DataFrame.
+        """
+        if not isinstance(col1, basestring):
+            raise ValueError("col1 should be a string.")
+        if not isinstance(col2, basestring):
+            raise ValueError("col2 should be a string.")
+        return DataFrame(self._jdf.stat().crosstab(col1, col2), self.sql_ctx)
+
+    @since(1.4)
+    def freqItems(self, cols, support=None):
+        """
+        Finding frequent items for columns, possibly with false positives. Using the
+        frequent element count algorithm described in
+        "http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou".
+        :func:`DataFrame.freqItems` and :func:`DataFrameStatFunctions.freqItems` are aliases.
+
+        .. note:: This function is meant for exploratory data analysis, as we make no
+            guarantee about the backward compatibility of the schema of the resulting DataFrame.
+
+        :param cols: Names of the columns to calculate frequent items for as a list or tuple of
+            strings.
+        :param support: The frequency with which to consider an item 'frequent'. Default is 1%.
+            The support must be greater than 1e-4.
+        """
+        if isinstance(cols, tuple):
+            cols = list(cols)
+        if not isinstance(cols, list):
+            raise ValueError("cols must be a list or tuple of column names as strings.")
+        if not support:
+            support = 0.01
+        return DataFrame(self._jdf.stat().freqItems(_to_seq(self._sc, cols), support), self.sql_ctx)
+
+    @ignore_unicode_prefix
+    @since(1.3)
+    def withColumn(self, colName, col):
+        """
+        Returns a new :class:`DataFrame` by adding a column or replacing the
+        existing column that has the same name.
+
+        The column expression must be an expression over this DataFrame; attempting to add
+        a column from some other dataframe will raise an error.
+
+        :param colName: string, name of the new column.
+        :param col: a :class:`Column` expression for the new column.
+
+        >>> df.withColumn('age2', df.age + 2).collect()
+        [Row(age=2, name=u'Alice', age2=4), Row(age=5, name=u'Bob', age2=7)]
+
+        """
+        assert isinstance(col, Column), "col should be Column"
+        return DataFrame(self._jdf.withColumn(colName, col._jc), self.sql_ctx)
+
+    @ignore_unicode_prefix
+    @since(1.3)
+    def withColumnRenamed(self, existing, new):
+        """Returns a new :class:`DataFrame` by renaming an existing column.
+        This is a no-op if schema doesn't contain the given column name.
+
+        :param existing: string, name of the existing column to rename.
+        :param new: string, new name of the column.
+
+        >>> df.withColumnRenamed('age', 'age2').collect()
+        [Row(age2=2, name=u'Alice'), Row(age2=5, name=u'Bob')]
+        """
+        return DataFrame(self._jdf.withColumnRenamed(existing, new), self.sql_ctx)
+
+    @since(1.4)
+    @ignore_unicode_prefix
+    def drop(self, *cols):
+        """Returns a new :class:`DataFrame` that drops the specified column.
+        This is a no-op if schema doesn't contain the given column name(s).
+
+        :param cols: a string name of the column to drop, or a
+            :class:`Column` to drop, or a list of string name of the columns to drop.
+
+        >>> df.drop('age').collect()
+        [Row(name=u'Alice'), Row(name=u'Bob')]
+
+        >>> df.drop(df.age).collect()
+        [Row(name=u'Alice'), Row(name=u'Bob')]
+
+        >>> df.join(df2, df.name == df2.name, 'inner').drop(df.name).collect()
+        [Row(age=5, height=85, name=u'Bob')]
+
+        >>> df.join(df2, df.name == df2.name, 'inner').drop(df2.name).collect()
+        [Row(age=5, name=u'Bob', height=85)]
+
+        >>> df.join(df2, 'name', 'inner').drop('age', 'height').collect()
+        [Row(name=u'Bob')]
+        """
+        if len(cols) == 1:
+            col = cols[0]
+            if isinstance(col, basestring):
+                jdf = self._jdf.drop(col)
+            elif isinstance(col, Column):
+                jdf = self._jdf.drop(col._jc)
+            else:
+                raise TypeError("col should be a string or a Column")
+        else:
+            for col in cols:
+                if not isinstance(col, basestring):
+                    raise TypeError("each col in the param list should be a string")
+            jdf = self._jdf.drop(self._jseq(cols))
+
+        return DataFrame(jdf, self.sql_ctx)
+
+    @ignore_unicode_prefix
+    def toDF(self, *cols):
+        """Returns a new class:`DataFrame` that with new specified column names
+
+        :param cols: list of new column names (string)
+
+        >>> df.toDF('f1', 'f2').collect()
+        [Row(f1=2, f2=u'Alice'), Row(f1=5, f2=u'Bob')]
+        """
+        jdf = self._jdf.toDF(self._jseq(cols))
+        return DataFrame(jdf, self.sql_ctx)
+
+    @since(1.3)
+    def toPandas(self):
+        """
+        Returns the contents of this :class:`DataFrame` as Pandas ``pandas.DataFrame``.
+
+        This is only available if Pandas is installed and available.
+
+        .. note:: This method should only be used if the resulting Pandas's DataFrame is expected
+            to be small, as all the data is loaded into the driver's memory.
+
+        .. note:: Usage with spark.sql.execution.arrow.enabled=True is experimental.
+
+        >>> df.toPandas()  # doctest: +SKIP
+           age   name
+        0    2  Alice
+        1    5    Bob
+        """
+        from pyspark.sql.utils import require_minimum_pandas_version
+        require_minimum_pandas_version()
+
+        import pandas as pd
+
+        if self.sql_ctx._conf.pandasRespectSessionTimeZone():
+            timezone = self.sql_ctx._conf.sessionLocalTimeZone()
+        else:
+            timezone = None
+
+        if self.sql_ctx._conf.arrowEnabled():
+            use_arrow = True
+            try:
+                from pyspark.sql.types import to_arrow_schema
+                from pyspark.sql.utils import require_minimum_pyarrow_version
+
+                require_minimum_pyarrow_version()
+                to_arrow_schema(self.schema)
+            except Exception as e:
+
+                if self.sql_ctx._conf.arrowFallbackEnabled():
+                    msg = (
+                        "toPandas attempted Arrow optimization because "
+                        "'spark.sql.execution.arrow.enabled' is set to true; however, "
+                        "failed by the reason below:\n  %s\n"
+                        "Attempting non-optimization as "
+                        "'spark.sql.execution.arrow.fallback.enabled' is set to "
+                        "true." % _exception_message(e))
+                    warnings.warn(msg)
+                    use_arrow = False
+                else:
+                    msg = (
+                        "toPandas attempted Arrow optimization because "
+                        "'spark.sql.execution.arrow.enabled' is set to true, but has reached "
+                        "the error below and will not continue because automatic fallback "
+                        "with 'spark.sql.execution.arrow.fallback.enabled' has been set to "
+                        "false.\n  %s" % _exception_message(e))
+                    warnings.warn(msg)
+                    raise
+
+            # Try to use Arrow optimization when the schema is supported and the required version
+            # of PyArrow is found, if 'spark.sql.execution.arrow.enabled' is enabled.
+            if use_arrow:
+                try:
+                    from pyspark.sql.types import _check_dataframe_convert_date, \
+                        _check_dataframe_localize_timestamps
+                    import pyarrow
+                    batches = self._collectAsArrow()
+                    if len(batches) > 0:
+                        table = pyarrow.Table.from_batches(batches)
+                        pdf = table.to_pandas()
+                        pdf = _check_dataframe_convert_date(pdf, self.schema)
+                        return _check_dataframe_localize_timestamps(pdf, timezone)
+                    else:
+                        return pd.DataFrame.from_records([], columns=self.columns)
+                except Exception as e:
+                    # We might have to allow fallback here as well but multiple Spark jobs can
+                    # be executed. So, simply fail in this case for now.
+                    msg = (
+                        "toPandas attempted Arrow optimization because "
+                        "'spark.sql.execution.arrow.enabled' is set to true, but has reached "
+                        "the error below and can not continue. Note that "
+                        "'spark.sql.execution.arrow.fallback.enabled' does not have an effect "
+                        "on failures in the middle of computation.\n  %s" % _exception_message(e))
+                    warnings.warn(msg)
+                    raise
+
+        # Below is toPandas without Arrow optimization.
+        pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
+
+        dtype = {}
+        for field in self.schema:
+            pandas_type = _to_corrected_pandas_type(field.dataType)
+            # SPARK-21766: if an integer field is nullable and has null values, it can be
+            # inferred by pandas as float column. Once we convert the column with NaN back
+            # to integer type e.g., np.int16, we will hit exception. So we use the inferred
+            # float type, not the corrected type from the schema in this case.
+            if pandas_type is not None and \
+                not(isinstance(field.dataType, IntegralType) and field.nullable and
+                    pdf[field.name].isnull().any()):
+                dtype[field.name] = pandas_type
+
+        for f, t in dtype.items():
+            pdf[f] = pdf[f].astype(t, copy=False)
+
+        if timezone is None:
+            return pdf
+        else:
+            from pyspark.sql.types import _check_series_convert_timestamps_local_tz
+            for field in self.schema:
+                # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
+                if isinstance(field.dataType, TimestampType):
+                    pdf[field.name] = \
+                        _check_series_convert_timestamps_local_tz(pdf[field.name], timezone)
+            return pdf
+
+    def _collectAsArrow(self):
+        """
+        Returns all records as a list of ArrowRecordBatches, pyarrow must be installed
+        and available on driver and worker Python environments.
+
+        .. note:: Experimental.
+        """
+        with SCCallSiteSync(self._sc) as css:
+            sock_info = self._jdf.collectAsArrowToPython()
+        return list(_load_from_socket(sock_info, ArrowStreamSerializer()))
+
+    ##########################################################################################
+    # Pandas compatibility
+    ##########################################################################################
+
+    groupby = copy_func(
+        groupBy,
+        sinceversion=1.4,
+        doc=":func:`groupby` is an alias for :func:`groupBy`.")
+
+    drop_duplicates = copy_func(
+        dropDuplicates,
+        sinceversion=1.4,
+        doc=":func:`drop_duplicates` is an alias for :func:`dropDuplicates`.")
+
+    where = copy_func(
+        filter,
+        sinceversion=1.3,
+        doc=":func:`where` is an alias for :func:`filter`.")
+
+
+def _to_scala_map(sc, jm):
+    """
+    Convert a dict into a JVM Map.
+    """
+    return sc._jvm.PythonUtils.toScalaMap(jm)
+
+
+def _to_corrected_pandas_type(dt):
+    """
+    When converting Spark SQL records to Pandas DataFrame, the inferred data type may be wrong.
+    This method gets the corrected data type for Pandas if that type may be inferred uncorrectly.
+    """
+    import numpy as np
+    if type(dt) == ByteType:
+        return np.int8
+    elif type(dt) == ShortType:
+        return np.int16
+    elif type(dt) == IntegerType:
+        return np.int32
+    elif type(dt) == FloatType:
+        return np.float32
+    else:
+        return None
+
+
+class DataFrameNaFunctions(object):
+    """Functionality for working with missing data in :class:`DataFrame`.
+
+    .. versionadded:: 1.4
+    """
+
+    def __init__(self, df):
+        self.df = df
+
+    def drop(self, how='any', thresh=None, subset=None):
+        return self.df.dropna(how=how, thresh=thresh, subset=subset)
+
+    drop.__doc__ = DataFrame.dropna.__doc__
+
+    def fill(self, value, subset=None):
+        return self.df.fillna(value=value, subset=subset)
+
+    fill.__doc__ = DataFrame.fillna.__doc__
+
+    def replace(self, to_replace, value=_NoValue, subset=None):
+        return self.df.replace(to_replace, value, subset)
+
+    replace.__doc__ = DataFrame.replace.__doc__
+
+
+class DataFrameStatFunctions(object):
+    """Functionality for statistic functions with :class:`DataFrame`.
+
+    .. versionadded:: 1.4
+    """
+
+    def __init__(self, df):
+        self.df = df
+
+    def approxQuantile(self, col, probabilities, relativeError):
+        return self.df.approxQuantile(col, probabilities, relativeError)
+
+    approxQuantile.__doc__ = DataFrame.approxQuantile.__doc__
+
+    def corr(self, col1, col2, method=None):
+        return self.df.corr(col1, col2, method)
+
+    corr.__doc__ = DataFrame.corr.__doc__
+
+    def cov(self, col1, col2):
+        return self.df.cov(col1, col2)
+
+    cov.__doc__ = DataFrame.cov.__doc__
+
+    def crosstab(self, col1, col2):
+        return self.df.crosstab(col1, col2)
+
+    crosstab.__doc__ = DataFrame.crosstab.__doc__
+
+    def freqItems(self, cols, support=None):
+        return self.df.freqItems(cols, support)
+
+    freqItems.__doc__ = DataFrame.freqItems.__doc__
+
+    def sampleBy(self, col, fractions, seed=None):
+        return self.df.sampleBy(col, fractions, seed)
+
+    sampleBy.__doc__ = DataFrame.sampleBy.__doc__
+
+
+def _test():
+    import doctest
+    from pyspark.context import SparkContext
+    from pyspark.sql import Row, SQLContext, SparkSession
+    import pyspark.sql.dataframe
+    from pyspark.sql.functions import from_unixtime
+    globs = pyspark.sql.dataframe.__dict__.copy()
+    sc = SparkContext('local[4]', 'PythonTest')
+    globs['sc'] = sc
+    globs['sqlContext'] = SQLContext(sc)
+    globs['spark'] = SparkSession(sc)
+    globs['df'] = sc.parallelize([(2, 'Alice'), (5, 'Bob')])\
+        .toDF(StructType([StructField('age', IntegerType()),
+                          StructField('name', StringType())]))
+    globs['df2'] = sc.parallelize([Row(name='Tom', height=80), Row(name='Bob', height=85)]).toDF()
+    globs['df3'] = sc.parallelize([Row(name='Alice', age=2),
+                                   Row(name='Bob', age=5)]).toDF()
+    globs['df4'] = sc.parallelize([Row(name='Alice', age=10, height=80),
+                                   Row(name='Bob', age=5, height=None),
+                                   Row(name='Tom', age=None, height=None),
+                                   Row(name=None, age=None, height=None)]).toDF()
+    globs['df5'] = sc.parallelize([Row(name='Alice', spy=False, age=10),
+                                   Row(name='Bob', spy=None, age=5),
+                                   Row(name='Mallory', spy=True, age=None)]).toDF()
+    globs['sdf'] = sc.parallelize([Row(name='Tom', time=1479441846),
+                                   Row(name='Bob', time=1479442946)]).toDF()
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.dataframe, globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF)
+    globs['sc'].stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/functions.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/functions.py
new file mode 100644
index 0000000000000000000000000000000000000000..e1d6ea34daaca8ddf288bf263411ff7ccd4a4bae
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/functions.py
@@ -0,0 +1,2954 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+A collections of builtin functions
+"""
+import sys
+import functools
+import warnings
+
+if sys.version < "3":
+    from itertools import imap as map
+
+if sys.version >= '3':
+    basestring = str
+
+from pyspark import since, SparkContext
+from pyspark.rdd import ignore_unicode_prefix, PythonEvalType
+from pyspark.sql.column import Column, _to_java_column, _to_seq, _create_column_from_literal
+from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.types import StringType, DataType
+# Keep UserDefinedFunction import for backwards compatible import; moved in SPARK-22409
+from pyspark.sql.udf import UserDefinedFunction, _create_udf
+
+
+def _create_function(name, doc=""):
+    """ Create a function for aggregator by name"""
+    def _(col):
+        sc = SparkContext._active_spark_context
+        jc = getattr(sc._jvm.functions, name)(col._jc if isinstance(col, Column) else col)
+        return Column(jc)
+    _.__name__ = name
+    _.__doc__ = doc
+    return _
+
+
+def _wrap_deprecated_function(func, message):
+    """ Wrap the deprecated function to print out deprecation warnings"""
+    def _(col):
+        warnings.warn(message, DeprecationWarning)
+        return func(col)
+    return functools.wraps(func)(_)
+
+
+def _create_binary_mathfunction(name, doc=""):
+    """ Create a binary mathfunction by name"""
+    def _(col1, col2):
+        sc = SparkContext._active_spark_context
+        # users might write ints for simplicity. This would throw an error on the JVM side.
+        jc = getattr(sc._jvm.functions, name)(col1._jc if isinstance(col1, Column) else float(col1),
+                                              col2._jc if isinstance(col2, Column) else float(col2))
+        return Column(jc)
+    _.__name__ = name
+    _.__doc__ = doc
+    return _
+
+
+def _create_window_function(name, doc=''):
+    """ Create a window function by name """
+    def _():
+        sc = SparkContext._active_spark_context
+        jc = getattr(sc._jvm.functions, name)()
+        return Column(jc)
+    _.__name__ = name
+    _.__doc__ = 'Window function: ' + doc
+    return _
+
+_lit_doc = """
+    Creates a :class:`Column` of literal value.
+
+    >>> df.select(lit(5).alias('height')).withColumn('spark_user', lit(True)).take(1)
+    [Row(height=5, spark_user=True)]
+    """
+_functions = {
+    'lit': _lit_doc,
+    'col': 'Returns a :class:`Column` based on the given column name.',
+    'column': 'Returns a :class:`Column` based on the given column name.',
+    'asc': 'Returns a sort expression based on the ascending order of the given column name.',
+    'desc': 'Returns a sort expression based on the descending order of the given column name.',
+
+    'upper': 'Converts a string expression to upper case.',
+    'lower': 'Converts a string expression to upper case.',
+    'sqrt': 'Computes the square root of the specified float value.',
+    'abs': 'Computes the absolute value.',
+
+    'max': 'Aggregate function: returns the maximum value of the expression in a group.',
+    'min': 'Aggregate function: returns the minimum value of the expression in a group.',
+    'count': 'Aggregate function: returns the number of items in a group.',
+    'sum': 'Aggregate function: returns the sum of all values in the expression.',
+    'avg': 'Aggregate function: returns the average of the values in a group.',
+    'mean': 'Aggregate function: returns the average of the values in a group.',
+    'sumDistinct': 'Aggregate function: returns the sum of distinct values in the expression.',
+}
+
+_functions_1_4 = {
+    # unary math functions
+    'acos': ':return: inverse cosine of `col`, as if computed by `java.lang.Math.acos()`',
+    'asin': ':return: inverse sine of `col`, as if computed by `java.lang.Math.asin()`',
+    'atan': ':return: inverse tangent of `col`, as if computed by `java.lang.Math.atan()`',
+    'cbrt': 'Computes the cube-root of the given value.',
+    'ceil': 'Computes the ceiling of the given value.',
+    'cos': """:param col: angle in radians
+           :return: cosine of the angle, as if computed by `java.lang.Math.cos()`.""",
+    'cosh': """:param col: hyperbolic angle
+           :return: hyperbolic cosine of the angle, as if computed by `java.lang.Math.cosh()`""",
+    'exp': 'Computes the exponential of the given value.',
+    'expm1': 'Computes the exponential of the given value minus one.',
+    'floor': 'Computes the floor of the given value.',
+    'log': 'Computes the natural logarithm of the given value.',
+    'log10': 'Computes the logarithm of the given value in Base 10.',
+    'log1p': 'Computes the natural logarithm of the given value plus one.',
+    'rint': 'Returns the double value that is closest in value to the argument and' +
+            ' is equal to a mathematical integer.',
+    'signum': 'Computes the signum of the given value.',
+    'sin': """:param col: angle in radians
+           :return: sine of the angle, as if computed by `java.lang.Math.sin()`""",
+    'sinh': """:param col: hyperbolic angle
+           :return: hyperbolic sine of the given value,
+                    as if computed by `java.lang.Math.sinh()`""",
+    'tan': """:param col: angle in radians
+           :return: tangent of the given value, as if computed by `java.lang.Math.tan()`""",
+    'tanh': """:param col: hyperbolic angle
+            :return: hyperbolic tangent of the given value,
+                     as if computed by `java.lang.Math.tanh()`""",
+    'toDegrees': '.. note:: Deprecated in 2.1, use :func:`degrees` instead.',
+    'toRadians': '.. note:: Deprecated in 2.1, use :func:`radians` instead.',
+    'bitwiseNOT': 'Computes bitwise not.',
+}
+
+_functions_2_4 = {
+    'asc_nulls_first': 'Returns a sort expression based on the ascending order of the given' +
+                       ' column name, and null values return before non-null values.',
+    'asc_nulls_last': 'Returns a sort expression based on the ascending order of the given' +
+                      ' column name, and null values appear after non-null values.',
+    'desc_nulls_first': 'Returns a sort expression based on the descending order of the given' +
+                        ' column name, and null values appear before non-null values.',
+    'desc_nulls_last': 'Returns a sort expression based on the descending order of the given' +
+                       ' column name, and null values appear after non-null values',
+}
+
+_collect_list_doc = """
+    Aggregate function: returns a list of objects with duplicates.
+
+    .. note:: The function is non-deterministic because the order of collected results depends
+        on order of rows which may be non-deterministic after a shuffle.
+
+    >>> df2 = spark.createDataFrame([(2,), (5,), (5,)], ('age',))
+    >>> df2.agg(collect_list('age')).collect()
+    [Row(collect_list(age)=[2, 5, 5])]
+    """
+_collect_set_doc = """
+    Aggregate function: returns a set of objects with duplicate elements eliminated.
+
+    .. note:: The function is non-deterministic because the order of collected results depends
+        on order of rows which may be non-deterministic after a shuffle.
+
+    >>> df2 = spark.createDataFrame([(2,), (5,), (5,)], ('age',))
+    >>> df2.agg(collect_set('age')).collect()
+    [Row(collect_set(age)=[5, 2])]
+    """
+_functions_1_6 = {
+    # unary math functions
+    'stddev': 'Aggregate function: returns the unbiased sample standard deviation of' +
+              ' the expression in a group.',
+    'stddev_samp': 'Aggregate function: returns the unbiased sample standard deviation of' +
+                   ' the expression in a group.',
+    'stddev_pop': 'Aggregate function: returns population standard deviation of' +
+                  ' the expression in a group.',
+    'variance': 'Aggregate function: returns the population variance of the values in a group.',
+    'var_samp': 'Aggregate function: returns the unbiased variance of the values in a group.',
+    'var_pop':  'Aggregate function: returns the population variance of the values in a group.',
+    'skewness': 'Aggregate function: returns the skewness of the values in a group.',
+    'kurtosis': 'Aggregate function: returns the kurtosis of the values in a group.',
+    'collect_list': _collect_list_doc,
+    'collect_set': _collect_set_doc
+}
+
+_functions_2_1 = {
+    # unary math functions
+    'degrees': """
+               Converts an angle measured in radians to an approximately equivalent angle
+               measured in degrees.
+               :param col: angle in radians
+               :return: angle in degrees, as if computed by `java.lang.Math.toDegrees()`
+               """,
+    'radians': """
+               Converts an angle measured in degrees to an approximately equivalent angle
+               measured in radians.
+               :param col: angle in degrees
+               :return: angle in radians, as if computed by `java.lang.Math.toRadians()`
+               """,
+}
+
+# math functions that take two arguments as input
+_binary_mathfunctions = {
+    'atan2': """
+             :param col1: coordinate on y-axis
+             :param col2: coordinate on x-axis
+             :return: the `theta` component of the point
+                (`r`, `theta`)
+                in polar coordinates that corresponds to the point
+                (`x`, `y`) in Cartesian coordinates,
+                as if computed by `java.lang.Math.atan2()`
+             """,
+    'hypot': 'Computes ``sqrt(a^2 + b^2)`` without intermediate overflow or underflow.',
+    'pow': 'Returns the value of the first argument raised to the power of the second argument.',
+}
+
+_window_functions = {
+    'row_number':
+        """returns a sequential number starting at 1 within a window partition.""",
+    'dense_rank':
+        """returns the rank of rows within a window partition, without any gaps.
+
+        The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
+        sequence when there are ties. That is, if you were ranking a competition using dense_rank
+        and had three people tie for second place, you would say that all three were in second
+        place and that the next person came in third. Rank would give me sequential numbers, making
+        the person that came in third place (after the ties) would register as coming in fifth.
+
+        This is equivalent to the DENSE_RANK function in SQL.""",
+    'rank':
+        """returns the rank of rows within a window partition.
+
+        The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
+        sequence when there are ties. That is, if you were ranking a competition using dense_rank
+        and had three people tie for second place, you would say that all three were in second
+        place and that the next person came in third. Rank would give me sequential numbers, making
+        the person that came in third place (after the ties) would register as coming in fifth.
+
+        This is equivalent to the RANK function in SQL.""",
+    'cume_dist':
+        """returns the cumulative distribution of values within a window partition,
+        i.e. the fraction of rows that are below the current row.""",
+    'percent_rank':
+        """returns the relative rank (i.e. percentile) of rows within a window partition.""",
+}
+
+# Wraps deprecated functions (keys) with the messages (values).
+_functions_deprecated = {
+    'toDegrees': 'Deprecated in 2.1, use degrees instead.',
+    'toRadians': 'Deprecated in 2.1, use radians instead.',
+}
+
+for _name, _doc in _functions.items():
+    globals()[_name] = since(1.3)(_create_function(_name, _doc))
+for _name, _doc in _functions_1_4.items():
+    globals()[_name] = since(1.4)(_create_function(_name, _doc))
+for _name, _doc in _binary_mathfunctions.items():
+    globals()[_name] = since(1.4)(_create_binary_mathfunction(_name, _doc))
+for _name, _doc in _window_functions.items():
+    globals()[_name] = since(1.6)(_create_window_function(_name, _doc))
+for _name, _doc in _functions_1_6.items():
+    globals()[_name] = since(1.6)(_create_function(_name, _doc))
+for _name, _doc in _functions_2_1.items():
+    globals()[_name] = since(2.1)(_create_function(_name, _doc))
+for _name, _message in _functions_deprecated.items():
+    globals()[_name] = _wrap_deprecated_function(globals()[_name], _message)
+for _name, _doc in _functions_2_4.items():
+    globals()[_name] = since(2.4)(_create_function(_name, _doc))
+del _name, _doc
+
+
+@since(1.3)
+def approxCountDistinct(col, rsd=None):
+    """
+    .. note:: Deprecated in 2.1, use :func:`approx_count_distinct` instead.
+    """
+    warnings.warn("Deprecated in 2.1, use approx_count_distinct instead.", DeprecationWarning)
+    return approx_count_distinct(col, rsd)
+
+
+@since(2.1)
+def approx_count_distinct(col, rsd=None):
+    """Aggregate function: returns a new :class:`Column` for approximate distinct count of
+    column `col`.
+
+    :param rsd: maximum estimation error allowed (default = 0.05). For rsd < 0.01, it is more
+        efficient to use :func:`countDistinct`
+
+    >>> df.agg(approx_count_distinct(df.age).alias('distinct_ages')).collect()
+    [Row(distinct_ages=2)]
+    """
+    sc = SparkContext._active_spark_context
+    if rsd is None:
+        jc = sc._jvm.functions.approx_count_distinct(_to_java_column(col))
+    else:
+        jc = sc._jvm.functions.approx_count_distinct(_to_java_column(col), rsd)
+    return Column(jc)
+
+
+@since(1.6)
+def broadcast(df):
+    """Marks a DataFrame as small enough for use in broadcast joins."""
+
+    sc = SparkContext._active_spark_context
+    return DataFrame(sc._jvm.functions.broadcast(df._jdf), df.sql_ctx)
+
+
+@since(1.4)
+def coalesce(*cols):
+    """Returns the first column that is not null.
+
+    >>> cDf = spark.createDataFrame([(None, None), (1, None), (None, 2)], ("a", "b"))
+    >>> cDf.show()
+    +----+----+
+    |   a|   b|
+    +----+----+
+    |null|null|
+    |   1|null|
+    |null|   2|
+    +----+----+
+
+    >>> cDf.select(coalesce(cDf["a"], cDf["b"])).show()
+    +--------------+
+    |coalesce(a, b)|
+    +--------------+
+    |          null|
+    |             1|
+    |             2|
+    +--------------+
+
+    >>> cDf.select('*', coalesce(cDf["a"], lit(0.0))).show()
+    +----+----+----------------+
+    |   a|   b|coalesce(a, 0.0)|
+    +----+----+----------------+
+    |null|null|             0.0|
+    |   1|null|             1.0|
+    |null|   2|             0.0|
+    +----+----+----------------+
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.coalesce(_to_seq(sc, cols, _to_java_column))
+    return Column(jc)
+
+
+@since(1.6)
+def corr(col1, col2):
+    """Returns a new :class:`Column` for the Pearson Correlation Coefficient for ``col1``
+    and ``col2``.
+
+    >>> a = range(20)
+    >>> b = [2 * x for x in range(20)]
+    >>> df = spark.createDataFrame(zip(a, b), ["a", "b"])
+    >>> df.agg(corr("a", "b").alias('c')).collect()
+    [Row(c=1.0)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.corr(_to_java_column(col1), _to_java_column(col2)))
+
+
+@since(2.0)
+def covar_pop(col1, col2):
+    """Returns a new :class:`Column` for the population covariance of ``col1`` and ``col2``.
+
+    >>> a = [1] * 10
+    >>> b = [1] * 10
+    >>> df = spark.createDataFrame(zip(a, b), ["a", "b"])
+    >>> df.agg(covar_pop("a", "b").alias('c')).collect()
+    [Row(c=0.0)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.covar_pop(_to_java_column(col1), _to_java_column(col2)))
+
+
+@since(2.0)
+def covar_samp(col1, col2):
+    """Returns a new :class:`Column` for the sample covariance of ``col1`` and ``col2``.
+
+    >>> a = [1] * 10
+    >>> b = [1] * 10
+    >>> df = spark.createDataFrame(zip(a, b), ["a", "b"])
+    >>> df.agg(covar_samp("a", "b").alias('c')).collect()
+    [Row(c=0.0)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.covar_samp(_to_java_column(col1), _to_java_column(col2)))
+
+
+@since(1.3)
+def countDistinct(col, *cols):
+    """Returns a new :class:`Column` for distinct count of ``col`` or ``cols``.
+
+    >>> df.agg(countDistinct(df.age, df.name).alias('c')).collect()
+    [Row(c=2)]
+
+    >>> df.agg(countDistinct("age", "name").alias('c')).collect()
+    [Row(c=2)]
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.countDistinct(_to_java_column(col), _to_seq(sc, cols, _to_java_column))
+    return Column(jc)
+
+
+@since(1.3)
+def first(col, ignorenulls=False):
+    """Aggregate function: returns the first value in a group.
+
+    The function by default returns the first values it sees. It will return the first non-null
+    value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
+
+    .. note:: The function is non-deterministic because its results depends on order of rows which
+        may be non-deterministic after a shuffle.
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.first(_to_java_column(col), ignorenulls)
+    return Column(jc)
+
+
+@since(2.0)
+def grouping(col):
+    """
+    Aggregate function: indicates whether a specified column in a GROUP BY list is aggregated
+    or not, returns 1 for aggregated or 0 for not aggregated in the result set.
+
+    >>> df.cube("name").agg(grouping("name"), sum("age")).orderBy("name").show()
+    +-----+--------------+--------+
+    | name|grouping(name)|sum(age)|
+    +-----+--------------+--------+
+    | null|             1|       7|
+    |Alice|             0|       2|
+    |  Bob|             0|       5|
+    +-----+--------------+--------+
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.grouping(_to_java_column(col))
+    return Column(jc)
+
+
+@since(2.0)
+def grouping_id(*cols):
+    """
+    Aggregate function: returns the level of grouping, equals to
+
+       (grouping(c1) << (n-1)) + (grouping(c2) << (n-2)) + ... + grouping(cn)
+
+    .. note:: The list of columns should match with grouping columns exactly, or empty (means all
+        the grouping columns).
+
+    >>> df.cube("name").agg(grouping_id(), sum("age")).orderBy("name").show()
+    +-----+-------------+--------+
+    | name|grouping_id()|sum(age)|
+    +-----+-------------+--------+
+    | null|            1|       7|
+    |Alice|            0|       2|
+    |  Bob|            0|       5|
+    +-----+-------------+--------+
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.grouping_id(_to_seq(sc, cols, _to_java_column))
+    return Column(jc)
+
+
+@since(1.6)
+def input_file_name():
+    """Creates a string column for the file name of the current Spark task.
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.input_file_name())
+
+
+@since(1.6)
+def isnan(col):
+    """An expression that returns true iff the column is NaN.
+
+    >>> df = spark.createDataFrame([(1.0, float('nan')), (float('nan'), 2.0)], ("a", "b"))
+    >>> df.select(isnan("a").alias("r1"), isnan(df.a).alias("r2")).collect()
+    [Row(r1=False, r2=False), Row(r1=True, r2=True)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.isnan(_to_java_column(col)))
+
+
+@since(1.6)
+def isnull(col):
+    """An expression that returns true iff the column is null.
+
+    >>> df = spark.createDataFrame([(1, None), (None, 2)], ("a", "b"))
+    >>> df.select(isnull("a").alias("r1"), isnull(df.a).alias("r2")).collect()
+    [Row(r1=False, r2=False), Row(r1=True, r2=True)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.isnull(_to_java_column(col)))
+
+
+@since(1.3)
+def last(col, ignorenulls=False):
+    """Aggregate function: returns the last value in a group.
+
+    The function by default returns the last values it sees. It will return the last non-null
+    value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
+
+    .. note:: The function is non-deterministic because its results depends on order of rows
+        which may be non-deterministic after a shuffle.
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.last(_to_java_column(col), ignorenulls)
+    return Column(jc)
+
+
+@since(1.6)
+def monotonically_increasing_id():
+    """A column that generates monotonically increasing 64-bit integers.
+
+    The generated ID is guaranteed to be monotonically increasing and unique, but not consecutive.
+    The current implementation puts the partition ID in the upper 31 bits, and the record number
+    within each partition in the lower 33 bits. The assumption is that the data frame has
+    less than 1 billion partitions, and each partition has less than 8 billion records.
+
+    .. note:: The function is non-deterministic because its result depends on partition IDs.
+
+    As an example, consider a :class:`DataFrame` with two partitions, each with 3 records.
+    This expression would return the following IDs:
+    0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594.
+
+    >>> df0 = sc.parallelize(range(2), 2).mapPartitions(lambda x: [(1,), (2,), (3,)]).toDF(['col1'])
+    >>> df0.select(monotonically_increasing_id().alias('id')).collect()
+    [Row(id=0), Row(id=1), Row(id=2), Row(id=8589934592), Row(id=8589934593), Row(id=8589934594)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.monotonically_increasing_id())
+
+
+@since(1.6)
+def nanvl(col1, col2):
+    """Returns col1 if it is not NaN, or col2 if col1 is NaN.
+
+    Both inputs should be floating point columns (:class:`DoubleType` or :class:`FloatType`).
+
+    >>> df = spark.createDataFrame([(1.0, float('nan')), (float('nan'), 2.0)], ("a", "b"))
+    >>> df.select(nanvl("a", "b").alias("r1"), nanvl(df.a, df.b).alias("r2")).collect()
+    [Row(r1=1.0, r2=1.0), Row(r1=2.0, r2=2.0)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.nanvl(_to_java_column(col1), _to_java_column(col2)))
+
+
+@ignore_unicode_prefix
+@since(1.4)
+def rand(seed=None):
+    """Generates a random column with independent and identically distributed (i.i.d.) samples
+    from U[0.0, 1.0].
+
+    .. note:: The function is non-deterministic in general case.
+
+    >>> df.withColumn('rand', rand(seed=42) * 3).collect()
+    [Row(age=2, name=u'Alice', rand=1.1568609015300986),
+     Row(age=5, name=u'Bob', rand=1.403379671529166)]
+    """
+    sc = SparkContext._active_spark_context
+    if seed is not None:
+        jc = sc._jvm.functions.rand(seed)
+    else:
+        jc = sc._jvm.functions.rand()
+    return Column(jc)
+
+
+@ignore_unicode_prefix
+@since(1.4)
+def randn(seed=None):
+    """Generates a column with independent and identically distributed (i.i.d.) samples from
+    the standard normal distribution.
+
+    .. note:: The function is non-deterministic in general case.
+
+    >>> df.withColumn('randn', randn(seed=42)).collect()
+    [Row(age=2, name=u'Alice', randn=-0.7556247885860078),
+    Row(age=5, name=u'Bob', randn=-0.0861619008451133)]
+    """
+    sc = SparkContext._active_spark_context
+    if seed is not None:
+        jc = sc._jvm.functions.randn(seed)
+    else:
+        jc = sc._jvm.functions.randn()
+    return Column(jc)
+
+
+@since(1.5)
+def round(col, scale=0):
+    """
+    Round the given value to `scale` decimal places using HALF_UP rounding mode if `scale` >= 0
+    or at integral part when `scale` < 0.
+
+    >>> spark.createDataFrame([(2.5,)], ['a']).select(round('a', 0).alias('r')).collect()
+    [Row(r=3.0)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.round(_to_java_column(col), scale))
+
+
+@since(2.0)
+def bround(col, scale=0):
+    """
+    Round the given value to `scale` decimal places using HALF_EVEN rounding mode if `scale` >= 0
+    or at integral part when `scale` < 0.
+
+    >>> spark.createDataFrame([(2.5,)], ['a']).select(bround('a', 0).alias('r')).collect()
+    [Row(r=2.0)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.bround(_to_java_column(col), scale))
+
+
+@since(1.5)
+def shiftLeft(col, numBits):
+    """Shift the given value numBits left.
+
+    >>> spark.createDataFrame([(21,)], ['a']).select(shiftLeft('a', 1).alias('r')).collect()
+    [Row(r=42)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.shiftLeft(_to_java_column(col), numBits))
+
+
+@since(1.5)
+def shiftRight(col, numBits):
+    """(Signed) shift the given value numBits right.
+
+    >>> spark.createDataFrame([(42,)], ['a']).select(shiftRight('a', 1).alias('r')).collect()
+    [Row(r=21)]
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.shiftRight(_to_java_column(col), numBits)
+    return Column(jc)
+
+
+@since(1.5)
+def shiftRightUnsigned(col, numBits):
+    """Unsigned shift the given value numBits right.
+
+    >>> df = spark.createDataFrame([(-42,)], ['a'])
+    >>> df.select(shiftRightUnsigned('a', 1).alias('r')).collect()
+    [Row(r=9223372036854775787)]
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.shiftRightUnsigned(_to_java_column(col), numBits)
+    return Column(jc)
+
+
+@since(1.6)
+def spark_partition_id():
+    """A column for partition ID.
+
+    .. note:: This is indeterministic because it depends on data partitioning and task scheduling.
+
+    >>> df.repartition(1).select(spark_partition_id().alias("pid")).collect()
+    [Row(pid=0), Row(pid=0)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.spark_partition_id())
+
+
+@since(1.5)
+def expr(str):
+    """Parses the expression string into the column that it represents
+
+    >>> df.select(expr("length(name)")).collect()
+    [Row(length(name)=5), Row(length(name)=3)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.expr(str))
+
+
+@ignore_unicode_prefix
+@since(1.4)
+def struct(*cols):
+    """Creates a new struct column.
+
+    :param cols: list of column names (string) or list of :class:`Column` expressions
+
+    >>> df.select(struct('age', 'name').alias("struct")).collect()
+    [Row(struct=Row(age=2, name=u'Alice')), Row(struct=Row(age=5, name=u'Bob'))]
+    >>> df.select(struct([df.age, df.name]).alias("struct")).collect()
+    [Row(struct=Row(age=2, name=u'Alice')), Row(struct=Row(age=5, name=u'Bob'))]
+    """
+    sc = SparkContext._active_spark_context
+    if len(cols) == 1 and isinstance(cols[0], (list, set)):
+        cols = cols[0]
+    jc = sc._jvm.functions.struct(_to_seq(sc, cols, _to_java_column))
+    return Column(jc)
+
+
+@since(1.5)
+def greatest(*cols):
+    """
+    Returns the greatest value of the list of column names, skipping null values.
+    This function takes at least 2 parameters. It will return null iff all parameters are null.
+
+    >>> df = spark.createDataFrame([(1, 4, 3)], ['a', 'b', 'c'])
+    >>> df.select(greatest(df.a, df.b, df.c).alias("greatest")).collect()
+    [Row(greatest=4)]
+    """
+    if len(cols) < 2:
+        raise ValueError("greatest should take at least two columns")
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.greatest(_to_seq(sc, cols, _to_java_column)))
+
+
+@since(1.5)
+def least(*cols):
+    """
+    Returns the least value of the list of column names, skipping null values.
+    This function takes at least 2 parameters. It will return null iff all parameters are null.
+
+    >>> df = spark.createDataFrame([(1, 4, 3)], ['a', 'b', 'c'])
+    >>> df.select(least(df.a, df.b, df.c).alias("least")).collect()
+    [Row(least=1)]
+    """
+    if len(cols) < 2:
+        raise ValueError("least should take at least two columns")
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.least(_to_seq(sc, cols, _to_java_column)))
+
+
+@since(1.4)
+def when(condition, value):
+    """Evaluates a list of conditions and returns one of multiple possible result expressions.
+    If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.
+
+    :param condition: a boolean :class:`Column` expression.
+    :param value: a literal value, or a :class:`Column` expression.
+
+    >>> df.select(when(df['age'] == 2, 3).otherwise(4).alias("age")).collect()
+    [Row(age=3), Row(age=4)]
+
+    >>> df.select(when(df.age == 2, df.age + 1).alias("age")).collect()
+    [Row(age=3), Row(age=None)]
+    """
+    sc = SparkContext._active_spark_context
+    if not isinstance(condition, Column):
+        raise TypeError("condition should be a Column")
+    v = value._jc if isinstance(value, Column) else value
+    jc = sc._jvm.functions.when(condition._jc, v)
+    return Column(jc)
+
+
+@since(1.5)
+def log(arg1, arg2=None):
+    """Returns the first argument-based logarithm of the second argument.
+
+    If there is only one argument, then this takes the natural logarithm of the argument.
+
+    >>> df.select(log(10.0, df.age).alias('ten')).rdd.map(lambda l: str(l.ten)[:7]).collect()
+    ['0.30102', '0.69897']
+
+    >>> df.select(log(df.age).alias('e')).rdd.map(lambda l: str(l.e)[:7]).collect()
+    ['0.69314', '1.60943']
+    """
+    sc = SparkContext._active_spark_context
+    if arg2 is None:
+        jc = sc._jvm.functions.log(_to_java_column(arg1))
+    else:
+        jc = sc._jvm.functions.log(arg1, _to_java_column(arg2))
+    return Column(jc)
+
+
+@since(1.5)
+def log2(col):
+    """Returns the base-2 logarithm of the argument.
+
+    >>> spark.createDataFrame([(4,)], ['a']).select(log2('a').alias('log2')).collect()
+    [Row(log2=2.0)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.log2(_to_java_column(col)))
+
+
+@since(1.5)
+@ignore_unicode_prefix
+def conv(col, fromBase, toBase):
+    """
+    Convert a number in a string column from one base to another.
+
+    >>> df = spark.createDataFrame([("010101",)], ['n'])
+    >>> df.select(conv(df.n, 2, 16).alias('hex')).collect()
+    [Row(hex=u'15')]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.conv(_to_java_column(col), fromBase, toBase))
+
+
+@since(1.5)
+def factorial(col):
+    """
+    Computes the factorial of the given value.
+
+    >>> df = spark.createDataFrame([(5,)], ['n'])
+    >>> df.select(factorial(df.n).alias('f')).collect()
+    [Row(f=120)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.factorial(_to_java_column(col)))
+
+
+# ---------------  Window functions ------------------------
+
+@since(1.4)
+def lag(col, count=1, default=None):
+    """
+    Window function: returns the value that is `offset` rows before the current row, and
+    `defaultValue` if there is less than `offset` rows before the current row. For example,
+    an `offset` of one will return the previous row at any given point in the window partition.
+
+    This is equivalent to the LAG function in SQL.
+
+    :param col: name of column or expression
+    :param count: number of row to extend
+    :param default: default value
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.lag(_to_java_column(col), count, default))
+
+
+@since(1.4)
+def lead(col, count=1, default=None):
+    """
+    Window function: returns the value that is `offset` rows after the current row, and
+    `defaultValue` if there is less than `offset` rows after the current row. For example,
+    an `offset` of one will return the next row at any given point in the window partition.
+
+    This is equivalent to the LEAD function in SQL.
+
+    :param col: name of column or expression
+    :param count: number of row to extend
+    :param default: default value
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.lead(_to_java_column(col), count, default))
+
+
+@since(1.4)
+def ntile(n):
+    """
+    Window function: returns the ntile group id (from 1 to `n` inclusive)
+    in an ordered window partition. For example, if `n` is 4, the first
+    quarter of the rows will get value 1, the second quarter will get 2,
+    the third quarter will get 3, and the last quarter will get 4.
+
+    This is equivalent to the NTILE function in SQL.
+
+    :param n: an integer
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.ntile(int(n)))
+
+
+# ---------------------- Date/Timestamp functions ------------------------------
+
+@since(1.5)
+def current_date():
+    """
+    Returns the current date as a :class:`DateType` column.
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.current_date())
+
+
+def current_timestamp():
+    """
+    Returns the current timestamp as a :class:`TimestampType` column.
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.current_timestamp())
+
+
+@ignore_unicode_prefix
+@since(1.5)
+def date_format(date, format):
+    """
+    Converts a date/timestamp/string to a value of string in the format specified by the date
+    format given by the second argument.
+
+    A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All
+    pattern letters of the Java class `java.text.SimpleDateFormat` can be used.
+
+    .. note:: Use when ever possible specialized functions like `year`. These benefit from a
+        specialized implementation.
+
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(date_format('dt', 'MM/dd/yyy').alias('date')).collect()
+    [Row(date=u'04/08/2015')]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.date_format(_to_java_column(date), format))
+
+
+@since(1.5)
+def year(col):
+    """
+    Extract the year of a given date as integer.
+
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(year('dt').alias('year')).collect()
+    [Row(year=2015)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.year(_to_java_column(col)))
+
+
+@since(1.5)
+def quarter(col):
+    """
+    Extract the quarter of a given date as integer.
+
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(quarter('dt').alias('quarter')).collect()
+    [Row(quarter=2)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.quarter(_to_java_column(col)))
+
+
+@since(1.5)
+def month(col):
+    """
+    Extract the month of a given date as integer.
+
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(month('dt').alias('month')).collect()
+    [Row(month=4)]
+   """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.month(_to_java_column(col)))
+
+
+@since(2.3)
+def dayofweek(col):
+    """
+    Extract the day of the week of a given date as integer.
+
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(dayofweek('dt').alias('day')).collect()
+    [Row(day=4)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.dayofweek(_to_java_column(col)))
+
+
+@since(1.5)
+def dayofmonth(col):
+    """
+    Extract the day of the month of a given date as integer.
+
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(dayofmonth('dt').alias('day')).collect()
+    [Row(day=8)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.dayofmonth(_to_java_column(col)))
+
+
+@since(1.5)
+def dayofyear(col):
+    """
+    Extract the day of the year of a given date as integer.
+
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(dayofyear('dt').alias('day')).collect()
+    [Row(day=98)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.dayofyear(_to_java_column(col)))
+
+
+@since(1.5)
+def hour(col):
+    """
+    Extract the hours of a given date as integer.
+
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['ts'])
+    >>> df.select(hour('ts').alias('hour')).collect()
+    [Row(hour=13)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.hour(_to_java_column(col)))
+
+
+@since(1.5)
+def minute(col):
+    """
+    Extract the minutes of a given date as integer.
+
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['ts'])
+    >>> df.select(minute('ts').alias('minute')).collect()
+    [Row(minute=8)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.minute(_to_java_column(col)))
+
+
+@since(1.5)
+def second(col):
+    """
+    Extract the seconds of a given date as integer.
+
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['ts'])
+    >>> df.select(second('ts').alias('second')).collect()
+    [Row(second=15)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.second(_to_java_column(col)))
+
+
+@since(1.5)
+def weekofyear(col):
+    """
+    Extract the week number of a given date as integer.
+
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(weekofyear(df.dt).alias('week')).collect()
+    [Row(week=15)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.weekofyear(_to_java_column(col)))
+
+
+@since(1.5)
+def date_add(start, days):
+    """
+    Returns the date that is `days` days after `start`
+
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(date_add(df.dt, 1).alias('next_date')).collect()
+    [Row(next_date=datetime.date(2015, 4, 9))]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.date_add(_to_java_column(start), days))
+
+
+@since(1.5)
+def date_sub(start, days):
+    """
+    Returns the date that is `days` days before `start`
+
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(date_sub(df.dt, 1).alias('prev_date')).collect()
+    [Row(prev_date=datetime.date(2015, 4, 7))]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.date_sub(_to_java_column(start), days))
+
+
+@since(1.5)
+def datediff(end, start):
+    """
+    Returns the number of days from `start` to `end`.
+
+    >>> df = spark.createDataFrame([('2015-04-08','2015-05-10')], ['d1', 'd2'])
+    >>> df.select(datediff(df.d2, df.d1).alias('diff')).collect()
+    [Row(diff=32)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.datediff(_to_java_column(end), _to_java_column(start)))
+
+
+@since(1.5)
+def add_months(start, months):
+    """
+    Returns the date that is `months` months after `start`
+
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select(add_months(df.dt, 1).alias('next_month')).collect()
+    [Row(next_month=datetime.date(2015, 5, 8))]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.add_months(_to_java_column(start), months))
+
+
+@since(1.5)
+def months_between(date1, date2, roundOff=True):
+    """
+    Returns number of months between dates date1 and date2.
+    If date1 is later than date2, then the result is positive.
+    If date1 and date2 are on the same day of month, or both are the last day of month,
+    returns an integer (time of day will be ignored).
+    The result is rounded off to 8 digits unless `roundOff` is set to `False`.
+
+    >>> df = spark.createDataFrame([('1997-02-28 10:30:00', '1996-10-30')], ['date1', 'date2'])
+    >>> df.select(months_between(df.date1, df.date2).alias('months')).collect()
+    [Row(months=3.94959677)]
+    >>> df.select(months_between(df.date1, df.date2, False).alias('months')).collect()
+    [Row(months=3.9495967741935485)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.months_between(
+        _to_java_column(date1), _to_java_column(date2), roundOff))
+
+
+@since(2.2)
+def to_date(col, format=None):
+    """Converts a :class:`Column` of :class:`pyspark.sql.types.StringType` or
+    :class:`pyspark.sql.types.TimestampType` into :class:`pyspark.sql.types.DateType`
+    using the optionally specified format. Specify formats according to
+    `SimpleDateFormats <http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html>`_.
+    By default, it follows casting rules to :class:`pyspark.sql.types.DateType` if the format
+    is omitted (equivalent to ``col.cast("date")``).
+
+    >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
+    >>> df.select(to_date(df.t).alias('date')).collect()
+    [Row(date=datetime.date(1997, 2, 28))]
+
+    >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
+    >>> df.select(to_date(df.t, 'yyyy-MM-dd HH:mm:ss').alias('date')).collect()
+    [Row(date=datetime.date(1997, 2, 28))]
+    """
+    sc = SparkContext._active_spark_context
+    if format is None:
+        jc = sc._jvm.functions.to_date(_to_java_column(col))
+    else:
+        jc = sc._jvm.functions.to_date(_to_java_column(col), format)
+    return Column(jc)
+
+
+@since(2.2)
+def to_timestamp(col, format=None):
+    """Converts a :class:`Column` of :class:`pyspark.sql.types.StringType` or
+    :class:`pyspark.sql.types.TimestampType` into :class:`pyspark.sql.types.DateType`
+    using the optionally specified format. Specify formats according to
+    `SimpleDateFormats <http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html>`_.
+    By default, it follows casting rules to :class:`pyspark.sql.types.TimestampType` if the format
+    is omitted (equivalent to ``col.cast("timestamp")``).
+
+    >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
+    >>> df.select(to_timestamp(df.t).alias('dt')).collect()
+    [Row(dt=datetime.datetime(1997, 2, 28, 10, 30))]
+
+    >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
+    >>> df.select(to_timestamp(df.t, 'yyyy-MM-dd HH:mm:ss').alias('dt')).collect()
+    [Row(dt=datetime.datetime(1997, 2, 28, 10, 30))]
+    """
+    sc = SparkContext._active_spark_context
+    if format is None:
+        jc = sc._jvm.functions.to_timestamp(_to_java_column(col))
+    else:
+        jc = sc._jvm.functions.to_timestamp(_to_java_column(col), format)
+    return Column(jc)
+
+
+@since(1.5)
+def trunc(date, format):
+    """
+    Returns date truncated to the unit specified by the format.
+
+    :param format: 'year', 'yyyy', 'yy' or 'month', 'mon', 'mm'
+
+    >>> df = spark.createDataFrame([('1997-02-28',)], ['d'])
+    >>> df.select(trunc(df.d, 'year').alias('year')).collect()
+    [Row(year=datetime.date(1997, 1, 1))]
+    >>> df.select(trunc(df.d, 'mon').alias('month')).collect()
+    [Row(month=datetime.date(1997, 2, 1))]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.trunc(_to_java_column(date), format))
+
+
+@since(2.3)
+def date_trunc(format, timestamp):
+    """
+    Returns timestamp truncated to the unit specified by the format.
+
+    :param format: 'year', 'yyyy', 'yy', 'month', 'mon', 'mm',
+        'day', 'dd', 'hour', 'minute', 'second', 'week', 'quarter'
+
+    >>> df = spark.createDataFrame([('1997-02-28 05:02:11',)], ['t'])
+    >>> df.select(date_trunc('year', df.t).alias('year')).collect()
+    [Row(year=datetime.datetime(1997, 1, 1, 0, 0))]
+    >>> df.select(date_trunc('mon', df.t).alias('month')).collect()
+    [Row(month=datetime.datetime(1997, 2, 1, 0, 0))]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.date_trunc(format, _to_java_column(timestamp)))
+
+
+@since(1.5)
+def next_day(date, dayOfWeek):
+    """
+    Returns the first date which is later than the value of the date column.
+
+    Day of the week parameter is case insensitive, and accepts:
+        "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun".
+
+    >>> df = spark.createDataFrame([('2015-07-27',)], ['d'])
+    >>> df.select(next_day(df.d, 'Sun').alias('date')).collect()
+    [Row(date=datetime.date(2015, 8, 2))]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.next_day(_to_java_column(date), dayOfWeek))
+
+
+@since(1.5)
+def last_day(date):
+    """
+    Returns the last day of the month which the given date belongs to.
+
+    >>> df = spark.createDataFrame([('1997-02-10',)], ['d'])
+    >>> df.select(last_day(df.d).alias('date')).collect()
+    [Row(date=datetime.date(1997, 2, 28))]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.last_day(_to_java_column(date)))
+
+
+@ignore_unicode_prefix
+@since(1.5)
+def from_unixtime(timestamp, format="yyyy-MM-dd HH:mm:ss"):
+    """
+    Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string
+    representing the timestamp of that moment in the current system time zone in the given
+    format.
+
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
+    >>> time_df = spark.createDataFrame([(1428476400,)], ['unix_time'])
+    >>> time_df.select(from_unixtime('unix_time').alias('ts')).collect()
+    [Row(ts=u'2015-04-08 00:00:00')]
+    >>> spark.conf.unset("spark.sql.session.timeZone")
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.from_unixtime(_to_java_column(timestamp), format))
+
+
+@since(1.5)
+def unix_timestamp(timestamp=None, format='yyyy-MM-dd HH:mm:ss'):
+    """
+    Convert time string with given pattern ('yyyy-MM-dd HH:mm:ss', by default)
+    to Unix time stamp (in seconds), using the default timezone and the default
+    locale, return null if fail.
+
+    if `timestamp` is None, then it returns current timestamp.
+
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
+    >>> time_df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> time_df.select(unix_timestamp('dt', 'yyyy-MM-dd').alias('unix_time')).collect()
+    [Row(unix_time=1428476400)]
+    >>> spark.conf.unset("spark.sql.session.timeZone")
+    """
+    sc = SparkContext._active_spark_context
+    if timestamp is None:
+        return Column(sc._jvm.functions.unix_timestamp())
+    return Column(sc._jvm.functions.unix_timestamp(_to_java_column(timestamp), format))
+
+
+@since(1.5)
+def from_utc_timestamp(timestamp, tz):
+    """
+    This is a common function for databases supporting TIMESTAMP WITHOUT TIMEZONE. This function
+    takes a timestamp which is timezone-agnostic, and interprets it as a timestamp in UTC, and
+    renders that timestamp as a timestamp in the given time zone.
+
+    However, timestamp in Spark represents number of microseconds from the Unix epoch, which is not
+    timezone-agnostic. So in Spark this function just shift the timestamp value from UTC timezone to
+    the given timezone.
+
+    This function may return confusing result if the input is a string with timezone, e.g.
+    '2018-03-13T06:18:23+00:00'. The reason is that, Spark firstly cast the string to timestamp
+    according to the timezone in the string, and finally display the result by converting the
+    timestamp to string according to the session local timezone.
+
+    :param timestamp: the column that contains timestamps
+    :param tz: a string that has the ID of timezone, e.g. "GMT", "America/Los_Angeles", etc
+
+    .. versionchanged:: 2.4
+       `tz` can take a :class:`Column` containing timezone ID strings.
+
+    >>> df = spark.createDataFrame([('1997-02-28 10:30:00', 'JST')], ['ts', 'tz'])
+    >>> df.select(from_utc_timestamp(df.ts, "PST").alias('local_time')).collect()
+    [Row(local_time=datetime.datetime(1997, 2, 28, 2, 30))]
+    >>> df.select(from_utc_timestamp(df.ts, df.tz).alias('local_time')).collect()
+    [Row(local_time=datetime.datetime(1997, 2, 28, 19, 30))]
+    """
+    sc = SparkContext._active_spark_context
+    if isinstance(tz, Column):
+        tz = _to_java_column(tz)
+    return Column(sc._jvm.functions.from_utc_timestamp(_to_java_column(timestamp), tz))
+
+
+@since(1.5)
+def to_utc_timestamp(timestamp, tz):
+    """
+    This is a common function for databases supporting TIMESTAMP WITHOUT TIMEZONE. This function
+    takes a timestamp which is timezone-agnostic, and interprets it as a timestamp in the given
+    timezone, and renders that timestamp as a timestamp in UTC.
+
+    However, timestamp in Spark represents number of microseconds from the Unix epoch, which is not
+    timezone-agnostic. So in Spark this function just shift the timestamp value from the given
+    timezone to UTC timezone.
+
+    This function may return confusing result if the input is a string with timezone, e.g.
+    '2018-03-13T06:18:23+00:00'. The reason is that, Spark firstly cast the string to timestamp
+    according to the timezone in the string, and finally display the result by converting the
+    timestamp to string according to the session local timezone.
+
+    :param timestamp: the column that contains timestamps
+    :param tz: a string that has the ID of timezone, e.g. "GMT", "America/Los_Angeles", etc
+
+    .. versionchanged:: 2.4
+       `tz` can take a :class:`Column` containing timezone ID strings.
+
+    >>> df = spark.createDataFrame([('1997-02-28 10:30:00', 'JST')], ['ts', 'tz'])
+    >>> df.select(to_utc_timestamp(df.ts, "PST").alias('utc_time')).collect()
+    [Row(utc_time=datetime.datetime(1997, 2, 28, 18, 30))]
+    >>> df.select(to_utc_timestamp(df.ts, df.tz).alias('utc_time')).collect()
+    [Row(utc_time=datetime.datetime(1997, 2, 28, 1, 30))]
+    """
+    sc = SparkContext._active_spark_context
+    if isinstance(tz, Column):
+        tz = _to_java_column(tz)
+    return Column(sc._jvm.functions.to_utc_timestamp(_to_java_column(timestamp), tz))
+
+
+@since(2.0)
+@ignore_unicode_prefix
+def window(timeColumn, windowDuration, slideDuration=None, startTime=None):
+    """Bucketize rows into one or more time windows given a timestamp specifying column. Window
+    starts are inclusive but the window ends are exclusive, e.g. 12:05 will be in the window
+    [12:05,12:10) but not in [12:00,12:05). Windows can support microsecond precision. Windows in
+    the order of months are not supported.
+
+    The time column must be of :class:`pyspark.sql.types.TimestampType`.
+
+    Durations are provided as strings, e.g. '1 second', '1 day 12 hours', '2 minutes'. Valid
+    interval strings are 'week', 'day', 'hour', 'minute', 'second', 'millisecond', 'microsecond'.
+    If the ``slideDuration`` is not provided, the windows will be tumbling windows.
+
+    The startTime is the offset with respect to 1970-01-01 00:00:00 UTC with which to start
+    window intervals. For example, in order to have hourly tumbling windows that start 15 minutes
+    past the hour, e.g. 12:15-13:15, 13:15-14:15... provide `startTime` as `15 minutes`.
+
+    The output column will be a struct called 'window' by default with the nested columns 'start'
+    and 'end', where 'start' and 'end' will be of :class:`pyspark.sql.types.TimestampType`.
+
+    >>> df = spark.createDataFrame([("2016-03-11 09:00:07", 1)]).toDF("date", "val")
+    >>> w = df.groupBy(window("date", "5 seconds")).agg(sum("val").alias("sum"))
+    >>> w.select(w.window.start.cast("string").alias("start"),
+    ...          w.window.end.cast("string").alias("end"), "sum").collect()
+    [Row(start=u'2016-03-11 09:00:05', end=u'2016-03-11 09:00:10', sum=1)]
+    """
+    def check_string_field(field, fieldName):
+        if not field or type(field) is not str:
+            raise TypeError("%s should be provided as a string" % fieldName)
+
+    sc = SparkContext._active_spark_context
+    time_col = _to_java_column(timeColumn)
+    check_string_field(windowDuration, "windowDuration")
+    if slideDuration and startTime:
+        check_string_field(slideDuration, "slideDuration")
+        check_string_field(startTime, "startTime")
+        res = sc._jvm.functions.window(time_col, windowDuration, slideDuration, startTime)
+    elif slideDuration:
+        check_string_field(slideDuration, "slideDuration")
+        res = sc._jvm.functions.window(time_col, windowDuration, slideDuration)
+    elif startTime:
+        check_string_field(startTime, "startTime")
+        res = sc._jvm.functions.window(time_col, windowDuration, windowDuration, startTime)
+    else:
+        res = sc._jvm.functions.window(time_col, windowDuration)
+    return Column(res)
+
+
+# ---------------------------- misc functions ----------------------------------
+
+@since(1.5)
+@ignore_unicode_prefix
+def crc32(col):
+    """
+    Calculates the cyclic redundancy check value  (CRC32) of a binary column and
+    returns the value as a bigint.
+
+    >>> spark.createDataFrame([('ABC',)], ['a']).select(crc32('a').alias('crc32')).collect()
+    [Row(crc32=2743272264)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.crc32(_to_java_column(col)))
+
+
+@ignore_unicode_prefix
+@since(1.5)
+def md5(col):
+    """Calculates the MD5 digest and returns the value as a 32 character hex string.
+
+    >>> spark.createDataFrame([('ABC',)], ['a']).select(md5('a').alias('hash')).collect()
+    [Row(hash=u'902fbdd2b1df0c4f70b4a5d23525e932')]
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.md5(_to_java_column(col))
+    return Column(jc)
+
+
+@ignore_unicode_prefix
+@since(1.5)
+def sha1(col):
+    """Returns the hex string result of SHA-1.
+
+    >>> spark.createDataFrame([('ABC',)], ['a']).select(sha1('a').alias('hash')).collect()
+    [Row(hash=u'3c01bdbb26f358bab27f267924aa2c9a03fcfdb8')]
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.sha1(_to_java_column(col))
+    return Column(jc)
+
+
+@ignore_unicode_prefix
+@since(1.5)
+def sha2(col, numBits):
+    """Returns the hex string result of SHA-2 family of hash functions (SHA-224, SHA-256, SHA-384,
+    and SHA-512). The numBits indicates the desired bit length of the result, which must have a
+    value of 224, 256, 384, 512, or 0 (which is equivalent to 256).
+
+    >>> digests = df.select(sha2(df.name, 256).alias('s')).collect()
+    >>> digests[0]
+    Row(s=u'3bc51062973c458d5a6f2d8d64a023246354ad7e064b1e4e009ec8a0699a3043')
+    >>> digests[1]
+    Row(s=u'cd9fb1e148ccd8442e5aa74904cc73bf6fb54d1d54d333bd596aa9bb4bb4e961')
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.sha2(_to_java_column(col), numBits)
+    return Column(jc)
+
+
+@since(2.0)
+def hash(*cols):
+    """Calculates the hash code of given columns, and returns the result as an int column.
+
+    >>> spark.createDataFrame([('ABC',)], ['a']).select(hash('a').alias('hash')).collect()
+    [Row(hash=-757602832)]
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.hash(_to_seq(sc, cols, _to_java_column))
+    return Column(jc)
+
+
+# ---------------------- String/Binary functions ------------------------------
+
+_string_functions = {
+    'ascii': 'Computes the numeric value of the first character of the string column.',
+    'base64': 'Computes the BASE64 encoding of a binary column and returns it as a string column.',
+    'unbase64': 'Decodes a BASE64 encoded string column and returns it as a binary column.',
+    'initcap': 'Returns a new string column by converting the first letter of each word to ' +
+               'uppercase. Words are delimited by whitespace.',
+    'lower': 'Converts a string column to lower case.',
+    'upper': 'Converts a string column to upper case.',
+    'ltrim': 'Trim the spaces from left end for the specified string value.',
+    'rtrim': 'Trim the spaces from right end for the specified string value.',
+    'trim': 'Trim the spaces from both ends for the specified string column.',
+}
+
+
+for _name, _doc in _string_functions.items():
+    globals()[_name] = since(1.5)(_create_function(_name, _doc))
+del _name, _doc
+
+
+@since(1.5)
+@ignore_unicode_prefix
+def concat_ws(sep, *cols):
+    """
+    Concatenates multiple input string columns together into a single string column,
+    using the given separator.
+
+    >>> df = spark.createDataFrame([('abcd','123')], ['s', 'd'])
+    >>> df.select(concat_ws('-', df.s, df.d).alias('s')).collect()
+    [Row(s=u'abcd-123')]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.concat_ws(sep, _to_seq(sc, cols, _to_java_column)))
+
+
+@since(1.5)
+def decode(col, charset):
+    """
+    Computes the first argument into a string from a binary using the provided character set
+    (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.decode(_to_java_column(col), charset))
+
+
+@since(1.5)
+def encode(col, charset):
+    """
+    Computes the first argument into a binary from a string using the provided character set
+    (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.encode(_to_java_column(col), charset))
+
+
+@ignore_unicode_prefix
+@since(1.5)
+def format_number(col, d):
+    """
+    Formats the number X to a format like '#,--#,--#.--', rounded to d decimal places
+    with HALF_EVEN round mode, and returns the result as a string.
+
+    :param col: the column name of the numeric value to be formatted
+    :param d: the N decimal places
+
+    >>> spark.createDataFrame([(5,)], ['a']).select(format_number('a', 4).alias('v')).collect()
+    [Row(v=u'5.0000')]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.format_number(_to_java_column(col), d))
+
+
+@ignore_unicode_prefix
+@since(1.5)
+def format_string(format, *cols):
+    """
+    Formats the arguments in printf-style and returns the result as a string column.
+
+    :param col: the column name of the numeric value to be formatted
+    :param d: the N decimal places
+
+    >>> df = spark.createDataFrame([(5, "hello")], ['a', 'b'])
+    >>> df.select(format_string('%d %s', df.a, df.b).alias('v')).collect()
+    [Row(v=u'5 hello')]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.format_string(format, _to_seq(sc, cols, _to_java_column)))
+
+
+@since(1.5)
+def instr(str, substr):
+    """
+    Locate the position of the first occurrence of substr column in the given string.
+    Returns null if either of the arguments are null.
+
+    .. note:: The position is not zero based, but 1 based index. Returns 0 if substr
+        could not be found in str.
+
+    >>> df = spark.createDataFrame([('abcd',)], ['s',])
+    >>> df.select(instr(df.s, 'b').alias('s')).collect()
+    [Row(s=2)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.instr(_to_java_column(str), substr))
+
+
+@since(1.5)
+@ignore_unicode_prefix
+def substring(str, pos, len):
+    """
+    Substring starts at `pos` and is of length `len` when str is String type or
+    returns the slice of byte array that starts at `pos` in byte and is of length `len`
+    when str is Binary type.
+
+    .. note:: The position is not zero based, but 1 based index.
+
+    >>> df = spark.createDataFrame([('abcd',)], ['s',])
+    >>> df.select(substring(df.s, 1, 2).alias('s')).collect()
+    [Row(s=u'ab')]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.substring(_to_java_column(str), pos, len))
+
+
+@since(1.5)
+@ignore_unicode_prefix
+def substring_index(str, delim, count):
+    """
+    Returns the substring from string str before count occurrences of the delimiter delim.
+    If count is positive, everything the left of the final delimiter (counting from left) is
+    returned. If count is negative, every to the right of the final delimiter (counting from the
+    right) is returned. substring_index performs a case-sensitive match when searching for delim.
+
+    >>> df = spark.createDataFrame([('a.b.c.d',)], ['s'])
+    >>> df.select(substring_index(df.s, '.', 2).alias('s')).collect()
+    [Row(s=u'a.b')]
+    >>> df.select(substring_index(df.s, '.', -3).alias('s')).collect()
+    [Row(s=u'b.c.d')]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.substring_index(_to_java_column(str), delim, count))
+
+
+@ignore_unicode_prefix
+@since(1.5)
+def levenshtein(left, right):
+    """Computes the Levenshtein distance of the two given strings.
+
+    >>> df0 = spark.createDataFrame([('kitten', 'sitting',)], ['l', 'r'])
+    >>> df0.select(levenshtein('l', 'r').alias('d')).collect()
+    [Row(d=3)]
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.levenshtein(_to_java_column(left), _to_java_column(right))
+    return Column(jc)
+
+
+@since(1.5)
+def locate(substr, str, pos=1):
+    """
+    Locate the position of the first occurrence of substr in a string column, after position pos.
+
+    .. note:: The position is not zero based, but 1 based index. Returns 0 if substr
+        could not be found in str.
+
+    :param substr: a string
+    :param str: a Column of :class:`pyspark.sql.types.StringType`
+    :param pos: start position (zero based)
+
+    >>> df = spark.createDataFrame([('abcd',)], ['s',])
+    >>> df.select(locate('b', df.s, 1).alias('s')).collect()
+    [Row(s=2)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.locate(substr, _to_java_column(str), pos))
+
+
+@since(1.5)
+@ignore_unicode_prefix
+def lpad(col, len, pad):
+    """
+    Left-pad the string column to width `len` with `pad`.
+
+    >>> df = spark.createDataFrame([('abcd',)], ['s',])
+    >>> df.select(lpad(df.s, 6, '#').alias('s')).collect()
+    [Row(s=u'##abcd')]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.lpad(_to_java_column(col), len, pad))
+
+
+@since(1.5)
+@ignore_unicode_prefix
+def rpad(col, len, pad):
+    """
+    Right-pad the string column to width `len` with `pad`.
+
+    >>> df = spark.createDataFrame([('abcd',)], ['s',])
+    >>> df.select(rpad(df.s, 6, '#').alias('s')).collect()
+    [Row(s=u'abcd##')]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.rpad(_to_java_column(col), len, pad))
+
+
+@since(1.5)
+@ignore_unicode_prefix
+def repeat(col, n):
+    """
+    Repeats a string column n times, and returns it as a new string column.
+
+    >>> df = spark.createDataFrame([('ab',)], ['s',])
+    >>> df.select(repeat(df.s, 3).alias('s')).collect()
+    [Row(s=u'ababab')]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.repeat(_to_java_column(col), n))
+
+
+@since(1.5)
+@ignore_unicode_prefix
+def split(str, pattern):
+    """
+    Splits str around pattern (pattern is a regular expression).
+
+    .. note:: pattern is a string represent the regular expression.
+
+    >>> df = spark.createDataFrame([('ab12cd',)], ['s',])
+    >>> df.select(split(df.s, '[0-9]+').alias('s')).collect()
+    [Row(s=[u'ab', u'cd'])]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.split(_to_java_column(str), pattern))
+
+
+@ignore_unicode_prefix
+@since(1.5)
+def regexp_extract(str, pattern, idx):
+    r"""Extract a specific group matched by a Java regex, from the specified string column.
+    If the regex did not match, or the specified group did not match, an empty string is returned.
+
+    >>> df = spark.createDataFrame([('100-200',)], ['str'])
+    >>> df.select(regexp_extract('str', r'(\d+)-(\d+)', 1).alias('d')).collect()
+    [Row(d=u'100')]
+    >>> df = spark.createDataFrame([('foo',)], ['str'])
+    >>> df.select(regexp_extract('str', r'(\d+)', 1).alias('d')).collect()
+    [Row(d=u'')]
+    >>> df = spark.createDataFrame([('aaaac',)], ['str'])
+    >>> df.select(regexp_extract('str', '(a+)(b)?(c)', 2).alias('d')).collect()
+    [Row(d=u'')]
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.regexp_extract(_to_java_column(str), pattern, idx)
+    return Column(jc)
+
+
+@ignore_unicode_prefix
+@since(1.5)
+def regexp_replace(str, pattern, replacement):
+    r"""Replace all substrings of the specified string value that match regexp with rep.
+
+    >>> df = spark.createDataFrame([('100-200',)], ['str'])
+    >>> df.select(regexp_replace('str', r'(\d+)', '--').alias('d')).collect()
+    [Row(d=u'-----')]
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.regexp_replace(_to_java_column(str), pattern, replacement)
+    return Column(jc)
+
+
+@ignore_unicode_prefix
+@since(1.5)
+def initcap(col):
+    """Translate the first letter of each word to upper case in the sentence.
+
+    >>> spark.createDataFrame([('ab cd',)], ['a']).select(initcap("a").alias('v')).collect()
+    [Row(v=u'Ab Cd')]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.initcap(_to_java_column(col)))
+
+
+@since(1.5)
+@ignore_unicode_prefix
+def soundex(col):
+    """
+    Returns the SoundEx encoding for a string
+
+    >>> df = spark.createDataFrame([("Peters",),("Uhrbach",)], ['name'])
+    >>> df.select(soundex(df.name).alias("soundex")).collect()
+    [Row(soundex=u'P362'), Row(soundex=u'U612')]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.soundex(_to_java_column(col)))
+
+
+@ignore_unicode_prefix
+@since(1.5)
+def bin(col):
+    """Returns the string representation of the binary value of the given column.
+
+    >>> df.select(bin(df.age).alias('c')).collect()
+    [Row(c=u'10'), Row(c=u'101')]
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.bin(_to_java_column(col))
+    return Column(jc)
+
+
+@ignore_unicode_prefix
+@since(1.5)
+def hex(col):
+    """Computes hex value of the given column, which could be :class:`pyspark.sql.types.StringType`,
+    :class:`pyspark.sql.types.BinaryType`, :class:`pyspark.sql.types.IntegerType` or
+    :class:`pyspark.sql.types.LongType`.
+
+    >>> spark.createDataFrame([('ABC', 3)], ['a', 'b']).select(hex('a'), hex('b')).collect()
+    [Row(hex(a)=u'414243', hex(b)=u'3')]
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.hex(_to_java_column(col))
+    return Column(jc)
+
+
+@ignore_unicode_prefix
+@since(1.5)
+def unhex(col):
+    """Inverse of hex. Interprets each pair of characters as a hexadecimal number
+    and converts to the byte representation of number.
+
+    >>> spark.createDataFrame([('414243',)], ['a']).select(unhex('a')).collect()
+    [Row(unhex(a)=bytearray(b'ABC'))]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.unhex(_to_java_column(col)))
+
+
+@ignore_unicode_prefix
+@since(1.5)
+def length(col):
+    """Computes the character length of string data or number of bytes of binary data.
+    The length of character data includes the trailing spaces. The length of binary data
+    includes binary zeros.
+
+    >>> spark.createDataFrame([('ABC ',)], ['a']).select(length('a').alias('length')).collect()
+    [Row(length=4)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.length(_to_java_column(col)))
+
+
+@ignore_unicode_prefix
+@since(1.5)
+def translate(srcCol, matching, replace):
+    """A function translate any character in the `srcCol` by a character in `matching`.
+    The characters in `replace` is corresponding to the characters in `matching`.
+    The translate will happen when any character in the string matching with the character
+    in the `matching`.
+
+    >>> spark.createDataFrame([('translate',)], ['a']).select(translate('a', "rnlt", "123") \\
+    ...     .alias('r')).collect()
+    [Row(r=u'1a2s3ae')]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.translate(_to_java_column(srcCol), matching, replace))
+
+
+# ---------------------- Collection functions ------------------------------
+
+@ignore_unicode_prefix
+@since(2.0)
+def create_map(*cols):
+    """Creates a new map column.
+
+    :param cols: list of column names (string) or list of :class:`Column` expressions that are
+        grouped as key-value pairs, e.g. (key1, value1, key2, value2, ...).
+
+    >>> df.select(create_map('name', 'age').alias("map")).collect()
+    [Row(map={u'Alice': 2}), Row(map={u'Bob': 5})]
+    >>> df.select(create_map([df.name, df.age]).alias("map")).collect()
+    [Row(map={u'Alice': 2}), Row(map={u'Bob': 5})]
+    """
+    sc = SparkContext._active_spark_context
+    if len(cols) == 1 and isinstance(cols[0], (list, set)):
+        cols = cols[0]
+    jc = sc._jvm.functions.map(_to_seq(sc, cols, _to_java_column))
+    return Column(jc)
+
+
+@since(2.4)
+def map_from_arrays(col1, col2):
+    """Creates a new map from two arrays.
+
+    :param col1: name of column containing a set of keys. All elements should not be null
+    :param col2: name of column containing a set of values
+
+    >>> df = spark.createDataFrame([([2, 5], ['a', 'b'])], ['k', 'v'])
+    >>> df.select(map_from_arrays(df.k, df.v).alias("map")).show()
+    +----------------+
+    |             map|
+    +----------------+
+    |[2 -> a, 5 -> b]|
+    +----------------+
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.map_from_arrays(_to_java_column(col1), _to_java_column(col2)))
+
+
+@since(1.4)
+def array(*cols):
+    """Creates a new array column.
+
+    :param cols: list of column names (string) or list of :class:`Column` expressions that have
+        the same data type.
+
+    >>> df.select(array('age', 'age').alias("arr")).collect()
+    [Row(arr=[2, 2]), Row(arr=[5, 5])]
+    >>> df.select(array([df.age, df.age]).alias("arr")).collect()
+    [Row(arr=[2, 2]), Row(arr=[5, 5])]
+    """
+    sc = SparkContext._active_spark_context
+    if len(cols) == 1 and isinstance(cols[0], (list, set)):
+        cols = cols[0]
+    jc = sc._jvm.functions.array(_to_seq(sc, cols, _to_java_column))
+    return Column(jc)
+
+
+@since(1.5)
+def array_contains(col, value):
+    """
+    Collection function: returns null if the array is null, true if the array contains the
+    given value, and false otherwise.
+
+    :param col: name of column containing array
+    :param value: value to check for in array
+
+    >>> df = spark.createDataFrame([(["a", "b", "c"],), ([],)], ['data'])
+    >>> df.select(array_contains(df.data, "a")).collect()
+    [Row(array_contains(data, a)=True), Row(array_contains(data, a)=False)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.array_contains(_to_java_column(col), value))
+
+
+@since(2.4)
+def arrays_overlap(a1, a2):
+    """
+    Collection function: returns true if the arrays contain any common non-null element; if not,
+    returns null if both the arrays are non-empty and any of them contains a null element; returns
+    false otherwise.
+
+    >>> df = spark.createDataFrame([(["a", "b"], ["b", "c"]), (["a"], ["b", "c"])], ['x', 'y'])
+    >>> df.select(arrays_overlap(df.x, df.y).alias("overlap")).collect()
+    [Row(overlap=True), Row(overlap=False)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.arrays_overlap(_to_java_column(a1), _to_java_column(a2)))
+
+
+@since(2.4)
+def slice(x, start, length):
+    """
+    Collection function: returns an array containing  all the elements in `x` from index `start`
+    (or starting from the end if `start` is negative) with the specified `length`.
+    >>> df = spark.createDataFrame([([1, 2, 3],), ([4, 5],)], ['x'])
+    >>> df.select(slice(df.x, 2, 2).alias("sliced")).collect()
+    [Row(sliced=[2, 3]), Row(sliced=[5])]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.slice(_to_java_column(x), start, length))
+
+
+@ignore_unicode_prefix
+@since(2.4)
+def array_join(col, delimiter, null_replacement=None):
+    """
+    Concatenates the elements of `column` using the `delimiter`. Null values are replaced with
+    `null_replacement` if set, otherwise they are ignored.
+
+    >>> df = spark.createDataFrame([(["a", "b", "c"],), (["a", None],)], ['data'])
+    >>> df.select(array_join(df.data, ",").alias("joined")).collect()
+    [Row(joined=u'a,b,c'), Row(joined=u'a')]
+    >>> df.select(array_join(df.data, ",", "NULL").alias("joined")).collect()
+    [Row(joined=u'a,b,c'), Row(joined=u'a,NULL')]
+    """
+    sc = SparkContext._active_spark_context
+    if null_replacement is None:
+        return Column(sc._jvm.functions.array_join(_to_java_column(col), delimiter))
+    else:
+        return Column(sc._jvm.functions.array_join(
+            _to_java_column(col), delimiter, null_replacement))
+
+
+@since(1.5)
+@ignore_unicode_prefix
+def concat(*cols):
+    """
+    Concatenates multiple input columns together into a single column.
+    The function works with strings, binary and compatible array columns.
+
+    >>> df = spark.createDataFrame([('abcd','123')], ['s', 'd'])
+    >>> df.select(concat(df.s, df.d).alias('s')).collect()
+    [Row(s=u'abcd123')]
+
+    >>> df = spark.createDataFrame([([1, 2], [3, 4], [5]), ([1, 2], None, [3])], ['a', 'b', 'c'])
+    >>> df.select(concat(df.a, df.b, df.c).alias("arr")).collect()
+    [Row(arr=[1, 2, 3, 4, 5]), Row(arr=None)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.concat(_to_seq(sc, cols, _to_java_column)))
+
+
+@since(2.4)
+def array_position(col, value):
+    """
+    Collection function: Locates the position of the first occurrence of the given value
+    in the given array. Returns null if either of the arguments are null.
+
+    .. note:: The position is not zero based, but 1 based index. Returns 0 if the given
+        value could not be found in the array.
+
+    >>> df = spark.createDataFrame([(["c", "b", "a"],), ([],)], ['data'])
+    >>> df.select(array_position(df.data, "a")).collect()
+    [Row(array_position(data, a)=3), Row(array_position(data, a)=0)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.array_position(_to_java_column(col), value))
+
+
+@ignore_unicode_prefix
+@since(2.4)
+def element_at(col, extraction):
+    """
+    Collection function: Returns element of array at given index in extraction if col is array.
+    Returns value for the given key in extraction if col is map.
+
+    :param col: name of column containing array or map
+    :param extraction: index to check for in array or key to check for in map
+
+    .. note:: The position is not zero based, but 1 based index.
+
+    >>> df = spark.createDataFrame([(["a", "b", "c"],), ([],)], ['data'])
+    >>> df.select(element_at(df.data, 1)).collect()
+    [Row(element_at(data, 1)=u'a'), Row(element_at(data, 1)=None)]
+
+    >>> df = spark.createDataFrame([({"a": 1.0, "b": 2.0},), ({},)], ['data'])
+    >>> df.select(element_at(df.data, "a")).collect()
+    [Row(element_at(data, a)=1.0), Row(element_at(data, a)=None)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.element_at(_to_java_column(col), extraction))
+
+
+@since(2.4)
+def array_remove(col, element):
+    """
+    Collection function: Remove all elements that equal to element from the given array.
+
+    :param col: name of column containing array
+    :param element: element to be removed from the array
+
+    >>> df = spark.createDataFrame([([1, 2, 3, 1, 1],), ([],)], ['data'])
+    >>> df.select(array_remove(df.data, 1)).collect()
+    [Row(array_remove(data, 1)=[2, 3]), Row(array_remove(data, 1)=[])]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.array_remove(_to_java_column(col), element))
+
+
+@since(2.4)
+def array_distinct(col):
+    """
+    Collection function: removes duplicate values from the array.
+    :param col: name of column or expression
+
+    >>> df = spark.createDataFrame([([1, 2, 3, 2],), ([4, 5, 5, 4],)], ['data'])
+    >>> df.select(array_distinct(df.data)).collect()
+    [Row(array_distinct(data)=[1, 2, 3]), Row(array_distinct(data)=[4, 5])]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.array_distinct(_to_java_column(col)))
+
+
+@ignore_unicode_prefix
+@since(2.4)
+def array_intersect(col1, col2):
+    """
+    Collection function: returns an array of the elements in the intersection of col1 and col2,
+    without duplicates.
+
+    :param col1: name of column containing array
+    :param col2: name of column containing array
+
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([Row(c1=["b", "a", "c"], c2=["c", "d", "a", "f"])])
+    >>> df.select(array_intersect(df.c1, df.c2)).collect()
+    [Row(array_intersect(c1, c2)=[u'a', u'c'])]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.array_intersect(_to_java_column(col1), _to_java_column(col2)))
+
+
+@ignore_unicode_prefix
+@since(2.4)
+def array_union(col1, col2):
+    """
+    Collection function: returns an array of the elements in the union of col1 and col2,
+    without duplicates.
+
+    :param col1: name of column containing array
+    :param col2: name of column containing array
+
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([Row(c1=["b", "a", "c"], c2=["c", "d", "a", "f"])])
+    >>> df.select(array_union(df.c1, df.c2)).collect()
+    [Row(array_union(c1, c2)=[u'b', u'a', u'c', u'd', u'f'])]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.array_union(_to_java_column(col1), _to_java_column(col2)))
+
+
+@ignore_unicode_prefix
+@since(2.4)
+def array_except(col1, col2):
+    """
+    Collection function: returns an array of the elements in col1 but not in col2,
+    without duplicates.
+
+    :param col1: name of column containing array
+    :param col2: name of column containing array
+
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([Row(c1=["b", "a", "c"], c2=["c", "d", "a", "f"])])
+    >>> df.select(array_except(df.c1, df.c2)).collect()
+    [Row(array_except(c1, c2)=[u'b'])]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.array_except(_to_java_column(col1), _to_java_column(col2)))
+
+
+@since(1.4)
+def explode(col):
+    """Returns a new row for each element in the given array or map.
+
+    >>> from pyspark.sql import Row
+    >>> eDF = spark.createDataFrame([Row(a=1, intlist=[1,2,3], mapfield={"a": "b"})])
+    >>> eDF.select(explode(eDF.intlist).alias("anInt")).collect()
+    [Row(anInt=1), Row(anInt=2), Row(anInt=3)]
+
+    >>> eDF.select(explode(eDF.mapfield).alias("key", "value")).show()
+    +---+-----+
+    |key|value|
+    +---+-----+
+    |  a|    b|
+    +---+-----+
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.explode(_to_java_column(col))
+    return Column(jc)
+
+
+@since(2.1)
+def posexplode(col):
+    """Returns a new row for each element with position in the given array or map.
+
+    >>> from pyspark.sql import Row
+    >>> eDF = spark.createDataFrame([Row(a=1, intlist=[1,2,3], mapfield={"a": "b"})])
+    >>> eDF.select(posexplode(eDF.intlist)).collect()
+    [Row(pos=0, col=1), Row(pos=1, col=2), Row(pos=2, col=3)]
+
+    >>> eDF.select(posexplode(eDF.mapfield)).show()
+    +---+---+-----+
+    |pos|key|value|
+    +---+---+-----+
+    |  0|  a|    b|
+    +---+---+-----+
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.posexplode(_to_java_column(col))
+    return Column(jc)
+
+
+@since(2.3)
+def explode_outer(col):
+    """Returns a new row for each element in the given array or map.
+    Unlike explode, if the array/map is null or empty then null is produced.
+
+    >>> df = spark.createDataFrame(
+    ...     [(1, ["foo", "bar"], {"x": 1.0}), (2, [], {}), (3, None, None)],
+    ...     ("id", "an_array", "a_map")
+    ... )
+    >>> df.select("id", "an_array", explode_outer("a_map")).show()
+    +---+----------+----+-----+
+    | id|  an_array| key|value|
+    +---+----------+----+-----+
+    |  1|[foo, bar]|   x|  1.0|
+    |  2|        []|null| null|
+    |  3|      null|null| null|
+    +---+----------+----+-----+
+
+    >>> df.select("id", "a_map", explode_outer("an_array")).show()
+    +---+----------+----+
+    | id|     a_map| col|
+    +---+----------+----+
+    |  1|[x -> 1.0]| foo|
+    |  1|[x -> 1.0]| bar|
+    |  2|        []|null|
+    |  3|      null|null|
+    +---+----------+----+
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.explode_outer(_to_java_column(col))
+    return Column(jc)
+
+
+@since(2.3)
+def posexplode_outer(col):
+    """Returns a new row for each element with position in the given array or map.
+    Unlike posexplode, if the array/map is null or empty then the row (null, null) is produced.
+
+    >>> df = spark.createDataFrame(
+    ...     [(1, ["foo", "bar"], {"x": 1.0}), (2, [], {}), (3, None, None)],
+    ...     ("id", "an_array", "a_map")
+    ... )
+    >>> df.select("id", "an_array", posexplode_outer("a_map")).show()
+    +---+----------+----+----+-----+
+    | id|  an_array| pos| key|value|
+    +---+----------+----+----+-----+
+    |  1|[foo, bar]|   0|   x|  1.0|
+    |  2|        []|null|null| null|
+    |  3|      null|null|null| null|
+    +---+----------+----+----+-----+
+    >>> df.select("id", "a_map", posexplode_outer("an_array")).show()
+    +---+----------+----+----+
+    | id|     a_map| pos| col|
+    +---+----------+----+----+
+    |  1|[x -> 1.0]|   0| foo|
+    |  1|[x -> 1.0]|   1| bar|
+    |  2|        []|null|null|
+    |  3|      null|null|null|
+    +---+----------+----+----+
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.posexplode_outer(_to_java_column(col))
+    return Column(jc)
+
+
+@ignore_unicode_prefix
+@since(1.6)
+def get_json_object(col, path):
+    """
+    Extracts json object from a json string based on json path specified, and returns json string
+    of the extracted json object. It will return null if the input json string is invalid.
+
+    :param col: string column in json format
+    :param path: path to the json object to extract
+
+    >>> data = [("1", '''{"f1": "value1", "f2": "value2"}'''), ("2", '''{"f1": "value12"}''')]
+    >>> df = spark.createDataFrame(data, ("key", "jstring"))
+    >>> df.select(df.key, get_json_object(df.jstring, '$.f1').alias("c0"), \\
+    ...                   get_json_object(df.jstring, '$.f2').alias("c1") ).collect()
+    [Row(key=u'1', c0=u'value1', c1=u'value2'), Row(key=u'2', c0=u'value12', c1=None)]
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.get_json_object(_to_java_column(col), path)
+    return Column(jc)
+
+
+@ignore_unicode_prefix
+@since(1.6)
+def json_tuple(col, *fields):
+    """Creates a new row for a json column according to the given field names.
+
+    :param col: string column in json format
+    :param fields: list of fields to extract
+
+    >>> data = [("1", '''{"f1": "value1", "f2": "value2"}'''), ("2", '''{"f1": "value12"}''')]
+    >>> df = spark.createDataFrame(data, ("key", "jstring"))
+    >>> df.select(df.key, json_tuple(df.jstring, 'f1', 'f2')).collect()
+    [Row(key=u'1', c0=u'value1', c1=u'value2'), Row(key=u'2', c0=u'value12', c1=None)]
+    """
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.json_tuple(_to_java_column(col), _to_seq(sc, fields))
+    return Column(jc)
+
+
+@ignore_unicode_prefix
+@since(2.1)
+def from_json(col, schema, options={}):
+    """
+    Parses a column containing a JSON string into a :class:`MapType` with :class:`StringType`
+    as keys type, :class:`StructType` or :class:`ArrayType` with
+    the specified schema. Returns `null`, in the case of an unparseable string.
+
+    :param col: string column in json format
+    :param schema: a StructType or ArrayType of StructType to use when parsing the json column.
+    :param options: options to control parsing. accepts the same options as the json datasource
+
+    .. note:: Since Spark 2.3, the DDL-formatted string or a JSON format string is also
+              supported for ``schema``.
+
+    >>> from pyspark.sql.types import *
+    >>> data = [(1, '''{"a": 1}''')]
+    >>> schema = StructType([StructField("a", IntegerType())])
+    >>> df = spark.createDataFrame(data, ("key", "value"))
+    >>> df.select(from_json(df.value, schema).alias("json")).collect()
+    [Row(json=Row(a=1))]
+    >>> df.select(from_json(df.value, "a INT").alias("json")).collect()
+    [Row(json=Row(a=1))]
+    >>> df.select(from_json(df.value, "MAP<STRING,INT>").alias("json")).collect()
+    [Row(json={u'a': 1})]
+    >>> data = [(1, '''[{"a": 1}]''')]
+    >>> schema = ArrayType(StructType([StructField("a", IntegerType())]))
+    >>> df = spark.createDataFrame(data, ("key", "value"))
+    >>> df.select(from_json(df.value, schema).alias("json")).collect()
+    [Row(json=[Row(a=1)])]
+    >>> schema = schema_of_json(lit('''{"a": 0}'''))
+    >>> df.select(from_json(df.value, schema).alias("json")).collect()
+    [Row(json=Row(a=1))]
+    >>> data = [(1, '''[1, 2, 3]''')]
+    >>> schema = ArrayType(IntegerType())
+    >>> df = spark.createDataFrame(data, ("key", "value"))
+    >>> df.select(from_json(df.value, schema).alias("json")).collect()
+    [Row(json=[1, 2, 3])]
+    """
+
+    sc = SparkContext._active_spark_context
+    if isinstance(schema, DataType):
+        schema = schema.json()
+    elif isinstance(schema, Column):
+        schema = _to_java_column(schema)
+    jc = sc._jvm.functions.from_json(_to_java_column(col), schema, options)
+    return Column(jc)
+
+
+@ignore_unicode_prefix
+@since(2.1)
+def to_json(col, options={}):
+    """
+    Converts a column containing a :class:`StructType`, :class:`ArrayType` or a :class:`MapType`
+    into a JSON string. Throws an exception, in the case of an unsupported type.
+
+    :param col: name of column containing a struct, an array or a map.
+    :param options: options to control converting. accepts the same options as the JSON datasource
+
+    >>> from pyspark.sql import Row
+    >>> from pyspark.sql.types import *
+    >>> data = [(1, Row(name='Alice', age=2))]
+    >>> df = spark.createDataFrame(data, ("key", "value"))
+    >>> df.select(to_json(df.value).alias("json")).collect()
+    [Row(json=u'{"age":2,"name":"Alice"}')]
+    >>> data = [(1, [Row(name='Alice', age=2), Row(name='Bob', age=3)])]
+    >>> df = spark.createDataFrame(data, ("key", "value"))
+    >>> df.select(to_json(df.value).alias("json")).collect()
+    [Row(json=u'[{"age":2,"name":"Alice"},{"age":3,"name":"Bob"}]')]
+    >>> data = [(1, {"name": "Alice"})]
+    >>> df = spark.createDataFrame(data, ("key", "value"))
+    >>> df.select(to_json(df.value).alias("json")).collect()
+    [Row(json=u'{"name":"Alice"}')]
+    >>> data = [(1, [{"name": "Alice"}, {"name": "Bob"}])]
+    >>> df = spark.createDataFrame(data, ("key", "value"))
+    >>> df.select(to_json(df.value).alias("json")).collect()
+    [Row(json=u'[{"name":"Alice"},{"name":"Bob"}]')]
+    >>> data = [(1, ["Alice", "Bob"])]
+    >>> df = spark.createDataFrame(data, ("key", "value"))
+    >>> df.select(to_json(df.value).alias("json")).collect()
+    [Row(json=u'["Alice","Bob"]')]
+    """
+
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.to_json(_to_java_column(col), options)
+    return Column(jc)
+
+
+@ignore_unicode_prefix
+@since(2.4)
+def schema_of_json(json):
+    """
+    Parses a JSON string and infers its schema in DDL format.
+
+    :param json: a JSON string or a string literal containing a JSON string.
+
+    >>> df = spark.range(1)
+    >>> df.select(schema_of_json('{"a": 0}').alias("json")).collect()
+    [Row(json=u'struct<a:bigint>')]
+    """
+    if isinstance(json, basestring):
+        col = _create_column_from_literal(json)
+    elif isinstance(json, Column):
+        col = _to_java_column(json)
+    else:
+        raise TypeError("schema argument should be a column or string")
+
+    sc = SparkContext._active_spark_context
+    jc = sc._jvm.functions.schema_of_json(col)
+    return Column(jc)
+
+
+@since(1.5)
+def size(col):
+    """
+    Collection function: returns the length of the array or map stored in the column.
+
+    :param col: name of column or expression
+
+    >>> df = spark.createDataFrame([([1, 2, 3],),([1],),([],)], ['data'])
+    >>> df.select(size(df.data)).collect()
+    [Row(size(data)=3), Row(size(data)=1), Row(size(data)=0)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.size(_to_java_column(col)))
+
+
+@since(2.4)
+def array_min(col):
+    """
+    Collection function: returns the minimum value of the array.
+
+    :param col: name of column or expression
+
+    >>> df = spark.createDataFrame([([2, 1, 3],), ([None, 10, -1],)], ['data'])
+    >>> df.select(array_min(df.data).alias('min')).collect()
+    [Row(min=1), Row(min=-1)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.array_min(_to_java_column(col)))
+
+
+@since(2.4)
+def array_max(col):
+    """
+    Collection function: returns the maximum value of the array.
+
+    :param col: name of column or expression
+
+    >>> df = spark.createDataFrame([([2, 1, 3],), ([None, 10, -1],)], ['data'])
+    >>> df.select(array_max(df.data).alias('max')).collect()
+    [Row(max=3), Row(max=10)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.array_max(_to_java_column(col)))
+
+
+@since(1.5)
+def sort_array(col, asc=True):
+    """
+    Collection function: sorts the input array in ascending or descending order according
+    to the natural ordering of the array elements. Null elements will be placed at the beginning
+    of the returned array in ascending order or at the end of the returned array in descending
+    order.
+
+    :param col: name of column or expression
+
+    >>> df = spark.createDataFrame([([2, 1, None, 3],),([1],),([],)], ['data'])
+    >>> df.select(sort_array(df.data).alias('r')).collect()
+    [Row(r=[None, 1, 2, 3]), Row(r=[1]), Row(r=[])]
+    >>> df.select(sort_array(df.data, asc=False).alias('r')).collect()
+    [Row(r=[3, 2, 1, None]), Row(r=[1]), Row(r=[])]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.sort_array(_to_java_column(col), asc))
+
+
+@since(2.4)
+def array_sort(col):
+    """
+    Collection function: sorts the input array in ascending order. The elements of the input array
+    must be orderable. Null elements will be placed at the end of the returned array.
+
+    :param col: name of column or expression
+
+    >>> df = spark.createDataFrame([([2, 1, None, 3],),([1],),([],)], ['data'])
+    >>> df.select(array_sort(df.data).alias('r')).collect()
+    [Row(r=[1, 2, 3, None]), Row(r=[1]), Row(r=[])]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.array_sort(_to_java_column(col)))
+
+
+@since(2.4)
+def shuffle(col):
+    """
+    Collection function: Generates a random permutation of the given array.
+
+    .. note:: The function is non-deterministic.
+
+    :param col: name of column or expression
+
+    >>> df = spark.createDataFrame([([1, 20, 3, 5],), ([1, 20, None, 3],)], ['data'])
+    >>> df.select(shuffle(df.data).alias('s')).collect()  # doctest: +SKIP
+    [Row(s=[3, 1, 5, 20]), Row(s=[20, None, 3, 1])]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.shuffle(_to_java_column(col)))
+
+
+@since(1.5)
+@ignore_unicode_prefix
+def reverse(col):
+    """
+    Collection function: returns a reversed string or an array with reverse order of elements.
+
+    :param col: name of column or expression
+
+    >>> df = spark.createDataFrame([('Spark SQL',)], ['data'])
+    >>> df.select(reverse(df.data).alias('s')).collect()
+    [Row(s=u'LQS krapS')]
+    >>> df = spark.createDataFrame([([2, 1, 3],) ,([1],) ,([],)], ['data'])
+    >>> df.select(reverse(df.data).alias('r')).collect()
+    [Row(r=[3, 1, 2]), Row(r=[1]), Row(r=[])]
+     """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.reverse(_to_java_column(col)))
+
+
+@since(2.4)
+def flatten(col):
+    """
+    Collection function: creates a single array from an array of arrays.
+    If a structure of nested arrays is deeper than two levels,
+    only one level of nesting is removed.
+
+    :param col: name of column or expression
+
+    >>> df = spark.createDataFrame([([[1, 2, 3], [4, 5], [6]],), ([None, [4, 5]],)], ['data'])
+    >>> df.select(flatten(df.data).alias('r')).collect()
+    [Row(r=[1, 2, 3, 4, 5, 6]), Row(r=None)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.flatten(_to_java_column(col)))
+
+
+@since(2.3)
+def map_keys(col):
+    """
+    Collection function: Returns an unordered array containing the keys of the map.
+
+    :param col: name of column or expression
+
+    >>> from pyspark.sql.functions import map_keys
+    >>> df = spark.sql("SELECT map(1, 'a', 2, 'b') as data")
+    >>> df.select(map_keys("data").alias("keys")).show()
+    +------+
+    |  keys|
+    +------+
+    |[1, 2]|
+    +------+
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.map_keys(_to_java_column(col)))
+
+
+@since(2.3)
+def map_values(col):
+    """
+    Collection function: Returns an unordered array containing the values of the map.
+
+    :param col: name of column or expression
+
+    >>> from pyspark.sql.functions import map_values
+    >>> df = spark.sql("SELECT map(1, 'a', 2, 'b') as data")
+    >>> df.select(map_values("data").alias("values")).show()
+    +------+
+    |values|
+    +------+
+    |[a, b]|
+    +------+
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.map_values(_to_java_column(col)))
+
+
+@since(2.4)
+def map_from_entries(col):
+    """
+    Collection function: Returns a map created from the given array of entries.
+
+    :param col: name of column or expression
+
+    >>> from pyspark.sql.functions import map_from_entries
+    >>> df = spark.sql("SELECT array(struct(1, 'a'), struct(2, 'b')) as data")
+    >>> df.select(map_from_entries("data").alias("map")).show()
+    +----------------+
+    |             map|
+    +----------------+
+    |[1 -> a, 2 -> b]|
+    +----------------+
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.map_from_entries(_to_java_column(col)))
+
+
+@ignore_unicode_prefix
+@since(2.4)
+def array_repeat(col, count):
+    """
+    Collection function: creates an array containing a column repeated count times.
+
+    >>> df = spark.createDataFrame([('ab',)], ['data'])
+    >>> df.select(array_repeat(df.data, 3).alias('r')).collect()
+    [Row(r=[u'ab', u'ab', u'ab'])]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.array_repeat(_to_java_column(col), count))
+
+
+@since(2.4)
+def arrays_zip(*cols):
+    """
+    Collection function: Returns a merged array of structs in which the N-th struct contains all
+    N-th values of input arrays.
+
+    :param cols: columns of arrays to be merged.
+
+    >>> from pyspark.sql.functions import arrays_zip
+    >>> df = spark.createDataFrame([(([1, 2, 3], [2, 3, 4]))], ['vals1', 'vals2'])
+    >>> df.select(arrays_zip(df.vals1, df.vals2).alias('zipped')).collect()
+    [Row(zipped=[Row(vals1=1, vals2=2), Row(vals1=2, vals2=3), Row(vals1=3, vals2=4)])]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.arrays_zip(_to_seq(sc, cols, _to_java_column)))
+
+
+@since(2.4)
+def map_concat(*cols):
+    """Returns the union of all the given maps.
+
+    :param cols: list of column names (string) or list of :class:`Column` expressions
+
+    >>> from pyspark.sql.functions import map_concat
+    >>> df = spark.sql("SELECT map(1, 'a', 2, 'b') as map1, map(3, 'c', 1, 'd') as map2")
+    >>> df.select(map_concat("map1", "map2").alias("map3")).show(truncate=False)
+    +--------------------------------+
+    |map3                            |
+    +--------------------------------+
+    |[1 -> a, 2 -> b, 3 -> c, 1 -> d]|
+    +--------------------------------+
+    """
+    sc = SparkContext._active_spark_context
+    if len(cols) == 1 and isinstance(cols[0], (list, set)):
+        cols = cols[0]
+    jc = sc._jvm.functions.map_concat(_to_seq(sc, cols, _to_java_column))
+    return Column(jc)
+
+
+@since(2.4)
+def sequence(start, stop, step=None):
+    """
+    Generate a sequence of integers from `start` to `stop`, incrementing by `step`.
+    If `step` is not set, incrementing by 1 if `start` is less than or equal to `stop`,
+    otherwise -1.
+
+    >>> df1 = spark.createDataFrame([(-2, 2)], ('C1', 'C2'))
+    >>> df1.select(sequence('C1', 'C2').alias('r')).collect()
+    [Row(r=[-2, -1, 0, 1, 2])]
+    >>> df2 = spark.createDataFrame([(4, -4, -2)], ('C1', 'C2', 'C3'))
+    >>> df2.select(sequence('C1', 'C2', 'C3').alias('r')).collect()
+    [Row(r=[4, 2, 0, -2, -4])]
+    """
+    sc = SparkContext._active_spark_context
+    if step is None:
+        return Column(sc._jvm.functions.sequence(_to_java_column(start), _to_java_column(stop)))
+    else:
+        return Column(sc._jvm.functions.sequence(
+            _to_java_column(start), _to_java_column(stop), _to_java_column(step)))
+
+
+# ---------------------------- User Defined Function ----------------------------------
+
+class PandasUDFType(object):
+    """Pandas UDF Types. See :meth:`pyspark.sql.functions.pandas_udf`.
+    """
+    SCALAR = PythonEvalType.SQL_SCALAR_PANDAS_UDF
+
+    GROUPED_MAP = PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF
+
+    GROUPED_AGG = PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF
+
+
+@since(1.3)
+def udf(f=None, returnType=StringType()):
+    """Creates a user defined function (UDF).
+
+    .. note:: The user-defined functions are considered deterministic by default. Due to
+        optimization, duplicate invocations may be eliminated or the function may even be invoked
+        more times than it is present in the query. If your function is not deterministic, call
+        `asNondeterministic` on the user defined function. E.g.:
+
+    >>> from pyspark.sql.types import IntegerType
+    >>> import random
+    >>> random_udf = udf(lambda: int(random.random() * 100), IntegerType()).asNondeterministic()
+
+    .. note:: The user-defined functions do not support conditional expressions or short circuiting
+        in boolean expressions and it ends up with being executed all internally. If the functions
+        can fail on special rows, the workaround is to incorporate the condition into the functions.
+
+    .. note:: The user-defined functions do not take keyword arguments on the calling side.
+
+    :param f: python function if used as a standalone function
+    :param returnType: the return type of the user-defined function. The value can be either a
+        :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
+
+    >>> from pyspark.sql.types import IntegerType
+    >>> slen = udf(lambda s: len(s), IntegerType())
+    >>> @udf
+    ... def to_upper(s):
+    ...     if s is not None:
+    ...         return s.upper()
+    ...
+    >>> @udf(returnType=IntegerType())
+    ... def add_one(x):
+    ...     if x is not None:
+    ...         return x + 1
+    ...
+    >>> df = spark.createDataFrame([(1, "John Doe", 21)], ("id", "name", "age"))
+    >>> df.select(slen("name").alias("slen(name)"), to_upper("name"), add_one("age")).show()
+    +----------+--------------+------------+
+    |slen(name)|to_upper(name)|add_one(age)|
+    +----------+--------------+------------+
+    |         8|      JOHN DOE|          22|
+    +----------+--------------+------------+
+    """
+    # decorator @udf, @udf(), @udf(dataType())
+    if f is None or isinstance(f, (str, DataType)):
+        # If DataType has been passed as a positional argument
+        # for decorator use it as a returnType
+        return_type = f or returnType
+        return functools.partial(_create_udf, returnType=return_type,
+                                 evalType=PythonEvalType.SQL_BATCHED_UDF)
+    else:
+        return _create_udf(f=f, returnType=returnType,
+                           evalType=PythonEvalType.SQL_BATCHED_UDF)
+
+
+@since(2.3)
+def pandas_udf(f=None, returnType=None, functionType=None):
+    """
+    Creates a vectorized user defined function (UDF).
+
+    :param f: user-defined function. A python function if used as a standalone function
+    :param returnType: the return type of the user-defined function. The value can be either a
+        :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
+    :param functionType: an enum value in :class:`pyspark.sql.functions.PandasUDFType`.
+                         Default: SCALAR.
+
+    .. note:: Experimental
+
+    The function type of the UDF can be one of the following:
+
+    1. SCALAR
+
+       A scalar UDF defines a transformation: One or more `pandas.Series` -> A `pandas.Series`.
+       The length of the returned `pandas.Series` must be of the same as the input `pandas.Series`.
+
+       :class:`MapType`, :class:`StructType` are currently not supported as output types.
+
+       Scalar UDFs are used with :meth:`pyspark.sql.DataFrame.withColumn` and
+       :meth:`pyspark.sql.DataFrame.select`.
+
+       >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
+       >>> from pyspark.sql.types import IntegerType, StringType
+       >>> slen = pandas_udf(lambda s: s.str.len(), IntegerType())  # doctest: +SKIP
+       >>> @pandas_udf(StringType())  # doctest: +SKIP
+       ... def to_upper(s):
+       ...     return s.str.upper()
+       ...
+       >>> @pandas_udf("integer", PandasUDFType.SCALAR)  # doctest: +SKIP
+       ... def add_one(x):
+       ...     return x + 1
+       ...
+       >>> df = spark.createDataFrame([(1, "John Doe", 21)],
+       ...                            ("id", "name", "age"))  # doctest: +SKIP
+       >>> df.select(slen("name").alias("slen(name)"), to_upper("name"), add_one("age")) \\
+       ...     .show()  # doctest: +SKIP
+       +----------+--------------+------------+
+       |slen(name)|to_upper(name)|add_one(age)|
+       +----------+--------------+------------+
+       |         8|      JOHN DOE|          22|
+       +----------+--------------+------------+
+
+       .. note:: The length of `pandas.Series` within a scalar UDF is not that of the whole input
+           column, but is the length of an internal batch used for each call to the function.
+           Therefore, this can be used, for example, to ensure the length of each returned
+           `pandas.Series`, and can not be used as the column length.
+
+    2. GROUPED_MAP
+
+       A grouped map UDF defines transformation: A `pandas.DataFrame` -> A `pandas.DataFrame`
+       The returnType should be a :class:`StructType` describing the schema of the returned
+       `pandas.DataFrame`. The column labels of the returned `pandas.DataFrame` must either match
+       the field names in the defined returnType schema if specified as strings, or match the
+       field data types by position if not strings, e.g. integer indices.
+       The length of the returned `pandas.DataFrame` can be arbitrary.
+
+       Grouped map UDFs are used with :meth:`pyspark.sql.GroupedData.apply`.
+
+       >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
+       >>> df = spark.createDataFrame(
+       ...     [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
+       ...     ("id", "v"))  # doctest: +SKIP
+       >>> @pandas_udf("id long, v double", PandasUDFType.GROUPED_MAP)  # doctest: +SKIP
+       ... def normalize(pdf):
+       ...     v = pdf.v
+       ...     return pdf.assign(v=(v - v.mean()) / v.std())
+       >>> df.groupby("id").apply(normalize).show()  # doctest: +SKIP
+       +---+-------------------+
+       | id|                  v|
+       +---+-------------------+
+       |  1|-0.7071067811865475|
+       |  1| 0.7071067811865475|
+       |  2|-0.8320502943378437|
+       |  2|-0.2773500981126146|
+       |  2| 1.1094003924504583|
+       +---+-------------------+
+
+       Alternatively, the user can define a function that takes two arguments.
+       In this case, the grouping key(s) will be passed as the first argument and the data will
+       be passed as the second argument. The grouping key(s) will be passed as a tuple of numpy
+       data types, e.g., `numpy.int32` and `numpy.float64`. The data will still be passed in
+       as a `pandas.DataFrame` containing all columns from the original Spark DataFrame.
+       This is useful when the user does not want to hardcode grouping key(s) in the function.
+
+       >>> import pandas as pd  # doctest: +SKIP
+       >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
+       >>> df = spark.createDataFrame(
+       ...     [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
+       ...     ("id", "v"))  # doctest: +SKIP
+       >>> @pandas_udf("id long, v double", PandasUDFType.GROUPED_MAP)  # doctest: +SKIP
+       ... def mean_udf(key, pdf):
+       ...     # key is a tuple of one numpy.int64, which is the value
+       ...     # of 'id' for the current group
+       ...     return pd.DataFrame([key + (pdf.v.mean(),)])
+       >>> df.groupby('id').apply(mean_udf).show()  # doctest: +SKIP
+       +---+---+
+       | id|  v|
+       +---+---+
+       |  1|1.5|
+       |  2|6.0|
+       +---+---+
+       >>> @pandas_udf(
+       ...    "id long, `ceil(v / 2)` long, v double",
+       ...    PandasUDFType.GROUPED_MAP)  # doctest: +SKIP
+       >>> def sum_udf(key, pdf):
+       ...     # key is a tuple of two numpy.int64s, which is the values
+       ...     # of 'id' and 'ceil(df.v / 2)' for the current group
+       ...     return pd.DataFrame([key + (pdf.v.sum(),)])
+       >>> df.groupby(df.id, ceil(df.v / 2)).apply(sum_udf).show()  # doctest: +SKIP
+       +---+-----------+----+
+       | id|ceil(v / 2)|   v|
+       +---+-----------+----+
+       |  2|          5|10.0|
+       |  1|          1| 3.0|
+       |  2|          3| 5.0|
+       |  2|          2| 3.0|
+       +---+-----------+----+
+
+       .. note:: If returning a new `pandas.DataFrame` constructed with a dictionary, it is
+           recommended to explicitly index the columns by name to ensure the positions are correct,
+           or alternatively use an `OrderedDict`.
+           For example, `pd.DataFrame({'id': ids, 'a': data}, columns=['id', 'a'])` or
+           `pd.DataFrame(OrderedDict([('id', ids), ('a', data)]))`.
+
+       .. seealso:: :meth:`pyspark.sql.GroupedData.apply`
+
+    3. GROUPED_AGG
+
+       A grouped aggregate UDF defines a transformation: One or more `pandas.Series` -> A scalar
+       The `returnType` should be a primitive data type, e.g., :class:`DoubleType`.
+       The returned scalar can be either a python primitive type, e.g., `int` or `float`
+       or a numpy data type, e.g., `numpy.int64` or `numpy.float64`.
+
+       :class:`MapType` and :class:`StructType` are currently not supported as output types.
+
+       Group aggregate UDFs are used with :meth:`pyspark.sql.GroupedData.agg` and
+       :class:`pyspark.sql.Window`
+
+       This example shows using grouped aggregated UDFs with groupby:
+
+       >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
+       >>> df = spark.createDataFrame(
+       ...     [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
+       ...     ("id", "v"))
+       >>> @pandas_udf("double", PandasUDFType.GROUPED_AGG)  # doctest: +SKIP
+       ... def mean_udf(v):
+       ...     return v.mean()
+       >>> df.groupby("id").agg(mean_udf(df['v'])).show()  # doctest: +SKIP
+       +---+-----------+
+       | id|mean_udf(v)|
+       +---+-----------+
+       |  1|        1.5|
+       |  2|        6.0|
+       +---+-----------+
+
+       This example shows using grouped aggregated UDFs as window functions. Note that only
+       unbounded window frame is supported at the moment:
+
+       >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
+       >>> from pyspark.sql import Window
+       >>> df = spark.createDataFrame(
+       ...     [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
+       ...     ("id", "v"))
+       >>> @pandas_udf("double", PandasUDFType.GROUPED_AGG)  # doctest: +SKIP
+       ... def mean_udf(v):
+       ...     return v.mean()
+       >>> w = Window \\
+       ...     .partitionBy('id') \\
+       ...     .rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)
+       >>> df.withColumn('mean_v', mean_udf(df['v']).over(w)).show()  # doctest: +SKIP
+       +---+----+------+
+       | id|   v|mean_v|
+       +---+----+------+
+       |  1| 1.0|   1.5|
+       |  1| 2.0|   1.5|
+       |  2| 3.0|   6.0|
+       |  2| 5.0|   6.0|
+       |  2|10.0|   6.0|
+       +---+----+------+
+
+       .. seealso:: :meth:`pyspark.sql.GroupedData.agg` and :class:`pyspark.sql.Window`
+
+    .. note:: The user-defined functions are considered deterministic by default. Due to
+        optimization, duplicate invocations may be eliminated or the function may even be invoked
+        more times than it is present in the query. If your function is not deterministic, call
+        `asNondeterministic` on the user defined function. E.g.:
+
+    >>> @pandas_udf('double', PandasUDFType.SCALAR)  # doctest: +SKIP
+    ... def random(v):
+    ...     import numpy as np
+    ...     import pandas as pd
+    ...     return pd.Series(np.random.randn(len(v))
+    >>> random = random.asNondeterministic()  # doctest: +SKIP
+
+    .. note:: The user-defined functions do not support conditional expressions or short circuiting
+        in boolean expressions and it ends up with being executed all internally. If the functions
+        can fail on special rows, the workaround is to incorporate the condition into the functions.
+
+    .. note:: The user-defined functions do not take keyword arguments on the calling side.
+    """
+    # decorator @pandas_udf(returnType, functionType)
+    is_decorator = f is None or isinstance(f, (str, DataType))
+
+    if is_decorator:
+        # If DataType has been passed as a positional argument
+        # for decorator use it as a returnType
+        return_type = f or returnType
+
+        if functionType is not None:
+            # @pandas_udf(dataType, functionType=functionType)
+            # @pandas_udf(returnType=dataType, functionType=functionType)
+            eval_type = functionType
+        elif returnType is not None and isinstance(returnType, int):
+            # @pandas_udf(dataType, functionType)
+            eval_type = returnType
+        else:
+            # @pandas_udf(dataType) or @pandas_udf(returnType=dataType)
+            eval_type = PythonEvalType.SQL_SCALAR_PANDAS_UDF
+    else:
+        return_type = returnType
+
+        if functionType is not None:
+            eval_type = functionType
+        else:
+            eval_type = PythonEvalType.SQL_SCALAR_PANDAS_UDF
+
+    if return_type is None:
+        raise ValueError("Invalid returnType: returnType can not be None")
+
+    if eval_type not in [PythonEvalType.SQL_SCALAR_PANDAS_UDF,
+                         PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
+                         PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF]:
+        raise ValueError("Invalid functionType: "
+                         "functionType must be one the values from PandasUDFType")
+
+    if is_decorator:
+        return functools.partial(_create_udf, returnType=return_type, evalType=eval_type)
+    else:
+        return _create_udf(f=f, returnType=return_type, evalType=eval_type)
+
+
+blacklist = ['map', 'since', 'ignore_unicode_prefix']
+__all__ = [k for k, v in globals().items()
+           if not k.startswith('_') and k[0].islower() and callable(v) and k not in blacklist]
+__all__ += ["PandasUDFType"]
+__all__.sort()
+
+
+def _test():
+    import doctest
+    from pyspark.sql import Row, SparkSession
+    import pyspark.sql.functions
+    globs = pyspark.sql.functions.__dict__.copy()
+    spark = SparkSession.builder\
+        .master("local[4]")\
+        .appName("sql.functions tests")\
+        .getOrCreate()
+    sc = spark.sparkContext
+    globs['sc'] = sc
+    globs['spark'] = spark
+    globs['df'] = spark.createDataFrame([Row(name='Alice', age=2), Row(name='Bob', age=5)])
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.functions, globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/group.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/group.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc1da8e7c1f72b8c4f64b36af9ce0199cc886c76
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/group.py
@@ -0,0 +1,317 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+
+from pyspark import since
+from pyspark.rdd import ignore_unicode_prefix, PythonEvalType
+from pyspark.sql.column import Column, _to_seq
+from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.types import *
+
+__all__ = ["GroupedData"]
+
+
+def dfapi(f):
+    def _api(self):
+        name = f.__name__
+        jdf = getattr(self._jgd, name)()
+        return DataFrame(jdf, self.sql_ctx)
+    _api.__name__ = f.__name__
+    _api.__doc__ = f.__doc__
+    return _api
+
+
+def df_varargs_api(f):
+    def _api(self, *cols):
+        name = f.__name__
+        jdf = getattr(self._jgd, name)(_to_seq(self.sql_ctx._sc, cols))
+        return DataFrame(jdf, self.sql_ctx)
+    _api.__name__ = f.__name__
+    _api.__doc__ = f.__doc__
+    return _api
+
+
+class GroupedData(object):
+    """
+    A set of methods for aggregations on a :class:`DataFrame`,
+    created by :func:`DataFrame.groupBy`.
+
+    .. note:: Experimental
+
+    .. versionadded:: 1.3
+    """
+
+    def __init__(self, jgd, df):
+        self._jgd = jgd
+        self._df = df
+        self.sql_ctx = df.sql_ctx
+
+    @ignore_unicode_prefix
+    @since(1.3)
+    def agg(self, *exprs):
+        """Compute aggregates and returns the result as a :class:`DataFrame`.
+
+        The available aggregate functions can be:
+
+        1. built-in aggregation functions, such as `avg`, `max`, `min`, `sum`, `count`
+
+        2. group aggregate pandas UDFs, created with :func:`pyspark.sql.functions.pandas_udf`
+
+           .. note:: There is no partial aggregation with group aggregate UDFs, i.e.,
+               a full shuffle is required. Also, all the data of a group will be loaded into
+               memory, so the user should be aware of the potential OOM risk if data is skewed
+               and certain groups are too large to fit in memory.
+
+           .. seealso:: :func:`pyspark.sql.functions.pandas_udf`
+
+        If ``exprs`` is a single :class:`dict` mapping from string to string, then the key
+        is the column to perform aggregation on, and the value is the aggregate function.
+
+        Alternatively, ``exprs`` can also be a list of aggregate :class:`Column` expressions.
+
+        .. note:: Built-in aggregation functions and group aggregate pandas UDFs cannot be mixed
+            in a single call to this function.
+
+        :param exprs: a dict mapping from column name (string) to aggregate functions (string),
+            or a list of :class:`Column`.
+
+        >>> gdf = df.groupBy(df.name)
+        >>> sorted(gdf.agg({"*": "count"}).collect())
+        [Row(name=u'Alice', count(1)=1), Row(name=u'Bob', count(1)=1)]
+
+        >>> from pyspark.sql import functions as F
+        >>> sorted(gdf.agg(F.min(df.age)).collect())
+        [Row(name=u'Alice', min(age)=2), Row(name=u'Bob', min(age)=5)]
+
+        >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
+        >>> @pandas_udf('int', PandasUDFType.GROUPED_AGG)  # doctest: +SKIP
+        ... def min_udf(v):
+        ...     return v.min()
+        >>> sorted(gdf.agg(min_udf(df.age)).collect())  # doctest: +SKIP
+        [Row(name=u'Alice', min_udf(age)=2), Row(name=u'Bob', min_udf(age)=5)]
+        """
+        assert exprs, "exprs should not be empty"
+        if len(exprs) == 1 and isinstance(exprs[0], dict):
+            jdf = self._jgd.agg(exprs[0])
+        else:
+            # Columns
+            assert all(isinstance(c, Column) for c in exprs), "all exprs should be Column"
+            jdf = self._jgd.agg(exprs[0]._jc,
+                                _to_seq(self.sql_ctx._sc, [c._jc for c in exprs[1:]]))
+        return DataFrame(jdf, self.sql_ctx)
+
+    @dfapi
+    @since(1.3)
+    def count(self):
+        """Counts the number of records for each group.
+
+        >>> sorted(df.groupBy(df.age).count().collect())
+        [Row(age=2, count=1), Row(age=5, count=1)]
+        """
+
+    @df_varargs_api
+    @since(1.3)
+    def mean(self, *cols):
+        """Computes average values for each numeric columns for each group.
+
+        :func:`mean` is an alias for :func:`avg`.
+
+        :param cols: list of column names (string). Non-numeric columns are ignored.
+
+        >>> df.groupBy().mean('age').collect()
+        [Row(avg(age)=3.5)]
+        >>> df3.groupBy().mean('age', 'height').collect()
+        [Row(avg(age)=3.5, avg(height)=82.5)]
+        """
+
+    @df_varargs_api
+    @since(1.3)
+    def avg(self, *cols):
+        """Computes average values for each numeric columns for each group.
+
+        :func:`mean` is an alias for :func:`avg`.
+
+        :param cols: list of column names (string). Non-numeric columns are ignored.
+
+        >>> df.groupBy().avg('age').collect()
+        [Row(avg(age)=3.5)]
+        >>> df3.groupBy().avg('age', 'height').collect()
+        [Row(avg(age)=3.5, avg(height)=82.5)]
+        """
+
+    @df_varargs_api
+    @since(1.3)
+    def max(self, *cols):
+        """Computes the max value for each numeric columns for each group.
+
+        >>> df.groupBy().max('age').collect()
+        [Row(max(age)=5)]
+        >>> df3.groupBy().max('age', 'height').collect()
+        [Row(max(age)=5, max(height)=85)]
+        """
+
+    @df_varargs_api
+    @since(1.3)
+    def min(self, *cols):
+        """Computes the min value for each numeric column for each group.
+
+        :param cols: list of column names (string). Non-numeric columns are ignored.
+
+        >>> df.groupBy().min('age').collect()
+        [Row(min(age)=2)]
+        >>> df3.groupBy().min('age', 'height').collect()
+        [Row(min(age)=2, min(height)=80)]
+        """
+
+    @df_varargs_api
+    @since(1.3)
+    def sum(self, *cols):
+        """Compute the sum for each numeric columns for each group.
+
+        :param cols: list of column names (string). Non-numeric columns are ignored.
+
+        >>> df.groupBy().sum('age').collect()
+        [Row(sum(age)=7)]
+        >>> df3.groupBy().sum('age', 'height').collect()
+        [Row(sum(age)=7, sum(height)=165)]
+        """
+
+    @since(1.6)
+    def pivot(self, pivot_col, values=None):
+        """
+        Pivots a column of the current :class:`DataFrame` and perform the specified aggregation.
+        There are two versions of pivot function: one that requires the caller to specify the list
+        of distinct values to pivot on, and one that does not. The latter is more concise but less
+        efficient, because Spark needs to first compute the list of distinct values internally.
+
+        :param pivot_col: Name of the column to pivot.
+        :param values: List of values that will be translated to columns in the output DataFrame.
+
+        # Compute the sum of earnings for each year by course with each course as a separate column
+
+        >>> df4.groupBy("year").pivot("course", ["dotNET", "Java"]).sum("earnings").collect()
+        [Row(year=2012, dotNET=15000, Java=20000), Row(year=2013, dotNET=48000, Java=30000)]
+
+        # Or without specifying column values (less efficient)
+
+        >>> df4.groupBy("year").pivot("course").sum("earnings").collect()
+        [Row(year=2012, Java=20000, dotNET=15000), Row(year=2013, Java=30000, dotNET=48000)]
+        >>> df5.groupBy("sales.year").pivot("sales.course").sum("sales.earnings").collect()
+        [Row(year=2012, Java=20000, dotNET=15000), Row(year=2013, Java=30000, dotNET=48000)]
+        """
+        if values is None:
+            jgd = self._jgd.pivot(pivot_col)
+        else:
+            jgd = self._jgd.pivot(pivot_col, values)
+        return GroupedData(jgd, self._df)
+
+    @since(2.3)
+    def apply(self, udf):
+        """
+        Maps each group of the current :class:`DataFrame` using a pandas udf and returns the result
+        as a `DataFrame`.
+
+        The user-defined function should take a `pandas.DataFrame` and return another
+        `pandas.DataFrame`. For each group, all columns are passed together as a `pandas.DataFrame`
+        to the user-function and the returned `pandas.DataFrame` are combined as a
+        :class:`DataFrame`.
+
+        The returned `pandas.DataFrame` can be of arbitrary length and its schema must match the
+        returnType of the pandas udf.
+
+        .. note:: This function requires a full shuffle. all the data of a group will be loaded
+            into memory, so the user should be aware of the potential OOM risk if data is skewed
+            and certain groups are too large to fit in memory.
+
+        .. note:: Experimental
+
+        :param udf: a grouped map user-defined function returned by
+            :func:`pyspark.sql.functions.pandas_udf`.
+
+        >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
+        >>> df = spark.createDataFrame(
+        ...     [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
+        ...     ("id", "v"))
+        >>> @pandas_udf("id long, v double", PandasUDFType.GROUPED_MAP)  # doctest: +SKIP
+        ... def normalize(pdf):
+        ...     v = pdf.v
+        ...     return pdf.assign(v=(v - v.mean()) / v.std())
+        >>> df.groupby("id").apply(normalize).show()  # doctest: +SKIP
+        +---+-------------------+
+        | id|                  v|
+        +---+-------------------+
+        |  1|-0.7071067811865475|
+        |  1| 0.7071067811865475|
+        |  2|-0.8320502943378437|
+        |  2|-0.2773500981126146|
+        |  2| 1.1094003924504583|
+        +---+-------------------+
+
+        .. seealso:: :meth:`pyspark.sql.functions.pandas_udf`
+
+        """
+        # Columns are special because hasattr always return True
+        if isinstance(udf, Column) or not hasattr(udf, 'func') \
+           or udf.evalType != PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF:
+            raise ValueError("Invalid udf: the udf argument must be a pandas_udf of type "
+                             "GROUPED_MAP.")
+        df = self._df
+        udf_column = udf(*[df[col] for col in df.columns])
+        jdf = self._jgd.flatMapGroupsInPandas(udf_column._jc.expr())
+        return DataFrame(jdf, self.sql_ctx)
+
+
+def _test():
+    import doctest
+    from pyspark.sql import Row, SparkSession
+    import pyspark.sql.group
+    globs = pyspark.sql.group.__dict__.copy()
+    spark = SparkSession.builder\
+        .master("local[4]")\
+        .appName("sql.group tests")\
+        .getOrCreate()
+    sc = spark.sparkContext
+    globs['sc'] = sc
+    globs['spark'] = spark
+    globs['df'] = sc.parallelize([(2, 'Alice'), (5, 'Bob')]) \
+        .toDF(StructType([StructField('age', IntegerType()),
+                          StructField('name', StringType())]))
+    globs['df3'] = sc.parallelize([Row(name='Alice', age=2, height=80),
+                                   Row(name='Bob', age=5, height=85)]).toDF()
+    globs['df4'] = sc.parallelize([Row(course="dotNET", year=2012, earnings=10000),
+                                   Row(course="Java",   year=2012, earnings=20000),
+                                   Row(course="dotNET", year=2012, earnings=5000),
+                                   Row(course="dotNET", year=2013, earnings=48000),
+                                   Row(course="Java",   year=2013, earnings=30000)]).toDF()
+    globs['df5'] = sc.parallelize([
+        Row(training="expert", sales=Row(course="dotNET", year=2012, earnings=10000)),
+        Row(training="junior", sales=Row(course="Java",   year=2012, earnings=20000)),
+        Row(training="expert", sales=Row(course="dotNET", year=2012, earnings=5000)),
+        Row(training="junior", sales=Row(course="dotNET", year=2013, earnings=48000)),
+        Row(training="expert", sales=Row(course="Java",   year=2013, earnings=30000))]).toDF()
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.group, globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/readwriter.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/readwriter.py
new file mode 100644
index 0000000000000000000000000000000000000000..690b13072244bc080e96bb1a4fc0389e734cfbdd
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/readwriter.py
@@ -0,0 +1,1021 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+
+if sys.version >= '3':
+    basestring = unicode = str
+
+from py4j.java_gateway import JavaClass
+
+from pyspark import RDD, since
+from pyspark.rdd import ignore_unicode_prefix
+from pyspark.sql.column import _to_seq
+from pyspark.sql.types import *
+from pyspark.sql import utils
+
+__all__ = ["DataFrameReader", "DataFrameWriter"]
+
+
+def to_str(value):
+    """
+    A wrapper over str(), but converts bool values to lower case strings.
+    If None is given, just returns None, instead of converting it to string "None".
+    """
+    if isinstance(value, bool):
+        return str(value).lower()
+    elif value is None:
+        return value
+    else:
+        return str(value)
+
+
+class OptionUtils(object):
+
+    def _set_opts(self, schema=None, **options):
+        """
+        Set named options (filter out those the value is None)
+        """
+        if schema is not None:
+            self.schema(schema)
+        for k, v in options.items():
+            if v is not None:
+                self.option(k, v)
+
+
+class DataFrameReader(OptionUtils):
+    """
+    Interface used to load a :class:`DataFrame` from external storage systems
+    (e.g. file systems, key-value stores, etc). Use :func:`spark.read`
+    to access this.
+
+    .. versionadded:: 1.4
+    """
+
+    def __init__(self, spark):
+        self._jreader = spark._ssql_ctx.read()
+        self._spark = spark
+
+    def _df(self, jdf):
+        from pyspark.sql.dataframe import DataFrame
+        return DataFrame(jdf, self._spark)
+
+    @since(1.4)
+    def format(self, source):
+        """Specifies the input data source format.
+
+        :param source: string, name of the data source, e.g. 'json', 'parquet'.
+
+        >>> df = spark.read.format('json').load('python/test_support/sql/people.json')
+        >>> df.dtypes
+        [('age', 'bigint'), ('name', 'string')]
+
+        """
+        self._jreader = self._jreader.format(source)
+        return self
+
+    @since(1.4)
+    def schema(self, schema):
+        """Specifies the input schema.
+
+        Some data sources (e.g. JSON) can infer the input schema automatically from data.
+        By specifying the schema here, the underlying data source can skip the schema
+        inference step, and thus speed up data loading.
+
+        :param schema: a :class:`pyspark.sql.types.StructType` object or a DDL-formatted string
+                       (For example ``col0 INT, col1 DOUBLE``).
+
+        >>> s = spark.read.schema("col0 INT, col1 DOUBLE")
+        """
+        from pyspark.sql import SparkSession
+        spark = SparkSession.builder.getOrCreate()
+        if isinstance(schema, StructType):
+            jschema = spark._jsparkSession.parseDataType(schema.json())
+            self._jreader = self._jreader.schema(jschema)
+        elif isinstance(schema, basestring):
+            self._jreader = self._jreader.schema(schema)
+        else:
+            raise TypeError("schema should be StructType or string")
+        return self
+
+    @since(1.5)
+    def option(self, key, value):
+        """Adds an input option for the underlying data source.
+
+        You can set the following option(s) for reading files:
+            * ``timeZone``: sets the string that indicates a timezone to be used to parse timestamps
+                in the JSON/CSV datasources or partition values.
+                If it isn't set, it uses the default value, session local timezone.
+        """
+        self._jreader = self._jreader.option(key, to_str(value))
+        return self
+
+    @since(1.4)
+    def options(self, **options):
+        """Adds input options for the underlying data source.
+
+        You can set the following option(s) for reading files:
+            * ``timeZone``: sets the string that indicates a timezone to be used to parse timestamps
+                in the JSON/CSV datasources or partition values.
+                If it isn't set, it uses the default value, session local timezone.
+        """
+        for k in options:
+            self._jreader = self._jreader.option(k, to_str(options[k]))
+        return self
+
+    @since(1.4)
+    def load(self, path=None, format=None, schema=None, **options):
+        """Loads data from a data source and returns it as a :class`DataFrame`.
+
+        :param path: optional string or a list of string for file-system backed data sources.
+        :param format: optional string for format of the data source. Default to 'parquet'.
+        :param schema: optional :class:`pyspark.sql.types.StructType` for the input schema
+                       or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
+        :param options: all other string options
+
+        >>> df = spark.read.format("parquet").load('python/test_support/sql/parquet_partitioned',
+        ...     opt1=True, opt2=1, opt3='str')
+        >>> df.dtypes
+        [('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')]
+
+        >>> df = spark.read.format('json').load(['python/test_support/sql/people.json',
+        ...     'python/test_support/sql/people1.json'])
+        >>> df.dtypes
+        [('age', 'bigint'), ('aka', 'string'), ('name', 'string')]
+        """
+        if format is not None:
+            self.format(format)
+        if schema is not None:
+            self.schema(schema)
+        self.options(**options)
+        if isinstance(path, basestring):
+            return self._df(self._jreader.load(path))
+        elif path is not None:
+            if type(path) != list:
+                path = [path]
+            return self._df(self._jreader.load(self._spark._sc._jvm.PythonUtils.toSeq(path)))
+        else:
+            return self._df(self._jreader.load())
+
+    @since(1.4)
+    def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
+             allowComments=None, allowUnquotedFieldNames=None, allowSingleQuotes=None,
+             allowNumericLeadingZero=None, allowBackslashEscapingAnyCharacter=None,
+             mode=None, columnNameOfCorruptRecord=None, dateFormat=None, timestampFormat=None,
+             multiLine=None, allowUnquotedControlChars=None, lineSep=None, samplingRatio=None,
+             dropFieldIfAllNull=None, encoding=None):
+        """
+        Loads JSON files and returns the results as a :class:`DataFrame`.
+
+        `JSON Lines <http://jsonlines.org/>`_ (newline-delimited JSON) is supported by default.
+        For JSON (one record per file), set the ``multiLine`` parameter to ``true``.
+
+        If the ``schema`` parameter is not specified, this function goes
+        through the input once to determine the input schema.
+
+        :param path: string represents path to the JSON dataset, or a list of paths,
+                     or RDD of Strings storing JSON objects.
+        :param schema: an optional :class:`pyspark.sql.types.StructType` for the input schema or
+                       a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
+        :param primitivesAsString: infers all primitive values as a string type. If None is set,
+                                   it uses the default value, ``false``.
+        :param prefersDecimal: infers all floating-point values as a decimal type. If the values
+                               do not fit in decimal, then it infers them as doubles. If None is
+                               set, it uses the default value, ``false``.
+        :param allowComments: ignores Java/C++ style comment in JSON records. If None is set,
+                              it uses the default value, ``false``.
+        :param allowUnquotedFieldNames: allows unquoted JSON field names. If None is set,
+                                        it uses the default value, ``false``.
+        :param allowSingleQuotes: allows single quotes in addition to double quotes. If None is
+                                        set, it uses the default value, ``true``.
+        :param allowNumericLeadingZero: allows leading zeros in numbers (e.g. 00012). If None is
+                                        set, it uses the default value, ``false``.
+        :param allowBackslashEscapingAnyCharacter: allows accepting quoting of all character
+                                                   using backslash quoting mechanism. If None is
+                                                   set, it uses the default value, ``false``.
+        :param mode: allows a mode for dealing with corrupt records during parsing. If None is
+                     set, it uses the default value, ``PERMISSIVE``.
+
+                * ``PERMISSIVE`` : when it meets a corrupted record, puts the malformed string \
+                  into a field configured by ``columnNameOfCorruptRecord``, and sets other \
+                  fields to ``null``. To keep corrupt records, an user can set a string type \
+                  field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \
+                  schema does not have the field, it drops corrupt records during parsing. \
+                  When inferring a schema, it implicitly adds a ``columnNameOfCorruptRecord`` \
+                  field in an output schema.
+                *  ``DROPMALFORMED`` : ignores the whole corrupted records.
+                *  ``FAILFAST`` : throws an exception when it meets corrupted records.
+
+        :param columnNameOfCorruptRecord: allows renaming the new field having malformed string
+                                          created by ``PERMISSIVE`` mode. This overrides
+                                          ``spark.sql.columnNameOfCorruptRecord``. If None is set,
+                                          it uses the value specified in
+                                          ``spark.sql.columnNameOfCorruptRecord``.
+        :param dateFormat: sets the string that indicates a date format. Custom date formats
+                           follow the formats at ``java.text.SimpleDateFormat``. This
+                           applies to date type. If None is set, it uses the
+                           default value, ``yyyy-MM-dd``.
+        :param timestampFormat: sets the string that indicates a timestamp format. Custom date
+                                formats follow the formats at ``java.text.SimpleDateFormat``.
+                                This applies to timestamp type. If None is set, it uses the
+                                default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``.
+        :param multiLine: parse one record, which may span multiple lines, per file. If None is
+                          set, it uses the default value, ``false``.
+        :param allowUnquotedControlChars: allows JSON Strings to contain unquoted control
+                                          characters (ASCII characters with value less than 32,
+                                          including tab and line feed characters) or not.
+        :param encoding: allows to forcibly set one of standard basic or extended encoding for
+                         the JSON files. For example UTF-16BE, UTF-32LE. If None is set,
+                         the encoding of input JSON will be detected automatically
+                         when the multiLine option is set to ``true``.
+        :param lineSep: defines the line separator that should be used for parsing. If None is
+                        set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``.
+        :param samplingRatio: defines fraction of input JSON objects used for schema inferring.
+                              If None is set, it uses the default value, ``1.0``.
+        :param dropFieldIfAllNull: whether to ignore column of all null values or empty
+                                   array/struct during schema inference. If None is set, it
+                                   uses the default value, ``false``.
+
+        >>> df1 = spark.read.json('python/test_support/sql/people.json')
+        >>> df1.dtypes
+        [('age', 'bigint'), ('name', 'string')]
+        >>> rdd = sc.textFile('python/test_support/sql/people.json')
+        >>> df2 = spark.read.json(rdd)
+        >>> df2.dtypes
+        [('age', 'bigint'), ('name', 'string')]
+
+        """
+        self._set_opts(
+            schema=schema, primitivesAsString=primitivesAsString, prefersDecimal=prefersDecimal,
+            allowComments=allowComments, allowUnquotedFieldNames=allowUnquotedFieldNames,
+            allowSingleQuotes=allowSingleQuotes, allowNumericLeadingZero=allowNumericLeadingZero,
+            allowBackslashEscapingAnyCharacter=allowBackslashEscapingAnyCharacter,
+            mode=mode, columnNameOfCorruptRecord=columnNameOfCorruptRecord, dateFormat=dateFormat,
+            timestampFormat=timestampFormat, multiLine=multiLine,
+            allowUnquotedControlChars=allowUnquotedControlChars, lineSep=lineSep,
+            samplingRatio=samplingRatio, dropFieldIfAllNull=dropFieldIfAllNull, encoding=encoding)
+        if isinstance(path, basestring):
+            path = [path]
+        if type(path) == list:
+            return self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))
+        elif isinstance(path, RDD):
+            def func(iterator):
+                for x in iterator:
+                    if not isinstance(x, basestring):
+                        x = unicode(x)
+                    if isinstance(x, unicode):
+                        x = x.encode("utf-8")
+                    yield x
+            keyed = path.mapPartitions(func)
+            keyed._bypass_serializer = True
+            jrdd = keyed._jrdd.map(self._spark._jvm.BytesToString())
+            return self._df(self._jreader.json(jrdd))
+        else:
+            raise TypeError("path can be only string, list or RDD")
+
+    @since(1.4)
+    def table(self, tableName):
+        """Returns the specified table as a :class:`DataFrame`.
+
+        :param tableName: string, name of the table.
+
+        >>> df = spark.read.parquet('python/test_support/sql/parquet_partitioned')
+        >>> df.createOrReplaceTempView('tmpTable')
+        >>> spark.read.table('tmpTable').dtypes
+        [('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')]
+        """
+        return self._df(self._jreader.table(tableName))
+
+    @since(1.4)
+    def parquet(self, *paths):
+        """Loads Parquet files, returning the result as a :class:`DataFrame`.
+
+        You can set the following Parquet-specific option(s) for reading Parquet files:
+            * ``mergeSchema``: sets whether we should merge schemas collected from all \
+                Parquet part-files. This will override ``spark.sql.parquet.mergeSchema``. \
+                The default value is specified in ``spark.sql.parquet.mergeSchema``.
+
+        >>> df = spark.read.parquet('python/test_support/sql/parquet_partitioned')
+        >>> df.dtypes
+        [('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')]
+        """
+        return self._df(self._jreader.parquet(_to_seq(self._spark._sc, paths)))
+
+    @ignore_unicode_prefix
+    @since(1.6)
+    def text(self, paths, wholetext=False, lineSep=None):
+        """
+        Loads text files and returns a :class:`DataFrame` whose schema starts with a
+        string column named "value", and followed by partitioned columns if there
+        are any.
+
+        By default, each line in the text file is a new row in the resulting DataFrame.
+
+        :param paths: string, or list of strings, for input path(s).
+        :param wholetext: if true, read each file from input path(s) as a single row.
+        :param lineSep: defines the line separator that should be used for parsing. If None is
+                        set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``.
+
+        >>> df = spark.read.text('python/test_support/sql/text-test.txt')
+        >>> df.collect()
+        [Row(value=u'hello'), Row(value=u'this')]
+        >>> df = spark.read.text('python/test_support/sql/text-test.txt', wholetext=True)
+        >>> df.collect()
+        [Row(value=u'hello\\nthis')]
+        """
+        self._set_opts(wholetext=wholetext, lineSep=lineSep)
+        if isinstance(paths, basestring):
+            paths = [paths]
+        return self._df(self._jreader.text(self._spark._sc._jvm.PythonUtils.toSeq(paths)))
+
+    @since(2.0)
+    def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=None,
+            comment=None, header=None, inferSchema=None, ignoreLeadingWhiteSpace=None,
+            ignoreTrailingWhiteSpace=None, nullValue=None, nanValue=None, positiveInf=None,
+            negativeInf=None, dateFormat=None, timestampFormat=None, maxColumns=None,
+            maxCharsPerColumn=None, maxMalformedLogPerPartition=None, mode=None,
+            columnNameOfCorruptRecord=None, multiLine=None, charToEscapeQuoteEscaping=None,
+            samplingRatio=None, enforceSchema=None, emptyValue=None):
+        r"""Loads a CSV file and returns the result as a  :class:`DataFrame`.
+
+        This function will go through the input once to determine the input schema if
+        ``inferSchema`` is enabled. To avoid going through the entire data once, disable
+        ``inferSchema`` option or specify the schema explicitly using ``schema``.
+
+        :param path: string, or list of strings, for input path(s),
+                     or RDD of Strings storing CSV rows.
+        :param schema: an optional :class:`pyspark.sql.types.StructType` for the input schema
+                       or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
+        :param sep: sets a single character as a separator for each field and value.
+                    If None is set, it uses the default value, ``,``.
+        :param encoding: decodes the CSV files by the given encoding type. If None is set,
+                         it uses the default value, ``UTF-8``.
+        :param quote: sets a single character used for escaping quoted values where the
+                      separator can be part of the value. If None is set, it uses the default
+                      value, ``"``. If you would like to turn off quotations, you need to set an
+                      empty string.
+        :param escape: sets a single character used for escaping quotes inside an already
+                       quoted value. If None is set, it uses the default value, ``\``.
+        :param comment: sets a single character used for skipping lines beginning with this
+                        character. By default (None), it is disabled.
+        :param header: uses the first line as names of columns. If None is set, it uses the
+                       default value, ``false``.
+        :param inferSchema: infers the input schema automatically from data. It requires one extra
+                       pass over the data. If None is set, it uses the default value, ``false``.
+        :param enforceSchema: If it is set to ``true``, the specified or inferred schema will be
+                              forcibly applied to datasource files, and headers in CSV files will be
+                              ignored. If the option is set to ``false``, the schema will be
+                              validated against all headers in CSV files or the first header in RDD
+                              if the ``header`` option is set to ``true``. Field names in the schema
+                              and column names in CSV headers are checked by their positions
+                              taking into account ``spark.sql.caseSensitive``. If None is set,
+                              ``true`` is used by default. Though the default value is ``true``,
+                              it is recommended to disable the ``enforceSchema`` option
+                              to avoid incorrect results.
+        :param ignoreLeadingWhiteSpace: A flag indicating whether or not leading whitespaces from
+                                        values being read should be skipped. If None is set, it
+                                        uses the default value, ``false``.
+        :param ignoreTrailingWhiteSpace: A flag indicating whether or not trailing whitespaces from
+                                         values being read should be skipped. If None is set, it
+                                         uses the default value, ``false``.
+        :param nullValue: sets the string representation of a null value. If None is set, it uses
+                          the default value, empty string. Since 2.0.1, this ``nullValue`` param
+                          applies to all supported types including the string type.
+        :param nanValue: sets the string representation of a non-number value. If None is set, it
+                         uses the default value, ``NaN``.
+        :param positiveInf: sets the string representation of a positive infinity value. If None
+                            is set, it uses the default value, ``Inf``.
+        :param negativeInf: sets the string representation of a negative infinity value. If None
+                            is set, it uses the default value, ``Inf``.
+        :param dateFormat: sets the string that indicates a date format. Custom date formats
+                           follow the formats at ``java.text.SimpleDateFormat``. This
+                           applies to date type. If None is set, it uses the
+                           default value, ``yyyy-MM-dd``.
+        :param timestampFormat: sets the string that indicates a timestamp format. Custom date
+                                formats follow the formats at ``java.text.SimpleDateFormat``.
+                                This applies to timestamp type. If None is set, it uses the
+                                default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``.
+        :param maxColumns: defines a hard limit of how many columns a record can have. If None is
+                           set, it uses the default value, ``20480``.
+        :param maxCharsPerColumn: defines the maximum number of characters allowed for any given
+                                  value being read. If None is set, it uses the default value,
+                                  ``-1`` meaning unlimited length.
+        :param maxMalformedLogPerPartition: this parameter is no longer used since Spark 2.2.0.
+                                            If specified, it is ignored.
+        :param mode: allows a mode for dealing with corrupt records during parsing. If None is
+                     set, it uses the default value, ``PERMISSIVE``.
+
+                * ``PERMISSIVE`` : when it meets a corrupted record, puts the malformed string \
+                  into a field configured by ``columnNameOfCorruptRecord``, and sets other \
+                  fields to ``null``. To keep corrupt records, an user can set a string type \
+                  field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \
+                  schema does not have the field, it drops corrupt records during parsing. \
+                  A record with less/more tokens than schema is not a corrupted record to CSV. \
+                  When it meets a record having fewer tokens than the length of the schema, \
+                  sets ``null`` to extra fields. When the record has more tokens than the \
+                  length of the schema, it drops extra tokens.
+                * ``DROPMALFORMED`` : ignores the whole corrupted records.
+                * ``FAILFAST`` : throws an exception when it meets corrupted records.
+
+        :param columnNameOfCorruptRecord: allows renaming the new field having malformed string
+                                          created by ``PERMISSIVE`` mode. This overrides
+                                          ``spark.sql.columnNameOfCorruptRecord``. If None is set,
+                                          it uses the value specified in
+                                          ``spark.sql.columnNameOfCorruptRecord``.
+        :param multiLine: parse records, which may span multiple lines. If None is
+                          set, it uses the default value, ``false``.
+        :param charToEscapeQuoteEscaping: sets a single character used for escaping the escape for
+                                          the quote character. If None is set, the default value is
+                                          escape character when escape and quote characters are
+                                          different, ``\0`` otherwise.
+        :param samplingRatio: defines fraction of rows used for schema inferring.
+                              If None is set, it uses the default value, ``1.0``.
+        :param emptyValue: sets the string representation of an empty value. If None is set, it uses
+                           the default value, empty string.
+
+        >>> df = spark.read.csv('python/test_support/sql/ages.csv')
+        >>> df.dtypes
+        [('_c0', 'string'), ('_c1', 'string')]
+        >>> rdd = sc.textFile('python/test_support/sql/ages.csv')
+        >>> df2 = spark.read.csv(rdd)
+        >>> df2.dtypes
+        [('_c0', 'string'), ('_c1', 'string')]
+        """
+        self._set_opts(
+            schema=schema, sep=sep, encoding=encoding, quote=quote, escape=escape, comment=comment,
+            header=header, inferSchema=inferSchema, ignoreLeadingWhiteSpace=ignoreLeadingWhiteSpace,
+            ignoreTrailingWhiteSpace=ignoreTrailingWhiteSpace, nullValue=nullValue,
+            nanValue=nanValue, positiveInf=positiveInf, negativeInf=negativeInf,
+            dateFormat=dateFormat, timestampFormat=timestampFormat, maxColumns=maxColumns,
+            maxCharsPerColumn=maxCharsPerColumn,
+            maxMalformedLogPerPartition=maxMalformedLogPerPartition, mode=mode,
+            columnNameOfCorruptRecord=columnNameOfCorruptRecord, multiLine=multiLine,
+            charToEscapeQuoteEscaping=charToEscapeQuoteEscaping, samplingRatio=samplingRatio,
+            enforceSchema=enforceSchema, emptyValue=emptyValue)
+        if isinstance(path, basestring):
+            path = [path]
+        if type(path) == list:
+            return self._df(self._jreader.csv(self._spark._sc._jvm.PythonUtils.toSeq(path)))
+        elif isinstance(path, RDD):
+            def func(iterator):
+                for x in iterator:
+                    if not isinstance(x, basestring):
+                        x = unicode(x)
+                    if isinstance(x, unicode):
+                        x = x.encode("utf-8")
+                    yield x
+            keyed = path.mapPartitions(func)
+            keyed._bypass_serializer = True
+            jrdd = keyed._jrdd.map(self._spark._jvm.BytesToString())
+            # see SPARK-22112
+            # There aren't any jvm api for creating a dataframe from rdd storing csv.
+            # We can do it through creating a jvm dataset firstly and using the jvm api
+            # for creating a dataframe from dataset storing csv.
+            jdataset = self._spark._ssql_ctx.createDataset(
+                jrdd.rdd(),
+                self._spark._jvm.Encoders.STRING())
+            return self._df(self._jreader.csv(jdataset))
+        else:
+            raise TypeError("path can be only string, list or RDD")
+
+    @since(1.5)
+    def orc(self, path):
+        """Loads ORC files, returning the result as a :class:`DataFrame`.
+
+        .. note:: Currently ORC support is only available together with Hive support.
+
+        >>> df = spark.read.orc('python/test_support/sql/orc_partitioned')
+        >>> df.dtypes
+        [('a', 'bigint'), ('b', 'int'), ('c', 'int')]
+        """
+        if isinstance(path, basestring):
+            path = [path]
+        return self._df(self._jreader.orc(_to_seq(self._spark._sc, path)))
+
+    @since(1.4)
+    def jdbc(self, url, table, column=None, lowerBound=None, upperBound=None, numPartitions=None,
+             predicates=None, properties=None):
+        """
+        Construct a :class:`DataFrame` representing the database table named ``table``
+        accessible via JDBC URL ``url`` and connection ``properties``.
+
+        Partitions of the table will be retrieved in parallel if either ``column`` or
+        ``predicates`` is specified. ``lowerBound`, ``upperBound`` and ``numPartitions``
+        is needed when ``column`` is specified.
+
+        If both ``column`` and ``predicates`` are specified, ``column`` will be used.
+
+        .. note:: Don't create too many partitions in parallel on a large cluster;
+            otherwise Spark might crash your external database systems.
+
+        :param url: a JDBC URL of the form ``jdbc:subprotocol:subname``
+        :param table: the name of the table
+        :param column: the name of an integer column that will be used for partitioning;
+                       if this parameter is specified, then ``numPartitions``, ``lowerBound``
+                       (inclusive), and ``upperBound`` (exclusive) will form partition strides
+                       for generated WHERE clause expressions used to split the column
+                       ``column`` evenly
+        :param lowerBound: the minimum value of ``column`` used to decide partition stride
+        :param upperBound: the maximum value of ``column`` used to decide partition stride
+        :param numPartitions: the number of partitions
+        :param predicates: a list of expressions suitable for inclusion in WHERE clauses;
+                           each one defines one partition of the :class:`DataFrame`
+        :param properties: a dictionary of JDBC database connection arguments. Normally at
+                           least properties "user" and "password" with their corresponding values.
+                           For example { 'user' : 'SYSTEM', 'password' : 'mypassword' }
+        :return: a DataFrame
+        """
+        if properties is None:
+            properties = dict()
+        jprop = JavaClass("java.util.Properties", self._spark._sc._gateway._gateway_client)()
+        for k in properties:
+            jprop.setProperty(k, properties[k])
+        if column is not None:
+            assert lowerBound is not None, "lowerBound can not be None when ``column`` is specified"
+            assert upperBound is not None, "upperBound can not be None when ``column`` is specified"
+            assert numPartitions is not None, \
+                "numPartitions can not be None when ``column`` is specified"
+            return self._df(self._jreader.jdbc(url, table, column, int(lowerBound), int(upperBound),
+                                               int(numPartitions), jprop))
+        if predicates is not None:
+            gateway = self._spark._sc._gateway
+            jpredicates = utils.toJArray(gateway, gateway.jvm.java.lang.String, predicates)
+            return self._df(self._jreader.jdbc(url, table, jpredicates, jprop))
+        return self._df(self._jreader.jdbc(url, table, jprop))
+
+
+class DataFrameWriter(OptionUtils):
+    """
+    Interface used to write a :class:`DataFrame` to external storage systems
+    (e.g. file systems, key-value stores, etc). Use :func:`DataFrame.write`
+    to access this.
+
+    .. versionadded:: 1.4
+    """
+    def __init__(self, df):
+        self._df = df
+        self._spark = df.sql_ctx
+        self._jwrite = df._jdf.write()
+
+    def _sq(self, jsq):
+        from pyspark.sql.streaming import StreamingQuery
+        return StreamingQuery(jsq)
+
+    @since(1.4)
+    def mode(self, saveMode):
+        """Specifies the behavior when data or table already exists.
+
+        Options include:
+
+        * `append`: Append contents of this :class:`DataFrame` to existing data.
+        * `overwrite`: Overwrite existing data.
+        * `error` or `errorifexists`: Throw an exception if data already exists.
+        * `ignore`: Silently ignore this operation if data already exists.
+
+        >>> df.write.mode('append').parquet(os.path.join(tempfile.mkdtemp(), 'data'))
+        """
+        # At the JVM side, the default value of mode is already set to "error".
+        # So, if the given saveMode is None, we will not call JVM-side's mode method.
+        if saveMode is not None:
+            self._jwrite = self._jwrite.mode(saveMode)
+        return self
+
+    @since(1.4)
+    def format(self, source):
+        """Specifies the underlying output data source.
+
+        :param source: string, name of the data source, e.g. 'json', 'parquet'.
+
+        >>> df.write.format('json').save(os.path.join(tempfile.mkdtemp(), 'data'))
+        """
+        self._jwrite = self._jwrite.format(source)
+        return self
+
+    @since(1.5)
+    def option(self, key, value):
+        """Adds an output option for the underlying data source.
+
+        You can set the following option(s) for writing files:
+            * ``timeZone``: sets the string that indicates a timezone to be used to format
+                timestamps in the JSON/CSV datasources or partition values.
+                If it isn't set, it uses the default value, session local timezone.
+        """
+        self._jwrite = self._jwrite.option(key, to_str(value))
+        return self
+
+    @since(1.4)
+    def options(self, **options):
+        """Adds output options for the underlying data source.
+
+        You can set the following option(s) for writing files:
+            * ``timeZone``: sets the string that indicates a timezone to be used to format
+                timestamps in the JSON/CSV datasources or partition values.
+                If it isn't set, it uses the default value, session local timezone.
+        """
+        for k in options:
+            self._jwrite = self._jwrite.option(k, to_str(options[k]))
+        return self
+
+    @since(1.4)
+    def partitionBy(self, *cols):
+        """Partitions the output by the given columns on the file system.
+
+        If specified, the output is laid out on the file system similar
+        to Hive's partitioning scheme.
+
+        :param cols: name of columns
+
+        >>> df.write.partitionBy('year', 'month').parquet(os.path.join(tempfile.mkdtemp(), 'data'))
+        """
+        if len(cols) == 1 and isinstance(cols[0], (list, tuple)):
+            cols = cols[0]
+        self._jwrite = self._jwrite.partitionBy(_to_seq(self._spark._sc, cols))
+        return self
+
+    @since(2.3)
+    def bucketBy(self, numBuckets, col, *cols):
+        """Buckets the output by the given columns.If specified,
+        the output is laid out on the file system similar to Hive's bucketing scheme.
+
+        :param numBuckets: the number of buckets to save
+        :param col: a name of a column, or a list of names.
+        :param cols: additional names (optional). If `col` is a list it should be empty.
+
+        .. note:: Applicable for file-based data sources in combination with
+                  :py:meth:`DataFrameWriter.saveAsTable`.
+
+        >>> (df.write.format('parquet')  # doctest: +SKIP
+        ...     .bucketBy(100, 'year', 'month')
+        ...     .mode("overwrite")
+        ...     .saveAsTable('bucketed_table'))
+        """
+        if not isinstance(numBuckets, int):
+            raise TypeError("numBuckets should be an int, got {0}.".format(type(numBuckets)))
+
+        if isinstance(col, (list, tuple)):
+            if cols:
+                raise ValueError("col is a {0} but cols are not empty".format(type(col)))
+
+            col, cols = col[0], col[1:]
+
+        if not all(isinstance(c, basestring) for c in cols) or not(isinstance(col, basestring)):
+            raise TypeError("all names should be `str`")
+
+        self._jwrite = self._jwrite.bucketBy(numBuckets, col, _to_seq(self._spark._sc, cols))
+        return self
+
+    @since(2.3)
+    def sortBy(self, col, *cols):
+        """Sorts the output in each bucket by the given columns on the file system.
+
+        :param col: a name of a column, or a list of names.
+        :param cols: additional names (optional). If `col` is a list it should be empty.
+
+        >>> (df.write.format('parquet')  # doctest: +SKIP
+        ...     .bucketBy(100, 'year', 'month')
+        ...     .sortBy('day')
+        ...     .mode("overwrite")
+        ...     .saveAsTable('sorted_bucketed_table'))
+        """
+        if isinstance(col, (list, tuple)):
+            if cols:
+                raise ValueError("col is a {0} but cols are not empty".format(type(col)))
+
+            col, cols = col[0], col[1:]
+
+        if not all(isinstance(c, basestring) for c in cols) or not(isinstance(col, basestring)):
+            raise TypeError("all names should be `str`")
+
+        self._jwrite = self._jwrite.sortBy(col, _to_seq(self._spark._sc, cols))
+        return self
+
+    @since(1.4)
+    def save(self, path=None, format=None, mode=None, partitionBy=None, **options):
+        """Saves the contents of the :class:`DataFrame` to a data source.
+
+        The data source is specified by the ``format`` and a set of ``options``.
+        If ``format`` is not specified, the default data source configured by
+        ``spark.sql.sources.default`` will be used.
+
+        :param path: the path in a Hadoop supported file system
+        :param format: the format used to save
+        :param mode: specifies the behavior of the save operation when data already exists.
+
+            * ``append``: Append contents of this :class:`DataFrame` to existing data.
+            * ``overwrite``: Overwrite existing data.
+            * ``ignore``: Silently ignore this operation if data already exists.
+            * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \
+                exists.
+        :param partitionBy: names of partitioning columns
+        :param options: all other string options
+
+        >>> df.write.mode('append').parquet(os.path.join(tempfile.mkdtemp(), 'data'))
+        """
+        self.mode(mode).options(**options)
+        if partitionBy is not None:
+            self.partitionBy(partitionBy)
+        if format is not None:
+            self.format(format)
+        if path is None:
+            self._jwrite.save()
+        else:
+            self._jwrite.save(path)
+
+    @since(1.4)
+    def insertInto(self, tableName, overwrite=False):
+        """Inserts the content of the :class:`DataFrame` to the specified table.
+
+        It requires that the schema of the class:`DataFrame` is the same as the
+        schema of the table.
+
+        Optionally overwriting any existing data.
+        """
+        self._jwrite.mode("overwrite" if overwrite else "append").insertInto(tableName)
+
+    @since(1.4)
+    def saveAsTable(self, name, format=None, mode=None, partitionBy=None, **options):
+        """Saves the content of the :class:`DataFrame` as the specified table.
+
+        In the case the table already exists, behavior of this function depends on the
+        save mode, specified by the `mode` function (default to throwing an exception).
+        When `mode` is `Overwrite`, the schema of the :class:`DataFrame` does not need to be
+        the same as that of the existing table.
+
+        * `append`: Append contents of this :class:`DataFrame` to existing data.
+        * `overwrite`: Overwrite existing data.
+        * `error` or `errorifexists`: Throw an exception if data already exists.
+        * `ignore`: Silently ignore this operation if data already exists.
+
+        :param name: the table name
+        :param format: the format used to save
+        :param mode: one of `append`, `overwrite`, `error`, `errorifexists`, `ignore` \
+                     (default: error)
+        :param partitionBy: names of partitioning columns
+        :param options: all other string options
+        """
+        self.mode(mode).options(**options)
+        if partitionBy is not None:
+            self.partitionBy(partitionBy)
+        if format is not None:
+            self.format(format)
+        self._jwrite.saveAsTable(name)
+
+    @since(1.4)
+    def json(self, path, mode=None, compression=None, dateFormat=None, timestampFormat=None,
+             lineSep=None, encoding=None):
+        """Saves the content of the :class:`DataFrame` in JSON format
+        (`JSON Lines text format or newline-delimited JSON <http://jsonlines.org/>`_) at the
+        specified path.
+
+        :param path: the path in any Hadoop supported file system
+        :param mode: specifies the behavior of the save operation when data already exists.
+
+            * ``append``: Append contents of this :class:`DataFrame` to existing data.
+            * ``overwrite``: Overwrite existing data.
+            * ``ignore``: Silently ignore this operation if data already exists.
+            * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \
+                exists.
+        :param compression: compression codec to use when saving to file. This can be one of the
+                            known case-insensitive shorten names (none, bzip2, gzip, lz4,
+                            snappy and deflate).
+        :param dateFormat: sets the string that indicates a date format. Custom date formats
+                           follow the formats at ``java.text.SimpleDateFormat``. This
+                           applies to date type. If None is set, it uses the
+                           default value, ``yyyy-MM-dd``.
+        :param timestampFormat: sets the string that indicates a timestamp format. Custom date
+                                formats follow the formats at ``java.text.SimpleDateFormat``.
+                                This applies to timestamp type. If None is set, it uses the
+                                default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``.
+        :param encoding: specifies encoding (charset) of saved json files. If None is set,
+                        the default UTF-8 charset will be used.
+        :param lineSep: defines the line separator that should be used for writing. If None is
+                        set, it uses the default value, ``\\n``.
+
+        >>> df.write.json(os.path.join(tempfile.mkdtemp(), 'data'))
+        """
+        self.mode(mode)
+        self._set_opts(
+            compression=compression, dateFormat=dateFormat, timestampFormat=timestampFormat,
+            lineSep=lineSep, encoding=encoding)
+        self._jwrite.json(path)
+
+    @since(1.4)
+    def parquet(self, path, mode=None, partitionBy=None, compression=None):
+        """Saves the content of the :class:`DataFrame` in Parquet format at the specified path.
+
+        :param path: the path in any Hadoop supported file system
+        :param mode: specifies the behavior of the save operation when data already exists.
+
+            * ``append``: Append contents of this :class:`DataFrame` to existing data.
+            * ``overwrite``: Overwrite existing data.
+            * ``ignore``: Silently ignore this operation if data already exists.
+            * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \
+                exists.
+        :param partitionBy: names of partitioning columns
+        :param compression: compression codec to use when saving to file. This can be one of the
+                            known case-insensitive shorten names (none, uncompressed, snappy, gzip,
+                            lzo, brotli, lz4, and zstd). This will override
+                            ``spark.sql.parquet.compression.codec``. If None is set, it uses the
+                            value specified in ``spark.sql.parquet.compression.codec``.
+
+        >>> df.write.parquet(os.path.join(tempfile.mkdtemp(), 'data'))
+        """
+        self.mode(mode)
+        if partitionBy is not None:
+            self.partitionBy(partitionBy)
+        self._set_opts(compression=compression)
+        self._jwrite.parquet(path)
+
+    @since(1.6)
+    def text(self, path, compression=None, lineSep=None):
+        """Saves the content of the DataFrame in a text file at the specified path.
+
+        :param path: the path in any Hadoop supported file system
+        :param compression: compression codec to use when saving to file. This can be one of the
+                            known case-insensitive shorten names (none, bzip2, gzip, lz4,
+                            snappy and deflate).
+        :param lineSep: defines the line separator that should be used for writing. If None is
+                        set, it uses the default value, ``\\n``.
+
+        The DataFrame must have only one column that is of string type.
+        Each row becomes a new line in the output file.
+        """
+        self._set_opts(compression=compression, lineSep=lineSep)
+        self._jwrite.text(path)
+
+    @since(2.0)
+    def csv(self, path, mode=None, compression=None, sep=None, quote=None, escape=None,
+            header=None, nullValue=None, escapeQuotes=None, quoteAll=None, dateFormat=None,
+            timestampFormat=None, ignoreLeadingWhiteSpace=None, ignoreTrailingWhiteSpace=None,
+            charToEscapeQuoteEscaping=None, encoding=None, emptyValue=None):
+        r"""Saves the content of the :class:`DataFrame` in CSV format at the specified path.
+
+        :param path: the path in any Hadoop supported file system
+        :param mode: specifies the behavior of the save operation when data already exists.
+
+            * ``append``: Append contents of this :class:`DataFrame` to existing data.
+            * ``overwrite``: Overwrite existing data.
+            * ``ignore``: Silently ignore this operation if data already exists.
+            * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \
+                exists.
+
+        :param compression: compression codec to use when saving to file. This can be one of the
+                            known case-insensitive shorten names (none, bzip2, gzip, lz4,
+                            snappy and deflate).
+        :param sep: sets a single character as a separator for each field and value. If None is
+                    set, it uses the default value, ``,``.
+        :param quote: sets a single character used for escaping quoted values where the
+                      separator can be part of the value. If None is set, it uses the default
+                      value, ``"``. If an empty string is set, it uses ``u0000`` (null character).
+        :param escape: sets a single character used for escaping quotes inside an already
+                       quoted value. If None is set, it uses the default value, ``\``
+        :param escapeQuotes: a flag indicating whether values containing quotes should always
+                             be enclosed in quotes. If None is set, it uses the default value
+                             ``true``, escaping all values containing a quote character.
+        :param quoteAll: a flag indicating whether all values should always be enclosed in
+                          quotes. If None is set, it uses the default value ``false``,
+                          only escaping values containing a quote character.
+        :param header: writes the names of columns as the first line. If None is set, it uses
+                       the default value, ``false``.
+        :param nullValue: sets the string representation of a null value. If None is set, it uses
+                          the default value, empty string.
+        :param dateFormat: sets the string that indicates a date format. Custom date formats
+                           follow the formats at ``java.text.SimpleDateFormat``. This
+                           applies to date type. If None is set, it uses the
+                           default value, ``yyyy-MM-dd``.
+        :param timestampFormat: sets the string that indicates a timestamp format. Custom date
+                                formats follow the formats at ``java.text.SimpleDateFormat``.
+                                This applies to timestamp type. If None is set, it uses the
+                                default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``.
+        :param ignoreLeadingWhiteSpace: a flag indicating whether or not leading whitespaces from
+                                        values being written should be skipped. If None is set, it
+                                        uses the default value, ``true``.
+        :param ignoreTrailingWhiteSpace: a flag indicating whether or not trailing whitespaces from
+                                         values being written should be skipped. If None is set, it
+                                         uses the default value, ``true``.
+        :param charToEscapeQuoteEscaping: sets a single character used for escaping the escape for
+                                          the quote character. If None is set, the default value is
+                                          escape character when escape and quote characters are
+                                          different, ``\0`` otherwise..
+        :param encoding: sets the encoding (charset) of saved csv files. If None is set,
+                         the default UTF-8 charset will be used.
+        :param emptyValue: sets the string representation of an empty value. If None is set, it uses
+                           the default value, ``""``.
+
+        >>> df.write.csv(os.path.join(tempfile.mkdtemp(), 'data'))
+        """
+        self.mode(mode)
+        self._set_opts(compression=compression, sep=sep, quote=quote, escape=escape, header=header,
+                       nullValue=nullValue, escapeQuotes=escapeQuotes, quoteAll=quoteAll,
+                       dateFormat=dateFormat, timestampFormat=timestampFormat,
+                       ignoreLeadingWhiteSpace=ignoreLeadingWhiteSpace,
+                       ignoreTrailingWhiteSpace=ignoreTrailingWhiteSpace,
+                       charToEscapeQuoteEscaping=charToEscapeQuoteEscaping,
+                       encoding=encoding, emptyValue=emptyValue)
+        self._jwrite.csv(path)
+
+    @since(1.5)
+    def orc(self, path, mode=None, partitionBy=None, compression=None):
+        """Saves the content of the :class:`DataFrame` in ORC format at the specified path.
+
+        .. note:: Currently ORC support is only available together with Hive support.
+
+        :param path: the path in any Hadoop supported file system
+        :param mode: specifies the behavior of the save operation when data already exists.
+
+            * ``append``: Append contents of this :class:`DataFrame` to existing data.
+            * ``overwrite``: Overwrite existing data.
+            * ``ignore``: Silently ignore this operation if data already exists.
+            * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \
+                exists.
+        :param partitionBy: names of partitioning columns
+        :param compression: compression codec to use when saving to file. This can be one of the
+                            known case-insensitive shorten names (none, snappy, zlib, and lzo).
+                            This will override ``orc.compress`` and
+                            ``spark.sql.orc.compression.codec``. If None is set, it uses the value
+                            specified in ``spark.sql.orc.compression.codec``.
+
+        >>> orc_df = spark.read.orc('python/test_support/sql/orc_partitioned')
+        >>> orc_df.write.orc(os.path.join(tempfile.mkdtemp(), 'data'))
+        """
+        self.mode(mode)
+        if partitionBy is not None:
+            self.partitionBy(partitionBy)
+        self._set_opts(compression=compression)
+        self._jwrite.orc(path)
+
+    @since(1.4)
+    def jdbc(self, url, table, mode=None, properties=None):
+        """Saves the content of the :class:`DataFrame` to an external database table via JDBC.
+
+        .. note:: Don't create too many partitions in parallel on a large cluster;
+            otherwise Spark might crash your external database systems.
+
+        :param url: a JDBC URL of the form ``jdbc:subprotocol:subname``
+        :param table: Name of the table in the external database.
+        :param mode: specifies the behavior of the save operation when data already exists.
+
+            * ``append``: Append contents of this :class:`DataFrame` to existing data.
+            * ``overwrite``: Overwrite existing data.
+            * ``ignore``: Silently ignore this operation if data already exists.
+            * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \
+                exists.
+        :param properties: a dictionary of JDBC database connection arguments. Normally at
+                           least properties "user" and "password" with their corresponding values.
+                           For example { 'user' : 'SYSTEM', 'password' : 'mypassword' }
+        """
+        if properties is None:
+            properties = dict()
+        jprop = JavaClass("java.util.Properties", self._spark._sc._gateway._gateway_client)()
+        for k in properties:
+            jprop.setProperty(k, properties[k])
+        self.mode(mode)._jwrite.jdbc(url, table, jprop)
+
+
+def _test():
+    import doctest
+    import os
+    import tempfile
+    import py4j
+    from pyspark.context import SparkContext
+    from pyspark.sql import SparkSession, Row
+    import pyspark.sql.readwriter
+
+    os.chdir(os.environ["SPARK_HOME"])
+
+    globs = pyspark.sql.readwriter.__dict__.copy()
+    sc = SparkContext('local[4]', 'PythonTest')
+    try:
+        spark = SparkSession.builder.getOrCreate()
+    except py4j.protocol.Py4JError:
+        spark = SparkSession(sc)
+
+    globs['tempfile'] = tempfile
+    globs['os'] = os
+    globs['sc'] = sc
+    globs['spark'] = spark
+    globs['df'] = spark.read.parquet('python/test_support/sql/parquet_partitioned')
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.readwriter, globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF)
+    sc.stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/session.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/session.py
new file mode 100644
index 0000000000000000000000000000000000000000..51a38ebfd19ff57cec6f5f3d907e273628c29821
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/session.py
@@ -0,0 +1,871 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+import sys
+import warnings
+from functools import reduce
+from threading import RLock
+
+if sys.version >= '3':
+    basestring = unicode = str
+    xrange = range
+else:
+    from itertools import izip as zip, imap as map
+
+from pyspark import since
+from pyspark.rdd import RDD, ignore_unicode_prefix
+from pyspark.sql.conf import RuntimeConfig
+from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.readwriter import DataFrameReader
+from pyspark.sql.streaming import DataStreamReader
+from pyspark.sql.types import Row, DataType, StringType, StructType, TimestampType, \
+    _make_type_verifier, _infer_schema, _has_nulltype, _merge_type, _create_converter, \
+    _parse_datatype_string
+from pyspark.sql.utils import install_exception_handler
+
+__all__ = ["SparkSession"]
+
+
+def _monkey_patch_RDD(sparkSession):
+    def toDF(self, schema=None, sampleRatio=None):
+        """
+        Converts current :class:`RDD` into a :class:`DataFrame`
+
+        This is a shorthand for ``spark.createDataFrame(rdd, schema, sampleRatio)``
+
+        :param schema: a :class:`pyspark.sql.types.StructType` or list of names of columns
+        :param samplingRatio: the sample ratio of rows used for inferring
+        :return: a DataFrame
+
+        >>> rdd.toDF().collect()
+        [Row(name=u'Alice', age=1)]
+        """
+        return sparkSession.createDataFrame(self, schema, sampleRatio)
+
+    RDD.toDF = toDF
+
+
+class SparkSession(object):
+    """The entry point to programming Spark with the Dataset and DataFrame API.
+
+    A SparkSession can be used create :class:`DataFrame`, register :class:`DataFrame` as
+    tables, execute SQL over tables, cache tables, and read parquet files.
+    To create a SparkSession, use the following builder pattern:
+
+    >>> spark = SparkSession.builder \\
+    ...     .master("local") \\
+    ...     .appName("Word Count") \\
+    ...     .config("spark.some.config.option", "some-value") \\
+    ...     .getOrCreate()
+
+    .. autoattribute:: builder
+       :annotation:
+    """
+
+    class Builder(object):
+        """Builder for :class:`SparkSession`.
+        """
+
+        _lock = RLock()
+        _options = {}
+
+        @since(2.0)
+        def config(self, key=None, value=None, conf=None):
+            """Sets a config option. Options set using this method are automatically propagated to
+            both :class:`SparkConf` and :class:`SparkSession`'s own configuration.
+
+            For an existing SparkConf, use `conf` parameter.
+
+            >>> from pyspark.conf import SparkConf
+            >>> SparkSession.builder.config(conf=SparkConf())
+            <pyspark.sql.session...
+
+            For a (key, value) pair, you can omit parameter names.
+
+            >>> SparkSession.builder.config("spark.some.config.option", "some-value")
+            <pyspark.sql.session...
+
+            :param key: a key name string for configuration property
+            :param value: a value for configuration property
+            :param conf: an instance of :class:`SparkConf`
+            """
+            with self._lock:
+                if conf is None:
+                    self._options[key] = str(value)
+                else:
+                    for (k, v) in conf.getAll():
+                        self._options[k] = v
+                return self
+
+        @since(2.0)
+        def master(self, master):
+            """Sets the Spark master URL to connect to, such as "local" to run locally, "local[4]"
+            to run locally with 4 cores, or "spark://master:7077" to run on a Spark standalone
+            cluster.
+
+            :param master: a url for spark master
+            """
+            return self.config("spark.master", master)
+
+        @since(2.0)
+        def appName(self, name):
+            """Sets a name for the application, which will be shown in the Spark web UI.
+
+            If no application name is set, a randomly generated name will be used.
+
+            :param name: an application name
+            """
+            return self.config("spark.app.name", name)
+
+        @since(2.0)
+        def enableHiveSupport(self):
+            """Enables Hive support, including connectivity to a persistent Hive metastore, support
+            for Hive serdes, and Hive user-defined functions.
+            """
+            return self.config("spark.sql.catalogImplementation", "hive")
+
+        @since(2.0)
+        def getOrCreate(self):
+            """Gets an existing :class:`SparkSession` or, if there is no existing one, creates a
+            new one based on the options set in this builder.
+
+            This method first checks whether there is a valid global default SparkSession, and if
+            yes, return that one. If no valid global default SparkSession exists, the method
+            creates a new SparkSession and assigns the newly created SparkSession as the global
+            default.
+
+            >>> s1 = SparkSession.builder.config("k1", "v1").getOrCreate()
+            >>> s1.conf.get("k1") == s1.sparkContext.getConf().get("k1") == "v1"
+            True
+
+            In case an existing SparkSession is returned, the config options specified
+            in this builder will be applied to the existing SparkSession.
+
+            >>> s2 = SparkSession.builder.config("k2", "v2").getOrCreate()
+            >>> s1.conf.get("k1") == s2.conf.get("k1")
+            True
+            >>> s1.conf.get("k2") == s2.conf.get("k2")
+            True
+            """
+            with self._lock:
+                from pyspark.context import SparkContext
+                from pyspark.conf import SparkConf
+                session = SparkSession._instantiatedSession
+                if session is None or session._sc._jsc is None:
+                    sparkConf = SparkConf()
+                    for key, value in self._options.items():
+                        sparkConf.set(key, value)
+                    sc = SparkContext.getOrCreate(sparkConf)
+                    # This SparkContext may be an existing one.
+                    for key, value in self._options.items():
+                        # we need to propagate the confs
+                        # before we create the SparkSession. Otherwise, confs like
+                        # warehouse path and metastore url will not be set correctly (
+                        # these confs cannot be changed once the SparkSession is created).
+                        sc._conf.set(key, value)
+                    session = SparkSession(sc)
+                for key, value in self._options.items():
+                    session._jsparkSession.sessionState().conf().setConfString(key, value)
+                for key, value in self._options.items():
+                    session.sparkContext._conf.set(key, value)
+                return session
+
+    builder = Builder()
+    """A class attribute having a :class:`Builder` to construct :class:`SparkSession` instances"""
+
+    _instantiatedSession = None
+
+    @ignore_unicode_prefix
+    def __init__(self, sparkContext, jsparkSession=None):
+        """Creates a new SparkSession.
+
+        >>> from datetime import datetime
+        >>> spark = SparkSession(sc)
+        >>> allTypes = sc.parallelize([Row(i=1, s="string", d=1.0, l=1,
+        ...     b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1),
+        ...     time=datetime(2014, 8, 1, 14, 1, 5))])
+        >>> df = allTypes.toDF()
+        >>> df.createOrReplaceTempView("allTypes")
+        >>> spark.sql('select i+1, d+1, not b, list[1], dict["s"], time, row.a '
+        ...            'from allTypes where b and i > 0').collect()
+        [Row((i + CAST(1 AS BIGINT))=2, (d + CAST(1 AS DOUBLE))=2.0, (NOT b)=False, list[1]=2, \
+            dict[s]=0, time=datetime.datetime(2014, 8, 1, 14, 1, 5), a=1)]
+        >>> df.rdd.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time, x.row.a, x.list)).collect()
+        [(1, u'string', 1.0, 1, True, datetime.datetime(2014, 8, 1, 14, 1, 5), 1, [1, 2, 3])]
+        """
+        from pyspark.sql.context import SQLContext
+        self._sc = sparkContext
+        self._jsc = self._sc._jsc
+        self._jvm = self._sc._jvm
+        if jsparkSession is None:
+            if self._jvm.SparkSession.getDefaultSession().isDefined() \
+                    and not self._jvm.SparkSession.getDefaultSession().get() \
+                        .sparkContext().isStopped():
+                jsparkSession = self._jvm.SparkSession.getDefaultSession().get()
+            else:
+                jsparkSession = self._jvm.SparkSession(self._jsc.sc())
+        self._jsparkSession = jsparkSession
+        self._jwrapped = self._jsparkSession.sqlContext()
+        self._wrapped = SQLContext(self._sc, self, self._jwrapped)
+        _monkey_patch_RDD(self)
+        install_exception_handler()
+        # If we had an instantiated SparkSession attached with a SparkContext
+        # which is stopped now, we need to renew the instantiated SparkSession.
+        # Otherwise, we will use invalid SparkSession when we call Builder.getOrCreate.
+        if SparkSession._instantiatedSession is None \
+                or SparkSession._instantiatedSession._sc._jsc is None:
+            SparkSession._instantiatedSession = self
+            self._jvm.SparkSession.setDefaultSession(self._jsparkSession)
+
+    def _repr_html_(self):
+        return """
+            <div>
+                <p><b>SparkSession - {catalogImplementation}</b></p>
+                {sc_HTML}
+            </div>
+        """.format(
+            catalogImplementation=self.conf.get("spark.sql.catalogImplementation"),
+            sc_HTML=self.sparkContext._repr_html_()
+        )
+
+    @since(2.0)
+    def newSession(self):
+        """
+        Returns a new SparkSession as new session, that has separate SQLConf,
+        registered temporary views and UDFs, but shared SparkContext and
+        table cache.
+        """
+        return self.__class__(self._sc, self._jsparkSession.newSession())
+
+    @property
+    @since(2.0)
+    def sparkContext(self):
+        """Returns the underlying :class:`SparkContext`."""
+        return self._sc
+
+    @property
+    @since(2.0)
+    def version(self):
+        """The version of Spark on which this application is running."""
+        return self._jsparkSession.version()
+
+    @property
+    @since(2.0)
+    def conf(self):
+        """Runtime configuration interface for Spark.
+
+        This is the interface through which the user can get and set all Spark and Hadoop
+        configurations that are relevant to Spark SQL. When getting the value of a config,
+        this defaults to the value set in the underlying :class:`SparkContext`, if any.
+        """
+        if not hasattr(self, "_conf"):
+            self._conf = RuntimeConfig(self._jsparkSession.conf())
+        return self._conf
+
+    @property
+    @since(2.0)
+    def catalog(self):
+        """Interface through which the user may create, drop, alter or query underlying
+        databases, tables, functions etc.
+
+        :return: :class:`Catalog`
+        """
+        from pyspark.sql.catalog import Catalog
+        if not hasattr(self, "_catalog"):
+            self._catalog = Catalog(self)
+        return self._catalog
+
+    @property
+    @since(2.0)
+    def udf(self):
+        """Returns a :class:`UDFRegistration` for UDF registration.
+
+        :return: :class:`UDFRegistration`
+        """
+        from pyspark.sql.udf import UDFRegistration
+        return UDFRegistration(self)
+
+    @since(2.0)
+    def range(self, start, end=None, step=1, numPartitions=None):
+        """
+        Create a :class:`DataFrame` with single :class:`pyspark.sql.types.LongType` column named
+        ``id``, containing elements in a range from ``start`` to ``end`` (exclusive) with
+        step value ``step``.
+
+        :param start: the start value
+        :param end: the end value (exclusive)
+        :param step: the incremental step (default: 1)
+        :param numPartitions: the number of partitions of the DataFrame
+        :return: :class:`DataFrame`
+
+        >>> spark.range(1, 7, 2).collect()
+        [Row(id=1), Row(id=3), Row(id=5)]
+
+        If only one argument is specified, it will be used as the end value.
+
+        >>> spark.range(3).collect()
+        [Row(id=0), Row(id=1), Row(id=2)]
+        """
+        if numPartitions is None:
+            numPartitions = self._sc.defaultParallelism
+
+        if end is None:
+            jdf = self._jsparkSession.range(0, int(start), int(step), int(numPartitions))
+        else:
+            jdf = self._jsparkSession.range(int(start), int(end), int(step), int(numPartitions))
+
+        return DataFrame(jdf, self._wrapped)
+
+    def _inferSchemaFromList(self, data, names=None):
+        """
+        Infer schema from list of Row or tuple.
+
+        :param data: list of Row or tuple
+        :param names: list of column names
+        :return: :class:`pyspark.sql.types.StructType`
+        """
+        if not data:
+            raise ValueError("can not infer schema from empty dataset")
+        first = data[0]
+        if type(first) is dict:
+            warnings.warn("inferring schema from dict is deprecated,"
+                          "please use pyspark.sql.Row instead")
+        schema = reduce(_merge_type, (_infer_schema(row, names) for row in data))
+        if _has_nulltype(schema):
+            raise ValueError("Some of types cannot be determined after inferring")
+        return schema
+
+    def _inferSchema(self, rdd, samplingRatio=None, names=None):
+        """
+        Infer schema from an RDD of Row or tuple.
+
+        :param rdd: an RDD of Row or tuple
+        :param samplingRatio: sampling ratio, or no sampling (default)
+        :return: :class:`pyspark.sql.types.StructType`
+        """
+        first = rdd.first()
+        if not first:
+            raise ValueError("The first row in RDD is empty, "
+                             "can not infer schema")
+        if type(first) is dict:
+            warnings.warn("Using RDD of dict to inferSchema is deprecated. "
+                          "Use pyspark.sql.Row instead")
+
+        if samplingRatio is None:
+            schema = _infer_schema(first, names=names)
+            if _has_nulltype(schema):
+                for row in rdd.take(100)[1:]:
+                    schema = _merge_type(schema, _infer_schema(row, names=names))
+                    if not _has_nulltype(schema):
+                        break
+                else:
+                    raise ValueError("Some of types cannot be determined by the "
+                                     "first 100 rows, please try again with sampling")
+        else:
+            if samplingRatio < 0.99:
+                rdd = rdd.sample(False, float(samplingRatio))
+            schema = rdd.map(lambda row: _infer_schema(row, names)).reduce(_merge_type)
+        return schema
+
+    def _createFromRDD(self, rdd, schema, samplingRatio):
+        """
+        Create an RDD for DataFrame from an existing RDD, returns the RDD and schema.
+        """
+        if schema is None or isinstance(schema, (list, tuple)):
+            struct = self._inferSchema(rdd, samplingRatio, names=schema)
+            converter = _create_converter(struct)
+            rdd = rdd.map(converter)
+            if isinstance(schema, (list, tuple)):
+                for i, name in enumerate(schema):
+                    struct.fields[i].name = name
+                    struct.names[i] = name
+            schema = struct
+
+        elif not isinstance(schema, StructType):
+            raise TypeError("schema should be StructType or list or None, but got: %s" % schema)
+
+        # convert python objects to sql data
+        rdd = rdd.map(schema.toInternal)
+        return rdd, schema
+
+    def _createFromLocal(self, data, schema):
+        """
+        Create an RDD for DataFrame from a list or pandas.DataFrame, returns
+        the RDD and schema.
+        """
+        # make sure data could consumed multiple times
+        if not isinstance(data, list):
+            data = list(data)
+
+        if schema is None or isinstance(schema, (list, tuple)):
+            struct = self._inferSchemaFromList(data, names=schema)
+            converter = _create_converter(struct)
+            data = map(converter, data)
+            if isinstance(schema, (list, tuple)):
+                for i, name in enumerate(schema):
+                    struct.fields[i].name = name
+                    struct.names[i] = name
+            schema = struct
+
+        elif not isinstance(schema, StructType):
+            raise TypeError("schema should be StructType or list or None, but got: %s" % schema)
+
+        # convert python objects to sql data
+        data = [schema.toInternal(row) for row in data]
+        return self._sc.parallelize(data), schema
+
+    def _get_numpy_record_dtype(self, rec):
+        """
+        Used when converting a pandas.DataFrame to Spark using to_records(), this will correct
+        the dtypes of fields in a record so they can be properly loaded into Spark.
+        :param rec: a numpy record to check field dtypes
+        :return corrected dtype for a numpy.record or None if no correction needed
+        """
+        import numpy as np
+        cur_dtypes = rec.dtype
+        col_names = cur_dtypes.names
+        record_type_list = []
+        has_rec_fix = False
+        for i in xrange(len(cur_dtypes)):
+            curr_type = cur_dtypes[i]
+            # If type is a datetime64 timestamp, convert to microseconds
+            # NOTE: if dtype is datetime[ns] then np.record.tolist() will output values as longs,
+            # conversion from [us] or lower will lead to py datetime objects, see SPARK-22417
+            if curr_type == np.dtype('datetime64[ns]'):
+                curr_type = 'datetime64[us]'
+                has_rec_fix = True
+            record_type_list.append((str(col_names[i]), curr_type))
+        return np.dtype(record_type_list) if has_rec_fix else None
+
+    def _convert_from_pandas(self, pdf, schema, timezone):
+        """
+         Convert a pandas.DataFrame to list of records that can be used to make a DataFrame
+         :return list of records
+        """
+        if timezone is not None:
+            from pyspark.sql.types import _check_series_convert_timestamps_tz_local
+            copied = False
+            if isinstance(schema, StructType):
+                for field in schema:
+                    # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
+                    if isinstance(field.dataType, TimestampType):
+                        s = _check_series_convert_timestamps_tz_local(pdf[field.name], timezone)
+                        if s is not pdf[field.name]:
+                            if not copied:
+                                # Copy once if the series is modified to prevent the original
+                                # Pandas DataFrame from being updated
+                                pdf = pdf.copy()
+                                copied = True
+                            pdf[field.name] = s
+            else:
+                for column, series in pdf.iteritems():
+                    s = _check_series_convert_timestamps_tz_local(series, timezone)
+                    if s is not series:
+                        if not copied:
+                            # Copy once if the series is modified to prevent the original
+                            # Pandas DataFrame from being updated
+                            pdf = pdf.copy()
+                            copied = True
+                        pdf[column] = s
+
+        # Convert pandas.DataFrame to list of numpy records
+        np_records = pdf.to_records(index=False)
+
+        # Check if any columns need to be fixed for Spark to infer properly
+        if len(np_records) > 0:
+            record_dtype = self._get_numpy_record_dtype(np_records[0])
+            if record_dtype is not None:
+                return [r.astype(record_dtype).tolist() for r in np_records]
+
+        # Convert list of numpy records to python lists
+        return [r.tolist() for r in np_records]
+
+    def _create_from_pandas_with_arrow(self, pdf, schema, timezone):
+        """
+        Create a DataFrame from a given pandas.DataFrame by slicing it into partitions, converting
+        to Arrow data, then sending to the JVM to parallelize. If a schema is passed in, the
+        data types will be used to coerce the data in Pandas to Arrow conversion.
+        """
+        from pyspark.serializers import ArrowStreamSerializer, _create_batch
+        from pyspark.sql.types import from_arrow_schema, to_arrow_type, TimestampType
+        from pyspark.sql.utils import require_minimum_pandas_version, \
+            require_minimum_pyarrow_version
+
+        require_minimum_pandas_version()
+        require_minimum_pyarrow_version()
+
+        from pandas.api.types import is_datetime64_dtype, is_datetime64tz_dtype
+
+        # Determine arrow types to coerce data when creating batches
+        if isinstance(schema, StructType):
+            arrow_types = [to_arrow_type(f.dataType) for f in schema.fields]
+        elif isinstance(schema, DataType):
+            raise ValueError("Single data type %s is not supported with Arrow" % str(schema))
+        else:
+            # Any timestamps must be coerced to be compatible with Spark
+            arrow_types = [to_arrow_type(TimestampType())
+                           if is_datetime64_dtype(t) or is_datetime64tz_dtype(t) else None
+                           for t in pdf.dtypes]
+
+        # Slice the DataFrame to be batched
+        step = -(-len(pdf) // self.sparkContext.defaultParallelism)  # round int up
+        pdf_slices = (pdf[start:start + step] for start in xrange(0, len(pdf), step))
+
+        # Create Arrow record batches
+        batches = [_create_batch([(c, t) for (_, c), t in zip(pdf_slice.iteritems(), arrow_types)],
+                                 timezone)
+                   for pdf_slice in pdf_slices]
+
+        # Create the Spark schema from the first Arrow batch (always at least 1 batch after slicing)
+        if isinstance(schema, (list, tuple)):
+            struct = from_arrow_schema(batches[0].schema)
+            for i, name in enumerate(schema):
+                struct.fields[i].name = name
+                struct.names[i] = name
+            schema = struct
+
+        jsqlContext = self._wrapped._jsqlContext
+
+        def reader_func(temp_filename):
+            return self._jvm.PythonSQLUtils.readArrowStreamFromFile(jsqlContext, temp_filename)
+
+        def create_RDD_server():
+            return self._jvm.ArrowRDDServer(jsqlContext)
+
+        # Create Spark DataFrame from Arrow stream file, using one batch per partition
+        jrdd = self._sc._serialize_to_jvm(batches, ArrowStreamSerializer(), reader_func,
+                                          create_RDD_server)
+        jdf = self._jvm.PythonSQLUtils.toDataFrame(jrdd, schema.json(), jsqlContext)
+        df = DataFrame(jdf, self._wrapped)
+        df._schema = schema
+        return df
+
+    @staticmethod
+    def _create_shell_session():
+        """
+        Initialize a SparkSession for a pyspark shell session. This is called from shell.py
+        to make error handling simpler without needing to declare local variables in that
+        script, which would expose those to users.
+        """
+        import py4j
+        from pyspark.conf import SparkConf
+        from pyspark.context import SparkContext
+        try:
+            # Try to access HiveConf, it will raise exception if Hive is not added
+            conf = SparkConf()
+            if conf.get('spark.sql.catalogImplementation', 'hive').lower() == 'hive':
+                SparkContext._jvm.org.apache.hadoop.hive.conf.HiveConf()
+                return SparkSession.builder\
+                    .enableHiveSupport()\
+                    .getOrCreate()
+            else:
+                return SparkSession.builder.getOrCreate()
+        except (py4j.protocol.Py4JError, TypeError):
+            if conf.get('spark.sql.catalogImplementation', '').lower() == 'hive':
+                warnings.warn("Fall back to non-hive support because failing to access HiveConf, "
+                              "please make sure you build spark with hive")
+
+        return SparkSession.builder.getOrCreate()
+
+    @since(2.0)
+    @ignore_unicode_prefix
+    def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=True):
+        """
+        Creates a :class:`DataFrame` from an :class:`RDD`, a list or a :class:`pandas.DataFrame`.
+
+        When ``schema`` is a list of column names, the type of each column
+        will be inferred from ``data``.
+
+        When ``schema`` is ``None``, it will try to infer the schema (column names and types)
+        from ``data``, which should be an RDD of :class:`Row`,
+        or :class:`namedtuple`, or :class:`dict`.
+
+        When ``schema`` is :class:`pyspark.sql.types.DataType` or a datatype string, it must match
+        the real data, or an exception will be thrown at runtime. If the given schema is not
+        :class:`pyspark.sql.types.StructType`, it will be wrapped into a
+        :class:`pyspark.sql.types.StructType` as its only field, and the field name will be "value",
+        each record will also be wrapped into a tuple, which can be converted to row later.
+
+        If schema inference is needed, ``samplingRatio`` is used to determined the ratio of
+        rows used for schema inference. The first row will be used if ``samplingRatio`` is ``None``.
+
+        :param data: an RDD of any kind of SQL data representation(e.g. row, tuple, int, boolean,
+            etc.), or :class:`list`, or :class:`pandas.DataFrame`.
+        :param schema: a :class:`pyspark.sql.types.DataType` or a datatype string or a list of
+            column names, default is ``None``.  The data type string format equals to
+            :class:`pyspark.sql.types.DataType.simpleString`, except that top level struct type can
+            omit the ``struct<>`` and atomic types use ``typeName()`` as their format, e.g. use
+            ``byte`` instead of ``tinyint`` for :class:`pyspark.sql.types.ByteType`. We can also use
+            ``int`` as a short name for ``IntegerType``.
+        :param samplingRatio: the sample ratio of rows used for inferring
+        :param verifySchema: verify data types of every row against schema.
+        :return: :class:`DataFrame`
+
+        .. versionchanged:: 2.1
+           Added verifySchema.
+
+        .. note:: Usage with spark.sql.execution.arrow.enabled=True is experimental.
+
+        >>> l = [('Alice', 1)]
+        >>> spark.createDataFrame(l).collect()
+        [Row(_1=u'Alice', _2=1)]
+        >>> spark.createDataFrame(l, ['name', 'age']).collect()
+        [Row(name=u'Alice', age=1)]
+
+        >>> d = [{'name': 'Alice', 'age': 1}]
+        >>> spark.createDataFrame(d).collect()
+        [Row(age=1, name=u'Alice')]
+
+        >>> rdd = sc.parallelize(l)
+        >>> spark.createDataFrame(rdd).collect()
+        [Row(_1=u'Alice', _2=1)]
+        >>> df = spark.createDataFrame(rdd, ['name', 'age'])
+        >>> df.collect()
+        [Row(name=u'Alice', age=1)]
+
+        >>> from pyspark.sql import Row
+        >>> Person = Row('name', 'age')
+        >>> person = rdd.map(lambda r: Person(*r))
+        >>> df2 = spark.createDataFrame(person)
+        >>> df2.collect()
+        [Row(name=u'Alice', age=1)]
+
+        >>> from pyspark.sql.types import *
+        >>> schema = StructType([
+        ...    StructField("name", StringType(), True),
+        ...    StructField("age", IntegerType(), True)])
+        >>> df3 = spark.createDataFrame(rdd, schema)
+        >>> df3.collect()
+        [Row(name=u'Alice', age=1)]
+
+        >>> spark.createDataFrame(df.toPandas()).collect()  # doctest: +SKIP
+        [Row(name=u'Alice', age=1)]
+        >>> spark.createDataFrame(pandas.DataFrame([[1, 2]])).collect()  # doctest: +SKIP
+        [Row(0=1, 1=2)]
+
+        >>> spark.createDataFrame(rdd, "a: string, b: int").collect()
+        [Row(a=u'Alice', b=1)]
+        >>> rdd = rdd.map(lambda row: row[1])
+        >>> spark.createDataFrame(rdd, "int").collect()
+        [Row(value=1)]
+        >>> spark.createDataFrame(rdd, "boolean").collect() # doctest: +IGNORE_EXCEPTION_DETAIL
+        Traceback (most recent call last):
+            ...
+        Py4JJavaError: ...
+        """
+        if isinstance(data, DataFrame):
+            raise TypeError("data is already a DataFrame")
+
+        if isinstance(schema, basestring):
+            schema = _parse_datatype_string(schema)
+        elif isinstance(schema, (list, tuple)):
+            # Must re-encode any unicode strings to be consistent with StructField names
+            schema = [x.encode('utf-8') if not isinstance(x, str) else x for x in schema]
+
+        try:
+            import pandas
+            has_pandas = True
+        except Exception:
+            has_pandas = False
+        if has_pandas and isinstance(data, pandas.DataFrame):
+            from pyspark.sql.utils import require_minimum_pandas_version
+            require_minimum_pandas_version()
+
+            if self._wrapped._conf.pandasRespectSessionTimeZone():
+                timezone = self._wrapped._conf.sessionLocalTimeZone()
+            else:
+                timezone = None
+
+            # If no schema supplied by user then get the names of columns only
+            if schema is None:
+                schema = [str(x) if not isinstance(x, basestring) else
+                          (x.encode('utf-8') if not isinstance(x, str) else x)
+                          for x in data.columns]
+
+            if self._wrapped._conf.arrowEnabled() and len(data) > 0:
+                try:
+                    return self._create_from_pandas_with_arrow(data, schema, timezone)
+                except Exception as e:
+                    from pyspark.util import _exception_message
+
+                    if self._wrapped._conf.arrowFallbackEnabled():
+                        msg = (
+                            "createDataFrame attempted Arrow optimization because "
+                            "'spark.sql.execution.arrow.enabled' is set to true; however, "
+                            "failed by the reason below:\n  %s\n"
+                            "Attempting non-optimization as "
+                            "'spark.sql.execution.arrow.fallback.enabled' is set to "
+                            "true." % _exception_message(e))
+                        warnings.warn(msg)
+                    else:
+                        msg = (
+                            "createDataFrame attempted Arrow optimization because "
+                            "'spark.sql.execution.arrow.enabled' is set to true, but has reached "
+                            "the error below and will not continue because automatic fallback "
+                            "with 'spark.sql.execution.arrow.fallback.enabled' has been set to "
+                            "false.\n  %s" % _exception_message(e))
+                        warnings.warn(msg)
+                        raise
+            data = self._convert_from_pandas(data, schema, timezone)
+
+        if isinstance(schema, StructType):
+            verify_func = _make_type_verifier(schema) if verifySchema else lambda _: True
+
+            def prepare(obj):
+                verify_func(obj)
+                return obj
+        elif isinstance(schema, DataType):
+            dataType = schema
+            schema = StructType().add("value", schema)
+
+            verify_func = _make_type_verifier(
+                dataType, name="field value") if verifySchema else lambda _: True
+
+            def prepare(obj):
+                verify_func(obj)
+                return obj,
+        else:
+            prepare = lambda obj: obj
+
+        if isinstance(data, RDD):
+            rdd, schema = self._createFromRDD(data.map(prepare), schema, samplingRatio)
+        else:
+            rdd, schema = self._createFromLocal(map(prepare, data), schema)
+        jrdd = self._jvm.SerDeUtil.toJavaArray(rdd._to_java_object_rdd())
+        jdf = self._jsparkSession.applySchemaToPythonRDD(jrdd.rdd(), schema.json())
+        df = DataFrame(jdf, self._wrapped)
+        df._schema = schema
+        return df
+
+    @ignore_unicode_prefix
+    @since(2.0)
+    def sql(self, sqlQuery):
+        """Returns a :class:`DataFrame` representing the result of the given query.
+
+        :return: :class:`DataFrame`
+
+        >>> df.createOrReplaceTempView("table1")
+        >>> df2 = spark.sql("SELECT field1 AS f1, field2 as f2 from table1")
+        >>> df2.collect()
+        [Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]
+        """
+        return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped)
+
+    @since(2.0)
+    def table(self, tableName):
+        """Returns the specified table as a :class:`DataFrame`.
+
+        :return: :class:`DataFrame`
+
+        >>> df.createOrReplaceTempView("table1")
+        >>> df2 = spark.table("table1")
+        >>> sorted(df.collect()) == sorted(df2.collect())
+        True
+        """
+        return DataFrame(self._jsparkSession.table(tableName), self._wrapped)
+
+    @property
+    @since(2.0)
+    def read(self):
+        """
+        Returns a :class:`DataFrameReader` that can be used to read data
+        in as a :class:`DataFrame`.
+
+        :return: :class:`DataFrameReader`
+        """
+        return DataFrameReader(self._wrapped)
+
+    @property
+    @since(2.0)
+    def readStream(self):
+        """
+        Returns a :class:`DataStreamReader` that can be used to read data streams
+        as a streaming :class:`DataFrame`.
+
+        .. note:: Evolving.
+
+        :return: :class:`DataStreamReader`
+        """
+        return DataStreamReader(self._wrapped)
+
+    @property
+    @since(2.0)
+    def streams(self):
+        """Returns a :class:`StreamingQueryManager` that allows managing all the
+        :class:`StreamingQuery` StreamingQueries active on `this` context.
+
+        .. note:: Evolving.
+
+        :return: :class:`StreamingQueryManager`
+        """
+        from pyspark.sql.streaming import StreamingQueryManager
+        return StreamingQueryManager(self._jsparkSession.streams())
+
+    @since(2.0)
+    def stop(self):
+        """Stop the underlying :class:`SparkContext`.
+        """
+        self._sc.stop()
+        # We should clean the default session up. See SPARK-23228.
+        self._jvm.SparkSession.clearDefaultSession()
+        SparkSession._instantiatedSession = None
+
+    @since(2.0)
+    def __enter__(self):
+        """
+        Enable 'with SparkSession.builder.(...).getOrCreate() as session: app' syntax.
+        """
+        return self
+
+    @since(2.0)
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """
+        Enable 'with SparkSession.builder.(...).getOrCreate() as session: app' syntax.
+
+        Specifically stop the SparkSession on exit of the with block.
+        """
+        self.stop()
+
+
+def _test():
+    import os
+    import doctest
+    from pyspark.context import SparkContext
+    from pyspark.sql import Row
+    import pyspark.sql.session
+
+    os.chdir(os.environ["SPARK_HOME"])
+
+    globs = pyspark.sql.session.__dict__.copy()
+    sc = SparkContext('local[4]', 'PythonTest')
+    globs['sc'] = sc
+    globs['spark'] = SparkSession(sc)
+    globs['rdd'] = rdd = sc.parallelize(
+        [Row(field1=1, field2="row1"),
+         Row(field1=2, field2="row2"),
+         Row(field1=3, field2="row3")])
+    globs['df'] = rdd.toDF()
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.session, globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)
+    globs['sc'].stop()
+    if failure_count:
+        sys.exit(-1)
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/streaming.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/streaming.py
new file mode 100644
index 0000000000000000000000000000000000000000..b18453b2a4f96c7e5f19cce76940905fae93cd5d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/streaming.py
@@ -0,0 +1,1145 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import json
+
+if sys.version >= '3':
+    basestring = str
+
+from py4j.java_gateway import java_import
+
+from pyspark import since, keyword_only
+from pyspark.rdd import ignore_unicode_prefix
+from pyspark.sql.column import _to_seq
+from pyspark.sql.readwriter import OptionUtils, to_str
+from pyspark.sql.types import *
+from pyspark.sql.utils import ForeachBatchFunction, StreamingQueryException
+
+__all__ = ["StreamingQuery", "StreamingQueryManager", "DataStreamReader", "DataStreamWriter"]
+
+
+class StreamingQuery(object):
+    """
+    A handle to a query that is executing continuously in the background as new data arrives.
+    All these methods are thread-safe.
+
+    .. note:: Evolving
+
+    .. versionadded:: 2.0
+    """
+
+    def __init__(self, jsq):
+        self._jsq = jsq
+
+    @property
+    @since(2.0)
+    def id(self):
+        """Returns the unique id of this query that persists across restarts from checkpoint data.
+        That is, this id is generated when a query is started for the first time, and
+        will be the same every time it is restarted from checkpoint data.
+        There can only be one query with the same id active in a Spark cluster.
+        Also see, `runId`.
+        """
+        return self._jsq.id().toString()
+
+    @property
+    @since(2.1)
+    def runId(self):
+        """Returns the unique id of this query that does not persist across restarts. That is, every
+        query that is started (or restarted from checkpoint) will have a different runId.
+        """
+        return self._jsq.runId().toString()
+
+    @property
+    @since(2.0)
+    def name(self):
+        """Returns the user-specified name of the query, or null if not specified.
+        This name can be specified in the `org.apache.spark.sql.streaming.DataStreamWriter`
+        as `dataframe.writeStream.queryName("query").start()`.
+        This name, if set, must be unique across all active queries.
+        """
+        return self._jsq.name()
+
+    @property
+    @since(2.0)
+    def isActive(self):
+        """Whether this streaming query is currently active or not.
+        """
+        return self._jsq.isActive()
+
+    @since(2.0)
+    def awaitTermination(self, timeout=None):
+        """Waits for the termination of `this` query, either by :func:`query.stop()` or by an
+        exception. If the query has terminated with an exception, then the exception will be thrown.
+        If `timeout` is set, it returns whether the query has terminated or not within the
+        `timeout` seconds.
+
+        If the query has terminated, then all subsequent calls to this method will either return
+        immediately (if the query was terminated by :func:`stop()`), or throw the exception
+        immediately (if the query has terminated with exception).
+
+        throws :class:`StreamingQueryException`, if `this` query has terminated with an exception
+        """
+        if timeout is not None:
+            if not isinstance(timeout, (int, float)) or timeout < 0:
+                raise ValueError("timeout must be a positive integer or float. Got %s" % timeout)
+            return self._jsq.awaitTermination(int(timeout * 1000))
+        else:
+            return self._jsq.awaitTermination()
+
+    @property
+    @since(2.1)
+    def status(self):
+        """
+        Returns the current status of the query.
+        """
+        return json.loads(self._jsq.status().json())
+
+    @property
+    @since(2.1)
+    def recentProgress(self):
+        """Returns an array of the most recent [[StreamingQueryProgress]] updates for this query.
+        The number of progress updates retained for each stream is configured by Spark session
+        configuration `spark.sql.streaming.numRecentProgressUpdates`.
+        """
+        return [json.loads(p.json()) for p in self._jsq.recentProgress()]
+
+    @property
+    @since(2.1)
+    def lastProgress(self):
+        """
+        Returns the most recent :class:`StreamingQueryProgress` update of this streaming query or
+        None if there were no progress updates
+        :return: a map
+        """
+        lastProgress = self._jsq.lastProgress()
+        if lastProgress:
+            return json.loads(lastProgress.json())
+        else:
+            return None
+
+    @since(2.0)
+    def processAllAvailable(self):
+        """Blocks until all available data in the source has been processed and committed to the
+        sink. This method is intended for testing.
+
+        .. note:: In the case of continually arriving data, this method may block forever.
+            Additionally, this method is only guaranteed to block until data that has been
+            synchronously appended data to a stream source prior to invocation.
+            (i.e. `getOffset` must immediately reflect the addition).
+        """
+        return self._jsq.processAllAvailable()
+
+    @since(2.0)
+    def stop(self):
+        """Stop this streaming query.
+        """
+        self._jsq.stop()
+
+    @since(2.1)
+    def explain(self, extended=False):
+        """Prints the (logical and physical) plans to the console for debugging purpose.
+
+        :param extended: boolean, default ``False``. If ``False``, prints only the physical plan.
+
+        >>> sq = sdf.writeStream.format('memory').queryName('query_explain').start()
+        >>> sq.processAllAvailable() # Wait a bit to generate the runtime plans.
+        >>> sq.explain()
+        == Physical Plan ==
+        ...
+        >>> sq.explain(True)
+        == Parsed Logical Plan ==
+        ...
+        == Analyzed Logical Plan ==
+        ...
+        == Optimized Logical Plan ==
+        ...
+        == Physical Plan ==
+        ...
+        >>> sq.stop()
+        """
+        # Cannot call `_jsq.explain(...)` because it will print in the JVM process.
+        # We should print it in the Python process.
+        print(self._jsq.explainInternal(extended))
+
+    @since(2.1)
+    def exception(self):
+        """
+        :return: the StreamingQueryException if the query was terminated by an exception, or None.
+        """
+        if self._jsq.exception().isDefined():
+            je = self._jsq.exception().get()
+            msg = je.toString().split(': ', 1)[1]  # Drop the Java StreamingQueryException type info
+            stackTrace = '\n\t at '.join(map(lambda x: x.toString(), je.getStackTrace()))
+            return StreamingQueryException(msg, stackTrace)
+        else:
+            return None
+
+
+class StreamingQueryManager(object):
+    """A class to manage all the :class:`StreamingQuery` StreamingQueries active.
+
+    .. note:: Evolving
+
+    .. versionadded:: 2.0
+    """
+
+    def __init__(self, jsqm):
+        self._jsqm = jsqm
+
+    @property
+    @ignore_unicode_prefix
+    @since(2.0)
+    def active(self):
+        """Returns a list of active queries associated with this SQLContext
+
+        >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
+        >>> sqm = spark.streams
+        >>> # get the list of active streaming queries
+        >>> [q.name for q in sqm.active]
+        [u'this_query']
+        >>> sq.stop()
+        """
+        return [StreamingQuery(jsq) for jsq in self._jsqm.active()]
+
+    @ignore_unicode_prefix
+    @since(2.0)
+    def get(self, id):
+        """Returns an active query from this SQLContext or throws exception if an active query
+        with this name doesn't exist.
+
+        >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
+        >>> sq.name
+        u'this_query'
+        >>> sq = spark.streams.get(sq.id)
+        >>> sq.isActive
+        True
+        >>> sq = sqlContext.streams.get(sq.id)
+        >>> sq.isActive
+        True
+        >>> sq.stop()
+        """
+        return StreamingQuery(self._jsqm.get(id))
+
+    @since(2.0)
+    def awaitAnyTermination(self, timeout=None):
+        """Wait until any of the queries on the associated SQLContext has terminated since the
+        creation of the context, or since :func:`resetTerminated()` was called. If any query was
+        terminated with an exception, then the exception will be thrown.
+        If `timeout` is set, it returns whether the query has terminated or not within the
+        `timeout` seconds.
+
+        If a query has terminated, then subsequent calls to :func:`awaitAnyTermination()` will
+        either return immediately (if the query was terminated by :func:`query.stop()`),
+        or throw the exception immediately (if the query was terminated with exception). Use
+        :func:`resetTerminated()` to clear past terminations and wait for new terminations.
+
+        In the case where multiple queries have terminated since :func:`resetTermination()`
+        was called, if any query has terminated with exception, then :func:`awaitAnyTermination()`
+        will throw any of the exception. For correctly documenting exceptions across multiple
+        queries, users need to stop all of them after any of them terminates with exception, and
+        then check the `query.exception()` for each query.
+
+        throws :class:`StreamingQueryException`, if `this` query has terminated with an exception
+        """
+        if timeout is not None:
+            if not isinstance(timeout, (int, float)) or timeout < 0:
+                raise ValueError("timeout must be a positive integer or float. Got %s" % timeout)
+            return self._jsqm.awaitAnyTermination(int(timeout * 1000))
+        else:
+            return self._jsqm.awaitAnyTermination()
+
+    @since(2.0)
+    def resetTerminated(self):
+        """Forget about past terminated queries so that :func:`awaitAnyTermination()` can be used
+        again to wait for new terminations.
+
+        >>> spark.streams.resetTerminated()
+        """
+        self._jsqm.resetTerminated()
+
+
+class DataStreamReader(OptionUtils):
+    """
+    Interface used to load a streaming :class:`DataFrame` from external storage systems
+    (e.g. file systems, key-value stores, etc). Use :func:`spark.readStream`
+    to access this.
+
+    .. note:: Evolving.
+
+    .. versionadded:: 2.0
+    """
+
+    def __init__(self, spark):
+        self._jreader = spark._ssql_ctx.readStream()
+        self._spark = spark
+
+    def _df(self, jdf):
+        from pyspark.sql.dataframe import DataFrame
+        return DataFrame(jdf, self._spark)
+
+    @since(2.0)
+    def format(self, source):
+        """Specifies the input data source format.
+
+        .. note:: Evolving.
+
+        :param source: string, name of the data source, e.g. 'json', 'parquet'.
+
+        >>> s = spark.readStream.format("text")
+        """
+        self._jreader = self._jreader.format(source)
+        return self
+
+    @since(2.0)
+    def schema(self, schema):
+        """Specifies the input schema.
+
+        Some data sources (e.g. JSON) can infer the input schema automatically from data.
+        By specifying the schema here, the underlying data source can skip the schema
+        inference step, and thus speed up data loading.
+
+        .. note:: Evolving.
+
+        :param schema: a :class:`pyspark.sql.types.StructType` object or a DDL-formatted string
+                       (For example ``col0 INT, col1 DOUBLE``).
+
+        >>> s = spark.readStream.schema(sdf_schema)
+        >>> s = spark.readStream.schema("col0 INT, col1 DOUBLE")
+        """
+        from pyspark.sql import SparkSession
+        spark = SparkSession.builder.getOrCreate()
+        if isinstance(schema, StructType):
+            jschema = spark._jsparkSession.parseDataType(schema.json())
+            self._jreader = self._jreader.schema(jschema)
+        elif isinstance(schema, basestring):
+            self._jreader = self._jreader.schema(schema)
+        else:
+            raise TypeError("schema should be StructType or string")
+        return self
+
+    @since(2.0)
+    def option(self, key, value):
+        """Adds an input option for the underlying data source.
+
+        You can set the following option(s) for reading files:
+            * ``timeZone``: sets the string that indicates a timezone to be used to parse timestamps
+                in the JSON/CSV datasources or partition values.
+                If it isn't set, it uses the default value, session local timezone.
+
+        .. note:: Evolving.
+
+        >>> s = spark.readStream.option("x", 1)
+        """
+        self._jreader = self._jreader.option(key, to_str(value))
+        return self
+
+    @since(2.0)
+    def options(self, **options):
+        """Adds input options for the underlying data source.
+
+        You can set the following option(s) for reading files:
+            * ``timeZone``: sets the string that indicates a timezone to be used to parse timestamps
+                in the JSON/CSV datasources or partition values.
+                If it isn't set, it uses the default value, session local timezone.
+
+        .. note:: Evolving.
+
+        >>> s = spark.readStream.options(x="1", y=2)
+        """
+        for k in options:
+            self._jreader = self._jreader.option(k, to_str(options[k]))
+        return self
+
+    @since(2.0)
+    def load(self, path=None, format=None, schema=None, **options):
+        """Loads a data stream from a data source and returns it as a :class`DataFrame`.
+
+        .. note:: Evolving.
+
+        :param path: optional string for file-system backed data sources.
+        :param format: optional string for format of the data source. Default to 'parquet'.
+        :param schema: optional :class:`pyspark.sql.types.StructType` for the input schema
+                       or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
+        :param options: all other string options
+
+        >>> json_sdf = spark.readStream.format("json") \\
+        ...     .schema(sdf_schema) \\
+        ...     .load(tempfile.mkdtemp())
+        >>> json_sdf.isStreaming
+        True
+        >>> json_sdf.schema == sdf_schema
+        True
+        """
+        if format is not None:
+            self.format(format)
+        if schema is not None:
+            self.schema(schema)
+        self.options(**options)
+        if path is not None:
+            if type(path) != str or len(path.strip()) == 0:
+                raise ValueError("If the path is provided for stream, it needs to be a " +
+                                 "non-empty string. List of paths are not supported.")
+            return self._df(self._jreader.load(path))
+        else:
+            return self._df(self._jreader.load())
+
+    @since(2.0)
+    def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
+             allowComments=None, allowUnquotedFieldNames=None, allowSingleQuotes=None,
+             allowNumericLeadingZero=None, allowBackslashEscapingAnyCharacter=None,
+             mode=None, columnNameOfCorruptRecord=None, dateFormat=None, timestampFormat=None,
+             multiLine=None,  allowUnquotedControlChars=None, lineSep=None):
+        """
+        Loads a JSON file stream and returns the results as a :class:`DataFrame`.
+
+        `JSON Lines <http://jsonlines.org/>`_ (newline-delimited JSON) is supported by default.
+        For JSON (one record per file), set the ``multiLine`` parameter to ``true``.
+
+        If the ``schema`` parameter is not specified, this function goes
+        through the input once to determine the input schema.
+
+        .. note:: Evolving.
+
+        :param path: string represents path to the JSON dataset,
+                     or RDD of Strings storing JSON objects.
+        :param schema: an optional :class:`pyspark.sql.types.StructType` for the input schema
+                       or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
+        :param primitivesAsString: infers all primitive values as a string type. If None is set,
+                                   it uses the default value, ``false``.
+        :param prefersDecimal: infers all floating-point values as a decimal type. If the values
+                               do not fit in decimal, then it infers them as doubles. If None is
+                               set, it uses the default value, ``false``.
+        :param allowComments: ignores Java/C++ style comment in JSON records. If None is set,
+                              it uses the default value, ``false``.
+        :param allowUnquotedFieldNames: allows unquoted JSON field names. If None is set,
+                                        it uses the default value, ``false``.
+        :param allowSingleQuotes: allows single quotes in addition to double quotes. If None is
+                                        set, it uses the default value, ``true``.
+        :param allowNumericLeadingZero: allows leading zeros in numbers (e.g. 00012). If None is
+                                        set, it uses the default value, ``false``.
+        :param allowBackslashEscapingAnyCharacter: allows accepting quoting of all character
+                                                   using backslash quoting mechanism. If None is
+                                                   set, it uses the default value, ``false``.
+        :param mode: allows a mode for dealing with corrupt records during parsing. If None is
+                     set, it uses the default value, ``PERMISSIVE``.
+
+                * ``PERMISSIVE`` : when it meets a corrupted record, puts the malformed string \
+                  into a field configured by ``columnNameOfCorruptRecord``, and sets other \
+                  fields to ``null``. To keep corrupt records, an user can set a string type \
+                  field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \
+                  schema does not have the field, it drops corrupt records during parsing. \
+                  When inferring a schema, it implicitly adds a ``columnNameOfCorruptRecord`` \
+                  field in an output schema.
+                *  ``DROPMALFORMED`` : ignores the whole corrupted records.
+                *  ``FAILFAST`` : throws an exception when it meets corrupted records.
+
+        :param columnNameOfCorruptRecord: allows renaming the new field having malformed string
+                                          created by ``PERMISSIVE`` mode. This overrides
+                                          ``spark.sql.columnNameOfCorruptRecord``. If None is set,
+                                          it uses the value specified in
+                                          ``spark.sql.columnNameOfCorruptRecord``.
+        :param dateFormat: sets the string that indicates a date format. Custom date formats
+                           follow the formats at ``java.text.SimpleDateFormat``. This
+                           applies to date type. If None is set, it uses the
+                           default value, ``yyyy-MM-dd``.
+        :param timestampFormat: sets the string that indicates a timestamp format. Custom date
+                                formats follow the formats at ``java.text.SimpleDateFormat``.
+                                This applies to timestamp type. If None is set, it uses the
+                                default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``.
+        :param multiLine: parse one record, which may span multiple lines, per file. If None is
+                          set, it uses the default value, ``false``.
+        :param allowUnquotedControlChars: allows JSON Strings to contain unquoted control
+                                          characters (ASCII characters with value less than 32,
+                                          including tab and line feed characters) or not.
+        :param lineSep: defines the line separator that should be used for parsing. If None is
+                        set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``.
+
+        >>> json_sdf = spark.readStream.json(tempfile.mkdtemp(), schema = sdf_schema)
+        >>> json_sdf.isStreaming
+        True
+        >>> json_sdf.schema == sdf_schema
+        True
+        """
+        self._set_opts(
+            schema=schema, primitivesAsString=primitivesAsString, prefersDecimal=prefersDecimal,
+            allowComments=allowComments, allowUnquotedFieldNames=allowUnquotedFieldNames,
+            allowSingleQuotes=allowSingleQuotes, allowNumericLeadingZero=allowNumericLeadingZero,
+            allowBackslashEscapingAnyCharacter=allowBackslashEscapingAnyCharacter,
+            mode=mode, columnNameOfCorruptRecord=columnNameOfCorruptRecord, dateFormat=dateFormat,
+            timestampFormat=timestampFormat, multiLine=multiLine,
+            allowUnquotedControlChars=allowUnquotedControlChars, lineSep=lineSep)
+        if isinstance(path, basestring):
+            return self._df(self._jreader.json(path))
+        else:
+            raise TypeError("path can be only a single string")
+
+    @since(2.3)
+    def orc(self, path):
+        """Loads a ORC file stream, returning the result as a :class:`DataFrame`.
+
+        .. note:: Evolving.
+
+        >>> orc_sdf = spark.readStream.schema(sdf_schema).orc(tempfile.mkdtemp())
+        >>> orc_sdf.isStreaming
+        True
+        >>> orc_sdf.schema == sdf_schema
+        True
+        """
+        if isinstance(path, basestring):
+            return self._df(self._jreader.orc(path))
+        else:
+            raise TypeError("path can be only a single string")
+
+    @since(2.0)
+    def parquet(self, path):
+        """Loads a Parquet file stream, returning the result as a :class:`DataFrame`.
+
+        You can set the following Parquet-specific option(s) for reading Parquet files:
+            * ``mergeSchema``: sets whether we should merge schemas collected from all \
+                Parquet part-files. This will override ``spark.sql.parquet.mergeSchema``. \
+                The default value is specified in ``spark.sql.parquet.mergeSchema``.
+
+        .. note:: Evolving.
+
+        >>> parquet_sdf = spark.readStream.schema(sdf_schema).parquet(tempfile.mkdtemp())
+        >>> parquet_sdf.isStreaming
+        True
+        >>> parquet_sdf.schema == sdf_schema
+        True
+        """
+        if isinstance(path, basestring):
+            return self._df(self._jreader.parquet(path))
+        else:
+            raise TypeError("path can be only a single string")
+
+    @ignore_unicode_prefix
+    @since(2.0)
+    def text(self, path, wholetext=False, lineSep=None):
+        """
+        Loads a text file stream and returns a :class:`DataFrame` whose schema starts with a
+        string column named "value", and followed by partitioned columns if there
+        are any.
+
+        By default, each line in the text file is a new row in the resulting DataFrame.
+
+        .. note:: Evolving.
+
+        :param paths: string, or list of strings, for input path(s).
+        :param wholetext: if true, read each file from input path(s) as a single row.
+        :param lineSep: defines the line separator that should be used for parsing. If None is
+                        set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``.
+
+        >>> text_sdf = spark.readStream.text(tempfile.mkdtemp())
+        >>> text_sdf.isStreaming
+        True
+        >>> "value" in str(text_sdf.schema)
+        True
+        """
+        self._set_opts(wholetext=wholetext, lineSep=lineSep)
+        if isinstance(path, basestring):
+            return self._df(self._jreader.text(path))
+        else:
+            raise TypeError("path can be only a single string")
+
+    @since(2.0)
+    def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=None,
+            comment=None, header=None, inferSchema=None, ignoreLeadingWhiteSpace=None,
+            ignoreTrailingWhiteSpace=None, nullValue=None, nanValue=None, positiveInf=None,
+            negativeInf=None, dateFormat=None, timestampFormat=None, maxColumns=None,
+            maxCharsPerColumn=None, maxMalformedLogPerPartition=None, mode=None,
+            columnNameOfCorruptRecord=None, multiLine=None, charToEscapeQuoteEscaping=None,
+            enforceSchema=None, emptyValue=None):
+        r"""Loads a CSV file stream and returns the result as a :class:`DataFrame`.
+
+        This function will go through the input once to determine the input schema if
+        ``inferSchema`` is enabled. To avoid going through the entire data once, disable
+        ``inferSchema`` option or specify the schema explicitly using ``schema``.
+
+        .. note:: Evolving.
+
+        :param path: string, or list of strings, for input path(s).
+        :param schema: an optional :class:`pyspark.sql.types.StructType` for the input schema
+                       or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
+        :param sep: sets a single character as a separator for each field and value.
+                    If None is set, it uses the default value, ``,``.
+        :param encoding: decodes the CSV files by the given encoding type. If None is set,
+                         it uses the default value, ``UTF-8``.
+        :param quote: sets a single character used for escaping quoted values where the
+                      separator can be part of the value. If None is set, it uses the default
+                      value, ``"``. If you would like to turn off quotations, you need to set an
+                      empty string.
+        :param escape: sets a single character used for escaping quotes inside an already
+                       quoted value. If None is set, it uses the default value, ``\``.
+        :param comment: sets a single character used for skipping lines beginning with this
+                        character. By default (None), it is disabled.
+        :param header: uses the first line as names of columns. If None is set, it uses the
+                       default value, ``false``.
+        :param inferSchema: infers the input schema automatically from data. It requires one extra
+                       pass over the data. If None is set, it uses the default value, ``false``.
+        :param enforceSchema: If it is set to ``true``, the specified or inferred schema will be
+                              forcibly applied to datasource files, and headers in CSV files will be
+                              ignored. If the option is set to ``false``, the schema will be
+                              validated against all headers in CSV files or the first header in RDD
+                              if the ``header`` option is set to ``true``. Field names in the schema
+                              and column names in CSV headers are checked by their positions
+                              taking into account ``spark.sql.caseSensitive``. If None is set,
+                              ``true`` is used by default. Though the default value is ``true``,
+                              it is recommended to disable the ``enforceSchema`` option
+                              to avoid incorrect results.
+        :param ignoreLeadingWhiteSpace: a flag indicating whether or not leading whitespaces from
+                                        values being read should be skipped. If None is set, it
+                                        uses the default value, ``false``.
+        :param ignoreTrailingWhiteSpace: a flag indicating whether or not trailing whitespaces from
+                                         values being read should be skipped. If None is set, it
+                                         uses the default value, ``false``.
+        :param nullValue: sets the string representation of a null value. If None is set, it uses
+                          the default value, empty string. Since 2.0.1, this ``nullValue`` param
+                          applies to all supported types including the string type.
+        :param nanValue: sets the string representation of a non-number value. If None is set, it
+                         uses the default value, ``NaN``.
+        :param positiveInf: sets the string representation of a positive infinity value. If None
+                            is set, it uses the default value, ``Inf``.
+        :param negativeInf: sets the string representation of a negative infinity value. If None
+                            is set, it uses the default value, ``Inf``.
+        :param dateFormat: sets the string that indicates a date format. Custom date formats
+                           follow the formats at ``java.text.SimpleDateFormat``. This
+                           applies to date type. If None is set, it uses the
+                           default value, ``yyyy-MM-dd``.
+        :param timestampFormat: sets the string that indicates a timestamp format. Custom date
+                                formats follow the formats at ``java.text.SimpleDateFormat``.
+                                This applies to timestamp type. If None is set, it uses the
+                                default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``.
+        :param maxColumns: defines a hard limit of how many columns a record can have. If None is
+                           set, it uses the default value, ``20480``.
+        :param maxCharsPerColumn: defines the maximum number of characters allowed for any given
+                                  value being read. If None is set, it uses the default value,
+                                  ``-1`` meaning unlimited length.
+        :param maxMalformedLogPerPartition: this parameter is no longer used since Spark 2.2.0.
+                                            If specified, it is ignored.
+        :param mode: allows a mode for dealing with corrupt records during parsing. If None is
+                     set, it uses the default value, ``PERMISSIVE``.
+
+                * ``PERMISSIVE`` : when it meets a corrupted record, puts the malformed string \
+                  into a field configured by ``columnNameOfCorruptRecord``, and sets other \
+                  fields to ``null``. To keep corrupt records, an user can set a string type \
+                  field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \
+                  schema does not have the field, it drops corrupt records during parsing. \
+                  A record with less/more tokens than schema is not a corrupted record to CSV. \
+                  When it meets a record having fewer tokens than the length of the schema, \
+                  sets ``null`` to extra fields. When the record has more tokens than the \
+                  length of the schema, it drops extra tokens.
+                * ``DROPMALFORMED`` : ignores the whole corrupted records.
+                * ``FAILFAST`` : throws an exception when it meets corrupted records.
+
+        :param columnNameOfCorruptRecord: allows renaming the new field having malformed string
+                                          created by ``PERMISSIVE`` mode. This overrides
+                                          ``spark.sql.columnNameOfCorruptRecord``. If None is set,
+                                          it uses the value specified in
+                                          ``spark.sql.columnNameOfCorruptRecord``.
+        :param multiLine: parse one record, which may span multiple lines. If None is
+                          set, it uses the default value, ``false``.
+        :param charToEscapeQuoteEscaping: sets a single character used for escaping the escape for
+                                          the quote character. If None is set, the default value is
+                                          escape character when escape and quote characters are
+                                          different, ``\0`` otherwise..
+        :param emptyValue: sets the string representation of an empty value. If None is set, it uses
+                           the default value, empty string.
+
+        >>> csv_sdf = spark.readStream.csv(tempfile.mkdtemp(), schema = sdf_schema)
+        >>> csv_sdf.isStreaming
+        True
+        >>> csv_sdf.schema == sdf_schema
+        True
+        """
+        self._set_opts(
+            schema=schema, sep=sep, encoding=encoding, quote=quote, escape=escape, comment=comment,
+            header=header, inferSchema=inferSchema, ignoreLeadingWhiteSpace=ignoreLeadingWhiteSpace,
+            ignoreTrailingWhiteSpace=ignoreTrailingWhiteSpace, nullValue=nullValue,
+            nanValue=nanValue, positiveInf=positiveInf, negativeInf=negativeInf,
+            dateFormat=dateFormat, timestampFormat=timestampFormat, maxColumns=maxColumns,
+            maxCharsPerColumn=maxCharsPerColumn,
+            maxMalformedLogPerPartition=maxMalformedLogPerPartition, mode=mode,
+            columnNameOfCorruptRecord=columnNameOfCorruptRecord, multiLine=multiLine,
+            charToEscapeQuoteEscaping=charToEscapeQuoteEscaping, enforceSchema=enforceSchema,
+            emptyValue=emptyValue)
+        if isinstance(path, basestring):
+            return self._df(self._jreader.csv(path))
+        else:
+            raise TypeError("path can be only a single string")
+
+
+class DataStreamWriter(object):
+    """
+    Interface used to write a streaming :class:`DataFrame` to external storage systems
+    (e.g. file systems, key-value stores, etc). Use :func:`DataFrame.writeStream`
+    to access this.
+
+    .. note:: Evolving.
+
+    .. versionadded:: 2.0
+    """
+
+    def __init__(self, df):
+        self._df = df
+        self._spark = df.sql_ctx
+        self._jwrite = df._jdf.writeStream()
+
+    def _sq(self, jsq):
+        from pyspark.sql.streaming import StreamingQuery
+        return StreamingQuery(jsq)
+
+    @since(2.0)
+    def outputMode(self, outputMode):
+        """Specifies how data of a streaming DataFrame/Dataset is written to a streaming sink.
+
+        Options include:
+
+        * `append`:Only the new rows in the streaming DataFrame/Dataset will be written to
+           the sink
+        * `complete`:All the rows in the streaming DataFrame/Dataset will be written to the sink
+           every time these is some updates
+        * `update`:only the rows that were updated in the streaming DataFrame/Dataset will be
+           written to the sink every time there are some updates. If the query doesn't contain
+           aggregations, it will be equivalent to `append` mode.
+
+       .. note:: Evolving.
+
+        >>> writer = sdf.writeStream.outputMode('append')
+        """
+        if not outputMode or type(outputMode) != str or len(outputMode.strip()) == 0:
+            raise ValueError('The output mode must be a non-empty string. Got: %s' % outputMode)
+        self._jwrite = self._jwrite.outputMode(outputMode)
+        return self
+
+    @since(2.0)
+    def format(self, source):
+        """Specifies the underlying output data source.
+
+        .. note:: Evolving.
+
+        :param source: string, name of the data source, which for now can be 'parquet'.
+
+        >>> writer = sdf.writeStream.format('json')
+        """
+        self._jwrite = self._jwrite.format(source)
+        return self
+
+    @since(2.0)
+    def option(self, key, value):
+        """Adds an output option for the underlying data source.
+
+        You can set the following option(s) for writing files:
+            * ``timeZone``: sets the string that indicates a timezone to be used to format
+                timestamps in the JSON/CSV datasources or partition values.
+                If it isn't set, it uses the default value, session local timezone.
+
+        .. note:: Evolving.
+        """
+        self._jwrite = self._jwrite.option(key, to_str(value))
+        return self
+
+    @since(2.0)
+    def options(self, **options):
+        """Adds output options for the underlying data source.
+
+        You can set the following option(s) for writing files:
+            * ``timeZone``: sets the string that indicates a timezone to be used to format
+                timestamps in the JSON/CSV datasources or partition values.
+                If it isn't set, it uses the default value, session local timezone.
+
+       .. note:: Evolving.
+        """
+        for k in options:
+            self._jwrite = self._jwrite.option(k, to_str(options[k]))
+        return self
+
+    @since(2.0)
+    def partitionBy(self, *cols):
+        """Partitions the output by the given columns on the file system.
+
+        If specified, the output is laid out on the file system similar
+        to Hive's partitioning scheme.
+
+        .. note:: Evolving.
+
+        :param cols: name of columns
+
+        """
+        if len(cols) == 1 and isinstance(cols[0], (list, tuple)):
+            cols = cols[0]
+        self._jwrite = self._jwrite.partitionBy(_to_seq(self._spark._sc, cols))
+        return self
+
+    @since(2.0)
+    def queryName(self, queryName):
+        """Specifies the name of the :class:`StreamingQuery` that can be started with
+        :func:`start`. This name must be unique among all the currently active queries
+        in the associated SparkSession.
+
+        .. note:: Evolving.
+
+        :param queryName: unique name for the query
+
+        >>> writer = sdf.writeStream.queryName('streaming_query')
+        """
+        if not queryName or type(queryName) != str or len(queryName.strip()) == 0:
+            raise ValueError('The queryName must be a non-empty string. Got: %s' % queryName)
+        self._jwrite = self._jwrite.queryName(queryName)
+        return self
+
+    @keyword_only
+    @since(2.0)
+    def trigger(self, processingTime=None, once=None, continuous=None):
+        """Set the trigger for the stream query. If this is not set it will run the query as fast
+        as possible, which is equivalent to setting the trigger to ``processingTime='0 seconds'``.
+
+        .. note:: Evolving.
+
+        :param processingTime: a processing time interval as a string, e.g. '5 seconds', '1 minute'.
+                               Set a trigger that runs a query periodically based on the processing
+                               time. Only one trigger can be set.
+        :param once: if set to True, set a trigger that processes only one batch of data in a
+                     streaming query then terminates the query. Only one trigger can be set.
+
+        >>> # trigger the query for execution every 5 seconds
+        >>> writer = sdf.writeStream.trigger(processingTime='5 seconds')
+        >>> # trigger the query for just once batch of data
+        >>> writer = sdf.writeStream.trigger(once=True)
+        >>> # trigger the query for execution every 5 seconds
+        >>> writer = sdf.writeStream.trigger(continuous='5 seconds')
+        """
+        params = [processingTime, once, continuous]
+
+        if params.count(None) == 3:
+            raise ValueError('No trigger provided')
+        elif params.count(None) < 2:
+            raise ValueError('Multiple triggers not allowed.')
+
+        jTrigger = None
+        if processingTime is not None:
+            if type(processingTime) != str or len(processingTime.strip()) == 0:
+                raise ValueError('Value for processingTime must be a non empty string. Got: %s' %
+                                 processingTime)
+            interval = processingTime.strip()
+            jTrigger = self._spark._sc._jvm.org.apache.spark.sql.streaming.Trigger.ProcessingTime(
+                interval)
+
+        elif once is not None:
+            if once is not True:
+                raise ValueError('Value for once must be True. Got: %s' % once)
+            jTrigger = self._spark._sc._jvm.org.apache.spark.sql.streaming.Trigger.Once()
+
+        else:
+            if type(continuous) != str or len(continuous.strip()) == 0:
+                raise ValueError('Value for continuous must be a non empty string. Got: %s' %
+                                 continuous)
+            interval = continuous.strip()
+            jTrigger = self._spark._sc._jvm.org.apache.spark.sql.streaming.Trigger.Continuous(
+                interval)
+
+        self._jwrite = self._jwrite.trigger(jTrigger)
+        return self
+
+    @since(2.4)
+    def foreach(self, f):
+        """
+        Sets the output of the streaming query to be processed using the provided writer ``f``.
+        This is often used to write the output of a streaming query to arbitrary storage systems.
+        The processing logic can be specified in two ways.
+
+        #. A **function** that takes a row as input.
+            This is a simple way to express your processing logic. Note that this does
+            not allow you to deduplicate generated data when failures cause reprocessing of
+            some input data. That would require you to specify the processing logic in the next
+            way.
+
+        #. An **object** with a ``process`` method and optional ``open`` and ``close`` methods.
+            The object can have the following methods.
+
+            * ``open(partition_id, epoch_id)``: *Optional* method that initializes the processing
+                (for example, open a connection, start a transaction, etc). Additionally, you can
+                use the `partition_id` and `epoch_id` to deduplicate regenerated data
+                (discussed later).
+
+            * ``process(row)``: *Non-optional* method that processes each :class:`Row`.
+
+            * ``close(error)``: *Optional* method that finalizes and cleans up (for example,
+                close connection, commit transaction, etc.) after all rows have been processed.
+
+            The object will be used by Spark in the following way.
+
+            * A single copy of this object is responsible of all the data generated by a
+                single task in a query. In other words, one instance is responsible for
+                processing one partition of the data generated in a distributed manner.
+
+            * This object must be serializable because each task will get a fresh
+                serialized-deserialized copy of the provided object. Hence, it is strongly
+                recommended that any initialization for writing data (e.g. opening a
+                connection or starting a transaction) is done after the `open(...)`
+                method has been called, which signifies that the task is ready to generate data.
+
+            * The lifecycle of the methods are as follows.
+
+                For each partition with ``partition_id``:
+
+                ... For each batch/epoch of streaming data with ``epoch_id``:
+
+                ....... Method ``open(partitionId, epochId)`` is called.
+
+                ....... If ``open(...)`` returns true, for each row in the partition and
+                        batch/epoch, method ``process(row)`` is called.
+
+                ....... Method ``close(errorOrNull)`` is called with error (if any) seen while
+                        processing rows.
+
+            Important points to note:
+
+            * The `partitionId` and `epochId` can be used to deduplicate generated data when
+                failures cause reprocessing of some input data. This depends on the execution
+                mode of the query. If the streaming query is being executed in the micro-batch
+                mode, then every partition represented by a unique tuple (partition_id, epoch_id)
+                is guaranteed to have the same data. Hence, (partition_id, epoch_id) can be used
+                to deduplicate and/or transactionally commit data and achieve exactly-once
+                guarantees. However, if the streaming query is being executed in the continuous
+                mode, then this guarantee does not hold and therefore should not be used for
+                deduplication.
+
+            * The ``close()`` method (if exists) will be called if `open()` method exists and
+                returns successfully (irrespective of the return value), except if the Python
+                crashes in the middle.
+
+        .. note:: Evolving.
+
+        >>> # Print every row using a function
+        >>> def print_row(row):
+        ...     print(row)
+        ...
+        >>> writer = sdf.writeStream.foreach(print_row)
+        >>> # Print every row using a object with process() method
+        >>> class RowPrinter:
+        ...     def open(self, partition_id, epoch_id):
+        ...         print("Opened %d, %d" % (partition_id, epoch_id))
+        ...         return True
+        ...     def process(self, row):
+        ...         print(row)
+        ...     def close(self, error):
+        ...         print("Closed with error: %s" % str(error))
+        ...
+        >>> writer = sdf.writeStream.foreach(RowPrinter())
+        """
+
+        from pyspark.rdd import _wrap_function
+        from pyspark.serializers import PickleSerializer, AutoBatchedSerializer
+        from pyspark.taskcontext import TaskContext
+
+        if callable(f):
+            # The provided object is a callable function that is supposed to be called on each row.
+            # Construct a function that takes an iterator and calls the provided function on each
+            # row.
+            def func_without_process(_, iterator):
+                for x in iterator:
+                    f(x)
+                return iter([])
+
+            func = func_without_process
+
+        else:
+            # The provided object is not a callable function. Then it is expected to have a
+            # 'process(row)' method, and optional 'open(partition_id, epoch_id)' and
+            # 'close(error)' methods.
+
+            if not hasattr(f, 'process'):
+                raise Exception("Provided object does not have a 'process' method")
+
+            if not callable(getattr(f, 'process')):
+                raise Exception("Attribute 'process' in provided object is not callable")
+
+            def doesMethodExist(method_name):
+                exists = hasattr(f, method_name)
+                if exists and not callable(getattr(f, method_name)):
+                    raise Exception(
+                        "Attribute '%s' in provided object is not callable" % method_name)
+                return exists
+
+            open_exists = doesMethodExist('open')
+            close_exists = doesMethodExist('close')
+
+            def func_with_open_process_close(partition_id, iterator):
+                epoch_id = TaskContext.get().getLocalProperty('streaming.sql.batchId')
+                if epoch_id:
+                    epoch_id = int(epoch_id)
+                else:
+                    raise Exception("Could not get batch id from TaskContext")
+
+                # Check if the data should be processed
+                should_process = True
+                if open_exists:
+                    should_process = f.open(partition_id, epoch_id)
+
+                error = None
+
+                try:
+                    if should_process:
+                        for x in iterator:
+                            f.process(x)
+                except Exception as ex:
+                    error = ex
+                finally:
+                    if close_exists:
+                        f.close(error)
+                    if error:
+                        raise error
+
+                return iter([])
+
+            func = func_with_open_process_close
+
+        serializer = AutoBatchedSerializer(PickleSerializer())
+        wrapped_func = _wrap_function(self._spark._sc, func, serializer, serializer)
+        jForeachWriter = \
+            self._spark._sc._jvm.org.apache.spark.sql.execution.python.PythonForeachWriter(
+                wrapped_func, self._df._jdf.schema())
+        self._jwrite.foreach(jForeachWriter)
+        return self
+
+    @since(2.4)
+    def foreachBatch(self, func):
+        """
+        Sets the output of the streaming query to be processed using the provided
+        function. This is supported only the in the micro-batch execution modes (that is, when the
+        trigger is not continuous). In every micro-batch, the provided function will be called in
+        every micro-batch with (i) the output rows as a DataFrame and (ii) the batch identifier.
+        The batchId can be used deduplicate and transactionally write the output
+        (that is, the provided Dataset) to external systems. The output DataFrame is guaranteed
+        to exactly same for the same batchId (assuming all operations are deterministic in the
+        query).
+
+        .. note:: Evolving.
+
+        >>> def func(batch_df, batch_id):
+        ...     batch_df.collect()
+        ...
+        >>> writer = sdf.writeStream.foreach(func)
+        """
+
+        from pyspark.java_gateway import ensure_callback_server_started
+        gw = self._spark._sc._gateway
+        java_import(gw.jvm, "org.apache.spark.sql.execution.streaming.sources.*")
+
+        wrapped_func = ForeachBatchFunction(self._spark, func)
+        gw.jvm.PythonForeachBatchHelper.callForeachBatch(self._jwrite, wrapped_func)
+        ensure_callback_server_started(gw)
+        return self
+
+    @ignore_unicode_prefix
+    @since(2.0)
+    def start(self, path=None, format=None, outputMode=None, partitionBy=None, queryName=None,
+              **options):
+        """Streams the contents of the :class:`DataFrame` to a data source.
+
+        The data source is specified by the ``format`` and a set of ``options``.
+        If ``format`` is not specified, the default data source configured by
+        ``spark.sql.sources.default`` will be used.
+
+        .. note:: Evolving.
+
+        :param path: the path in a Hadoop supported file system
+        :param format: the format used to save
+        :param outputMode: specifies how data of a streaming DataFrame/Dataset is written to a
+                           streaming sink.
+
+            * `append`:Only the new rows in the streaming DataFrame/Dataset will be written to the
+              sink
+            * `complete`:All the rows in the streaming DataFrame/Dataset will be written to the sink
+               every time these is some updates
+            * `update`:only the rows that were updated in the streaming DataFrame/Dataset will be
+              written to the sink every time there are some updates. If the query doesn't contain
+              aggregations, it will be equivalent to `append` mode.
+        :param partitionBy: names of partitioning columns
+        :param queryName: unique name for the query
+        :param options: All other string options. You may want to provide a `checkpointLocation`
+                        for most streams, however it is not required for a `memory` stream.
+
+        >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
+        >>> sq.isActive
+        True
+        >>> sq.name
+        u'this_query'
+        >>> sq.stop()
+        >>> sq.isActive
+        False
+        >>> sq = sdf.writeStream.trigger(processingTime='5 seconds').start(
+        ...     queryName='that_query', outputMode="append", format='memory')
+        >>> sq.name
+        u'that_query'
+        >>> sq.isActive
+        True
+        >>> sq.stop()
+        """
+        self.options(**options)
+        if outputMode is not None:
+            self.outputMode(outputMode)
+        if partitionBy is not None:
+            self.partitionBy(partitionBy)
+        if format is not None:
+            self.format(format)
+        if queryName is not None:
+            self.queryName(queryName)
+        if path is None:
+            return self._sq(self._jwrite.start())
+        else:
+            return self._sq(self._jwrite.start(path))
+
+
+def _test():
+    import doctest
+    import os
+    import tempfile
+    from pyspark.sql import Row, SparkSession, SQLContext
+    import pyspark.sql.streaming
+
+    os.chdir(os.environ["SPARK_HOME"])
+
+    globs = pyspark.sql.streaming.__dict__.copy()
+    try:
+        spark = SparkSession.builder.getOrCreate()
+    except py4j.protocol.Py4JError:
+        spark = SparkSession(sc)
+
+    globs['tempfile'] = tempfile
+    globs['os'] = os
+    globs['spark'] = spark
+    globs['sqlContext'] = SQLContext.getOrCreate(spark.sparkContext)
+    globs['sdf'] = \
+        spark.readStream.format('text').load('python/test_support/sql/streaming')
+    globs['sdf_schema'] = StructType([StructField("data", StringType(), False)])
+    globs['df'] = \
+        globs['spark'].readStream.format('text').load('python/test_support/sql/streaming')
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.streaming, globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF)
+    globs['spark'].stop()
+
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/tests.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/tests.py
new file mode 100644
index 0000000000000000000000000000000000000000..b05de54773eb4d93a1e988b4d9b4976aab0d1377
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/tests.py
@@ -0,0 +1,6690 @@
+# -*- encoding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Unit tests for pyspark.sql; additional tests are implemented as doctests in
+individual modules.
+"""
+import os
+import sys
+import subprocess
+import pydoc
+import shutil
+import tempfile
+import threading
+import pickle
+import functools
+import time
+import datetime
+import array
+import ctypes
+import warnings
+import py4j
+from contextlib import contextmanager
+
+try:
+    import xmlrunner
+except ImportError:
+    xmlrunner = None
+
+if sys.version_info[:2] <= (2, 6):
+    try:
+        import unittest2 as unittest
+    except ImportError:
+        sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier')
+        sys.exit(1)
+else:
+    import unittest
+
+from pyspark.util import _exception_message
+
+_pandas_requirement_message = None
+try:
+    from pyspark.sql.utils import require_minimum_pandas_version
+    require_minimum_pandas_version()
+except ImportError as e:
+    # If Pandas version requirement is not satisfied, skip related tests.
+    _pandas_requirement_message = _exception_message(e)
+
+_pyarrow_requirement_message = None
+try:
+    from pyspark.sql.utils import require_minimum_pyarrow_version
+    require_minimum_pyarrow_version()
+except ImportError as e:
+    # If Arrow version requirement is not satisfied, skip related tests.
+    _pyarrow_requirement_message = _exception_message(e)
+
+_test_not_compiled_message = None
+try:
+    from pyspark.sql.utils import require_test_compiled
+    require_test_compiled()
+except Exception as e:
+    _test_not_compiled_message = _exception_message(e)
+
+_have_pandas = _pandas_requirement_message is None
+_have_pyarrow = _pyarrow_requirement_message is None
+_test_compiled = _test_not_compiled_message is None
+
+from pyspark import SparkContext
+from pyspark.sql import SparkSession, SQLContext, HiveContext, Column, Row
+from pyspark.sql.types import *
+from pyspark.sql.types import UserDefinedType, _infer_type, _make_type_verifier
+from pyspark.sql.types import _array_signed_int_typecode_ctype_mappings, _array_type_mappings
+from pyspark.sql.types import _array_unsigned_int_typecode_ctype_mappings
+from pyspark.sql.types import _merge_type
+from pyspark.tests import QuietTest, ReusedPySparkTestCase, PySparkTestCase, SparkSubmitTests
+from pyspark.sql.functions import UserDefinedFunction, sha2, lit
+from pyspark.sql.window import Window
+from pyspark.sql.utils import AnalysisException, ParseException, IllegalArgumentException
+
+
+class UTCOffsetTimezone(datetime.tzinfo):
+    """
+    Specifies timezone in UTC offset
+    """
+
+    def __init__(self, offset=0):
+        self.ZERO = datetime.timedelta(hours=offset)
+
+    def utcoffset(self, dt):
+        return self.ZERO
+
+    def dst(self, dt):
+        return self.ZERO
+
+
+class ExamplePointUDT(UserDefinedType):
+    """
+    User-defined type (UDT) for ExamplePoint.
+    """
+
+    @classmethod
+    def sqlType(self):
+        return ArrayType(DoubleType(), False)
+
+    @classmethod
+    def module(cls):
+        return 'pyspark.sql.tests'
+
+    @classmethod
+    def scalaUDT(cls):
+        return 'org.apache.spark.sql.test.ExamplePointUDT'
+
+    def serialize(self, obj):
+        return [obj.x, obj.y]
+
+    def deserialize(self, datum):
+        return ExamplePoint(datum[0], datum[1])
+
+
+class ExamplePoint:
+    """
+    An example class to demonstrate UDT in Scala, Java, and Python.
+    """
+
+    __UDT__ = ExamplePointUDT()
+
+    def __init__(self, x, y):
+        self.x = x
+        self.y = y
+
+    def __repr__(self):
+        return "ExamplePoint(%s,%s)" % (self.x, self.y)
+
+    def __str__(self):
+        return "(%s,%s)" % (self.x, self.y)
+
+    def __eq__(self, other):
+        return isinstance(other, self.__class__) and \
+            other.x == self.x and other.y == self.y
+
+
+class PythonOnlyUDT(UserDefinedType):
+    """
+    User-defined type (UDT) for ExamplePoint.
+    """
+
+    @classmethod
+    def sqlType(self):
+        return ArrayType(DoubleType(), False)
+
+    @classmethod
+    def module(cls):
+        return '__main__'
+
+    def serialize(self, obj):
+        return [obj.x, obj.y]
+
+    def deserialize(self, datum):
+        return PythonOnlyPoint(datum[0], datum[1])
+
+    @staticmethod
+    def foo():
+        pass
+
+    @property
+    def props(self):
+        return {}
+
+
+class PythonOnlyPoint(ExamplePoint):
+    """
+    An example class to demonstrate UDT in only Python
+    """
+    __UDT__ = PythonOnlyUDT()
+
+
+class MyObject(object):
+    def __init__(self, key, value):
+        self.key = key
+        self.value = value
+
+
+class SQLTestUtils(object):
+    """
+    This util assumes the instance of this to have 'spark' attribute, having a spark session.
+    It is usually used with 'ReusedSQLTestCase' class but can be used if you feel sure the
+    the implementation of this class has 'spark' attribute.
+    """
+
+    @contextmanager
+    def sql_conf(self, pairs):
+        """
+        A convenient context manager to test some configuration specific logic. This sets
+        `value` to the configuration `key` and then restores it back when it exits.
+        """
+        assert isinstance(pairs, dict), "pairs should be a dictionary."
+        assert hasattr(self, "spark"), "it should have 'spark' attribute, having a spark session."
+
+        keys = pairs.keys()
+        new_values = pairs.values()
+        old_values = [self.spark.conf.get(key, None) for key in keys]
+        for key, new_value in zip(keys, new_values):
+            self.spark.conf.set(key, new_value)
+        try:
+            yield
+        finally:
+            for key, old_value in zip(keys, old_values):
+                if old_value is None:
+                    self.spark.conf.unset(key)
+                else:
+                    self.spark.conf.set(key, old_value)
+
+
+class ReusedSQLTestCase(ReusedPySparkTestCase, SQLTestUtils):
+    @classmethod
+    def setUpClass(cls):
+        super(ReusedSQLTestCase, cls).setUpClass()
+        cls.spark = SparkSession(cls.sc)
+
+    @classmethod
+    def tearDownClass(cls):
+        super(ReusedSQLTestCase, cls).tearDownClass()
+        cls.spark.stop()
+
+    def assertPandasEqual(self, expected, result):
+        msg = ("DataFrames are not equal: " +
+               "\n\nExpected:\n%s\n%s" % (expected, expected.dtypes) +
+               "\n\nResult:\n%s\n%s" % (result, result.dtypes))
+        self.assertTrue(expected.equals(result), msg=msg)
+
+
+class DataTypeTests(unittest.TestCase):
+    # regression test for SPARK-6055
+    def test_data_type_eq(self):
+        lt = LongType()
+        lt2 = pickle.loads(pickle.dumps(LongType()))
+        self.assertEqual(lt, lt2)
+
+    # regression test for SPARK-7978
+    def test_decimal_type(self):
+        t1 = DecimalType()
+        t2 = DecimalType(10, 2)
+        self.assertTrue(t2 is not t1)
+        self.assertNotEqual(t1, t2)
+        t3 = DecimalType(8)
+        self.assertNotEqual(t2, t3)
+
+    # regression test for SPARK-10392
+    def test_datetype_equal_zero(self):
+        dt = DateType()
+        self.assertEqual(dt.fromInternal(0), datetime.date(1970, 1, 1))
+
+    # regression test for SPARK-17035
+    def test_timestamp_microsecond(self):
+        tst = TimestampType()
+        self.assertEqual(tst.toInternal(datetime.datetime.max) % 1000000, 999999)
+
+    def test_empty_row(self):
+        row = Row()
+        self.assertEqual(len(row), 0)
+
+    def test_struct_field_type_name(self):
+        struct_field = StructField("a", IntegerType())
+        self.assertRaises(TypeError, struct_field.typeName)
+
+    def test_invalid_create_row(self):
+        row_class = Row("c1", "c2")
+        self.assertRaises(ValueError, lambda: row_class(1, 2, 3))
+
+
+class SQLTests(ReusedSQLTestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        ReusedSQLTestCase.setUpClass()
+        cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
+        os.unlink(cls.tempdir.name)
+        cls.testData = [Row(key=i, value=str(i)) for i in range(100)]
+        cls.df = cls.spark.createDataFrame(cls.testData)
+
+    @classmethod
+    def tearDownClass(cls):
+        ReusedSQLTestCase.tearDownClass()
+        shutil.rmtree(cls.tempdir.name, ignore_errors=True)
+
+    def test_sqlcontext_reuses_sparksession(self):
+        sqlContext1 = SQLContext(self.sc)
+        sqlContext2 = SQLContext(self.sc)
+        self.assertTrue(sqlContext1.sparkSession is sqlContext2.sparkSession)
+
+    def tearDown(self):
+        super(SQLTests, self).tearDown()
+
+        # tear down test_bucketed_write state
+        self.spark.sql("DROP TABLE IF EXISTS pyspark_bucket")
+
+    def test_row_should_be_read_only(self):
+        row = Row(a=1, b=2)
+        self.assertEqual(1, row.a)
+
+        def foo():
+            row.a = 3
+        self.assertRaises(Exception, foo)
+
+        row2 = self.spark.range(10).first()
+        self.assertEqual(0, row2.id)
+
+        def foo2():
+            row2.id = 2
+        self.assertRaises(Exception, foo2)
+
+    def test_range(self):
+        self.assertEqual(self.spark.range(1, 1).count(), 0)
+        self.assertEqual(self.spark.range(1, 0, -1).count(), 1)
+        self.assertEqual(self.spark.range(0, 1 << 40, 1 << 39).count(), 2)
+        self.assertEqual(self.spark.range(-2).count(), 0)
+        self.assertEqual(self.spark.range(3).count(), 3)
+
+    def test_duplicated_column_names(self):
+        df = self.spark.createDataFrame([(1, 2)], ["c", "c"])
+        row = df.select('*').first()
+        self.assertEqual(1, row[0])
+        self.assertEqual(2, row[1])
+        self.assertEqual("Row(c=1, c=2)", str(row))
+        # Cannot access columns
+        self.assertRaises(AnalysisException, lambda: df.select(df[0]).first())
+        self.assertRaises(AnalysisException, lambda: df.select(df.c).first())
+        self.assertRaises(AnalysisException, lambda: df.select(df["c"]).first())
+
+    def test_column_name_encoding(self):
+        """Ensure that created columns has `str` type consistently."""
+        columns = self.spark.createDataFrame([('Alice', 1)], ['name', u'age']).columns
+        self.assertEqual(columns, ['name', 'age'])
+        self.assertTrue(isinstance(columns[0], str))
+        self.assertTrue(isinstance(columns[1], str))
+
+    def test_explode(self):
+        from pyspark.sql.functions import explode, explode_outer, posexplode_outer
+        d = [
+            Row(a=1, intlist=[1, 2, 3], mapfield={"a": "b"}),
+            Row(a=1, intlist=[], mapfield={}),
+            Row(a=1, intlist=None, mapfield=None),
+        ]
+        rdd = self.sc.parallelize(d)
+        data = self.spark.createDataFrame(rdd)
+
+        result = data.select(explode(data.intlist).alias("a")).select("a").collect()
+        self.assertEqual(result[0][0], 1)
+        self.assertEqual(result[1][0], 2)
+        self.assertEqual(result[2][0], 3)
+
+        result = data.select(explode(data.mapfield).alias("a", "b")).select("a", "b").collect()
+        self.assertEqual(result[0][0], "a")
+        self.assertEqual(result[0][1], "b")
+
+        result = [tuple(x) for x in data.select(posexplode_outer("intlist")).collect()]
+        self.assertEqual(result, [(0, 1), (1, 2), (2, 3), (None, None), (None, None)])
+
+        result = [tuple(x) for x in data.select(posexplode_outer("mapfield")).collect()]
+        self.assertEqual(result, [(0, 'a', 'b'), (None, None, None), (None, None, None)])
+
+        result = [x[0] for x in data.select(explode_outer("intlist")).collect()]
+        self.assertEqual(result, [1, 2, 3, None, None])
+
+        result = [tuple(x) for x in data.select(explode_outer("mapfield")).collect()]
+        self.assertEqual(result, [('a', 'b'), (None, None), (None, None)])
+
+    def test_and_in_expression(self):
+        self.assertEqual(4, self.df.filter((self.df.key <= 10) & (self.df.value <= "2")).count())
+        self.assertRaises(ValueError, lambda: (self.df.key <= 10) and (self.df.value <= "2"))
+        self.assertEqual(14, self.df.filter((self.df.key <= 3) | (self.df.value < "2")).count())
+        self.assertRaises(ValueError, lambda: self.df.key <= 3 or self.df.value < "2")
+        self.assertEqual(99, self.df.filter(~(self.df.key == 1)).count())
+        self.assertRaises(ValueError, lambda: not self.df.key == 1)
+
+    def test_udf_with_callable(self):
+        d = [Row(number=i, squared=i**2) for i in range(10)]
+        rdd = self.sc.parallelize(d)
+        data = self.spark.createDataFrame(rdd)
+
+        class PlusFour:
+            def __call__(self, col):
+                if col is not None:
+                    return col + 4
+
+        call = PlusFour()
+        pudf = UserDefinedFunction(call, LongType())
+        res = data.select(pudf(data['number']).alias('plus_four'))
+        self.assertEqual(res.agg({'plus_four': 'sum'}).collect()[0][0], 85)
+
+    def test_udf_with_partial_function(self):
+        d = [Row(number=i, squared=i**2) for i in range(10)]
+        rdd = self.sc.parallelize(d)
+        data = self.spark.createDataFrame(rdd)
+
+        def some_func(col, param):
+            if col is not None:
+                return col + param
+
+        pfunc = functools.partial(some_func, param=4)
+        pudf = UserDefinedFunction(pfunc, LongType())
+        res = data.select(pudf(data['number']).alias('plus_four'))
+        self.assertEqual(res.agg({'plus_four': 'sum'}).collect()[0][0], 85)
+
+    def test_udf(self):
+        self.spark.catalog.registerFunction("twoArgs", lambda x, y: len(x) + y, IntegerType())
+        [row] = self.spark.sql("SELECT twoArgs('test', 1)").collect()
+        self.assertEqual(row[0], 5)
+
+        # This is to check if a deprecated 'SQLContext.registerFunction' can call its alias.
+        sqlContext = self.spark._wrapped
+        sqlContext.registerFunction("oneArg", lambda x: len(x), IntegerType())
+        [row] = sqlContext.sql("SELECT oneArg('test')").collect()
+        self.assertEqual(row[0], 4)
+
+    def test_udf2(self):
+        self.spark.catalog.registerFunction("strlen", lambda string: len(string), IntegerType())
+        self.spark.createDataFrame(self.sc.parallelize([Row(a="test")]))\
+            .createOrReplaceTempView("test")
+        [res] = self.spark.sql("SELECT strlen(a) FROM test WHERE strlen(a) > 1").collect()
+        self.assertEqual(4, res[0])
+
+    def test_udf3(self):
+        two_args = self.spark.catalog.registerFunction(
+            "twoArgs", UserDefinedFunction(lambda x, y: len(x) + y))
+        self.assertEqual(two_args.deterministic, True)
+        [row] = self.spark.sql("SELECT twoArgs('test', 1)").collect()
+        self.assertEqual(row[0], u'5')
+
+    def test_udf_registration_return_type_none(self):
+        two_args = self.spark.catalog.registerFunction(
+            "twoArgs", UserDefinedFunction(lambda x, y: len(x) + y, "integer"), None)
+        self.assertEqual(two_args.deterministic, True)
+        [row] = self.spark.sql("SELECT twoArgs('test', 1)").collect()
+        self.assertEqual(row[0], 5)
+
+    def test_udf_registration_return_type_not_none(self):
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(TypeError, "Invalid returnType"):
+                self.spark.catalog.registerFunction(
+                    "f", UserDefinedFunction(lambda x, y: len(x) + y, StringType()), StringType())
+
+    def test_nondeterministic_udf(self):
+        # Test that nondeterministic UDFs are evaluated only once in chained UDF evaluations
+        from pyspark.sql.functions import udf
+        import random
+        udf_random_col = udf(lambda: int(100 * random.random()), IntegerType()).asNondeterministic()
+        self.assertEqual(udf_random_col.deterministic, False)
+        df = self.spark.createDataFrame([Row(1)]).select(udf_random_col().alias('RAND'))
+        udf_add_ten = udf(lambda rand: rand + 10, IntegerType())
+        [row] = df.withColumn('RAND_PLUS_TEN', udf_add_ten('RAND')).collect()
+        self.assertEqual(row[0] + 10, row[1])
+
+    def test_nondeterministic_udf2(self):
+        import random
+        from pyspark.sql.functions import udf
+        random_udf = udf(lambda: random.randint(6, 6), IntegerType()).asNondeterministic()
+        self.assertEqual(random_udf.deterministic, False)
+        random_udf1 = self.spark.catalog.registerFunction("randInt", random_udf)
+        self.assertEqual(random_udf1.deterministic, False)
+        [row] = self.spark.sql("SELECT randInt()").collect()
+        self.assertEqual(row[0], 6)
+        [row] = self.spark.range(1).select(random_udf1()).collect()
+        self.assertEqual(row[0], 6)
+        [row] = self.spark.range(1).select(random_udf()).collect()
+        self.assertEqual(row[0], 6)
+        # render_doc() reproduces the help() exception without printing output
+        pydoc.render_doc(udf(lambda: random.randint(6, 6), IntegerType()))
+        pydoc.render_doc(random_udf)
+        pydoc.render_doc(random_udf1)
+        pydoc.render_doc(udf(lambda x: x).asNondeterministic)
+
+    def test_nondeterministic_udf3(self):
+        # regression test for SPARK-23233
+        from pyspark.sql.functions import udf
+        f = udf(lambda x: x)
+        # Here we cache the JVM UDF instance.
+        self.spark.range(1).select(f("id"))
+        # This should reset the cache to set the deterministic status correctly.
+        f = f.asNondeterministic()
+        # Check the deterministic status of udf.
+        df = self.spark.range(1).select(f("id"))
+        deterministic = df._jdf.logicalPlan().projectList().head().deterministic()
+        self.assertFalse(deterministic)
+
+    def test_nondeterministic_udf_in_aggregate(self):
+        from pyspark.sql.functions import udf, sum
+        import random
+        udf_random_col = udf(lambda: int(100 * random.random()), 'int').asNondeterministic()
+        df = self.spark.range(10)
+
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(AnalysisException, "nondeterministic"):
+                df.groupby('id').agg(sum(udf_random_col())).collect()
+            with self.assertRaisesRegexp(AnalysisException, "nondeterministic"):
+                df.agg(sum(udf_random_col())).collect()
+
+    def test_chained_udf(self):
+        self.spark.catalog.registerFunction("double", lambda x: x + x, IntegerType())
+        [row] = self.spark.sql("SELECT double(1)").collect()
+        self.assertEqual(row[0], 2)
+        [row] = self.spark.sql("SELECT double(double(1))").collect()
+        self.assertEqual(row[0], 4)
+        [row] = self.spark.sql("SELECT double(double(1) + 1)").collect()
+        self.assertEqual(row[0], 6)
+
+    def test_single_udf_with_repeated_argument(self):
+        # regression test for SPARK-20685
+        self.spark.catalog.registerFunction("add", lambda x, y: x + y, IntegerType())
+        row = self.spark.sql("SELECT add(1, 1)").first()
+        self.assertEqual(tuple(row), (2, ))
+
+    def test_multiple_udfs(self):
+        self.spark.catalog.registerFunction("double", lambda x: x * 2, IntegerType())
+        [row] = self.spark.sql("SELECT double(1), double(2)").collect()
+        self.assertEqual(tuple(row), (2, 4))
+        [row] = self.spark.sql("SELECT double(double(1)), double(double(2) + 2)").collect()
+        self.assertEqual(tuple(row), (4, 12))
+        self.spark.catalog.registerFunction("add", lambda x, y: x + y, IntegerType())
+        [row] = self.spark.sql("SELECT double(add(1, 2)), add(double(2), 1)").collect()
+        self.assertEqual(tuple(row), (6, 5))
+
+    def test_udf_in_filter_on_top_of_outer_join(self):
+        from pyspark.sql.functions import udf
+        left = self.spark.createDataFrame([Row(a=1)])
+        right = self.spark.createDataFrame([Row(a=1)])
+        df = left.join(right, on='a', how='left_outer')
+        df = df.withColumn('b', udf(lambda x: 'x')(df.a))
+        self.assertEqual(df.filter('b = "x"').collect(), [Row(a=1, b='x')])
+
+    def test_udf_in_filter_on_top_of_join(self):
+        # regression test for SPARK-18589
+        from pyspark.sql.functions import udf
+        left = self.spark.createDataFrame([Row(a=1)])
+        right = self.spark.createDataFrame([Row(b=1)])
+        f = udf(lambda a, b: a == b, BooleanType())
+        df = left.crossJoin(right).filter(f("a", "b"))
+        self.assertEqual(df.collect(), [Row(a=1, b=1)])
+
+    def test_udf_in_join_condition(self):
+        # regression test for SPARK-25314
+        from pyspark.sql.functions import udf
+        left = self.spark.createDataFrame([Row(a=1)])
+        right = self.spark.createDataFrame([Row(b=1)])
+        f = udf(lambda a, b: a == b, BooleanType())
+        df = left.join(right, f("a", "b"))
+        with self.assertRaisesRegexp(AnalysisException, 'Detected implicit cartesian product'):
+            df.collect()
+        with self.sql_conf({"spark.sql.crossJoin.enabled": True}):
+            self.assertEqual(df.collect(), [Row(a=1, b=1)])
+
+    def test_udf_in_left_semi_join_condition(self):
+        # regression test for SPARK-25314
+        from pyspark.sql.functions import udf
+        left = self.spark.createDataFrame([Row(a=1, a1=1, a2=1), Row(a=2, a1=2, a2=2)])
+        right = self.spark.createDataFrame([Row(b=1, b1=1, b2=1)])
+        f = udf(lambda a, b: a == b, BooleanType())
+        df = left.join(right, f("a", "b"), "leftsemi")
+        with self.assertRaisesRegexp(AnalysisException, 'Detected implicit cartesian product'):
+            df.collect()
+        with self.sql_conf({"spark.sql.crossJoin.enabled": True}):
+            self.assertEqual(df.collect(), [Row(a=1, a1=1, a2=1)])
+
+    def test_udf_and_common_filter_in_join_condition(self):
+        # regression test for SPARK-25314
+        # test the complex scenario with both udf and common filter
+        from pyspark.sql.functions import udf
+        left = self.spark.createDataFrame([Row(a=1, a1=1, a2=1), Row(a=2, a1=2, a2=2)])
+        right = self.spark.createDataFrame([Row(b=1, b1=1, b2=1), Row(b=1, b1=3, b2=1)])
+        f = udf(lambda a, b: a == b, BooleanType())
+        df = left.join(right, [f("a", "b"), left.a1 == right.b1])
+        # do not need spark.sql.crossJoin.enabled=true for udf is not the only join condition.
+        self.assertEqual(df.collect(), [Row(a=1, a1=1, a2=1, b=1, b1=1, b2=1)])
+
+    def test_udf_and_common_filter_in_left_semi_join_condition(self):
+        # regression test for SPARK-25314
+        # test the complex scenario with both udf and common filter
+        from pyspark.sql.functions import udf
+        left = self.spark.createDataFrame([Row(a=1, a1=1, a2=1), Row(a=2, a1=2, a2=2)])
+        right = self.spark.createDataFrame([Row(b=1, b1=1, b2=1), Row(b=1, b1=3, b2=1)])
+        f = udf(lambda a, b: a == b, BooleanType())
+        df = left.join(right, [f("a", "b"), left.a1 == right.b1], "left_semi")
+        # do not need spark.sql.crossJoin.enabled=true for udf is not the only join condition.
+        self.assertEqual(df.collect(), [Row(a=1, a1=1, a2=1)])
+
+    def test_udf_not_supported_in_join_condition(self):
+        # regression test for SPARK-25314
+        # test python udf is not supported in join type besides left_semi and inner join.
+        from pyspark.sql.functions import udf
+        left = self.spark.createDataFrame([Row(a=1, a1=1, a2=1), Row(a=2, a1=2, a2=2)])
+        right = self.spark.createDataFrame([Row(b=1, b1=1, b2=1), Row(b=1, b1=3, b2=1)])
+        f = udf(lambda a, b: a == b, BooleanType())
+
+        def runWithJoinType(join_type, type_string):
+            with self.assertRaisesRegexp(
+                    AnalysisException,
+                    'Using PythonUDF.*%s is not supported.' % type_string):
+                left.join(right, [f("a", "b"), left.a1 == right.b1], join_type).collect()
+        runWithJoinType("full", "FullOuter")
+        runWithJoinType("left", "LeftOuter")
+        runWithJoinType("right", "RightOuter")
+        runWithJoinType("leftanti", "LeftAnti")
+
+    def test_udf_without_arguments(self):
+        self.spark.catalog.registerFunction("foo", lambda: "bar")
+        [row] = self.spark.sql("SELECT foo()").collect()
+        self.assertEqual(row[0], "bar")
+
+    def test_udf_with_array_type(self):
+        d = [Row(l=list(range(3)), d={"key": list(range(5))})]
+        rdd = self.sc.parallelize(d)
+        self.spark.createDataFrame(rdd).createOrReplaceTempView("test")
+        self.spark.catalog.registerFunction("copylist", lambda l: list(l), ArrayType(IntegerType()))
+        self.spark.catalog.registerFunction("maplen", lambda d: len(d), IntegerType())
+        [(l1, l2)] = self.spark.sql("select copylist(l), maplen(d) from test").collect()
+        self.assertEqual(list(range(3)), l1)
+        self.assertEqual(1, l2)
+
+    def test_broadcast_in_udf(self):
+        bar = {"a": "aa", "b": "bb", "c": "abc"}
+        foo = self.sc.broadcast(bar)
+        self.spark.catalog.registerFunction("MYUDF", lambda x: foo.value[x] if x else '')
+        [res] = self.spark.sql("SELECT MYUDF('c')").collect()
+        self.assertEqual("abc", res[0])
+        [res] = self.spark.sql("SELECT MYUDF('')").collect()
+        self.assertEqual("", res[0])
+
+    def test_udf_with_filter_function(self):
+        df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"])
+        from pyspark.sql.functions import udf, col
+        from pyspark.sql.types import BooleanType
+
+        my_filter = udf(lambda a: a < 2, BooleanType())
+        sel = df.select(col("key"), col("value")).filter((my_filter(col("key"))) & (df.value < "2"))
+        self.assertEqual(sel.collect(), [Row(key=1, value='1')])
+
+    def test_udf_with_aggregate_function(self):
+        df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"])
+        from pyspark.sql.functions import udf, col, sum
+        from pyspark.sql.types import BooleanType
+
+        my_filter = udf(lambda a: a == 1, BooleanType())
+        sel = df.select(col("key")).distinct().filter(my_filter(col("key")))
+        self.assertEqual(sel.collect(), [Row(key=1)])
+
+        my_copy = udf(lambda x: x, IntegerType())
+        my_add = udf(lambda a, b: int(a + b), IntegerType())
+        my_strlen = udf(lambda x: len(x), IntegerType())
+        sel = df.groupBy(my_copy(col("key")).alias("k"))\
+            .agg(sum(my_strlen(col("value"))).alias("s"))\
+            .select(my_add(col("k"), col("s")).alias("t"))
+        self.assertEqual(sel.collect(), [Row(t=4), Row(t=3)])
+
+    def test_udf_in_generate(self):
+        from pyspark.sql.functions import udf, explode
+        df = self.spark.range(5)
+        f = udf(lambda x: list(range(x)), ArrayType(LongType()))
+        row = df.select(explode(f(*df))).groupBy().sum().first()
+        self.assertEqual(row[0], 10)
+
+        df = self.spark.range(3)
+        res = df.select("id", explode(f(df.id))).collect()
+        self.assertEqual(res[0][0], 1)
+        self.assertEqual(res[0][1], 0)
+        self.assertEqual(res[1][0], 2)
+        self.assertEqual(res[1][1], 0)
+        self.assertEqual(res[2][0], 2)
+        self.assertEqual(res[2][1], 1)
+
+        range_udf = udf(lambda value: list(range(value - 1, value + 1)), ArrayType(IntegerType()))
+        res = df.select("id", explode(range_udf(df.id))).collect()
+        self.assertEqual(res[0][0], 0)
+        self.assertEqual(res[0][1], -1)
+        self.assertEqual(res[1][0], 0)
+        self.assertEqual(res[1][1], 0)
+        self.assertEqual(res[2][0], 1)
+        self.assertEqual(res[2][1], 0)
+        self.assertEqual(res[3][0], 1)
+        self.assertEqual(res[3][1], 1)
+
+    def test_udf_with_order_by_and_limit(self):
+        from pyspark.sql.functions import udf
+        my_copy = udf(lambda x: x, IntegerType())
+        df = self.spark.range(10).orderBy("id")
+        res = df.select(df.id, my_copy(df.id).alias("copy")).limit(1)
+        res.explain(True)
+        self.assertEqual(res.collect(), [Row(id=0, copy=0)])
+
+    def test_udf_registration_returns_udf(self):
+        df = self.spark.range(10)
+        add_three = self.spark.udf.register("add_three", lambda x: x + 3, IntegerType())
+
+        self.assertListEqual(
+            df.selectExpr("add_three(id) AS plus_three").collect(),
+            df.select(add_three("id").alias("plus_three")).collect()
+        )
+
+        # This is to check if a 'SQLContext.udf' can call its alias.
+        sqlContext = self.spark._wrapped
+        add_four = sqlContext.udf.register("add_four", lambda x: x + 4, IntegerType())
+
+        self.assertListEqual(
+            df.selectExpr("add_four(id) AS plus_four").collect(),
+            df.select(add_four("id").alias("plus_four")).collect()
+        )
+
+    def test_non_existed_udf(self):
+        spark = self.spark
+        self.assertRaisesRegexp(AnalysisException, "Can not load class non_existed_udf",
+                                lambda: spark.udf.registerJavaFunction("udf1", "non_existed_udf"))
+
+        # This is to check if a deprecated 'SQLContext.registerJavaFunction' can call its alias.
+        sqlContext = spark._wrapped
+        self.assertRaisesRegexp(AnalysisException, "Can not load class non_existed_udf",
+                                lambda: sqlContext.registerJavaFunction("udf1", "non_existed_udf"))
+
+    def test_non_existed_udaf(self):
+        spark = self.spark
+        self.assertRaisesRegexp(AnalysisException, "Can not load class non_existed_udaf",
+                                lambda: spark.udf.registerJavaUDAF("udaf1", "non_existed_udaf"))
+
+    def test_linesep_text(self):
+        df = self.spark.read.text("python/test_support/sql/ages_newlines.csv", lineSep=",")
+        expected = [Row(value=u'Joe'), Row(value=u'20'), Row(value=u'"Hi'),
+                    Row(value=u'\nI am Jeo"\nTom'), Row(value=u'30'),
+                    Row(value=u'"My name is Tom"\nHyukjin'), Row(value=u'25'),
+                    Row(value=u'"I am Hyukjin\n\nI love Spark!"\n')]
+        self.assertEqual(df.collect(), expected)
+
+        tpath = tempfile.mkdtemp()
+        shutil.rmtree(tpath)
+        try:
+            df.write.text(tpath, lineSep="!")
+            expected = [Row(value=u'Joe!20!"Hi!'), Row(value=u'I am Jeo"'),
+                        Row(value=u'Tom!30!"My name is Tom"'),
+                        Row(value=u'Hyukjin!25!"I am Hyukjin'),
+                        Row(value=u''), Row(value=u'I love Spark!"'),
+                        Row(value=u'!')]
+            readback = self.spark.read.text(tpath)
+            self.assertEqual(readback.collect(), expected)
+        finally:
+            shutil.rmtree(tpath)
+
+    def test_multiline_json(self):
+        people1 = self.spark.read.json("python/test_support/sql/people.json")
+        people_array = self.spark.read.json("python/test_support/sql/people_array.json",
+                                            multiLine=True)
+        self.assertEqual(people1.collect(), people_array.collect())
+
+    def test_encoding_json(self):
+        people_array = self.spark.read\
+            .json("python/test_support/sql/people_array_utf16le.json",
+                  multiLine=True, encoding="UTF-16LE")
+        expected = [Row(age=30, name=u'Andy'), Row(age=19, name=u'Justin')]
+        self.assertEqual(people_array.collect(), expected)
+
+    def test_linesep_json(self):
+        df = self.spark.read.json("python/test_support/sql/people.json", lineSep=",")
+        expected = [Row(_corrupt_record=None, name=u'Michael'),
+                    Row(_corrupt_record=u' "age":30}\n{"name":"Justin"', name=None),
+                    Row(_corrupt_record=u' "age":19}\n', name=None)]
+        self.assertEqual(df.collect(), expected)
+
+        tpath = tempfile.mkdtemp()
+        shutil.rmtree(tpath)
+        try:
+            df = self.spark.read.json("python/test_support/sql/people.json")
+            df.write.json(tpath, lineSep="!!")
+            readback = self.spark.read.json(tpath, lineSep="!!")
+            self.assertEqual(readback.collect(), df.collect())
+        finally:
+            shutil.rmtree(tpath)
+
+    def test_multiline_csv(self):
+        ages_newlines = self.spark.read.csv(
+            "python/test_support/sql/ages_newlines.csv", multiLine=True)
+        expected = [Row(_c0=u'Joe', _c1=u'20', _c2=u'Hi,\nI am Jeo'),
+                    Row(_c0=u'Tom', _c1=u'30', _c2=u'My name is Tom'),
+                    Row(_c0=u'Hyukjin', _c1=u'25', _c2=u'I am Hyukjin\n\nI love Spark!')]
+        self.assertEqual(ages_newlines.collect(), expected)
+
+    def test_ignorewhitespace_csv(self):
+        tmpPath = tempfile.mkdtemp()
+        shutil.rmtree(tmpPath)
+        self.spark.createDataFrame([[" a", "b  ", " c "]]).write.csv(
+            tmpPath,
+            ignoreLeadingWhiteSpace=False,
+            ignoreTrailingWhiteSpace=False)
+
+        expected = [Row(value=u' a,b  , c ')]
+        readback = self.spark.read.text(tmpPath)
+        self.assertEqual(readback.collect(), expected)
+        shutil.rmtree(tmpPath)
+
+    def test_read_multiple_orc_file(self):
+        df = self.spark.read.orc(["python/test_support/sql/orc_partitioned/b=0/c=0",
+                                  "python/test_support/sql/orc_partitioned/b=1/c=1"])
+        self.assertEqual(2, df.count())
+
+    def test_udf_with_input_file_name(self):
+        from pyspark.sql.functions import udf, input_file_name
+        sourceFile = udf(lambda path: path, StringType())
+        filePath = "python/test_support/sql/people1.json"
+        row = self.spark.read.json(filePath).select(sourceFile(input_file_name())).first()
+        self.assertTrue(row[0].find("people1.json") != -1)
+
+    def test_udf_with_input_file_name_for_hadooprdd(self):
+        from pyspark.sql.functions import udf, input_file_name
+
+        def filename(path):
+            return path
+
+        sameText = udf(filename, StringType())
+
+        rdd = self.sc.textFile('python/test_support/sql/people.json')
+        df = self.spark.read.json(rdd).select(input_file_name().alias('file'))
+        row = df.select(sameText(df['file'])).first()
+        self.assertTrue(row[0].find("people.json") != -1)
+
+        rdd2 = self.sc.newAPIHadoopFile(
+            'python/test_support/sql/people.json',
+            'org.apache.hadoop.mapreduce.lib.input.TextInputFormat',
+            'org.apache.hadoop.io.LongWritable',
+            'org.apache.hadoop.io.Text')
+
+        df2 = self.spark.read.json(rdd2).select(input_file_name().alias('file'))
+        row2 = df2.select(sameText(df2['file'])).first()
+        self.assertTrue(row2[0].find("people.json") != -1)
+
+    def test_udf_defers_judf_initialization(self):
+        # This is separate of  UDFInitializationTests
+        # to avoid context initialization
+        # when udf is called
+
+        from pyspark.sql.functions import UserDefinedFunction
+
+        f = UserDefinedFunction(lambda x: x, StringType())
+
+        self.assertIsNone(
+            f._judf_placeholder,
+            "judf should not be initialized before the first call."
+        )
+
+        self.assertIsInstance(f("foo"), Column, "UDF call should return a Column.")
+
+        self.assertIsNotNone(
+            f._judf_placeholder,
+            "judf should be initialized after UDF has been called."
+        )
+
+    def test_udf_with_string_return_type(self):
+        from pyspark.sql.functions import UserDefinedFunction
+
+        add_one = UserDefinedFunction(lambda x: x + 1, "integer")
+        make_pair = UserDefinedFunction(lambda x: (-x, x), "struct<x:integer,y:integer>")
+        make_array = UserDefinedFunction(
+            lambda x: [float(x) for x in range(x, x + 3)], "array<double>")
+
+        expected = (2, Row(x=-1, y=1), [1.0, 2.0, 3.0])
+        actual = (self.spark.range(1, 2).toDF("x")
+                  .select(add_one("x"), make_pair("x"), make_array("x"))
+                  .first())
+
+        self.assertTupleEqual(expected, actual)
+
+    def test_udf_shouldnt_accept_noncallable_object(self):
+        from pyspark.sql.functions import UserDefinedFunction
+
+        non_callable = None
+        self.assertRaises(TypeError, UserDefinedFunction, non_callable, StringType())
+
+    def test_udf_with_decorator(self):
+        from pyspark.sql.functions import lit, udf
+        from pyspark.sql.types import IntegerType, DoubleType
+
+        @udf(IntegerType())
+        def add_one(x):
+            if x is not None:
+                return x + 1
+
+        @udf(returnType=DoubleType())
+        def add_two(x):
+            if x is not None:
+                return float(x + 2)
+
+        @udf
+        def to_upper(x):
+            if x is not None:
+                return x.upper()
+
+        @udf()
+        def to_lower(x):
+            if x is not None:
+                return x.lower()
+
+        @udf
+        def substr(x, start, end):
+            if x is not None:
+                return x[start:end]
+
+        @udf("long")
+        def trunc(x):
+            return int(x)
+
+        @udf(returnType="double")
+        def as_double(x):
+            return float(x)
+
+        df = (
+            self.spark
+                .createDataFrame(
+                    [(1, "Foo", "foobar", 3.0)], ("one", "Foo", "foobar", "float"))
+                .select(
+                    add_one("one"), add_two("one"),
+                    to_upper("Foo"), to_lower("Foo"),
+                    substr("foobar", lit(0), lit(3)),
+                    trunc("float"), as_double("one")))
+
+        self.assertListEqual(
+            [tpe for _, tpe in df.dtypes],
+            ["int", "double", "string", "string", "string", "bigint", "double"]
+        )
+
+        self.assertListEqual(
+            list(df.first()),
+            [2, 3.0, "FOO", "foo", "foo", 3, 1.0]
+        )
+
+    def test_udf_wrapper(self):
+        from pyspark.sql.functions import udf
+        from pyspark.sql.types import IntegerType
+
+        def f(x):
+            """Identity"""
+            return x
+
+        return_type = IntegerType()
+        f_ = udf(f, return_type)
+
+        self.assertTrue(f.__doc__ in f_.__doc__)
+        self.assertEqual(f, f_.func)
+        self.assertEqual(return_type, f_.returnType)
+
+        class F(object):
+            """Identity"""
+            def __call__(self, x):
+                return x
+
+        f = F()
+        return_type = IntegerType()
+        f_ = udf(f, return_type)
+
+        self.assertTrue(f.__doc__ in f_.__doc__)
+        self.assertEqual(f, f_.func)
+        self.assertEqual(return_type, f_.returnType)
+
+        f = functools.partial(f, x=1)
+        return_type = IntegerType()
+        f_ = udf(f, return_type)
+
+        self.assertTrue(f.__doc__ in f_.__doc__)
+        self.assertEqual(f, f_.func)
+        self.assertEqual(return_type, f_.returnType)
+
+    def test_validate_column_types(self):
+        from pyspark.sql.functions import udf, to_json
+        from pyspark.sql.column import _to_java_column
+
+        self.assertTrue("Column" in _to_java_column("a").getClass().toString())
+        self.assertTrue("Column" in _to_java_column(u"a").getClass().toString())
+        self.assertTrue("Column" in _to_java_column(self.spark.range(1).id).getClass().toString())
+
+        self.assertRaisesRegexp(
+            TypeError,
+            "Invalid argument, not a string or column",
+            lambda: _to_java_column(1))
+
+        class A():
+            pass
+
+        self.assertRaises(TypeError, lambda: _to_java_column(A()))
+        self.assertRaises(TypeError, lambda: _to_java_column([]))
+
+        self.assertRaisesRegexp(
+            TypeError,
+            "Invalid argument, not a string or column",
+            lambda: udf(lambda x: x)(None))
+        self.assertRaises(TypeError, lambda: to_json(1))
+
+    def test_basic_functions(self):
+        rdd = self.sc.parallelize(['{"foo":"bar"}', '{"foo":"baz"}'])
+        df = self.spark.read.json(rdd)
+        df.count()
+        df.collect()
+        df.schema
+
+        # cache and checkpoint
+        self.assertFalse(df.is_cached)
+        df.persist()
+        df.unpersist(True)
+        df.cache()
+        self.assertTrue(df.is_cached)
+        self.assertEqual(2, df.count())
+
+        df.createOrReplaceTempView("temp")
+        df = self.spark.sql("select foo from temp")
+        df.count()
+        df.collect()
+
+    def test_apply_schema_to_row(self):
+        df = self.spark.read.json(self.sc.parallelize(["""{"a":2}"""]))
+        df2 = self.spark.createDataFrame(df.rdd.map(lambda x: x), df.schema)
+        self.assertEqual(df.collect(), df2.collect())
+
+        rdd = self.sc.parallelize(range(10)).map(lambda x: Row(a=x))
+        df3 = self.spark.createDataFrame(rdd, df.schema)
+        self.assertEqual(10, df3.count())
+
+    def test_infer_schema_to_local(self):
+        input = [{"a": 1}, {"b": "coffee"}]
+        rdd = self.sc.parallelize(input)
+        df = self.spark.createDataFrame(input)
+        df2 = self.spark.createDataFrame(rdd, samplingRatio=1.0)
+        self.assertEqual(df.schema, df2.schema)
+
+        rdd = self.sc.parallelize(range(10)).map(lambda x: Row(a=x, b=None))
+        df3 = self.spark.createDataFrame(rdd, df.schema)
+        self.assertEqual(10, df3.count())
+
+    def test_apply_schema_to_dict_and_rows(self):
+        schema = StructType().add("b", StringType()).add("a", IntegerType())
+        input = [{"a": 1}, {"b": "coffee"}]
+        rdd = self.sc.parallelize(input)
+        for verify in [False, True]:
+            df = self.spark.createDataFrame(input, schema, verifySchema=verify)
+            df2 = self.spark.createDataFrame(rdd, schema, verifySchema=verify)
+            self.assertEqual(df.schema, df2.schema)
+
+            rdd = self.sc.parallelize(range(10)).map(lambda x: Row(a=x, b=None))
+            df3 = self.spark.createDataFrame(rdd, schema, verifySchema=verify)
+            self.assertEqual(10, df3.count())
+            input = [Row(a=x, b=str(x)) for x in range(10)]
+            df4 = self.spark.createDataFrame(input, schema, verifySchema=verify)
+            self.assertEqual(10, df4.count())
+
+    def test_create_dataframe_schema_mismatch(self):
+        input = [Row(a=1)]
+        rdd = self.sc.parallelize(range(3)).map(lambda i: Row(a=i))
+        schema = StructType([StructField("a", IntegerType()), StructField("b", StringType())])
+        df = self.spark.createDataFrame(rdd, schema)
+        self.assertRaises(Exception, lambda: df.show())
+
+    def test_serialize_nested_array_and_map(self):
+        d = [Row(l=[Row(a=1, b='s')], d={"key": Row(c=1.0, d="2")})]
+        rdd = self.sc.parallelize(d)
+        df = self.spark.createDataFrame(rdd)
+        row = df.head()
+        self.assertEqual(1, len(row.l))
+        self.assertEqual(1, row.l[0].a)
+        self.assertEqual("2", row.d["key"].d)
+
+        l = df.rdd.map(lambda x: x.l).first()
+        self.assertEqual(1, len(l))
+        self.assertEqual('s', l[0].b)
+
+        d = df.rdd.map(lambda x: x.d).first()
+        self.assertEqual(1, len(d))
+        self.assertEqual(1.0, d["key"].c)
+
+        row = df.rdd.map(lambda x: x.d["key"]).first()
+        self.assertEqual(1.0, row.c)
+        self.assertEqual("2", row.d)
+
+    def test_infer_schema(self):
+        d = [Row(l=[], d={}, s=None),
+             Row(l=[Row(a=1, b='s')], d={"key": Row(c=1.0, d="2")}, s="")]
+        rdd = self.sc.parallelize(d)
+        df = self.spark.createDataFrame(rdd)
+        self.assertEqual([], df.rdd.map(lambda r: r.l).first())
+        self.assertEqual([None, ""], df.rdd.map(lambda r: r.s).collect())
+        df.createOrReplaceTempView("test")
+        result = self.spark.sql("SELECT l[0].a from test where d['key'].d = '2'")
+        self.assertEqual(1, result.head()[0])
+
+        df2 = self.spark.createDataFrame(rdd, samplingRatio=1.0)
+        self.assertEqual(df.schema, df2.schema)
+        self.assertEqual({}, df2.rdd.map(lambda r: r.d).first())
+        self.assertEqual([None, ""], df2.rdd.map(lambda r: r.s).collect())
+        df2.createOrReplaceTempView("test2")
+        result = self.spark.sql("SELECT l[0].a from test2 where d['key'].d = '2'")
+        self.assertEqual(1, result.head()[0])
+
+    def test_infer_schema_not_enough_names(self):
+        df = self.spark.createDataFrame([["a", "b"]], ["col1"])
+        self.assertEqual(df.columns, ['col1', '_2'])
+
+    def test_infer_schema_fails(self):
+        with self.assertRaisesRegexp(TypeError, 'field a'):
+            self.spark.createDataFrame(self.spark.sparkContext.parallelize([[1, 1], ["x", 1]]),
+                                       schema=["a", "b"], samplingRatio=0.99)
+
+    def test_infer_nested_schema(self):
+        NestedRow = Row("f1", "f2")
+        nestedRdd1 = self.sc.parallelize([NestedRow([1, 2], {"row1": 1.0}),
+                                          NestedRow([2, 3], {"row2": 2.0})])
+        df = self.spark.createDataFrame(nestedRdd1)
+        self.assertEqual(Row(f1=[1, 2], f2={u'row1': 1.0}), df.collect()[0])
+
+        nestedRdd2 = self.sc.parallelize([NestedRow([[1, 2], [2, 3]], [1, 2]),
+                                          NestedRow([[2, 3], [3, 4]], [2, 3])])
+        df = self.spark.createDataFrame(nestedRdd2)
+        self.assertEqual(Row(f1=[[1, 2], [2, 3]], f2=[1, 2]), df.collect()[0])
+
+        from collections import namedtuple
+        CustomRow = namedtuple('CustomRow', 'field1 field2')
+        rdd = self.sc.parallelize([CustomRow(field1=1, field2="row1"),
+                                   CustomRow(field1=2, field2="row2"),
+                                   CustomRow(field1=3, field2="row3")])
+        df = self.spark.createDataFrame(rdd)
+        self.assertEqual(Row(field1=1, field2=u'row1'), df.first())
+
+    def test_create_dataframe_from_dict_respects_schema(self):
+        df = self.spark.createDataFrame([{'a': 1}], ["b"])
+        self.assertEqual(df.columns, ['b'])
+
+    def test_create_dataframe_from_objects(self):
+        data = [MyObject(1, "1"), MyObject(2, "2")]
+        df = self.spark.createDataFrame(data)
+        self.assertEqual(df.dtypes, [("key", "bigint"), ("value", "string")])
+        self.assertEqual(df.first(), Row(key=1, value="1"))
+
+    def test_select_null_literal(self):
+        df = self.spark.sql("select null as col")
+        self.assertEqual(Row(col=None), df.first())
+
+    def test_apply_schema(self):
+        from datetime import date, datetime
+        rdd = self.sc.parallelize([(127, -128, -32768, 32767, 2147483647, 1.0,
+                                    date(2010, 1, 1), datetime(2010, 1, 1, 1, 1, 1),
+                                    {"a": 1}, (2,), [1, 2, 3], None)])
+        schema = StructType([
+            StructField("byte1", ByteType(), False),
+            StructField("byte2", ByteType(), False),
+            StructField("short1", ShortType(), False),
+            StructField("short2", ShortType(), False),
+            StructField("int1", IntegerType(), False),
+            StructField("float1", FloatType(), False),
+            StructField("date1", DateType(), False),
+            StructField("time1", TimestampType(), False),
+            StructField("map1", MapType(StringType(), IntegerType(), False), False),
+            StructField("struct1", StructType([StructField("b", ShortType(), False)]), False),
+            StructField("list1", ArrayType(ByteType(), False), False),
+            StructField("null1", DoubleType(), True)])
+        df = self.spark.createDataFrame(rdd, schema)
+        results = df.rdd.map(lambda x: (x.byte1, x.byte2, x.short1, x.short2, x.int1, x.float1,
+                             x.date1, x.time1, x.map1["a"], x.struct1.b, x.list1, x.null1))
+        r = (127, -128, -32768, 32767, 2147483647, 1.0, date(2010, 1, 1),
+             datetime(2010, 1, 1, 1, 1, 1), 1, 2, [1, 2, 3], None)
+        self.assertEqual(r, results.first())
+
+        df.createOrReplaceTempView("table2")
+        r = self.spark.sql("SELECT byte1 - 1 AS byte1, byte2 + 1 AS byte2, " +
+                           "short1 + 1 AS short1, short2 - 1 AS short2, int1 - 1 AS int1, " +
+                           "float1 + 1.5 as float1 FROM table2").first()
+
+        self.assertEqual((126, -127, -32767, 32766, 2147483646, 2.5), tuple(r))
+
+    def test_struct_in_map(self):
+        d = [Row(m={Row(i=1): Row(s="")})]
+        df = self.sc.parallelize(d).toDF()
+        k, v = list(df.head().m.items())[0]
+        self.assertEqual(1, k.i)
+        self.assertEqual("", v.s)
+
+    def test_convert_row_to_dict(self):
+        row = Row(l=[Row(a=1, b='s')], d={"key": Row(c=1.0, d="2")})
+        self.assertEqual(1, row.asDict()['l'][0].a)
+        df = self.sc.parallelize([row]).toDF()
+        df.createOrReplaceTempView("test")
+        row = self.spark.sql("select l, d from test").head()
+        self.assertEqual(1, row.asDict()["l"][0].a)
+        self.assertEqual(1.0, row.asDict()['d']['key'].c)
+
+    def test_udt(self):
+        from pyspark.sql.types import _parse_datatype_json_string, _infer_type, _make_type_verifier
+        from pyspark.sql.tests import ExamplePointUDT, ExamplePoint
+
+        def check_datatype(datatype):
+            pickled = pickle.loads(pickle.dumps(datatype))
+            assert datatype == pickled
+            scala_datatype = self.spark._jsparkSession.parseDataType(datatype.json())
+            python_datatype = _parse_datatype_json_string(scala_datatype.json())
+            assert datatype == python_datatype
+
+        check_datatype(ExamplePointUDT())
+        structtype_with_udt = StructType([StructField("label", DoubleType(), False),
+                                          StructField("point", ExamplePointUDT(), False)])
+        check_datatype(structtype_with_udt)
+        p = ExamplePoint(1.0, 2.0)
+        self.assertEqual(_infer_type(p), ExamplePointUDT())
+        _make_type_verifier(ExamplePointUDT())(ExamplePoint(1.0, 2.0))
+        self.assertRaises(ValueError, lambda: _make_type_verifier(ExamplePointUDT())([1.0, 2.0]))
+
+        check_datatype(PythonOnlyUDT())
+        structtype_with_udt = StructType([StructField("label", DoubleType(), False),
+                                          StructField("point", PythonOnlyUDT(), False)])
+        check_datatype(structtype_with_udt)
+        p = PythonOnlyPoint(1.0, 2.0)
+        self.assertEqual(_infer_type(p), PythonOnlyUDT())
+        _make_type_verifier(PythonOnlyUDT())(PythonOnlyPoint(1.0, 2.0))
+        self.assertRaises(
+            ValueError,
+            lambda: _make_type_verifier(PythonOnlyUDT())([1.0, 2.0]))
+
+    def test_simple_udt_in_df(self):
+        schema = StructType().add("key", LongType()).add("val", PythonOnlyUDT())
+        df = self.spark.createDataFrame(
+            [(i % 3, PythonOnlyPoint(float(i), float(i))) for i in range(10)],
+            schema=schema)
+        df.collect()
+
+    def test_nested_udt_in_df(self):
+        schema = StructType().add("key", LongType()).add("val", ArrayType(PythonOnlyUDT()))
+        df = self.spark.createDataFrame(
+            [(i % 3, [PythonOnlyPoint(float(i), float(i))]) for i in range(10)],
+            schema=schema)
+        df.collect()
+
+        schema = StructType().add("key", LongType()).add("val",
+                                                         MapType(LongType(), PythonOnlyUDT()))
+        df = self.spark.createDataFrame(
+            [(i % 3, {i % 3: PythonOnlyPoint(float(i + 1), float(i + 1))}) for i in range(10)],
+            schema=schema)
+        df.collect()
+
+    def test_complex_nested_udt_in_df(self):
+        from pyspark.sql.functions import udf
+
+        schema = StructType().add("key", LongType()).add("val", PythonOnlyUDT())
+        df = self.spark.createDataFrame(
+            [(i % 3, PythonOnlyPoint(float(i), float(i))) for i in range(10)],
+            schema=schema)
+        df.collect()
+
+        gd = df.groupby("key").agg({"val": "collect_list"})
+        gd.collect()
+        udf = udf(lambda k, v: [(k, v[0])], ArrayType(df.schema))
+        gd.select(udf(*gd)).collect()
+
+    def test_udt_with_none(self):
+        df = self.spark.range(0, 10, 1, 1)
+
+        def myudf(x):
+            if x > 0:
+                return PythonOnlyPoint(float(x), float(x))
+
+        self.spark.catalog.registerFunction("udf", myudf, PythonOnlyUDT())
+        rows = [r[0] for r in df.selectExpr("udf(id)").take(2)]
+        self.assertEqual(rows, [None, PythonOnlyPoint(1, 1)])
+
+    def test_nonparam_udf_with_aggregate(self):
+        import pyspark.sql.functions as f
+
+        df = self.spark.createDataFrame([(1, 2), (1, 2)])
+        f_udf = f.udf(lambda: "const_str")
+        rows = df.distinct().withColumn("a", f_udf()).collect()
+        self.assertEqual(rows, [Row(_1=1, _2=2, a=u'const_str')])
+
+    def test_infer_schema_with_udt(self):
+        from pyspark.sql.tests import ExamplePoint, ExamplePointUDT
+        row = Row(label=1.0, point=ExamplePoint(1.0, 2.0))
+        df = self.spark.createDataFrame([row])
+        schema = df.schema
+        field = [f for f in schema.fields if f.name == "point"][0]
+        self.assertEqual(type(field.dataType), ExamplePointUDT)
+        df.createOrReplaceTempView("labeled_point")
+        point = self.spark.sql("SELECT point FROM labeled_point").head().point
+        self.assertEqual(point, ExamplePoint(1.0, 2.0))
+
+        row = Row(label=1.0, point=PythonOnlyPoint(1.0, 2.0))
+        df = self.spark.createDataFrame([row])
+        schema = df.schema
+        field = [f for f in schema.fields if f.name == "point"][0]
+        self.assertEqual(type(field.dataType), PythonOnlyUDT)
+        df.createOrReplaceTempView("labeled_point")
+        point = self.spark.sql("SELECT point FROM labeled_point").head().point
+        self.assertEqual(point, PythonOnlyPoint(1.0, 2.0))
+
+    def test_apply_schema_with_udt(self):
+        from pyspark.sql.tests import ExamplePoint, ExamplePointUDT
+        row = (1.0, ExamplePoint(1.0, 2.0))
+        schema = StructType([StructField("label", DoubleType(), False),
+                             StructField("point", ExamplePointUDT(), False)])
+        df = self.spark.createDataFrame([row], schema)
+        point = df.head().point
+        self.assertEqual(point, ExamplePoint(1.0, 2.0))
+
+        row = (1.0, PythonOnlyPoint(1.0, 2.0))
+        schema = StructType([StructField("label", DoubleType(), False),
+                             StructField("point", PythonOnlyUDT(), False)])
+        df = self.spark.createDataFrame([row], schema)
+        point = df.head().point
+        self.assertEqual(point, PythonOnlyPoint(1.0, 2.0))
+
+    def test_udf_with_udt(self):
+        from pyspark.sql.tests import ExamplePoint, ExamplePointUDT
+        row = Row(label=1.0, point=ExamplePoint(1.0, 2.0))
+        df = self.spark.createDataFrame([row])
+        self.assertEqual(1.0, df.rdd.map(lambda r: r.point.x).first())
+        udf = UserDefinedFunction(lambda p: p.y, DoubleType())
+        self.assertEqual(2.0, df.select(udf(df.point)).first()[0])
+        udf2 = UserDefinedFunction(lambda p: ExamplePoint(p.x + 1, p.y + 1), ExamplePointUDT())
+        self.assertEqual(ExamplePoint(2.0, 3.0), df.select(udf2(df.point)).first()[0])
+
+        row = Row(label=1.0, point=PythonOnlyPoint(1.0, 2.0))
+        df = self.spark.createDataFrame([row])
+        self.assertEqual(1.0, df.rdd.map(lambda r: r.point.x).first())
+        udf = UserDefinedFunction(lambda p: p.y, DoubleType())
+        self.assertEqual(2.0, df.select(udf(df.point)).first()[0])
+        udf2 = UserDefinedFunction(lambda p: PythonOnlyPoint(p.x + 1, p.y + 1), PythonOnlyUDT())
+        self.assertEqual(PythonOnlyPoint(2.0, 3.0), df.select(udf2(df.point)).first()[0])
+
+    def test_parquet_with_udt(self):
+        from pyspark.sql.tests import ExamplePoint, ExamplePointUDT
+        row = Row(label=1.0, point=ExamplePoint(1.0, 2.0))
+        df0 = self.spark.createDataFrame([row])
+        output_dir = os.path.join(self.tempdir.name, "labeled_point")
+        df0.write.parquet(output_dir)
+        df1 = self.spark.read.parquet(output_dir)
+        point = df1.head().point
+        self.assertEqual(point, ExamplePoint(1.0, 2.0))
+
+        row = Row(label=1.0, point=PythonOnlyPoint(1.0, 2.0))
+        df0 = self.spark.createDataFrame([row])
+        df0.write.parquet(output_dir, mode='overwrite')
+        df1 = self.spark.read.parquet(output_dir)
+        point = df1.head().point
+        self.assertEqual(point, PythonOnlyPoint(1.0, 2.0))
+
+    def test_union_with_udt(self):
+        from pyspark.sql.tests import ExamplePoint, ExamplePointUDT
+        row1 = (1.0, ExamplePoint(1.0, 2.0))
+        row2 = (2.0, ExamplePoint(3.0, 4.0))
+        schema = StructType([StructField("label", DoubleType(), False),
+                             StructField("point", ExamplePointUDT(), False)])
+        df1 = self.spark.createDataFrame([row1], schema)
+        df2 = self.spark.createDataFrame([row2], schema)
+
+        result = df1.union(df2).orderBy("label").collect()
+        self.assertEqual(
+            result,
+            [
+                Row(label=1.0, point=ExamplePoint(1.0, 2.0)),
+                Row(label=2.0, point=ExamplePoint(3.0, 4.0))
+            ]
+        )
+
+    def test_cast_to_string_with_udt(self):
+        from pyspark.sql.tests import ExamplePointUDT, ExamplePoint
+        from pyspark.sql.functions import col
+        row = (ExamplePoint(1.0, 2.0), PythonOnlyPoint(3.0, 4.0))
+        schema = StructType([StructField("point", ExamplePointUDT(), False),
+                             StructField("pypoint", PythonOnlyUDT(), False)])
+        df = self.spark.createDataFrame([row], schema)
+
+        result = df.select(col('point').cast('string'), col('pypoint').cast('string')).head()
+        self.assertEqual(result, Row(point=u'(1.0, 2.0)', pypoint=u'[3.0, 4.0]'))
+
+    def test_column_operators(self):
+        ci = self.df.key
+        cs = self.df.value
+        c = ci == cs
+        self.assertTrue(isinstance((- ci - 1 - 2) % 3 * 2.5 / 3.5, Column))
+        rcc = (1 + ci), (1 - ci), (1 * ci), (1 / ci), (1 % ci), (1 ** ci), (ci ** 1)
+        self.assertTrue(all(isinstance(c, Column) for c in rcc))
+        cb = [ci == 5, ci != 0, ci > 3, ci < 4, ci >= 0, ci <= 7]
+        self.assertTrue(all(isinstance(c, Column) for c in cb))
+        cbool = (ci & ci), (ci | ci), (~ci)
+        self.assertTrue(all(isinstance(c, Column) for c in cbool))
+        css = cs.contains('a'), cs.like('a'), cs.rlike('a'), cs.asc(), cs.desc(),\
+            cs.startswith('a'), cs.endswith('a'), ci.eqNullSafe(cs)
+        self.assertTrue(all(isinstance(c, Column) for c in css))
+        self.assertTrue(isinstance(ci.cast(LongType()), Column))
+        self.assertRaisesRegexp(ValueError,
+                                "Cannot apply 'in' operator against a column",
+                                lambda: 1 in cs)
+
+    def test_column_getitem(self):
+        from pyspark.sql.functions import col
+
+        self.assertIsInstance(col("foo")[1:3], Column)
+        self.assertIsInstance(col("foo")[0], Column)
+        self.assertIsInstance(col("foo")["bar"], Column)
+        self.assertRaises(ValueError, lambda: col("foo")[0:10:2])
+
+    def test_column_select(self):
+        df = self.df
+        self.assertEqual(self.testData, df.select("*").collect())
+        self.assertEqual(self.testData, df.select(df.key, df.value).collect())
+        self.assertEqual([Row(value='1')], df.where(df.key == 1).select(df.value).collect())
+
+    def test_freqItems(self):
+        vals = [Row(a=1, b=-2.0) if i % 2 == 0 else Row(a=i, b=i * 1.0) for i in range(100)]
+        df = self.sc.parallelize(vals).toDF()
+        items = df.stat.freqItems(("a", "b"), 0.4).collect()[0]
+        self.assertTrue(1 in items[0])
+        self.assertTrue(-2.0 in items[1])
+
+    def test_aggregator(self):
+        df = self.df
+        g = df.groupBy()
+        self.assertEqual([99, 100], sorted(g.agg({'key': 'max', 'value': 'count'}).collect()[0]))
+        self.assertEqual([Row(**{"AVG(key#0)": 49.5})], g.mean().collect())
+
+        from pyspark.sql import functions
+        self.assertEqual((0, u'99'),
+                         tuple(g.agg(functions.first(df.key), functions.last(df.value)).first()))
+        self.assertTrue(95 < g.agg(functions.approxCountDistinct(df.key)).first()[0])
+        self.assertEqual(100, g.agg(functions.countDistinct(df.value)).first()[0])
+
+    def test_first_last_ignorenulls(self):
+        from pyspark.sql import functions
+        df = self.spark.range(0, 100)
+        df2 = df.select(functions.when(df.id % 3 == 0, None).otherwise(df.id).alias("id"))
+        df3 = df2.select(functions.first(df2.id, False).alias('a'),
+                         functions.first(df2.id, True).alias('b'),
+                         functions.last(df2.id, False).alias('c'),
+                         functions.last(df2.id, True).alias('d'))
+        self.assertEqual([Row(a=None, b=1, c=None, d=98)], df3.collect())
+
+    def test_approxQuantile(self):
+        df = self.sc.parallelize([Row(a=i, b=i+10) for i in range(10)]).toDF()
+        for f in ["a", u"a"]:
+            aq = df.stat.approxQuantile(f, [0.1, 0.5, 0.9], 0.1)
+            self.assertTrue(isinstance(aq, list))
+            self.assertEqual(len(aq), 3)
+        self.assertTrue(all(isinstance(q, float) for q in aq))
+        aqs = df.stat.approxQuantile(["a", u"b"], [0.1, 0.5, 0.9], 0.1)
+        self.assertTrue(isinstance(aqs, list))
+        self.assertEqual(len(aqs), 2)
+        self.assertTrue(isinstance(aqs[0], list))
+        self.assertEqual(len(aqs[0]), 3)
+        self.assertTrue(all(isinstance(q, float) for q in aqs[0]))
+        self.assertTrue(isinstance(aqs[1], list))
+        self.assertEqual(len(aqs[1]), 3)
+        self.assertTrue(all(isinstance(q, float) for q in aqs[1]))
+        aqt = df.stat.approxQuantile((u"a", "b"), [0.1, 0.5, 0.9], 0.1)
+        self.assertTrue(isinstance(aqt, list))
+        self.assertEqual(len(aqt), 2)
+        self.assertTrue(isinstance(aqt[0], list))
+        self.assertEqual(len(aqt[0]), 3)
+        self.assertTrue(all(isinstance(q, float) for q in aqt[0]))
+        self.assertTrue(isinstance(aqt[1], list))
+        self.assertEqual(len(aqt[1]), 3)
+        self.assertTrue(all(isinstance(q, float) for q in aqt[1]))
+        self.assertRaises(ValueError, lambda: df.stat.approxQuantile(123, [0.1, 0.9], 0.1))
+        self.assertRaises(ValueError, lambda: df.stat.approxQuantile(("a", 123), [0.1, 0.9], 0.1))
+        self.assertRaises(ValueError, lambda: df.stat.approxQuantile(["a", 123], [0.1, 0.9], 0.1))
+
+    def test_corr(self):
+        import math
+        df = self.sc.parallelize([Row(a=i, b=math.sqrt(i)) for i in range(10)]).toDF()
+        corr = df.stat.corr(u"a", "b")
+        self.assertTrue(abs(corr - 0.95734012) < 1e-6)
+
+    def test_sampleby(self):
+        df = self.sc.parallelize([Row(a=i, b=(i % 3)) for i in range(10)]).toDF()
+        sampled = df.stat.sampleBy(u"b", fractions={0: 0.5, 1: 0.5}, seed=0)
+        self.assertTrue(sampled.count() == 3)
+
+    def test_cov(self):
+        df = self.sc.parallelize([Row(a=i, b=2 * i) for i in range(10)]).toDF()
+        cov = df.stat.cov(u"a", "b")
+        self.assertTrue(abs(cov - 55.0 / 3) < 1e-6)
+
+    def test_crosstab(self):
+        df = self.sc.parallelize([Row(a=i % 3, b=i % 2) for i in range(1, 7)]).toDF()
+        ct = df.stat.crosstab(u"a", "b").collect()
+        ct = sorted(ct, key=lambda x: x[0])
+        for i, row in enumerate(ct):
+            self.assertEqual(row[0], str(i))
+            self.assertTrue(row[1], 1)
+            self.assertTrue(row[2], 1)
+
+    def test_math_functions(self):
+        df = self.sc.parallelize([Row(a=i, b=2 * i) for i in range(10)]).toDF()
+        from pyspark.sql import functions
+        import math
+
+        def get_values(l):
+            return [j[0] for j in l]
+
+        def assert_close(a, b):
+            c = get_values(b)
+            diff = [abs(v - c[k]) < 1e-6 for k, v in enumerate(a)]
+            return sum(diff) == len(a)
+        assert_close([math.cos(i) for i in range(10)],
+                     df.select(functions.cos(df.a)).collect())
+        assert_close([math.cos(i) for i in range(10)],
+                     df.select(functions.cos("a")).collect())
+        assert_close([math.sin(i) for i in range(10)],
+                     df.select(functions.sin(df.a)).collect())
+        assert_close([math.sin(i) for i in range(10)],
+                     df.select(functions.sin(df['a'])).collect())
+        assert_close([math.pow(i, 2 * i) for i in range(10)],
+                     df.select(functions.pow(df.a, df.b)).collect())
+        assert_close([math.pow(i, 2) for i in range(10)],
+                     df.select(functions.pow(df.a, 2)).collect())
+        assert_close([math.pow(i, 2) for i in range(10)],
+                     df.select(functions.pow(df.a, 2.0)).collect())
+        assert_close([math.hypot(i, 2 * i) for i in range(10)],
+                     df.select(functions.hypot(df.a, df.b)).collect())
+
+    def test_rand_functions(self):
+        df = self.df
+        from pyspark.sql import functions
+        rnd = df.select('key', functions.rand()).collect()
+        for row in rnd:
+            assert row[1] >= 0.0 and row[1] <= 1.0, "got: %s" % row[1]
+        rndn = df.select('key', functions.randn(5)).collect()
+        for row in rndn:
+            assert row[1] >= -4.0 and row[1] <= 4.0, "got: %s" % row[1]
+
+        # If the specified seed is 0, we should use it.
+        # https://issues.apache.org/jira/browse/SPARK-9691
+        rnd1 = df.select('key', functions.rand(0)).collect()
+        rnd2 = df.select('key', functions.rand(0)).collect()
+        self.assertEqual(sorted(rnd1), sorted(rnd2))
+
+        rndn1 = df.select('key', functions.randn(0)).collect()
+        rndn2 = df.select('key', functions.randn(0)).collect()
+        self.assertEqual(sorted(rndn1), sorted(rndn2))
+
+    def test_string_functions(self):
+        from pyspark.sql.functions import col, lit
+        df = self.spark.createDataFrame([['nick']], schema=['name'])
+        self.assertRaisesRegexp(
+            TypeError,
+            "must be the same type",
+            lambda: df.select(col('name').substr(0, lit(1))))
+        if sys.version_info.major == 2:
+            self.assertRaises(
+                TypeError,
+                lambda: df.select(col('name').substr(long(0), long(1))))
+
+    def test_array_contains_function(self):
+        from pyspark.sql.functions import array_contains
+
+        df = self.spark.createDataFrame([(["1", "2", "3"],), ([],)], ['data'])
+        actual = df.select(array_contains(df.data, "1").alias('b')).collect()
+        self.assertEqual([Row(b=True), Row(b=False)], actual)
+
+    def test_between_function(self):
+        df = self.sc.parallelize([
+            Row(a=1, b=2, c=3),
+            Row(a=2, b=1, c=3),
+            Row(a=4, b=1, c=4)]).toDF()
+        self.assertEqual([Row(a=2, b=1, c=3), Row(a=4, b=1, c=4)],
+                         df.filter(df.a.between(df.b, df.c)).collect())
+
+    def test_struct_type(self):
+        struct1 = StructType().add("f1", StringType(), True).add("f2", StringType(), True, None)
+        struct2 = StructType([StructField("f1", StringType(), True),
+                              StructField("f2", StringType(), True, None)])
+        self.assertEqual(struct1.fieldNames(), struct2.names)
+        self.assertEqual(struct1, struct2)
+
+        struct1 = StructType().add("f1", StringType(), True).add("f2", StringType(), True, None)
+        struct2 = StructType([StructField("f1", StringType(), True)])
+        self.assertNotEqual(struct1.fieldNames(), struct2.names)
+        self.assertNotEqual(struct1, struct2)
+
+        struct1 = (StructType().add(StructField("f1", StringType(), True))
+                   .add(StructField("f2", StringType(), True, None)))
+        struct2 = StructType([StructField("f1", StringType(), True),
+                              StructField("f2", StringType(), True, None)])
+        self.assertEqual(struct1.fieldNames(), struct2.names)
+        self.assertEqual(struct1, struct2)
+
+        struct1 = (StructType().add(StructField("f1", StringType(), True))
+                   .add(StructField("f2", StringType(), True, None)))
+        struct2 = StructType([StructField("f1", StringType(), True)])
+        self.assertNotEqual(struct1.fieldNames(), struct2.names)
+        self.assertNotEqual(struct1, struct2)
+
+        # Catch exception raised during improper construction
+        self.assertRaises(ValueError, lambda: StructType().add("name"))
+
+        struct1 = StructType().add("f1", StringType(), True).add("f2", StringType(), True, None)
+        for field in struct1:
+            self.assertIsInstance(field, StructField)
+
+        struct1 = StructType().add("f1", StringType(), True).add("f2", StringType(), True, None)
+        self.assertEqual(len(struct1), 2)
+
+        struct1 = StructType().add("f1", StringType(), True).add("f2", StringType(), True, None)
+        self.assertIs(struct1["f1"], struct1.fields[0])
+        self.assertIs(struct1[0], struct1.fields[0])
+        self.assertEqual(struct1[0:1], StructType(struct1.fields[0:1]))
+        self.assertRaises(KeyError, lambda: struct1["f9"])
+        self.assertRaises(IndexError, lambda: struct1[9])
+        self.assertRaises(TypeError, lambda: struct1[9.9])
+
+    def test_parse_datatype_string(self):
+        from pyspark.sql.types import _all_atomic_types, _parse_datatype_string
+        for k, t in _all_atomic_types.items():
+            if t != NullType:
+                self.assertEqual(t(), _parse_datatype_string(k))
+        self.assertEqual(IntegerType(), _parse_datatype_string("int"))
+        self.assertEqual(DecimalType(1, 1), _parse_datatype_string("decimal(1  ,1)"))
+        self.assertEqual(DecimalType(10, 1), _parse_datatype_string("decimal( 10,1 )"))
+        self.assertEqual(DecimalType(11, 1), _parse_datatype_string("decimal(11,1)"))
+        self.assertEqual(
+            ArrayType(IntegerType()),
+            _parse_datatype_string("array<int >"))
+        self.assertEqual(
+            MapType(IntegerType(), DoubleType()),
+            _parse_datatype_string("map< int, double  >"))
+        self.assertEqual(
+            StructType([StructField("a", IntegerType()), StructField("c", DoubleType())]),
+            _parse_datatype_string("struct<a:int, c:double >"))
+        self.assertEqual(
+            StructType([StructField("a", IntegerType()), StructField("c", DoubleType())]),
+            _parse_datatype_string("a:int, c:double"))
+        self.assertEqual(
+            StructType([StructField("a", IntegerType()), StructField("c", DoubleType())]),
+            _parse_datatype_string("a INT, c DOUBLE"))
+
+    def test_metadata_null(self):
+        schema = StructType([StructField("f1", StringType(), True, None),
+                             StructField("f2", StringType(), True, {'a': None})])
+        rdd = self.sc.parallelize([["a", "b"], ["c", "d"]])
+        self.spark.createDataFrame(rdd, schema)
+
+    def test_save_and_load(self):
+        df = self.df
+        tmpPath = tempfile.mkdtemp()
+        shutil.rmtree(tmpPath)
+        df.write.json(tmpPath)
+        actual = self.spark.read.json(tmpPath)
+        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+
+        schema = StructType([StructField("value", StringType(), True)])
+        actual = self.spark.read.json(tmpPath, schema)
+        self.assertEqual(sorted(df.select("value").collect()), sorted(actual.collect()))
+
+        df.write.json(tmpPath, "overwrite")
+        actual = self.spark.read.json(tmpPath)
+        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+
+        df.write.save(format="json", mode="overwrite", path=tmpPath,
+                      noUse="this options will not be used in save.")
+        actual = self.spark.read.load(format="json", path=tmpPath,
+                                      noUse="this options will not be used in load.")
+        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+
+        defaultDataSourceName = self.spark.conf.get("spark.sql.sources.default",
+                                                    "org.apache.spark.sql.parquet")
+        self.spark.sql("SET spark.sql.sources.default=org.apache.spark.sql.json")
+        actual = self.spark.read.load(path=tmpPath)
+        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+        self.spark.sql("SET spark.sql.sources.default=" + defaultDataSourceName)
+
+        csvpath = os.path.join(tempfile.mkdtemp(), 'data')
+        df.write.option('quote', None).format('csv').save(csvpath)
+
+        shutil.rmtree(tmpPath)
+
+    def test_save_and_load_builder(self):
+        df = self.df
+        tmpPath = tempfile.mkdtemp()
+        shutil.rmtree(tmpPath)
+        df.write.json(tmpPath)
+        actual = self.spark.read.json(tmpPath)
+        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+
+        schema = StructType([StructField("value", StringType(), True)])
+        actual = self.spark.read.json(tmpPath, schema)
+        self.assertEqual(sorted(df.select("value").collect()), sorted(actual.collect()))
+
+        df.write.mode("overwrite").json(tmpPath)
+        actual = self.spark.read.json(tmpPath)
+        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+
+        df.write.mode("overwrite").options(noUse="this options will not be used in save.")\
+                .option("noUse", "this option will not be used in save.")\
+                .format("json").save(path=tmpPath)
+        actual =\
+            self.spark.read.format("json")\
+                           .load(path=tmpPath, noUse="this options will not be used in load.")
+        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+
+        defaultDataSourceName = self.spark.conf.get("spark.sql.sources.default",
+                                                    "org.apache.spark.sql.parquet")
+        self.spark.sql("SET spark.sql.sources.default=org.apache.spark.sql.json")
+        actual = self.spark.read.load(path=tmpPath)
+        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+        self.spark.sql("SET spark.sql.sources.default=" + defaultDataSourceName)
+
+        shutil.rmtree(tmpPath)
+
+    def test_stream_trigger(self):
+        df = self.spark.readStream.format('text').load('python/test_support/sql/streaming')
+
+        # Should take at least one arg
+        try:
+            df.writeStream.trigger()
+        except ValueError:
+            pass
+
+        # Should not take multiple args
+        try:
+            df.writeStream.trigger(once=True, processingTime='5 seconds')
+        except ValueError:
+            pass
+
+        # Should not take multiple args
+        try:
+            df.writeStream.trigger(processingTime='5 seconds', continuous='1 second')
+        except ValueError:
+            pass
+
+        # Should take only keyword args
+        try:
+            df.writeStream.trigger('5 seconds')
+            self.fail("Should have thrown an exception")
+        except TypeError:
+            pass
+
+    def test_stream_read_options(self):
+        schema = StructType([StructField("data", StringType(), False)])
+        df = self.spark.readStream\
+            .format('text')\
+            .option('path', 'python/test_support/sql/streaming')\
+            .schema(schema)\
+            .load()
+        self.assertTrue(df.isStreaming)
+        self.assertEqual(df.schema.simpleString(), "struct<data:string>")
+
+    def test_stream_read_options_overwrite(self):
+        bad_schema = StructType([StructField("test", IntegerType(), False)])
+        schema = StructType([StructField("data", StringType(), False)])
+        df = self.spark.readStream.format('csv').option('path', 'python/test_support/sql/fake') \
+            .schema(bad_schema)\
+            .load(path='python/test_support/sql/streaming', schema=schema, format='text')
+        self.assertTrue(df.isStreaming)
+        self.assertEqual(df.schema.simpleString(), "struct<data:string>")
+
+    def test_stream_save_options(self):
+        df = self.spark.readStream.format('text').load('python/test_support/sql/streaming') \
+            .withColumn('id', lit(1))
+        for q in self.spark._wrapped.streams.active:
+            q.stop()
+        tmpPath = tempfile.mkdtemp()
+        shutil.rmtree(tmpPath)
+        self.assertTrue(df.isStreaming)
+        out = os.path.join(tmpPath, 'out')
+        chk = os.path.join(tmpPath, 'chk')
+        q = df.writeStream.option('checkpointLocation', chk).queryName('this_query') \
+            .format('parquet').partitionBy('id').outputMode('append').option('path', out).start()
+        try:
+            self.assertEqual(q.name, 'this_query')
+            self.assertTrue(q.isActive)
+            q.processAllAvailable()
+            output_files = []
+            for _, _, files in os.walk(out):
+                output_files.extend([f for f in files if not f.startswith('.')])
+            self.assertTrue(len(output_files) > 0)
+            self.assertTrue(len(os.listdir(chk)) > 0)
+        finally:
+            q.stop()
+            shutil.rmtree(tmpPath)
+
+    def test_stream_save_options_overwrite(self):
+        df = self.spark.readStream.format('text').load('python/test_support/sql/streaming')
+        for q in self.spark._wrapped.streams.active:
+            q.stop()
+        tmpPath = tempfile.mkdtemp()
+        shutil.rmtree(tmpPath)
+        self.assertTrue(df.isStreaming)
+        out = os.path.join(tmpPath, 'out')
+        chk = os.path.join(tmpPath, 'chk')
+        fake1 = os.path.join(tmpPath, 'fake1')
+        fake2 = os.path.join(tmpPath, 'fake2')
+        q = df.writeStream.option('checkpointLocation', fake1)\
+            .format('memory').option('path', fake2) \
+            .queryName('fake_query').outputMode('append') \
+            .start(path=out, format='parquet', queryName='this_query', checkpointLocation=chk)
+
+        try:
+            self.assertEqual(q.name, 'this_query')
+            self.assertTrue(q.isActive)
+            q.processAllAvailable()
+            output_files = []
+            for _, _, files in os.walk(out):
+                output_files.extend([f for f in files if not f.startswith('.')])
+            self.assertTrue(len(output_files) > 0)
+            self.assertTrue(len(os.listdir(chk)) > 0)
+            self.assertFalse(os.path.isdir(fake1))  # should not have been created
+            self.assertFalse(os.path.isdir(fake2))  # should not have been created
+        finally:
+            q.stop()
+            shutil.rmtree(tmpPath)
+
+    def test_stream_status_and_progress(self):
+        df = self.spark.readStream.format('text').load('python/test_support/sql/streaming')
+        for q in self.spark._wrapped.streams.active:
+            q.stop()
+        tmpPath = tempfile.mkdtemp()
+        shutil.rmtree(tmpPath)
+        self.assertTrue(df.isStreaming)
+        out = os.path.join(tmpPath, 'out')
+        chk = os.path.join(tmpPath, 'chk')
+
+        def func(x):
+            time.sleep(1)
+            return x
+
+        from pyspark.sql.functions import col, udf
+        sleep_udf = udf(func)
+
+        # Use "sleep_udf" to delay the progress update so that we can test `lastProgress` when there
+        # were no updates.
+        q = df.select(sleep_udf(col("value")).alias('value')).writeStream \
+            .start(path=out, format='parquet', queryName='this_query', checkpointLocation=chk)
+        try:
+            # "lastProgress" will return None in most cases. However, as it may be flaky when
+            # Jenkins is very slow, we don't assert it. If there is something wrong, "lastProgress"
+            # may throw error with a high chance and make this test flaky, so we should still be
+            # able to detect broken codes.
+            q.lastProgress
+
+            q.processAllAvailable()
+            lastProgress = q.lastProgress
+            recentProgress = q.recentProgress
+            status = q.status
+            self.assertEqual(lastProgress['name'], q.name)
+            self.assertEqual(lastProgress['id'], q.id)
+            self.assertTrue(any(p == lastProgress for p in recentProgress))
+            self.assertTrue(
+                "message" in status and
+                "isDataAvailable" in status and
+                "isTriggerActive" in status)
+        finally:
+            q.stop()
+            shutil.rmtree(tmpPath)
+
+    def test_stream_await_termination(self):
+        df = self.spark.readStream.format('text').load('python/test_support/sql/streaming')
+        for q in self.spark._wrapped.streams.active:
+            q.stop()
+        tmpPath = tempfile.mkdtemp()
+        shutil.rmtree(tmpPath)
+        self.assertTrue(df.isStreaming)
+        out = os.path.join(tmpPath, 'out')
+        chk = os.path.join(tmpPath, 'chk')
+        q = df.writeStream\
+            .start(path=out, format='parquet', queryName='this_query', checkpointLocation=chk)
+        try:
+            self.assertTrue(q.isActive)
+            try:
+                q.awaitTermination("hello")
+                self.fail("Expected a value exception")
+            except ValueError:
+                pass
+            now = time.time()
+            # test should take at least 2 seconds
+            res = q.awaitTermination(2.6)
+            duration = time.time() - now
+            self.assertTrue(duration >= 2)
+            self.assertFalse(res)
+        finally:
+            q.stop()
+            shutil.rmtree(tmpPath)
+
+    def test_stream_exception(self):
+        sdf = self.spark.readStream.format('text').load('python/test_support/sql/streaming')
+        sq = sdf.writeStream.format('memory').queryName('query_explain').start()
+        try:
+            sq.processAllAvailable()
+            self.assertEqual(sq.exception(), None)
+        finally:
+            sq.stop()
+
+        from pyspark.sql.functions import col, udf
+        from pyspark.sql.utils import StreamingQueryException
+        bad_udf = udf(lambda x: 1 / 0)
+        sq = sdf.select(bad_udf(col("value")))\
+            .writeStream\
+            .format('memory')\
+            .queryName('this_query')\
+            .start()
+        try:
+            # Process some data to fail the query
+            sq.processAllAvailable()
+            self.fail("bad udf should fail the query")
+        except StreamingQueryException as e:
+            # This is expected
+            self.assertTrue("ZeroDivisionError" in e.desc)
+        finally:
+            sq.stop()
+        self.assertTrue(type(sq.exception()) is StreamingQueryException)
+        self.assertTrue("ZeroDivisionError" in sq.exception().desc)
+
+    def test_query_manager_await_termination(self):
+        df = self.spark.readStream.format('text').load('python/test_support/sql/streaming')
+        for q in self.spark._wrapped.streams.active:
+            q.stop()
+        tmpPath = tempfile.mkdtemp()
+        shutil.rmtree(tmpPath)
+        self.assertTrue(df.isStreaming)
+        out = os.path.join(tmpPath, 'out')
+        chk = os.path.join(tmpPath, 'chk')
+        q = df.writeStream\
+            .start(path=out, format='parquet', queryName='this_query', checkpointLocation=chk)
+        try:
+            self.assertTrue(q.isActive)
+            try:
+                self.spark._wrapped.streams.awaitAnyTermination("hello")
+                self.fail("Expected a value exception")
+            except ValueError:
+                pass
+            now = time.time()
+            # test should take at least 2 seconds
+            res = self.spark._wrapped.streams.awaitAnyTermination(2.6)
+            duration = time.time() - now
+            self.assertTrue(duration >= 2)
+            self.assertFalse(res)
+        finally:
+            q.stop()
+            shutil.rmtree(tmpPath)
+
+    class ForeachWriterTester:
+
+        def __init__(self, spark):
+            self.spark = spark
+
+        def write_open_event(self, partitionId, epochId):
+            self._write_event(
+                self.open_events_dir,
+                {'partition': partitionId, 'epoch': epochId})
+
+        def write_process_event(self, row):
+            self._write_event(self.process_events_dir, {'value': 'text'})
+
+        def write_close_event(self, error):
+            self._write_event(self.close_events_dir, {'error': str(error)})
+
+        def write_input_file(self):
+            self._write_event(self.input_dir, "text")
+
+        def open_events(self):
+            return self._read_events(self.open_events_dir, 'partition INT, epoch INT')
+
+        def process_events(self):
+            return self._read_events(self.process_events_dir, 'value STRING')
+
+        def close_events(self):
+            return self._read_events(self.close_events_dir, 'error STRING')
+
+        def run_streaming_query_on_writer(self, writer, num_files):
+            self._reset()
+            try:
+                sdf = self.spark.readStream.format('text').load(self.input_dir)
+                sq = sdf.writeStream.foreach(writer).start()
+                for i in range(num_files):
+                    self.write_input_file()
+                    sq.processAllAvailable()
+            finally:
+                self.stop_all()
+
+        def assert_invalid_writer(self, writer, msg=None):
+            self._reset()
+            try:
+                sdf = self.spark.readStream.format('text').load(self.input_dir)
+                sq = sdf.writeStream.foreach(writer).start()
+                self.write_input_file()
+                sq.processAllAvailable()
+                self.fail("invalid writer %s did not fail the query" % str(writer))  # not expected
+            except Exception as e:
+                if msg:
+                    assert msg in str(e), "%s not in %s" % (msg, str(e))
+
+            finally:
+                self.stop_all()
+
+        def stop_all(self):
+            for q in self.spark._wrapped.streams.active:
+                q.stop()
+
+        def _reset(self):
+            self.input_dir = tempfile.mkdtemp()
+            self.open_events_dir = tempfile.mkdtemp()
+            self.process_events_dir = tempfile.mkdtemp()
+            self.close_events_dir = tempfile.mkdtemp()
+
+        def _read_events(self, dir, json):
+            rows = self.spark.read.schema(json).json(dir).collect()
+            dicts = [row.asDict() for row in rows]
+            return dicts
+
+        def _write_event(self, dir, event):
+            import uuid
+            with open(os.path.join(dir, str(uuid.uuid4())), 'w') as f:
+                f.write("%s\n" % str(event))
+
+        def __getstate__(self):
+            return (self.open_events_dir, self.process_events_dir, self.close_events_dir)
+
+        def __setstate__(self, state):
+            self.open_events_dir, self.process_events_dir, self.close_events_dir = state
+
+    def test_streaming_foreach_with_simple_function(self):
+        tester = self.ForeachWriterTester(self.spark)
+
+        def foreach_func(row):
+            tester.write_process_event(row)
+
+        tester.run_streaming_query_on_writer(foreach_func, 2)
+        self.assertEqual(len(tester.process_events()), 2)
+
+    def test_streaming_foreach_with_basic_open_process_close(self):
+        tester = self.ForeachWriterTester(self.spark)
+
+        class ForeachWriter:
+            def open(self, partitionId, epochId):
+                tester.write_open_event(partitionId, epochId)
+                return True
+
+            def process(self, row):
+                tester.write_process_event(row)
+
+            def close(self, error):
+                tester.write_close_event(error)
+
+        tester.run_streaming_query_on_writer(ForeachWriter(), 2)
+
+        open_events = tester.open_events()
+        self.assertEqual(len(open_events), 2)
+        self.assertSetEqual(set([e['epoch'] for e in open_events]), {0, 1})
+
+        self.assertEqual(len(tester.process_events()), 2)
+
+        close_events = tester.close_events()
+        self.assertEqual(len(close_events), 2)
+        self.assertSetEqual(set([e['error'] for e in close_events]), {'None'})
+
+    def test_streaming_foreach_with_open_returning_false(self):
+        tester = self.ForeachWriterTester(self.spark)
+
+        class ForeachWriter:
+            def open(self, partition_id, epoch_id):
+                tester.write_open_event(partition_id, epoch_id)
+                return False
+
+            def process(self, row):
+                tester.write_process_event(row)
+
+            def close(self, error):
+                tester.write_close_event(error)
+
+        tester.run_streaming_query_on_writer(ForeachWriter(), 2)
+
+        self.assertEqual(len(tester.open_events()), 2)
+
+        self.assertEqual(len(tester.process_events()), 0)  # no row was processed
+
+        close_events = tester.close_events()
+        self.assertEqual(len(close_events), 2)
+        self.assertSetEqual(set([e['error'] for e in close_events]), {'None'})
+
+    def test_streaming_foreach_without_open_method(self):
+        tester = self.ForeachWriterTester(self.spark)
+
+        class ForeachWriter:
+            def process(self, row):
+                tester.write_process_event(row)
+
+            def close(self, error):
+                tester.write_close_event(error)
+
+        tester.run_streaming_query_on_writer(ForeachWriter(), 2)
+        self.assertEqual(len(tester.open_events()), 0)  # no open events
+        self.assertEqual(len(tester.process_events()), 2)
+        self.assertEqual(len(tester.close_events()), 2)
+
+    def test_streaming_foreach_without_close_method(self):
+        tester = self.ForeachWriterTester(self.spark)
+
+        class ForeachWriter:
+            def open(self, partition_id, epoch_id):
+                tester.write_open_event(partition_id, epoch_id)
+                return True
+
+            def process(self, row):
+                tester.write_process_event(row)
+
+        tester.run_streaming_query_on_writer(ForeachWriter(), 2)
+        self.assertEqual(len(tester.open_events()), 2)  # no open events
+        self.assertEqual(len(tester.process_events()), 2)
+        self.assertEqual(len(tester.close_events()), 0)
+
+    def test_streaming_foreach_without_open_and_close_methods(self):
+        tester = self.ForeachWriterTester(self.spark)
+
+        class ForeachWriter:
+            def process(self, row):
+                tester.write_process_event(row)
+
+        tester.run_streaming_query_on_writer(ForeachWriter(), 2)
+        self.assertEqual(len(tester.open_events()), 0)  # no open events
+        self.assertEqual(len(tester.process_events()), 2)
+        self.assertEqual(len(tester.close_events()), 0)
+
+    def test_streaming_foreach_with_process_throwing_error(self):
+        from pyspark.sql.utils import StreamingQueryException
+
+        tester = self.ForeachWriterTester(self.spark)
+
+        class ForeachWriter:
+            def process(self, row):
+                raise Exception("test error")
+
+            def close(self, error):
+                tester.write_close_event(error)
+
+        try:
+            tester.run_streaming_query_on_writer(ForeachWriter(), 1)
+            self.fail("bad writer did not fail the query")  # this is not expected
+        except StreamingQueryException as e:
+            # TODO: Verify whether original error message is inside the exception
+            pass
+
+        self.assertEqual(len(tester.process_events()), 0)  # no row was processed
+        close_events = tester.close_events()
+        self.assertEqual(len(close_events), 1)
+        # TODO: Verify whether original error message is inside the exception
+
+    def test_streaming_foreach_with_invalid_writers(self):
+
+        tester = self.ForeachWriterTester(self.spark)
+
+        def func_with_iterator_input(iter):
+            for x in iter:
+                print(x)
+
+        tester.assert_invalid_writer(func_with_iterator_input)
+
+        class WriterWithoutProcess:
+            def open(self, partition):
+                pass
+
+        tester.assert_invalid_writer(WriterWithoutProcess(), "does not have a 'process'")
+
+        class WriterWithNonCallableProcess():
+            process = True
+
+        tester.assert_invalid_writer(WriterWithNonCallableProcess(),
+                                     "'process' in provided object is not callable")
+
+        class WriterWithNoParamProcess():
+            def process(self):
+                pass
+
+        tester.assert_invalid_writer(WriterWithNoParamProcess())
+
+        # Abstract class for tests below
+        class WithProcess():
+            def process(self, row):
+                pass
+
+        class WriterWithNonCallableOpen(WithProcess):
+            open = True
+
+        tester.assert_invalid_writer(WriterWithNonCallableOpen(),
+                                     "'open' in provided object is not callable")
+
+        class WriterWithNoParamOpen(WithProcess):
+            def open(self):
+                pass
+
+        tester.assert_invalid_writer(WriterWithNoParamOpen())
+
+        class WriterWithNonCallableClose(WithProcess):
+            close = True
+
+        tester.assert_invalid_writer(WriterWithNonCallableClose(),
+                                     "'close' in provided object is not callable")
+
+    def test_streaming_foreachBatch(self):
+        q = None
+        collected = dict()
+
+        def collectBatch(batch_df, batch_id):
+            collected[batch_id] = batch_df.collect()
+
+        try:
+            df = self.spark.readStream.format('text').load('python/test_support/sql/streaming')
+            q = df.writeStream.foreachBatch(collectBatch).start()
+            q.processAllAvailable()
+            self.assertTrue(0 in collected)
+            self.assertTrue(len(collected[0]), 2)
+        finally:
+            if q:
+                q.stop()
+
+    def test_streaming_foreachBatch_propagates_python_errors(self):
+        from pyspark.sql.utils import StreamingQueryException
+
+        q = None
+
+        def collectBatch(df, id):
+            raise Exception("this should fail the query")
+
+        try:
+            df = self.spark.readStream.format('text').load('python/test_support/sql/streaming')
+            q = df.writeStream.foreachBatch(collectBatch).start()
+            q.processAllAvailable()
+            self.fail("Expected a failure")
+        except StreamingQueryException as e:
+            self.assertTrue("this should fail" in str(e))
+        finally:
+            if q:
+                q.stop()
+
+    def test_help_command(self):
+        # Regression test for SPARK-5464
+        rdd = self.sc.parallelize(['{"foo":"bar"}', '{"foo":"baz"}'])
+        df = self.spark.read.json(rdd)
+        # render_doc() reproduces the help() exception without printing output
+        pydoc.render_doc(df)
+        pydoc.render_doc(df.foo)
+        pydoc.render_doc(df.take(1))
+
+    def test_access_column(self):
+        df = self.df
+        self.assertTrue(isinstance(df.key, Column))
+        self.assertTrue(isinstance(df['key'], Column))
+        self.assertTrue(isinstance(df[0], Column))
+        self.assertRaises(IndexError, lambda: df[2])
+        self.assertRaises(AnalysisException, lambda: df["bad_key"])
+        self.assertRaises(TypeError, lambda: df[{}])
+
+    def test_column_name_with_non_ascii(self):
+        if sys.version >= '3':
+            columnName = "数量"
+            self.assertTrue(isinstance(columnName, str))
+        else:
+            columnName = unicode("数量", "utf-8")
+            self.assertTrue(isinstance(columnName, unicode))
+        schema = StructType([StructField(columnName, LongType(), True)])
+        df = self.spark.createDataFrame([(1,)], schema)
+        self.assertEqual(schema, df.schema)
+        self.assertEqual("DataFrame[数量: bigint]", str(df))
+        self.assertEqual([("数量", 'bigint')], df.dtypes)
+        self.assertEqual(1, df.select("数量").first()[0])
+        self.assertEqual(1, df.select(df["数量"]).first()[0])
+
+    def test_access_nested_types(self):
+        df = self.sc.parallelize([Row(l=[1], r=Row(a=1, b="b"), d={"k": "v"})]).toDF()
+        self.assertEqual(1, df.select(df.l[0]).first()[0])
+        self.assertEqual(1, df.select(df.l.getItem(0)).first()[0])
+        self.assertEqual(1, df.select(df.r.a).first()[0])
+        self.assertEqual("b", df.select(df.r.getField("b")).first()[0])
+        self.assertEqual("v", df.select(df.d["k"]).first()[0])
+        self.assertEqual("v", df.select(df.d.getItem("k")).first()[0])
+
+    def test_field_accessor(self):
+        df = self.sc.parallelize([Row(l=[1], r=Row(a=1, b="b"), d={"k": "v"})]).toDF()
+        self.assertEqual(1, df.select(df.l[0]).first()[0])
+        self.assertEqual(1, df.select(df.r["a"]).first()[0])
+        self.assertEqual(1, df.select(df["r.a"]).first()[0])
+        self.assertEqual("b", df.select(df.r["b"]).first()[0])
+        self.assertEqual("b", df.select(df["r.b"]).first()[0])
+        self.assertEqual("v", df.select(df.d["k"]).first()[0])
+
+    def test_infer_long_type(self):
+        longrow = [Row(f1='a', f2=100000000000000)]
+        df = self.sc.parallelize(longrow).toDF()
+        self.assertEqual(df.schema.fields[1].dataType, LongType())
+
+        # this saving as Parquet caused issues as well.
+        output_dir = os.path.join(self.tempdir.name, "infer_long_type")
+        df.write.parquet(output_dir)
+        df1 = self.spark.read.parquet(output_dir)
+        self.assertEqual('a', df1.first().f1)
+        self.assertEqual(100000000000000, df1.first().f2)
+
+        self.assertEqual(_infer_type(1), LongType())
+        self.assertEqual(_infer_type(2**10), LongType())
+        self.assertEqual(_infer_type(2**20), LongType())
+        self.assertEqual(_infer_type(2**31 - 1), LongType())
+        self.assertEqual(_infer_type(2**31), LongType())
+        self.assertEqual(_infer_type(2**61), LongType())
+        self.assertEqual(_infer_type(2**71), LongType())
+
+    def test_merge_type(self):
+        self.assertEqual(_merge_type(LongType(), NullType()), LongType())
+        self.assertEqual(_merge_type(NullType(), LongType()), LongType())
+
+        self.assertEqual(_merge_type(LongType(), LongType()), LongType())
+
+        self.assertEqual(_merge_type(
+            ArrayType(LongType()),
+            ArrayType(LongType())
+        ), ArrayType(LongType()))
+        with self.assertRaisesRegexp(TypeError, 'element in array'):
+            _merge_type(ArrayType(LongType()), ArrayType(DoubleType()))
+
+        self.assertEqual(_merge_type(
+            MapType(StringType(), LongType()),
+            MapType(StringType(), LongType())
+        ), MapType(StringType(), LongType()))
+        with self.assertRaisesRegexp(TypeError, 'key of map'):
+            _merge_type(
+                MapType(StringType(), LongType()),
+                MapType(DoubleType(), LongType()))
+        with self.assertRaisesRegexp(TypeError, 'value of map'):
+            _merge_type(
+                MapType(StringType(), LongType()),
+                MapType(StringType(), DoubleType()))
+
+        self.assertEqual(_merge_type(
+            StructType([StructField("f1", LongType()), StructField("f2", StringType())]),
+            StructType([StructField("f1", LongType()), StructField("f2", StringType())])
+        ), StructType([StructField("f1", LongType()), StructField("f2", StringType())]))
+        with self.assertRaisesRegexp(TypeError, 'field f1'):
+            _merge_type(
+                StructType([StructField("f1", LongType()), StructField("f2", StringType())]),
+                StructType([StructField("f1", DoubleType()), StructField("f2", StringType())]))
+
+        self.assertEqual(_merge_type(
+            StructType([StructField("f1", StructType([StructField("f2", LongType())]))]),
+            StructType([StructField("f1", StructType([StructField("f2", LongType())]))])
+        ), StructType([StructField("f1", StructType([StructField("f2", LongType())]))]))
+        with self.assertRaisesRegexp(TypeError, 'field f2 in field f1'):
+            _merge_type(
+                StructType([StructField("f1", StructType([StructField("f2", LongType())]))]),
+                StructType([StructField("f1", StructType([StructField("f2", StringType())]))]))
+
+        self.assertEqual(_merge_type(
+            StructType([StructField("f1", ArrayType(LongType())), StructField("f2", StringType())]),
+            StructType([StructField("f1", ArrayType(LongType())), StructField("f2", StringType())])
+        ), StructType([StructField("f1", ArrayType(LongType())), StructField("f2", StringType())]))
+        with self.assertRaisesRegexp(TypeError, 'element in array field f1'):
+            _merge_type(
+                StructType([
+                    StructField("f1", ArrayType(LongType())),
+                    StructField("f2", StringType())]),
+                StructType([
+                    StructField("f1", ArrayType(DoubleType())),
+                    StructField("f2", StringType())]))
+
+        self.assertEqual(_merge_type(
+            StructType([
+                StructField("f1", MapType(StringType(), LongType())),
+                StructField("f2", StringType())]),
+            StructType([
+                StructField("f1", MapType(StringType(), LongType())),
+                StructField("f2", StringType())])
+        ), StructType([
+            StructField("f1", MapType(StringType(), LongType())),
+            StructField("f2", StringType())]))
+        with self.assertRaisesRegexp(TypeError, 'value of map field f1'):
+            _merge_type(
+                StructType([
+                    StructField("f1", MapType(StringType(), LongType())),
+                    StructField("f2", StringType())]),
+                StructType([
+                    StructField("f1", MapType(StringType(), DoubleType())),
+                    StructField("f2", StringType())]))
+
+        self.assertEqual(_merge_type(
+            StructType([StructField("f1", ArrayType(MapType(StringType(), LongType())))]),
+            StructType([StructField("f1", ArrayType(MapType(StringType(), LongType())))])
+        ), StructType([StructField("f1", ArrayType(MapType(StringType(), LongType())))]))
+        with self.assertRaisesRegexp(TypeError, 'key of map element in array field f1'):
+            _merge_type(
+                StructType([StructField("f1", ArrayType(MapType(StringType(), LongType())))]),
+                StructType([StructField("f1", ArrayType(MapType(DoubleType(), LongType())))])
+            )
+
+    def test_filter_with_datetime(self):
+        time = datetime.datetime(2015, 4, 17, 23, 1, 2, 3000)
+        date = time.date()
+        row = Row(date=date, time=time)
+        df = self.spark.createDataFrame([row])
+        self.assertEqual(1, df.filter(df.date == date).count())
+        self.assertEqual(1, df.filter(df.time == time).count())
+        self.assertEqual(0, df.filter(df.date > date).count())
+        self.assertEqual(0, df.filter(df.time > time).count())
+
+    def test_filter_with_datetime_timezone(self):
+        dt1 = datetime.datetime(2015, 4, 17, 23, 1, 2, 3000, tzinfo=UTCOffsetTimezone(0))
+        dt2 = datetime.datetime(2015, 4, 17, 23, 1, 2, 3000, tzinfo=UTCOffsetTimezone(1))
+        row = Row(date=dt1)
+        df = self.spark.createDataFrame([row])
+        self.assertEqual(0, df.filter(df.date == dt2).count())
+        self.assertEqual(1, df.filter(df.date > dt2).count())
+        self.assertEqual(0, df.filter(df.date < dt2).count())
+
+    def test_time_with_timezone(self):
+        day = datetime.date.today()
+        now = datetime.datetime.now()
+        ts = time.mktime(now.timetuple())
+        # class in __main__ is not serializable
+        from pyspark.sql.tests import UTCOffsetTimezone
+        utc = UTCOffsetTimezone()
+        utcnow = datetime.datetime.utcfromtimestamp(ts)  # without microseconds
+        # add microseconds to utcnow (keeping year,month,day,hour,minute,second)
+        utcnow = datetime.datetime(*(utcnow.timetuple()[:6] + (now.microsecond, utc)))
+        df = self.spark.createDataFrame([(day, now, utcnow)])
+        day1, now1, utcnow1 = df.first()
+        self.assertEqual(day1, day)
+        self.assertEqual(now, now1)
+        self.assertEqual(now, utcnow1)
+
+    # regression test for SPARK-19561
+    def test_datetime_at_epoch(self):
+        epoch = datetime.datetime.fromtimestamp(0)
+        df = self.spark.createDataFrame([Row(date=epoch)])
+        first = df.select('date', lit(epoch).alias('lit_date')).first()
+        self.assertEqual(first['date'], epoch)
+        self.assertEqual(first['lit_date'], epoch)
+
+    def test_dayofweek(self):
+        from pyspark.sql.functions import dayofweek
+        dt = datetime.datetime(2017, 11, 6)
+        df = self.spark.createDataFrame([Row(date=dt)])
+        row = df.select(dayofweek(df.date)).first()
+        self.assertEqual(row[0], 2)
+
+    def test_decimal(self):
+        from decimal import Decimal
+        schema = StructType([StructField("decimal", DecimalType(10, 5))])
+        df = self.spark.createDataFrame([(Decimal("3.14159"),)], schema)
+        row = df.select(df.decimal + 1).first()
+        self.assertEqual(row[0], Decimal("4.14159"))
+        tmpPath = tempfile.mkdtemp()
+        shutil.rmtree(tmpPath)
+        df.write.parquet(tmpPath)
+        df2 = self.spark.read.parquet(tmpPath)
+        row = df2.first()
+        self.assertEqual(row[0], Decimal("3.14159"))
+
+    def test_dropna(self):
+        schema = StructType([
+            StructField("name", StringType(), True),
+            StructField("age", IntegerType(), True),
+            StructField("height", DoubleType(), True)])
+
+        # shouldn't drop a non-null row
+        self.assertEqual(self.spark.createDataFrame(
+            [(u'Alice', 50, 80.1)], schema).dropna().count(),
+            1)
+
+        # dropping rows with a single null value
+        self.assertEqual(self.spark.createDataFrame(
+            [(u'Alice', None, 80.1)], schema).dropna().count(),
+            0)
+        self.assertEqual(self.spark.createDataFrame(
+            [(u'Alice', None, 80.1)], schema).dropna(how='any').count(),
+            0)
+
+        # if how = 'all', only drop rows if all values are null
+        self.assertEqual(self.spark.createDataFrame(
+            [(u'Alice', None, 80.1)], schema).dropna(how='all').count(),
+            1)
+        self.assertEqual(self.spark.createDataFrame(
+            [(None, None, None)], schema).dropna(how='all').count(),
+            0)
+
+        # how and subset
+        self.assertEqual(self.spark.createDataFrame(
+            [(u'Alice', 50, None)], schema).dropna(how='any', subset=['name', 'age']).count(),
+            1)
+        self.assertEqual(self.spark.createDataFrame(
+            [(u'Alice', None, None)], schema).dropna(how='any', subset=['name', 'age']).count(),
+            0)
+
+        # threshold
+        self.assertEqual(self.spark.createDataFrame(
+            [(u'Alice', None, 80.1)], schema).dropna(thresh=2).count(),
+            1)
+        self.assertEqual(self.spark.createDataFrame(
+            [(u'Alice', None, None)], schema).dropna(thresh=2).count(),
+            0)
+
+        # threshold and subset
+        self.assertEqual(self.spark.createDataFrame(
+            [(u'Alice', 50, None)], schema).dropna(thresh=2, subset=['name', 'age']).count(),
+            1)
+        self.assertEqual(self.spark.createDataFrame(
+            [(u'Alice', None, 180.9)], schema).dropna(thresh=2, subset=['name', 'age']).count(),
+            0)
+
+        # thresh should take precedence over how
+        self.assertEqual(self.spark.createDataFrame(
+            [(u'Alice', 50, None)], schema).dropna(
+                how='any', thresh=2, subset=['name', 'age']).count(),
+            1)
+
+    def test_fillna(self):
+        schema = StructType([
+            StructField("name", StringType(), True),
+            StructField("age", IntegerType(), True),
+            StructField("height", DoubleType(), True),
+            StructField("spy", BooleanType(), True)])
+
+        # fillna shouldn't change non-null values
+        row = self.spark.createDataFrame([(u'Alice', 10, 80.1, True)], schema).fillna(50).first()
+        self.assertEqual(row.age, 10)
+
+        # fillna with int
+        row = self.spark.createDataFrame([(u'Alice', None, None, None)], schema).fillna(50).first()
+        self.assertEqual(row.age, 50)
+        self.assertEqual(row.height, 50.0)
+
+        # fillna with double
+        row = self.spark.createDataFrame(
+            [(u'Alice', None, None, None)], schema).fillna(50.1).first()
+        self.assertEqual(row.age, 50)
+        self.assertEqual(row.height, 50.1)
+
+        # fillna with bool
+        row = self.spark.createDataFrame(
+            [(u'Alice', None, None, None)], schema).fillna(True).first()
+        self.assertEqual(row.age, None)
+        self.assertEqual(row.spy, True)
+
+        # fillna with string
+        row = self.spark.createDataFrame([(None, None, None, None)], schema).fillna("hello").first()
+        self.assertEqual(row.name, u"hello")
+        self.assertEqual(row.age, None)
+
+        # fillna with subset specified for numeric cols
+        row = self.spark.createDataFrame(
+            [(None, None, None, None)], schema).fillna(50, subset=['name', 'age']).first()
+        self.assertEqual(row.name, None)
+        self.assertEqual(row.age, 50)
+        self.assertEqual(row.height, None)
+        self.assertEqual(row.spy, None)
+
+        # fillna with subset specified for string cols
+        row = self.spark.createDataFrame(
+            [(None, None, None, None)], schema).fillna("haha", subset=['name', 'age']).first()
+        self.assertEqual(row.name, "haha")
+        self.assertEqual(row.age, None)
+        self.assertEqual(row.height, None)
+        self.assertEqual(row.spy, None)
+
+        # fillna with subset specified for bool cols
+        row = self.spark.createDataFrame(
+            [(None, None, None, None)], schema).fillna(True, subset=['name', 'spy']).first()
+        self.assertEqual(row.name, None)
+        self.assertEqual(row.age, None)
+        self.assertEqual(row.height, None)
+        self.assertEqual(row.spy, True)
+
+        # fillna with dictionary for boolean types
+        row = self.spark.createDataFrame([Row(a=None), Row(a=True)]).fillna({"a": True}).first()
+        self.assertEqual(row.a, True)
+
+    def test_bitwise_operations(self):
+        from pyspark.sql import functions
+        row = Row(a=170, b=75)
+        df = self.spark.createDataFrame([row])
+        result = df.select(df.a.bitwiseAND(df.b)).collect()[0].asDict()
+        self.assertEqual(170 & 75, result['(a & b)'])
+        result = df.select(df.a.bitwiseOR(df.b)).collect()[0].asDict()
+        self.assertEqual(170 | 75, result['(a | b)'])
+        result = df.select(df.a.bitwiseXOR(df.b)).collect()[0].asDict()
+        self.assertEqual(170 ^ 75, result['(a ^ b)'])
+        result = df.select(functions.bitwiseNOT(df.b)).collect()[0].asDict()
+        self.assertEqual(~75, result['~b'])
+
+    def test_expr(self):
+        from pyspark.sql import functions
+        row = Row(a="length string", b=75)
+        df = self.spark.createDataFrame([row])
+        result = df.select(functions.expr("length(a)")).collect()[0].asDict()
+        self.assertEqual(13, result["length(a)"])
+
+    def test_repartitionByRange_dataframe(self):
+        schema = StructType([
+            StructField("name", StringType(), True),
+            StructField("age", IntegerType(), True),
+            StructField("height", DoubleType(), True)])
+
+        df1 = self.spark.createDataFrame(
+            [(u'Bob', 27, 66.0), (u'Alice', 10, 10.0), (u'Bob', 10, 66.0)], schema)
+        df2 = self.spark.createDataFrame(
+            [(u'Alice', 10, 10.0), (u'Bob', 10, 66.0), (u'Bob', 27, 66.0)], schema)
+
+        # test repartitionByRange(numPartitions, *cols)
+        df3 = df1.repartitionByRange(2, "name", "age")
+        self.assertEqual(df3.rdd.getNumPartitions(), 2)
+        self.assertEqual(df3.rdd.first(), df2.rdd.first())
+        self.assertEqual(df3.rdd.take(3), df2.rdd.take(3))
+
+        # test repartitionByRange(numPartitions, *cols)
+        df4 = df1.repartitionByRange(3, "name", "age")
+        self.assertEqual(df4.rdd.getNumPartitions(), 3)
+        self.assertEqual(df4.rdd.first(), df2.rdd.first())
+        self.assertEqual(df4.rdd.take(3), df2.rdd.take(3))
+
+        # test repartitionByRange(*cols)
+        df5 = df1.repartitionByRange("name", "age")
+        self.assertEqual(df5.rdd.first(), df2.rdd.first())
+        self.assertEqual(df5.rdd.take(3), df2.rdd.take(3))
+
+    def test_replace(self):
+        schema = StructType([
+            StructField("name", StringType(), True),
+            StructField("age", IntegerType(), True),
+            StructField("height", DoubleType(), True)])
+
+        # replace with int
+        row = self.spark.createDataFrame([(u'Alice', 10, 10.0)], schema).replace(10, 20).first()
+        self.assertEqual(row.age, 20)
+        self.assertEqual(row.height, 20.0)
+
+        # replace with double
+        row = self.spark.createDataFrame(
+            [(u'Alice', 80, 80.0)], schema).replace(80.0, 82.1).first()
+        self.assertEqual(row.age, 82)
+        self.assertEqual(row.height, 82.1)
+
+        # replace with string
+        row = self.spark.createDataFrame(
+            [(u'Alice', 10, 80.1)], schema).replace(u'Alice', u'Ann').first()
+        self.assertEqual(row.name, u"Ann")
+        self.assertEqual(row.age, 10)
+
+        # replace with subset specified by a string of a column name w/ actual change
+        row = self.spark.createDataFrame(
+            [(u'Alice', 10, 80.1)], schema).replace(10, 20, subset='age').first()
+        self.assertEqual(row.age, 20)
+
+        # replace with subset specified by a string of a column name w/o actual change
+        row = self.spark.createDataFrame(
+            [(u'Alice', 10, 80.1)], schema).replace(10, 20, subset='height').first()
+        self.assertEqual(row.age, 10)
+
+        # replace with subset specified with one column replaced, another column not in subset
+        # stays unchanged.
+        row = self.spark.createDataFrame(
+            [(u'Alice', 10, 10.0)], schema).replace(10, 20, subset=['name', 'age']).first()
+        self.assertEqual(row.name, u'Alice')
+        self.assertEqual(row.age, 20)
+        self.assertEqual(row.height, 10.0)
+
+        # replace with subset specified but no column will be replaced
+        row = self.spark.createDataFrame(
+            [(u'Alice', 10, None)], schema).replace(10, 20, subset=['name', 'height']).first()
+        self.assertEqual(row.name, u'Alice')
+        self.assertEqual(row.age, 10)
+        self.assertEqual(row.height, None)
+
+        # replace with lists
+        row = self.spark.createDataFrame(
+            [(u'Alice', 10, 80.1)], schema).replace([u'Alice'], [u'Ann']).first()
+        self.assertTupleEqual(row, (u'Ann', 10, 80.1))
+
+        # replace with dict
+        row = self.spark.createDataFrame(
+            [(u'Alice', 10, 80.1)], schema).replace({10: 11}).first()
+        self.assertTupleEqual(row, (u'Alice', 11, 80.1))
+
+        # test backward compatibility with dummy value
+        dummy_value = 1
+        row = self.spark.createDataFrame(
+            [(u'Alice', 10, 80.1)], schema).replace({'Alice': 'Bob'}, dummy_value).first()
+        self.assertTupleEqual(row, (u'Bob', 10, 80.1))
+
+        # test dict with mixed numerics
+        row = self.spark.createDataFrame(
+            [(u'Alice', 10, 80.1)], schema).replace({10: -10, 80.1: 90.5}).first()
+        self.assertTupleEqual(row, (u'Alice', -10, 90.5))
+
+        # replace with tuples
+        row = self.spark.createDataFrame(
+            [(u'Alice', 10, 80.1)], schema).replace((u'Alice', ), (u'Bob', )).first()
+        self.assertTupleEqual(row, (u'Bob', 10, 80.1))
+
+        # replace multiple columns
+        row = self.spark.createDataFrame(
+            [(u'Alice', 10, 80.0)], schema).replace((10, 80.0), (20, 90)).first()
+        self.assertTupleEqual(row, (u'Alice', 20, 90.0))
+
+        # test for mixed numerics
+        row = self.spark.createDataFrame(
+            [(u'Alice', 10, 80.0)], schema).replace((10, 80), (20, 90.5)).first()
+        self.assertTupleEqual(row, (u'Alice', 20, 90.5))
+
+        row = self.spark.createDataFrame(
+            [(u'Alice', 10, 80.0)], schema).replace({10: 20, 80: 90.5}).first()
+        self.assertTupleEqual(row, (u'Alice', 20, 90.5))
+
+        # replace with boolean
+        row = (self
+               .spark.createDataFrame([(u'Alice', 10, 80.0)], schema)
+               .selectExpr("name = 'Bob'", 'age <= 15')
+               .replace(False, True).first())
+        self.assertTupleEqual(row, (True, True))
+
+        # replace string with None and then drop None rows
+        row = self.spark.createDataFrame(
+            [(u'Alice', 10, 80.0)], schema).replace(u'Alice', None).dropna()
+        self.assertEqual(row.count(), 0)
+
+        # replace with number and None
+        row = self.spark.createDataFrame(
+            [(u'Alice', 10, 80.0)], schema).replace([10, 80], [20, None]).first()
+        self.assertTupleEqual(row, (u'Alice', 20, None))
+
+        # should fail if subset is not list, tuple or None
+        with self.assertRaises(ValueError):
+            self.spark.createDataFrame(
+                [(u'Alice', 10, 80.1)], schema).replace({10: 11}, subset=1).first()
+
+        # should fail if to_replace and value have different length
+        with self.assertRaises(ValueError):
+            self.spark.createDataFrame(
+                [(u'Alice', 10, 80.1)], schema).replace(["Alice", "Bob"], ["Eve"]).first()
+
+        # should fail if when received unexpected type
+        with self.assertRaises(ValueError):
+            from datetime import datetime
+            self.spark.createDataFrame(
+                [(u'Alice', 10, 80.1)], schema).replace(datetime.now(), datetime.now()).first()
+
+        # should fail if provided mixed type replacements
+        with self.assertRaises(ValueError):
+            self.spark.createDataFrame(
+                [(u'Alice', 10, 80.1)], schema).replace(["Alice", 10], ["Eve", 20]).first()
+
+        with self.assertRaises(ValueError):
+            self.spark.createDataFrame(
+                [(u'Alice', 10, 80.1)], schema).replace({u"Alice": u"Bob", 10: 20}).first()
+
+        with self.assertRaisesRegexp(
+                TypeError,
+                'value argument is required when to_replace is not a dictionary.'):
+            self.spark.createDataFrame(
+                [(u'Alice', 10, 80.0)], schema).replace(["Alice", "Bob"]).first()
+
+    def test_capture_analysis_exception(self):
+        self.assertRaises(AnalysisException, lambda: self.spark.sql("select abc"))
+        self.assertRaises(AnalysisException, lambda: self.df.selectExpr("a + b"))
+
+    def test_capture_parse_exception(self):
+        self.assertRaises(ParseException, lambda: self.spark.sql("abc"))
+
+    def test_capture_illegalargument_exception(self):
+        self.assertRaisesRegexp(IllegalArgumentException, "Setting negative mapred.reduce.tasks",
+                                lambda: self.spark.sql("SET mapred.reduce.tasks=-1"))
+        df = self.spark.createDataFrame([(1, 2)], ["a", "b"])
+        self.assertRaisesRegexp(IllegalArgumentException, "1024 is not in the permitted values",
+                                lambda: df.select(sha2(df.a, 1024)).collect())
+        try:
+            df.select(sha2(df.a, 1024)).collect()
+        except IllegalArgumentException as e:
+            self.assertRegexpMatches(e.desc, "1024 is not in the permitted values")
+            self.assertRegexpMatches(e.stackTrace,
+                                     "org.apache.spark.sql.functions")
+
+    def test_with_column_with_existing_name(self):
+        keys = self.df.withColumn("key", self.df.key).select("key").collect()
+        self.assertEqual([r.key for r in keys], list(range(100)))
+
+    # regression test for SPARK-10417
+    def test_column_iterator(self):
+
+        def foo():
+            for x in self.df.key:
+                break
+
+        self.assertRaises(TypeError, foo)
+
+    # add test for SPARK-10577 (test broadcast join hint)
+    def test_functions_broadcast(self):
+        from pyspark.sql.functions import broadcast
+
+        df1 = self.spark.createDataFrame([(1, "1"), (2, "2")], ("key", "value"))
+        df2 = self.spark.createDataFrame([(1, "1"), (2, "2")], ("key", "value"))
+
+        # equijoin - should be converted into broadcast join
+        plan1 = df1.join(broadcast(df2), "key")._jdf.queryExecution().executedPlan()
+        self.assertEqual(1, plan1.toString().count("BroadcastHashJoin"))
+
+        # no join key -- should not be a broadcast join
+        plan2 = df1.crossJoin(broadcast(df2))._jdf.queryExecution().executedPlan()
+        self.assertEqual(0, plan2.toString().count("BroadcastHashJoin"))
+
+        # planner should not crash without a join
+        broadcast(df1)._jdf.queryExecution().executedPlan()
+
+    def test_generic_hints(self):
+        from pyspark.sql import DataFrame
+
+        df1 = self.spark.range(10e10).toDF("id")
+        df2 = self.spark.range(10e10).toDF("id")
+
+        self.assertIsInstance(df1.hint("broadcast"), DataFrame)
+        self.assertIsInstance(df1.hint("broadcast", []), DataFrame)
+
+        # Dummy rules
+        self.assertIsInstance(df1.hint("broadcast", "foo", "bar"), DataFrame)
+        self.assertIsInstance(df1.hint("broadcast", ["foo", "bar"]), DataFrame)
+
+        plan = df1.join(df2.hint("broadcast"), "id")._jdf.queryExecution().executedPlan()
+        self.assertEqual(1, plan.toString().count("BroadcastHashJoin"))
+
+    def test_sample(self):
+        self.assertRaisesRegexp(
+            TypeError,
+            "should be a bool, float and number",
+            lambda: self.spark.range(1).sample())
+
+        self.assertRaises(
+            TypeError,
+            lambda: self.spark.range(1).sample("a"))
+
+        self.assertRaises(
+            TypeError,
+            lambda: self.spark.range(1).sample(seed="abc"))
+
+        self.assertRaises(
+            IllegalArgumentException,
+            lambda: self.spark.range(1).sample(-1.0))
+
+    def test_toDF_with_schema_string(self):
+        data = [Row(key=i, value=str(i)) for i in range(100)]
+        rdd = self.sc.parallelize(data, 5)
+
+        df = rdd.toDF("key: int, value: string")
+        self.assertEqual(df.schema.simpleString(), "struct<key:int,value:string>")
+        self.assertEqual(df.collect(), data)
+
+        # different but compatible field types can be used.
+        df = rdd.toDF("key: string, value: string")
+        self.assertEqual(df.schema.simpleString(), "struct<key:string,value:string>")
+        self.assertEqual(df.collect(), [Row(key=str(i), value=str(i)) for i in range(100)])
+
+        # field names can differ.
+        df = rdd.toDF(" a: int, b: string ")
+        self.assertEqual(df.schema.simpleString(), "struct<a:int,b:string>")
+        self.assertEqual(df.collect(), data)
+
+        # number of fields must match.
+        self.assertRaisesRegexp(Exception, "Length of object",
+                                lambda: rdd.toDF("key: int").collect())
+
+        # field types mismatch will cause exception at runtime.
+        self.assertRaisesRegexp(Exception, "FloatType can not accept",
+                                lambda: rdd.toDF("key: float, value: string").collect())
+
+        # flat schema values will be wrapped into row.
+        df = rdd.map(lambda row: row.key).toDF("int")
+        self.assertEqual(df.schema.simpleString(), "struct<value:int>")
+        self.assertEqual(df.collect(), [Row(key=i) for i in range(100)])
+
+        # users can use DataType directly instead of data type string.
+        df = rdd.map(lambda row: row.key).toDF(IntegerType())
+        self.assertEqual(df.schema.simpleString(), "struct<value:int>")
+        self.assertEqual(df.collect(), [Row(key=i) for i in range(100)])
+
+    def test_join_without_on(self):
+        df1 = self.spark.range(1).toDF("a")
+        df2 = self.spark.range(1).toDF("b")
+
+        with self.sql_conf({"spark.sql.crossJoin.enabled": False}):
+            self.assertRaises(AnalysisException, lambda: df1.join(df2, how="inner").collect())
+
+        with self.sql_conf({"spark.sql.crossJoin.enabled": True}):
+            actual = df1.join(df2, how="inner").collect()
+            expected = [Row(a=0, b=0)]
+            self.assertEqual(actual, expected)
+
+    # Regression test for invalid join methods when on is None, Spark-14761
+    def test_invalid_join_method(self):
+        df1 = self.spark.createDataFrame([("Alice", 5), ("Bob", 8)], ["name", "age"])
+        df2 = self.spark.createDataFrame([("Alice", 80), ("Bob", 90)], ["name", "height"])
+        self.assertRaises(IllegalArgumentException, lambda: df1.join(df2, how="invalid-join-type"))
+
+    # Cartesian products require cross join syntax
+    def test_require_cross(self):
+        from pyspark.sql.functions import broadcast
+
+        df1 = self.spark.createDataFrame([(1, "1")], ("key", "value"))
+        df2 = self.spark.createDataFrame([(1, "1")], ("key", "value"))
+
+        # joins without conditions require cross join syntax
+        self.assertRaises(AnalysisException, lambda: df1.join(df2).collect())
+
+        # works with crossJoin
+        self.assertEqual(1, df1.crossJoin(df2).count())
+
+    def test_conf(self):
+        spark = self.spark
+        spark.conf.set("bogo", "sipeo")
+        self.assertEqual(spark.conf.get("bogo"), "sipeo")
+        spark.conf.set("bogo", "ta")
+        self.assertEqual(spark.conf.get("bogo"), "ta")
+        self.assertEqual(spark.conf.get("bogo", "not.read"), "ta")
+        self.assertEqual(spark.conf.get("not.set", "ta"), "ta")
+        self.assertRaisesRegexp(Exception, "not.set", lambda: spark.conf.get("not.set"))
+        spark.conf.unset("bogo")
+        self.assertEqual(spark.conf.get("bogo", "colombia"), "colombia")
+
+        self.assertEqual(spark.conf.get("hyukjin", None), None)
+
+        # This returns 'STATIC' because it's the default value of
+        # 'spark.sql.sources.partitionOverwriteMode', and `defaultValue` in
+        # `spark.conf.get` is unset.
+        self.assertEqual(spark.conf.get("spark.sql.sources.partitionOverwriteMode"), "STATIC")
+
+        # This returns None because 'spark.sql.sources.partitionOverwriteMode' is unset, but
+        # `defaultValue` in `spark.conf.get` is set to None.
+        self.assertEqual(spark.conf.get("spark.sql.sources.partitionOverwriteMode", None), None)
+
+    def test_current_database(self):
+        spark = self.spark
+        spark.catalog._reset()
+        self.assertEquals(spark.catalog.currentDatabase(), "default")
+        spark.sql("CREATE DATABASE some_db")
+        spark.catalog.setCurrentDatabase("some_db")
+        self.assertEquals(spark.catalog.currentDatabase(), "some_db")
+        self.assertRaisesRegexp(
+            AnalysisException,
+            "does_not_exist",
+            lambda: spark.catalog.setCurrentDatabase("does_not_exist"))
+
+    def test_list_databases(self):
+        spark = self.spark
+        spark.catalog._reset()
+        databases = [db.name for db in spark.catalog.listDatabases()]
+        self.assertEquals(databases, ["default"])
+        spark.sql("CREATE DATABASE some_db")
+        databases = [db.name for db in spark.catalog.listDatabases()]
+        self.assertEquals(sorted(databases), ["default", "some_db"])
+
+    def test_list_tables(self):
+        from pyspark.sql.catalog import Table
+        spark = self.spark
+        spark.catalog._reset()
+        spark.sql("CREATE DATABASE some_db")
+        self.assertEquals(spark.catalog.listTables(), [])
+        self.assertEquals(spark.catalog.listTables("some_db"), [])
+        spark.createDataFrame([(1, 1)]).createOrReplaceTempView("temp_tab")
+        spark.sql("CREATE TABLE tab1 (name STRING, age INT) USING parquet")
+        spark.sql("CREATE TABLE some_db.tab2 (name STRING, age INT) USING parquet")
+        tables = sorted(spark.catalog.listTables(), key=lambda t: t.name)
+        tablesDefault = sorted(spark.catalog.listTables("default"), key=lambda t: t.name)
+        tablesSomeDb = sorted(spark.catalog.listTables("some_db"), key=lambda t: t.name)
+        self.assertEquals(tables, tablesDefault)
+        self.assertEquals(len(tables), 2)
+        self.assertEquals(len(tablesSomeDb), 2)
+        self.assertEquals(tables[0], Table(
+            name="tab1",
+            database="default",
+            description=None,
+            tableType="MANAGED",
+            isTemporary=False))
+        self.assertEquals(tables[1], Table(
+            name="temp_tab",
+            database=None,
+            description=None,
+            tableType="TEMPORARY",
+            isTemporary=True))
+        self.assertEquals(tablesSomeDb[0], Table(
+            name="tab2",
+            database="some_db",
+            description=None,
+            tableType="MANAGED",
+            isTemporary=False))
+        self.assertEquals(tablesSomeDb[1], Table(
+            name="temp_tab",
+            database=None,
+            description=None,
+            tableType="TEMPORARY",
+            isTemporary=True))
+        self.assertRaisesRegexp(
+            AnalysisException,
+            "does_not_exist",
+            lambda: spark.catalog.listTables("does_not_exist"))
+
+    def test_list_functions(self):
+        from pyspark.sql.catalog import Function
+        spark = self.spark
+        spark.catalog._reset()
+        spark.sql("CREATE DATABASE some_db")
+        functions = dict((f.name, f) for f in spark.catalog.listFunctions())
+        functionsDefault = dict((f.name, f) for f in spark.catalog.listFunctions("default"))
+        self.assertTrue(len(functions) > 200)
+        self.assertTrue("+" in functions)
+        self.assertTrue("like" in functions)
+        self.assertTrue("month" in functions)
+        self.assertTrue("to_date" in functions)
+        self.assertTrue("to_timestamp" in functions)
+        self.assertTrue("to_unix_timestamp" in functions)
+        self.assertTrue("current_database" in functions)
+        self.assertEquals(functions["+"], Function(
+            name="+",
+            description=None,
+            className="org.apache.spark.sql.catalyst.expressions.Add",
+            isTemporary=True))
+        self.assertEquals(functions, functionsDefault)
+        spark.catalog.registerFunction("temp_func", lambda x: str(x))
+        spark.sql("CREATE FUNCTION func1 AS 'org.apache.spark.data.bricks'")
+        spark.sql("CREATE FUNCTION some_db.func2 AS 'org.apache.spark.data.bricks'")
+        newFunctions = dict((f.name, f) for f in spark.catalog.listFunctions())
+        newFunctionsSomeDb = dict((f.name, f) for f in spark.catalog.listFunctions("some_db"))
+        self.assertTrue(set(functions).issubset(set(newFunctions)))
+        self.assertTrue(set(functions).issubset(set(newFunctionsSomeDb)))
+        self.assertTrue("temp_func" in newFunctions)
+        self.assertTrue("func1" in newFunctions)
+        self.assertTrue("func2" not in newFunctions)
+        self.assertTrue("temp_func" in newFunctionsSomeDb)
+        self.assertTrue("func1" not in newFunctionsSomeDb)
+        self.assertTrue("func2" in newFunctionsSomeDb)
+        self.assertRaisesRegexp(
+            AnalysisException,
+            "does_not_exist",
+            lambda: spark.catalog.listFunctions("does_not_exist"))
+
+    def test_list_columns(self):
+        from pyspark.sql.catalog import Column
+        spark = self.spark
+        spark.catalog._reset()
+        spark.sql("CREATE DATABASE some_db")
+        spark.sql("CREATE TABLE tab1 (name STRING, age INT) USING parquet")
+        spark.sql("CREATE TABLE some_db.tab2 (nickname STRING, tolerance FLOAT) USING parquet")
+        columns = sorted(spark.catalog.listColumns("tab1"), key=lambda c: c.name)
+        columnsDefault = sorted(spark.catalog.listColumns("tab1", "default"), key=lambda c: c.name)
+        self.assertEquals(columns, columnsDefault)
+        self.assertEquals(len(columns), 2)
+        self.assertEquals(columns[0], Column(
+            name="age",
+            description=None,
+            dataType="int",
+            nullable=True,
+            isPartition=False,
+            isBucket=False))
+        self.assertEquals(columns[1], Column(
+            name="name",
+            description=None,
+            dataType="string",
+            nullable=True,
+            isPartition=False,
+            isBucket=False))
+        columns2 = sorted(spark.catalog.listColumns("tab2", "some_db"), key=lambda c: c.name)
+        self.assertEquals(len(columns2), 2)
+        self.assertEquals(columns2[0], Column(
+            name="nickname",
+            description=None,
+            dataType="string",
+            nullable=True,
+            isPartition=False,
+            isBucket=False))
+        self.assertEquals(columns2[1], Column(
+            name="tolerance",
+            description=None,
+            dataType="float",
+            nullable=True,
+            isPartition=False,
+            isBucket=False))
+        self.assertRaisesRegexp(
+            AnalysisException,
+            "tab2",
+            lambda: spark.catalog.listColumns("tab2"))
+        self.assertRaisesRegexp(
+            AnalysisException,
+            "does_not_exist",
+            lambda: spark.catalog.listColumns("does_not_exist"))
+
+    def test_cache(self):
+        spark = self.spark
+        spark.createDataFrame([(2, 2), (3, 3)]).createOrReplaceTempView("tab1")
+        spark.createDataFrame([(2, 2), (3, 3)]).createOrReplaceTempView("tab2")
+        self.assertFalse(spark.catalog.isCached("tab1"))
+        self.assertFalse(spark.catalog.isCached("tab2"))
+        spark.catalog.cacheTable("tab1")
+        self.assertTrue(spark.catalog.isCached("tab1"))
+        self.assertFalse(spark.catalog.isCached("tab2"))
+        spark.catalog.cacheTable("tab2")
+        spark.catalog.uncacheTable("tab1")
+        self.assertFalse(spark.catalog.isCached("tab1"))
+        self.assertTrue(spark.catalog.isCached("tab2"))
+        spark.catalog.clearCache()
+        self.assertFalse(spark.catalog.isCached("tab1"))
+        self.assertFalse(spark.catalog.isCached("tab2"))
+        self.assertRaisesRegexp(
+            AnalysisException,
+            "does_not_exist",
+            lambda: spark.catalog.isCached("does_not_exist"))
+        self.assertRaisesRegexp(
+            AnalysisException,
+            "does_not_exist",
+            lambda: spark.catalog.cacheTable("does_not_exist"))
+        self.assertRaisesRegexp(
+            AnalysisException,
+            "does_not_exist",
+            lambda: spark.catalog.uncacheTable("does_not_exist"))
+
+    def test_read_text_file_list(self):
+        df = self.spark.read.text(['python/test_support/sql/text-test.txt',
+                                   'python/test_support/sql/text-test.txt'])
+        count = df.count()
+        self.assertEquals(count, 4)
+
+    def test_BinaryType_serialization(self):
+        # Pyrolite version <= 4.9 could not serialize BinaryType with Python3 SPARK-17808
+        # The empty bytearray is test for SPARK-21534.
+        schema = StructType([StructField('mybytes', BinaryType())])
+        data = [[bytearray(b'here is my data')],
+                [bytearray(b'and here is some more')],
+                [bytearray(b'')]]
+        df = self.spark.createDataFrame(data, schema=schema)
+        df.collect()
+
+    # test for SPARK-16542
+    def test_array_types(self):
+        # This test need to make sure that the Scala type selected is at least
+        # as large as the python's types. This is necessary because python's
+        # array types depend on C implementation on the machine. Therefore there
+        # is no machine independent correspondence between python's array types
+        # and Scala types.
+        # See: https://docs.python.org/2/library/array.html
+
+        def assertCollectSuccess(typecode, value):
+            row = Row(myarray=array.array(typecode, [value]))
+            df = self.spark.createDataFrame([row])
+            self.assertEqual(df.first()["myarray"][0], value)
+
+        # supported string types
+        #
+        # String types in python's array are "u" for Py_UNICODE and "c" for char.
+        # "u" will be removed in python 4, and "c" is not supported in python 3.
+        supported_string_types = []
+        if sys.version_info[0] < 4:
+            supported_string_types += ['u']
+            # test unicode
+            assertCollectSuccess('u', u'a')
+        if sys.version_info[0] < 3:
+            supported_string_types += ['c']
+            # test string
+            assertCollectSuccess('c', 'a')
+
+        # supported float and double
+        #
+        # Test max, min, and precision for float and double, assuming IEEE 754
+        # floating-point format.
+        supported_fractional_types = ['f', 'd']
+        assertCollectSuccess('f', ctypes.c_float(1e+38).value)
+        assertCollectSuccess('f', ctypes.c_float(1e-38).value)
+        assertCollectSuccess('f', ctypes.c_float(1.123456).value)
+        assertCollectSuccess('d', sys.float_info.max)
+        assertCollectSuccess('d', sys.float_info.min)
+        assertCollectSuccess('d', sys.float_info.epsilon)
+
+        # supported signed int types
+        #
+        # The size of C types changes with implementation, we need to make sure
+        # that there is no overflow error on the platform running this test.
+        supported_signed_int_types = list(
+            set(_array_signed_int_typecode_ctype_mappings.keys())
+            .intersection(set(_array_type_mappings.keys())))
+        for t in supported_signed_int_types:
+            ctype = _array_signed_int_typecode_ctype_mappings[t]
+            max_val = 2 ** (ctypes.sizeof(ctype) * 8 - 1)
+            assertCollectSuccess(t, max_val - 1)
+            assertCollectSuccess(t, -max_val)
+
+        # supported unsigned int types
+        #
+        # JVM does not have unsigned types. We need to be very careful to make
+        # sure that there is no overflow error.
+        supported_unsigned_int_types = list(
+            set(_array_unsigned_int_typecode_ctype_mappings.keys())
+            .intersection(set(_array_type_mappings.keys())))
+        for t in supported_unsigned_int_types:
+            ctype = _array_unsigned_int_typecode_ctype_mappings[t]
+            assertCollectSuccess(t, 2 ** (ctypes.sizeof(ctype) * 8) - 1)
+
+        # all supported types
+        #
+        # Make sure the types tested above:
+        # 1. are all supported types
+        # 2. cover all supported types
+        supported_types = (supported_string_types +
+                           supported_fractional_types +
+                           supported_signed_int_types +
+                           supported_unsigned_int_types)
+        self.assertEqual(set(supported_types), set(_array_type_mappings.keys()))
+
+        # all unsupported types
+        #
+        # Keys in _array_type_mappings is a complete list of all supported types,
+        # and types not in _array_type_mappings are considered unsupported.
+        # `array.typecodes` are not supported in python 2.
+        if sys.version_info[0] < 3:
+            all_types = set(['c', 'b', 'B', 'u', 'h', 'H', 'i', 'I', 'l', 'L', 'f', 'd'])
+        else:
+            all_types = set(array.typecodes)
+        unsupported_types = all_types - set(supported_types)
+        # test unsupported types
+        for t in unsupported_types:
+            with self.assertRaises(TypeError):
+                a = array.array(t)
+                self.spark.createDataFrame([Row(myarray=a)]).collect()
+
+    def test_bucketed_write(self):
+        data = [
+            (1, "foo", 3.0), (2, "foo", 5.0),
+            (3, "bar", -1.0), (4, "bar", 6.0),
+        ]
+        df = self.spark.createDataFrame(data, ["x", "y", "z"])
+
+        def count_bucketed_cols(names, table="pyspark_bucket"):
+            """Given a sequence of column names and a table name
+            query the catalog and return number o columns which are
+            used for bucketing
+            """
+            cols = self.spark.catalog.listColumns(table)
+            num = len([c for c in cols if c.name in names and c.isBucket])
+            return num
+
+        # Test write with one bucketing column
+        df.write.bucketBy(3, "x").mode("overwrite").saveAsTable("pyspark_bucket")
+        self.assertEqual(count_bucketed_cols(["x"]), 1)
+        self.assertSetEqual(set(data), set(self.spark.table("pyspark_bucket").collect()))
+
+        # Test write two bucketing columns
+        df.write.bucketBy(3, "x", "y").mode("overwrite").saveAsTable("pyspark_bucket")
+        self.assertEqual(count_bucketed_cols(["x", "y"]), 2)
+        self.assertSetEqual(set(data), set(self.spark.table("pyspark_bucket").collect()))
+
+        # Test write with bucket and sort
+        df.write.bucketBy(2, "x").sortBy("z").mode("overwrite").saveAsTable("pyspark_bucket")
+        self.assertEqual(count_bucketed_cols(["x"]), 1)
+        self.assertSetEqual(set(data), set(self.spark.table("pyspark_bucket").collect()))
+
+        # Test write with a list of columns
+        df.write.bucketBy(3, ["x", "y"]).mode("overwrite").saveAsTable("pyspark_bucket")
+        self.assertEqual(count_bucketed_cols(["x", "y"]), 2)
+        self.assertSetEqual(set(data), set(self.spark.table("pyspark_bucket").collect()))
+
+        # Test write with bucket and sort with a list of columns
+        (df.write.bucketBy(2, "x")
+            .sortBy(["y", "z"])
+            .mode("overwrite").saveAsTable("pyspark_bucket"))
+        self.assertSetEqual(set(data), set(self.spark.table("pyspark_bucket").collect()))
+
+        # Test write with bucket and sort with multiple columns
+        (df.write.bucketBy(2, "x")
+            .sortBy("y", "z")
+            .mode("overwrite").saveAsTable("pyspark_bucket"))
+        self.assertSetEqual(set(data), set(self.spark.table("pyspark_bucket").collect()))
+
+    def _to_pandas(self):
+        from datetime import datetime, date
+        schema = StructType().add("a", IntegerType()).add("b", StringType())\
+                             .add("c", BooleanType()).add("d", FloatType())\
+                             .add("dt", DateType()).add("ts", TimestampType())
+        data = [
+            (1, "foo", True, 3.0, date(1969, 1, 1), datetime(1969, 1, 1, 1, 1, 1)),
+            (2, "foo", True, 5.0, None, None),
+            (3, "bar", False, -1.0, date(2012, 3, 3), datetime(2012, 3, 3, 3, 3, 3)),
+            (4, "bar", False, 6.0, date(2100, 4, 4), datetime(2100, 4, 4, 4, 4, 4)),
+        ]
+        df = self.spark.createDataFrame(data, schema)
+        return df.toPandas()
+
+    @unittest.skipIf(not _have_pandas, _pandas_requirement_message)
+    def test_to_pandas(self):
+        import numpy as np
+        pdf = self._to_pandas()
+        types = pdf.dtypes
+        self.assertEquals(types[0], np.int32)
+        self.assertEquals(types[1], np.object)
+        self.assertEquals(types[2], np.bool)
+        self.assertEquals(types[3], np.float32)
+        self.assertEquals(types[4], np.object)  # datetime.date
+        self.assertEquals(types[5], 'datetime64[ns]')
+
+    @unittest.skipIf(_have_pandas, "Required Pandas was found.")
+    def test_to_pandas_required_pandas_not_found(self):
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(ImportError, 'Pandas >= .* must be installed'):
+                self._to_pandas()
+
+    @unittest.skipIf(not _have_pandas, _pandas_requirement_message)
+    def test_to_pandas_avoid_astype(self):
+        import numpy as np
+        schema = StructType().add("a", IntegerType()).add("b", StringType())\
+                             .add("c", IntegerType())
+        data = [(1, "foo", 16777220), (None, "bar", None)]
+        df = self.spark.createDataFrame(data, schema)
+        types = df.toPandas().dtypes
+        self.assertEquals(types[0], np.float64)  # doesn't convert to np.int32 due to NaN value.
+        self.assertEquals(types[1], np.object)
+        self.assertEquals(types[2], np.float64)
+
+    def test_create_dataframe_from_array_of_long(self):
+        import array
+        data = [Row(longarray=array.array('l', [-9223372036854775808, 0, 9223372036854775807]))]
+        df = self.spark.createDataFrame(data)
+        self.assertEqual(df.first(), Row(longarray=[-9223372036854775808, 0, 9223372036854775807]))
+
+    @unittest.skipIf(not _have_pandas, _pandas_requirement_message)
+    def test_create_dataframe_from_pandas_with_timestamp(self):
+        import pandas as pd
+        from datetime import datetime
+        pdf = pd.DataFrame({"ts": [datetime(2017, 10, 31, 1, 1, 1)],
+                            "d": [pd.Timestamp.now().date()]}, columns=["d", "ts"])
+        # test types are inferred correctly without specifying schema
+        df = self.spark.createDataFrame(pdf)
+        self.assertTrue(isinstance(df.schema['ts'].dataType, TimestampType))
+        self.assertTrue(isinstance(df.schema['d'].dataType, DateType))
+        # test with schema will accept pdf as input
+        df = self.spark.createDataFrame(pdf, schema="d date, ts timestamp")
+        self.assertTrue(isinstance(df.schema['ts'].dataType, TimestampType))
+        self.assertTrue(isinstance(df.schema['d'].dataType, DateType))
+
+    @unittest.skipIf(_have_pandas, "Required Pandas was found.")
+    def test_create_dataframe_required_pandas_not_found(self):
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(
+                    ImportError,
+                    "(Pandas >= .* must be installed|No module named '?pandas'?)"):
+                import pandas as pd
+                from datetime import datetime
+                pdf = pd.DataFrame({"ts": [datetime(2017, 10, 31, 1, 1, 1)],
+                                    "d": [pd.Timestamp.now().date()]})
+                self.spark.createDataFrame(pdf)
+
+    # Regression test for SPARK-23360
+    @unittest.skipIf(not _have_pandas, _pandas_requirement_message)
+    def test_create_dateframe_from_pandas_with_dst(self):
+        import pandas as pd
+        from datetime import datetime
+
+        pdf = pd.DataFrame({'time': [datetime(2015, 10, 31, 22, 30)]})
+
+        df = self.spark.createDataFrame(pdf)
+        self.assertPandasEqual(pdf, df.toPandas())
+
+        orig_env_tz = os.environ.get('TZ', None)
+        try:
+            tz = 'America/Los_Angeles'
+            os.environ['TZ'] = tz
+            time.tzset()
+            with self.sql_conf({'spark.sql.session.timeZone': tz}):
+                df = self.spark.createDataFrame(pdf)
+                self.assertPandasEqual(pdf, df.toPandas())
+        finally:
+            del os.environ['TZ']
+            if orig_env_tz is not None:
+                os.environ['TZ'] = orig_env_tz
+            time.tzset()
+
+    def test_sort_with_nulls_order(self):
+        from pyspark.sql import functions
+
+        df = self.spark.createDataFrame(
+            [('Tom', 80), (None, 60), ('Alice', 50)], ["name", "height"])
+        self.assertEquals(
+            df.select(df.name).orderBy(functions.asc_nulls_first('name')).collect(),
+            [Row(name=None), Row(name=u'Alice'), Row(name=u'Tom')])
+        self.assertEquals(
+            df.select(df.name).orderBy(functions.asc_nulls_last('name')).collect(),
+            [Row(name=u'Alice'), Row(name=u'Tom'), Row(name=None)])
+        self.assertEquals(
+            df.select(df.name).orderBy(functions.desc_nulls_first('name')).collect(),
+            [Row(name=None), Row(name=u'Tom'), Row(name=u'Alice')])
+        self.assertEquals(
+            df.select(df.name).orderBy(functions.desc_nulls_last('name')).collect(),
+            [Row(name=u'Tom'), Row(name=u'Alice'), Row(name=None)])
+
+    def test_json_sampling_ratio(self):
+        rdd = self.spark.sparkContext.range(0, 100, 1, 1) \
+            .map(lambda x: '{"a":0.1}' if x == 1 else '{"a":%s}' % str(x))
+        schema = self.spark.read.option('inferSchema', True) \
+            .option('samplingRatio', 0.5) \
+            .json(rdd).schema
+        self.assertEquals(schema, StructType([StructField("a", LongType(), True)]))
+
+    def test_csv_sampling_ratio(self):
+        rdd = self.spark.sparkContext.range(0, 100, 1, 1) \
+            .map(lambda x: '0.1' if x == 1 else str(x))
+        schema = self.spark.read.option('inferSchema', True)\
+            .csv(rdd, samplingRatio=0.5).schema
+        self.assertEquals(schema, StructType([StructField("_c0", IntegerType(), True)]))
+
+    def test_checking_csv_header(self):
+        path = tempfile.mkdtemp()
+        shutil.rmtree(path)
+        try:
+            self.spark.createDataFrame([[1, 1000], [2000, 2]])\
+                .toDF('f1', 'f2').write.option("header", "true").csv(path)
+            schema = StructType([
+                StructField('f2', IntegerType(), nullable=True),
+                StructField('f1', IntegerType(), nullable=True)])
+            df = self.spark.read.option('header', 'true').schema(schema)\
+                .csv(path, enforceSchema=False)
+            self.assertRaisesRegexp(
+                Exception,
+                "CSV header does not conform to the schema",
+                lambda: df.collect())
+        finally:
+            shutil.rmtree(path)
+
+    def test_ignore_column_of_all_nulls(self):
+        path = tempfile.mkdtemp()
+        shutil.rmtree(path)
+        try:
+            df = self.spark.createDataFrame([["""{"a":null, "b":1, "c":3.0}"""],
+                                             ["""{"a":null, "b":null, "c":"string"}"""],
+                                             ["""{"a":null, "b":null, "c":null}"""]])
+            df.write.text(path)
+            schema = StructType([
+                StructField('b', LongType(), nullable=True),
+                StructField('c', StringType(), nullable=True)])
+            readback = self.spark.read.json(path, dropFieldIfAllNull=True)
+            self.assertEquals(readback.schema, schema)
+        finally:
+            shutil.rmtree(path)
+
+    # SPARK-24721
+    @unittest.skipIf(not _test_compiled, _test_not_compiled_message)
+    def test_datasource_with_udf(self):
+        from pyspark.sql.functions import udf, lit, col
+
+        path = tempfile.mkdtemp()
+        shutil.rmtree(path)
+
+        try:
+            self.spark.range(1).write.mode("overwrite").format('csv').save(path)
+            filesource_df = self.spark.read.option('inferSchema', True).csv(path).toDF('i')
+            datasource_df = self.spark.read \
+                .format("org.apache.spark.sql.sources.SimpleScanSource") \
+                .option('from', 0).option('to', 1).load().toDF('i')
+            datasource_v2_df = self.spark.read \
+                .format("org.apache.spark.sql.sources.v2.SimpleDataSourceV2") \
+                .load().toDF('i', 'j')
+
+            c1 = udf(lambda x: x + 1, 'int')(lit(1))
+            c2 = udf(lambda x: x + 1, 'int')(col('i'))
+
+            f1 = udf(lambda x: False, 'boolean')(lit(1))
+            f2 = udf(lambda x: False, 'boolean')(col('i'))
+
+            for df in [filesource_df, datasource_df, datasource_v2_df]:
+                result = df.withColumn('c', c1)
+                expected = df.withColumn('c', lit(2))
+                self.assertEquals(expected.collect(), result.collect())
+
+            for df in [filesource_df, datasource_df, datasource_v2_df]:
+                result = df.withColumn('c', c2)
+                expected = df.withColumn('c', col('i') + 1)
+                self.assertEquals(expected.collect(), result.collect())
+
+            for df in [filesource_df, datasource_df, datasource_v2_df]:
+                for f in [f1, f2]:
+                    result = df.filter(f)
+                    self.assertEquals(0, result.count())
+        finally:
+            shutil.rmtree(path)
+
+    def test_repr_behaviors(self):
+        import re
+        pattern = re.compile(r'^ *\|', re.MULTILINE)
+        df = self.spark.createDataFrame([(1, "1"), (22222, "22222")], ("key", "value"))
+
+        # test when eager evaluation is enabled and _repr_html_ will not be called
+        with self.sql_conf({"spark.sql.repl.eagerEval.enabled": True}):
+            expected1 = """+-----+-----+
+                ||  key|value|
+                |+-----+-----+
+                ||    1|    1|
+                ||22222|22222|
+                |+-----+-----+
+                |"""
+            self.assertEquals(re.sub(pattern, '', expected1), df.__repr__())
+            with self.sql_conf({"spark.sql.repl.eagerEval.truncate": 3}):
+                expected2 = """+---+-----+
+                ||key|value|
+                |+---+-----+
+                ||  1|    1|
+                ||222|  222|
+                |+---+-----+
+                |"""
+                self.assertEquals(re.sub(pattern, '', expected2), df.__repr__())
+                with self.sql_conf({"spark.sql.repl.eagerEval.maxNumRows": 1}):
+                    expected3 = """+---+-----+
+                    ||key|value|
+                    |+---+-----+
+                    ||  1|    1|
+                    |+---+-----+
+                    |only showing top 1 row
+                    |"""
+                    self.assertEquals(re.sub(pattern, '', expected3), df.__repr__())
+
+        # test when eager evaluation is enabled and _repr_html_ will be called
+        with self.sql_conf({"spark.sql.repl.eagerEval.enabled": True}):
+            expected1 = """<table border='1'>
+                |<tr><th>key</th><th>value</th></tr>
+                |<tr><td>1</td><td>1</td></tr>
+                |<tr><td>22222</td><td>22222</td></tr>
+                |</table>
+                |"""
+            self.assertEquals(re.sub(pattern, '', expected1), df._repr_html_())
+            with self.sql_conf({"spark.sql.repl.eagerEval.truncate": 3}):
+                expected2 = """<table border='1'>
+                    |<tr><th>key</th><th>value</th></tr>
+                    |<tr><td>1</td><td>1</td></tr>
+                    |<tr><td>222</td><td>222</td></tr>
+                    |</table>
+                    |"""
+                self.assertEquals(re.sub(pattern, '', expected2), df._repr_html_())
+                with self.sql_conf({"spark.sql.repl.eagerEval.maxNumRows": 1}):
+                    expected3 = """<table border='1'>
+                        |<tr><th>key</th><th>value</th></tr>
+                        |<tr><td>1</td><td>1</td></tr>
+                        |</table>
+                        |only showing top 1 row
+                        |"""
+                    self.assertEquals(re.sub(pattern, '', expected3), df._repr_html_())
+
+        # test when eager evaluation is disabled and _repr_html_ will be called
+        with self.sql_conf({"spark.sql.repl.eagerEval.enabled": False}):
+            expected = "DataFrame[key: bigint, value: string]"
+            self.assertEquals(None, df._repr_html_())
+            self.assertEquals(expected, df.__repr__())
+            with self.sql_conf({"spark.sql.repl.eagerEval.truncate": 3}):
+                self.assertEquals(None, df._repr_html_())
+                self.assertEquals(expected, df.__repr__())
+                with self.sql_conf({"spark.sql.repl.eagerEval.maxNumRows": 1}):
+                    self.assertEquals(None, df._repr_html_())
+                    self.assertEquals(expected, df.__repr__())
+
+    # SPARK-25591
+    def test_same_accumulator_in_udfs(self):
+        from pyspark.sql.functions import udf
+
+        data_schema = StructType([StructField("a", IntegerType(), True),
+                                  StructField("b", IntegerType(), True)])
+        data = self.spark.createDataFrame([[1, 2]], schema=data_schema)
+
+        test_accum = self.sc.accumulator(0)
+
+        def first_udf(x):
+            test_accum.add(1)
+            return x
+
+        def second_udf(x):
+            test_accum.add(100)
+            return x
+
+        func_udf = udf(first_udf, IntegerType())
+        func_udf2 = udf(second_udf, IntegerType())
+        data = data.withColumn("out1", func_udf(data["a"]))
+        data = data.withColumn("out2", func_udf2(data["b"]))
+        data.collect()
+        self.assertEqual(test_accum.value, 101)
+
+
+class HiveSparkSubmitTests(SparkSubmitTests):
+
+    @classmethod
+    def setUpClass(cls):
+        # get a SparkContext to check for availability of Hive
+        sc = SparkContext('local[4]', cls.__name__)
+        cls.hive_available = True
+        try:
+            sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
+        except py4j.protocol.Py4JError:
+            cls.hive_available = False
+        except TypeError:
+            cls.hive_available = False
+        finally:
+            # we don't need this SparkContext for the test
+            sc.stop()
+
+    def setUp(self):
+        super(HiveSparkSubmitTests, self).setUp()
+        if not self.hive_available:
+            self.skipTest("Hive is not available.")
+
+    def test_hivecontext(self):
+        # This test checks that HiveContext is using Hive metastore (SPARK-16224).
+        # It sets a metastore url and checks if there is a derby dir created by
+        # Hive metastore. If this derby dir exists, HiveContext is using
+        # Hive metastore.
+        metastore_path = os.path.join(tempfile.mkdtemp(), "spark16224_metastore_db")
+        metastore_URL = "jdbc:derby:;databaseName=" + metastore_path + ";create=true"
+        hive_site_dir = os.path.join(self.programDir, "conf")
+        hive_site_file = self.createTempFile("hive-site.xml", ("""
+            |<configuration>
+            |  <property>
+            |  <name>javax.jdo.option.ConnectionURL</name>
+            |  <value>%s</value>
+            |  </property>
+            |</configuration>
+            """ % metastore_URL).lstrip(), "conf")
+        script = self.createTempFile("test.py", """
+            |import os
+            |
+            |from pyspark.conf import SparkConf
+            |from pyspark.context import SparkContext
+            |from pyspark.sql import HiveContext
+            |
+            |conf = SparkConf()
+            |sc = SparkContext(conf=conf)
+            |hive_context = HiveContext(sc)
+            |print(hive_context.sql("show databases").collect())
+            """)
+        proc = subprocess.Popen(
+            self.sparkSubmit + ["--master", "local-cluster[1,1,1024]",
+                                "--driver-class-path", hive_site_dir, script],
+            stdout=subprocess.PIPE)
+        out, err = proc.communicate()
+        self.assertEqual(0, proc.returncode)
+        self.assertIn("default", out.decode('utf-8'))
+        self.assertTrue(os.path.exists(metastore_path))
+
+
+class SQLTests2(ReusedSQLTestCase):
+
+    # We can't include this test into SQLTests because we will stop class's SparkContext and cause
+    # other tests failed.
+    def test_sparksession_with_stopped_sparkcontext(self):
+        self.sc.stop()
+        sc = SparkContext('local[4]', self.sc.appName)
+        spark = SparkSession.builder.getOrCreate()
+        try:
+            df = spark.createDataFrame([(1, 2)], ["c", "c"])
+            df.collect()
+        finally:
+            spark.stop()
+            sc.stop()
+
+
+class QueryExecutionListenerTests(unittest.TestCase, SQLTestUtils):
+    # These tests are separate because it uses 'spark.sql.queryExecutionListeners' which is
+    # static and immutable. This can't be set or unset, for example, via `spark.conf`.
+
+    @classmethod
+    def setUpClass(cls):
+        import glob
+        from pyspark.find_spark_home import _find_spark_home
+
+        SPARK_HOME = _find_spark_home()
+        filename_pattern = (
+            "sql/core/target/scala-*/test-classes/org/apache/spark/sql/"
+            "TestQueryExecutionListener.class")
+        cls.has_listener = bool(glob.glob(os.path.join(SPARK_HOME, filename_pattern)))
+
+        if cls.has_listener:
+            # Note that 'spark.sql.queryExecutionListeners' is a static immutable configuration.
+            cls.spark = SparkSession.builder \
+                .master("local[4]") \
+                .appName(cls.__name__) \
+                .config(
+                    "spark.sql.queryExecutionListeners",
+                    "org.apache.spark.sql.TestQueryExecutionListener") \
+                .getOrCreate()
+
+    def setUp(self):
+        if not self.has_listener:
+            raise self.skipTest(
+                "'org.apache.spark.sql.TestQueryExecutionListener' is not "
+                "available. Will skip the related tests.")
+
+    @classmethod
+    def tearDownClass(cls):
+        if hasattr(cls, "spark"):
+            cls.spark.stop()
+
+    def tearDown(self):
+        self.spark._jvm.OnSuccessCall.clear()
+
+    def test_query_execution_listener_on_collect(self):
+        self.assertFalse(
+            self.spark._jvm.OnSuccessCall.isCalled(),
+            "The callback from the query execution listener should not be called before 'collect'")
+        self.spark.sql("SELECT * FROM range(1)").collect()
+        self.assertTrue(
+            self.spark._jvm.OnSuccessCall.isCalled(),
+            "The callback from the query execution listener should be called after 'collect'")
+
+    @unittest.skipIf(
+        not _have_pandas or not _have_pyarrow,
+        _pandas_requirement_message or _pyarrow_requirement_message)
+    def test_query_execution_listener_on_collect_with_arrow(self):
+        with self.sql_conf({"spark.sql.execution.arrow.enabled": True}):
+            self.assertFalse(
+                self.spark._jvm.OnSuccessCall.isCalled(),
+                "The callback from the query execution listener should not be "
+                "called before 'toPandas'")
+            self.spark.sql("SELECT * FROM range(1)").toPandas()
+            self.assertTrue(
+                self.spark._jvm.OnSuccessCall.isCalled(),
+                "The callback from the query execution listener should be called after 'toPandas'")
+
+
+class SparkSessionTests(PySparkTestCase):
+
+    # This test is separate because it's closely related with session's start and stop.
+    # See SPARK-23228.
+    def test_set_jvm_default_session(self):
+        spark = SparkSession.builder.getOrCreate()
+        try:
+            self.assertTrue(spark._jvm.SparkSession.getDefaultSession().isDefined())
+        finally:
+            spark.stop()
+            self.assertTrue(spark._jvm.SparkSession.getDefaultSession().isEmpty())
+
+    def test_jvm_default_session_already_set(self):
+        # Here, we assume there is the default session already set in JVM.
+        jsession = self.sc._jvm.SparkSession(self.sc._jsc.sc())
+        self.sc._jvm.SparkSession.setDefaultSession(jsession)
+
+        spark = SparkSession.builder.getOrCreate()
+        try:
+            self.assertTrue(spark._jvm.SparkSession.getDefaultSession().isDefined())
+            # The session should be the same with the exiting one.
+            self.assertTrue(jsession.equals(spark._jvm.SparkSession.getDefaultSession().get()))
+        finally:
+            spark.stop()
+
+
+class UDFInitializationTests(unittest.TestCase):
+    def tearDown(self):
+        if SparkSession._instantiatedSession is not None:
+            SparkSession._instantiatedSession.stop()
+
+        if SparkContext._active_spark_context is not None:
+            SparkContext._active_spark_context.stop()
+
+    def test_udf_init_shouldnt_initialize_context(self):
+        from pyspark.sql.functions import UserDefinedFunction
+
+        UserDefinedFunction(lambda x: x, StringType())
+
+        self.assertIsNone(
+            SparkContext._active_spark_context,
+            "SparkContext shouldn't be initialized when UserDefinedFunction is created."
+        )
+        self.assertIsNone(
+            SparkSession._instantiatedSession,
+            "SparkSession shouldn't be initialized when UserDefinedFunction is created."
+        )
+
+
+class HiveContextSQLTests(ReusedPySparkTestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        ReusedPySparkTestCase.setUpClass()
+        cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
+        cls.hive_available = True
+        try:
+            cls.sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
+        except py4j.protocol.Py4JError:
+            cls.hive_available = False
+        except TypeError:
+            cls.hive_available = False
+        os.unlink(cls.tempdir.name)
+        if cls.hive_available:
+            cls.spark = HiveContext._createForTesting(cls.sc)
+            cls.testData = [Row(key=i, value=str(i)) for i in range(100)]
+            cls.df = cls.sc.parallelize(cls.testData).toDF()
+
+    def setUp(self):
+        if not self.hive_available:
+            self.skipTest("Hive is not available.")
+
+    @classmethod
+    def tearDownClass(cls):
+        ReusedPySparkTestCase.tearDownClass()
+        shutil.rmtree(cls.tempdir.name, ignore_errors=True)
+
+    def test_save_and_load_table(self):
+        df = self.df
+        tmpPath = tempfile.mkdtemp()
+        shutil.rmtree(tmpPath)
+        df.write.saveAsTable("savedJsonTable", "json", "append", path=tmpPath)
+        actual = self.spark.createExternalTable("externalJsonTable", tmpPath, "json")
+        self.assertEqual(sorted(df.collect()),
+                         sorted(self.spark.sql("SELECT * FROM savedJsonTable").collect()))
+        self.assertEqual(sorted(df.collect()),
+                         sorted(self.spark.sql("SELECT * FROM externalJsonTable").collect()))
+        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+        self.spark.sql("DROP TABLE externalJsonTable")
+
+        df.write.saveAsTable("savedJsonTable", "json", "overwrite", path=tmpPath)
+        schema = StructType([StructField("value", StringType(), True)])
+        actual = self.spark.createExternalTable("externalJsonTable", source="json",
+                                                schema=schema, path=tmpPath,
+                                                noUse="this options will not be used")
+        self.assertEqual(sorted(df.collect()),
+                         sorted(self.spark.sql("SELECT * FROM savedJsonTable").collect()))
+        self.assertEqual(sorted(df.select("value").collect()),
+                         sorted(self.spark.sql("SELECT * FROM externalJsonTable").collect()))
+        self.assertEqual(sorted(df.select("value").collect()), sorted(actual.collect()))
+        self.spark.sql("DROP TABLE savedJsonTable")
+        self.spark.sql("DROP TABLE externalJsonTable")
+
+        defaultDataSourceName = self.spark.getConf("spark.sql.sources.default",
+                                                   "org.apache.spark.sql.parquet")
+        self.spark.sql("SET spark.sql.sources.default=org.apache.spark.sql.json")
+        df.write.saveAsTable("savedJsonTable", path=tmpPath, mode="overwrite")
+        actual = self.spark.createExternalTable("externalJsonTable", path=tmpPath)
+        self.assertEqual(sorted(df.collect()),
+                         sorted(self.spark.sql("SELECT * FROM savedJsonTable").collect()))
+        self.assertEqual(sorted(df.collect()),
+                         sorted(self.spark.sql("SELECT * FROM externalJsonTable").collect()))
+        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+        self.spark.sql("DROP TABLE savedJsonTable")
+        self.spark.sql("DROP TABLE externalJsonTable")
+        self.spark.sql("SET spark.sql.sources.default=" + defaultDataSourceName)
+
+        shutil.rmtree(tmpPath)
+
+    def test_window_functions(self):
+        df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"])
+        w = Window.partitionBy("value").orderBy("key")
+        from pyspark.sql import functions as F
+        sel = df.select(df.value, df.key,
+                        F.max("key").over(w.rowsBetween(0, 1)),
+                        F.min("key").over(w.rowsBetween(0, 1)),
+                        F.count("key").over(w.rowsBetween(float('-inf'), float('inf'))),
+                        F.row_number().over(w),
+                        F.rank().over(w),
+                        F.dense_rank().over(w),
+                        F.ntile(2).over(w))
+        rs = sorted(sel.collect())
+        expected = [
+            ("1", 1, 1, 1, 1, 1, 1, 1, 1),
+            ("2", 1, 1, 1, 3, 1, 1, 1, 1),
+            ("2", 1, 2, 1, 3, 2, 1, 1, 1),
+            ("2", 2, 2, 2, 3, 3, 3, 2, 2)
+        ]
+        for r, ex in zip(rs, expected):
+            self.assertEqual(tuple(r), ex[:len(r)])
+
+    def test_window_functions_without_partitionBy(self):
+        df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"])
+        w = Window.orderBy("key", df.value)
+        from pyspark.sql import functions as F
+        sel = df.select(df.value, df.key,
+                        F.max("key").over(w.rowsBetween(0, 1)),
+                        F.min("key").over(w.rowsBetween(0, 1)),
+                        F.count("key").over(w.rowsBetween(float('-inf'), float('inf'))),
+                        F.row_number().over(w),
+                        F.rank().over(w),
+                        F.dense_rank().over(w),
+                        F.ntile(2).over(w))
+        rs = sorted(sel.collect())
+        expected = [
+            ("1", 1, 1, 1, 4, 1, 1, 1, 1),
+            ("2", 1, 1, 1, 4, 2, 2, 2, 1),
+            ("2", 1, 2, 1, 4, 3, 2, 2, 2),
+            ("2", 2, 2, 2, 4, 4, 4, 3, 2)
+        ]
+        for r, ex in zip(rs, expected):
+            self.assertEqual(tuple(r), ex[:len(r)])
+
+    def test_window_functions_cumulative_sum(self):
+        df = self.spark.createDataFrame([("one", 1), ("two", 2)], ["key", "value"])
+        from pyspark.sql import functions as F
+
+        # Test cumulative sum
+        sel = df.select(
+            df.key,
+            F.sum(df.value).over(Window.rowsBetween(Window.unboundedPreceding, 0)))
+        rs = sorted(sel.collect())
+        expected = [("one", 1), ("two", 3)]
+        for r, ex in zip(rs, expected):
+            self.assertEqual(tuple(r), ex[:len(r)])
+
+        # Test boundary values less than JVM's Long.MinValue and make sure we don't overflow
+        sel = df.select(
+            df.key,
+            F.sum(df.value).over(Window.rowsBetween(Window.unboundedPreceding - 1, 0)))
+        rs = sorted(sel.collect())
+        expected = [("one", 1), ("two", 3)]
+        for r, ex in zip(rs, expected):
+            self.assertEqual(tuple(r), ex[:len(r)])
+
+        # Test boundary values greater than JVM's Long.MaxValue and make sure we don't overflow
+        frame_end = Window.unboundedFollowing + 1
+        sel = df.select(
+            df.key,
+            F.sum(df.value).over(Window.rowsBetween(Window.currentRow, frame_end)))
+        rs = sorted(sel.collect())
+        expected = [("one", 3), ("two", 2)]
+        for r, ex in zip(rs, expected):
+            self.assertEqual(tuple(r), ex[:len(r)])
+
+    def test_collect_functions(self):
+        df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"])
+        from pyspark.sql import functions
+
+        self.assertEqual(
+            sorted(df.select(functions.collect_set(df.key).alias('r')).collect()[0].r),
+            [1, 2])
+        self.assertEqual(
+            sorted(df.select(functions.collect_list(df.key).alias('r')).collect()[0].r),
+            [1, 1, 1, 2])
+        self.assertEqual(
+            sorted(df.select(functions.collect_set(df.value).alias('r')).collect()[0].r),
+            ["1", "2"])
+        self.assertEqual(
+            sorted(df.select(functions.collect_list(df.value).alias('r')).collect()[0].r),
+            ["1", "2", "2", "2"])
+
+    def test_limit_and_take(self):
+        df = self.spark.range(1, 1000, numPartitions=10)
+
+        def assert_runs_only_one_job_stage_and_task(job_group_name, f):
+            tracker = self.sc.statusTracker()
+            self.sc.setJobGroup(job_group_name, description="")
+            f()
+            jobs = tracker.getJobIdsForGroup(job_group_name)
+            self.assertEqual(1, len(jobs))
+            stages = tracker.getJobInfo(jobs[0]).stageIds
+            self.assertEqual(1, len(stages))
+            self.assertEqual(1, tracker.getStageInfo(stages[0]).numTasks)
+
+        # Regression test for SPARK-10731: take should delegate to Scala implementation
+        assert_runs_only_one_job_stage_and_task("take", lambda: df.take(1))
+        # Regression test for SPARK-17514: limit(n).collect() should the perform same as take(n)
+        assert_runs_only_one_job_stage_and_task("collect_limit", lambda: df.limit(1).collect())
+
+    def test_datetime_functions(self):
+        from pyspark.sql import functions
+        from datetime import date, datetime
+        df = self.spark.range(1).selectExpr("'2017-01-22' as dateCol")
+        parse_result = df.select(functions.to_date(functions.col("dateCol"))).first()
+        self.assertEquals(date(2017, 1, 22), parse_result['to_date(`dateCol`)'])
+
+    @unittest.skipIf(sys.version_info < (3, 3), "Unittest < 3.3 doesn't support mocking")
+    def test_unbounded_frames(self):
+        from unittest.mock import patch
+        from pyspark.sql import functions as F
+        from pyspark.sql import window
+        import importlib
+
+        df = self.spark.range(0, 3)
+
+        def rows_frame_match():
+            return "ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" in df.select(
+                F.count("*").over(window.Window.rowsBetween(-sys.maxsize, sys.maxsize))
+            ).columns[0]
+
+        def range_frame_match():
+            return "RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" in df.select(
+                F.count("*").over(window.Window.rangeBetween(-sys.maxsize, sys.maxsize))
+            ).columns[0]
+
+        with patch("sys.maxsize", 2 ** 31 - 1):
+            importlib.reload(window)
+            self.assertTrue(rows_frame_match())
+            self.assertTrue(range_frame_match())
+
+        with patch("sys.maxsize", 2 ** 63 - 1):
+            importlib.reload(window)
+            self.assertTrue(rows_frame_match())
+            self.assertTrue(range_frame_match())
+
+        with patch("sys.maxsize", 2 ** 127 - 1):
+            importlib.reload(window)
+            self.assertTrue(rows_frame_match())
+            self.assertTrue(range_frame_match())
+
+        importlib.reload(window)
+
+
+class DataTypeVerificationTests(unittest.TestCase):
+
+    def test_verify_type_exception_msg(self):
+        self.assertRaisesRegexp(
+            ValueError,
+            "test_name",
+            lambda: _make_type_verifier(StringType(), nullable=False, name="test_name")(None))
+
+        schema = StructType([StructField('a', StructType([StructField('b', IntegerType())]))])
+        self.assertRaisesRegexp(
+            TypeError,
+            "field b in field a",
+            lambda: _make_type_verifier(schema)([["data"]]))
+
+    def test_verify_type_ok_nullable(self):
+        obj = None
+        types = [IntegerType(), FloatType(), StringType(), StructType([])]
+        for data_type in types:
+            try:
+                _make_type_verifier(data_type, nullable=True)(obj)
+            except Exception:
+                self.fail("verify_type(%s, %s, nullable=True)" % (obj, data_type))
+
+    def test_verify_type_not_nullable(self):
+        import array
+        import datetime
+        import decimal
+
+        schema = StructType([
+            StructField('s', StringType(), nullable=False),
+            StructField('i', IntegerType(), nullable=True)])
+
+        class MyObj:
+            def __init__(self, **kwargs):
+                for k, v in kwargs.items():
+                    setattr(self, k, v)
+
+        # obj, data_type
+        success_spec = [
+            # String
+            ("", StringType()),
+            (u"", StringType()),
+            (1, StringType()),
+            (1.0, StringType()),
+            ([], StringType()),
+            ({}, StringType()),
+
+            # UDT
+            (ExamplePoint(1.0, 2.0), ExamplePointUDT()),
+
+            # Boolean
+            (True, BooleanType()),
+
+            # Byte
+            (-(2**7), ByteType()),
+            (2**7 - 1, ByteType()),
+
+            # Short
+            (-(2**15), ShortType()),
+            (2**15 - 1, ShortType()),
+
+            # Integer
+            (-(2**31), IntegerType()),
+            (2**31 - 1, IntegerType()),
+
+            # Long
+            (2**64, LongType()),
+
+            # Float & Double
+            (1.0, FloatType()),
+            (1.0, DoubleType()),
+
+            # Decimal
+            (decimal.Decimal("1.0"), DecimalType()),
+
+            # Binary
+            (bytearray([1, 2]), BinaryType()),
+
+            # Date/Timestamp
+            (datetime.date(2000, 1, 2), DateType()),
+            (datetime.datetime(2000, 1, 2, 3, 4), DateType()),
+            (datetime.datetime(2000, 1, 2, 3, 4), TimestampType()),
+
+            # Array
+            ([], ArrayType(IntegerType())),
+            (["1", None], ArrayType(StringType(), containsNull=True)),
+            ([1, 2], ArrayType(IntegerType())),
+            ((1, 2), ArrayType(IntegerType())),
+            (array.array('h', [1, 2]), ArrayType(IntegerType())),
+
+            # Map
+            ({}, MapType(StringType(), IntegerType())),
+            ({"a": 1}, MapType(StringType(), IntegerType())),
+            ({"a": None}, MapType(StringType(), IntegerType(), valueContainsNull=True)),
+
+            # Struct
+            ({"s": "a", "i": 1}, schema),
+            ({"s": "a", "i": None}, schema),
+            ({"s": "a"}, schema),
+            ({"s": "a", "f": 1.0}, schema),
+            (Row(s="a", i=1), schema),
+            (Row(s="a", i=None), schema),
+            (Row(s="a", i=1, f=1.0), schema),
+            (["a", 1], schema),
+            (["a", None], schema),
+            (("a", 1), schema),
+            (MyObj(s="a", i=1), schema),
+            (MyObj(s="a", i=None), schema),
+            (MyObj(s="a"), schema),
+        ]
+
+        # obj, data_type, exception class
+        failure_spec = [
+            # String (match anything but None)
+            (None, StringType(), ValueError),
+
+            # UDT
+            (ExamplePoint(1.0, 2.0), PythonOnlyUDT(), ValueError),
+
+            # Boolean
+            (1, BooleanType(), TypeError),
+            ("True", BooleanType(), TypeError),
+            ([1], BooleanType(), TypeError),
+
+            # Byte
+            (-(2**7) - 1, ByteType(), ValueError),
+            (2**7, ByteType(), ValueError),
+            ("1", ByteType(), TypeError),
+            (1.0, ByteType(), TypeError),
+
+            # Short
+            (-(2**15) - 1, ShortType(), ValueError),
+            (2**15, ShortType(), ValueError),
+
+            # Integer
+            (-(2**31) - 1, IntegerType(), ValueError),
+            (2**31, IntegerType(), ValueError),
+
+            # Float & Double
+            (1, FloatType(), TypeError),
+            (1, DoubleType(), TypeError),
+
+            # Decimal
+            (1.0, DecimalType(), TypeError),
+            (1, DecimalType(), TypeError),
+            ("1.0", DecimalType(), TypeError),
+
+            # Binary
+            (1, BinaryType(), TypeError),
+
+            # Date/Timestamp
+            ("2000-01-02", DateType(), TypeError),
+            (946811040, TimestampType(), TypeError),
+
+            # Array
+            (["1", None], ArrayType(StringType(), containsNull=False), ValueError),
+            ([1, "2"], ArrayType(IntegerType()), TypeError),
+
+            # Map
+            ({"a": 1}, MapType(IntegerType(), IntegerType()), TypeError),
+            ({"a": "1"}, MapType(StringType(), IntegerType()), TypeError),
+            ({"a": None}, MapType(StringType(), IntegerType(), valueContainsNull=False),
+             ValueError),
+
+            # Struct
+            ({"s": "a", "i": "1"}, schema, TypeError),
+            (Row(s="a"), schema, ValueError),     # Row can't have missing field
+            (Row(s="a", i="1"), schema, TypeError),
+            (["a"], schema, ValueError),
+            (["a", "1"], schema, TypeError),
+            (MyObj(s="a", i="1"), schema, TypeError),
+            (MyObj(s=None, i="1"), schema, ValueError),
+        ]
+
+        # Check success cases
+        for obj, data_type in success_spec:
+            try:
+                _make_type_verifier(data_type, nullable=False)(obj)
+            except Exception:
+                self.fail("verify_type(%s, %s, nullable=False)" % (obj, data_type))
+
+        # Check failure cases
+        for obj, data_type, exp in failure_spec:
+            msg = "verify_type(%s, %s, nullable=False) == %s" % (obj, data_type, exp)
+            with self.assertRaises(exp, msg=msg):
+                _make_type_verifier(data_type, nullable=False)(obj)
+
+
+@unittest.skipIf(
+    not _have_pandas or not _have_pyarrow,
+    _pandas_requirement_message or _pyarrow_requirement_message)
+class ArrowTests(ReusedSQLTestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        from datetime import date, datetime
+        from decimal import Decimal
+        from distutils.version import LooseVersion
+        import pyarrow as pa
+        super(ArrowTests, cls).setUpClass()
+        cls.warnings_lock = threading.Lock()
+
+        # Synchronize default timezone between Python and Java
+        cls.tz_prev = os.environ.get("TZ", None)  # save current tz if set
+        tz = "America/Los_Angeles"
+        os.environ["TZ"] = tz
+        time.tzset()
+
+        cls.spark.conf.set("spark.sql.session.timeZone", tz)
+        cls.spark.conf.set("spark.sql.execution.arrow.enabled", "true")
+        # Disable fallback by default to easily detect the failures.
+        cls.spark.conf.set("spark.sql.execution.arrow.fallback.enabled", "false")
+        cls.schema = StructType([
+            StructField("1_str_t", StringType(), True),
+            StructField("2_int_t", IntegerType(), True),
+            StructField("3_long_t", LongType(), True),
+            StructField("4_float_t", FloatType(), True),
+            StructField("5_double_t", DoubleType(), True),
+            StructField("6_decimal_t", DecimalType(38, 18), True),
+            StructField("7_date_t", DateType(), True),
+            StructField("8_timestamp_t", TimestampType(), True)])
+        cls.data = [(u"a", 1, 10, 0.2, 2.0, Decimal("2.0"),
+                     date(1969, 1, 1), datetime(1969, 1, 1, 1, 1, 1)),
+                    (u"b", 2, 20, 0.4, 4.0, Decimal("4.0"),
+                     date(2012, 2, 2), datetime(2012, 2, 2, 2, 2, 2)),
+                    (u"c", 3, 30, 0.8, 6.0, Decimal("6.0"),
+                     date(2100, 3, 3), datetime(2100, 3, 3, 3, 3, 3))]
+
+        # TODO: remove version check once minimum pyarrow version is 0.10.0
+        if LooseVersion("0.10.0") <= LooseVersion(pa.__version__):
+            cls.schema.add(StructField("9_binary_t", BinaryType(), True))
+            cls.data[0] = cls.data[0] + (bytearray(b"a"),)
+            cls.data[1] = cls.data[1] + (bytearray(b"bb"),)
+            cls.data[2] = cls.data[2] + (bytearray(b"ccc"),)
+
+    @classmethod
+    def tearDownClass(cls):
+        del os.environ["TZ"]
+        if cls.tz_prev is not None:
+            os.environ["TZ"] = cls.tz_prev
+        time.tzset()
+        super(ArrowTests, cls).tearDownClass()
+
+    def create_pandas_data_frame(self):
+        import pandas as pd
+        import numpy as np
+        data_dict = {}
+        for j, name in enumerate(self.schema.names):
+            data_dict[name] = [self.data[i][j] for i in range(len(self.data))]
+        # need to convert these to numpy types first
+        data_dict["2_int_t"] = np.int32(data_dict["2_int_t"])
+        data_dict["4_float_t"] = np.float32(data_dict["4_float_t"])
+        return pd.DataFrame(data=data_dict)
+
+    def test_toPandas_fallback_enabled(self):
+        import pandas as pd
+
+        with self.sql_conf({"spark.sql.execution.arrow.fallback.enabled": True}):
+            schema = StructType([StructField("map", MapType(StringType(), IntegerType()), True)])
+            df = self.spark.createDataFrame([({u'a': 1},)], schema=schema)
+            with QuietTest(self.sc):
+                with self.warnings_lock:
+                    with warnings.catch_warnings(record=True) as warns:
+                        # we want the warnings to appear even if this test is run from a subclass
+                        warnings.simplefilter("always")
+                        pdf = df.toPandas()
+                        # Catch and check the last UserWarning.
+                        user_warns = [
+                            warn.message for warn in warns if isinstance(warn.message, UserWarning)]
+                        self.assertTrue(len(user_warns) > 0)
+                        self.assertTrue(
+                            "Attempting non-optimization" in _exception_message(user_warns[-1]))
+                        self.assertPandasEqual(pdf, pd.DataFrame({u'map': [{u'a': 1}]}))
+
+    def test_toPandas_fallback_disabled(self):
+        from distutils.version import LooseVersion
+        import pyarrow as pa
+
+        schema = StructType([StructField("map", MapType(StringType(), IntegerType()), True)])
+        df = self.spark.createDataFrame([(None,)], schema=schema)
+        with QuietTest(self.sc):
+            with self.warnings_lock:
+                with self.assertRaisesRegexp(Exception, 'Unsupported type'):
+                    df.toPandas()
+
+        # TODO: remove BinaryType check once minimum pyarrow version is 0.10.0
+        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
+            schema = StructType([StructField("binary", BinaryType(), True)])
+            df = self.spark.createDataFrame([(None,)], schema=schema)
+            with QuietTest(self.sc):
+                with self.assertRaisesRegexp(Exception, 'Unsupported type.*BinaryType'):
+                    df.toPandas()
+
+    def test_null_conversion(self):
+        df_null = self.spark.createDataFrame([tuple([None for _ in range(len(self.data[0]))])] +
+                                             self.data)
+        pdf = df_null.toPandas()
+        null_counts = pdf.isnull().sum().tolist()
+        self.assertTrue(all([c == 1 for c in null_counts]))
+
+    def _toPandas_arrow_toggle(self, df):
+        with self.sql_conf({"spark.sql.execution.arrow.enabled": False}):
+            pdf = df.toPandas()
+
+        pdf_arrow = df.toPandas()
+
+        return pdf, pdf_arrow
+
+    def test_toPandas_arrow_toggle(self):
+        df = self.spark.createDataFrame(self.data, schema=self.schema)
+        pdf, pdf_arrow = self._toPandas_arrow_toggle(df)
+        expected = self.create_pandas_data_frame()
+        self.assertPandasEqual(expected, pdf)
+        self.assertPandasEqual(expected, pdf_arrow)
+
+    def test_toPandas_respect_session_timezone(self):
+        df = self.spark.createDataFrame(self.data, schema=self.schema)
+
+        timezone = "America/New_York"
+        with self.sql_conf({
+                "spark.sql.execution.pandas.respectSessionTimeZone": False,
+                "spark.sql.session.timeZone": timezone}):
+            pdf_la, pdf_arrow_la = self._toPandas_arrow_toggle(df)
+            self.assertPandasEqual(pdf_arrow_la, pdf_la)
+
+        with self.sql_conf({
+                "spark.sql.execution.pandas.respectSessionTimeZone": True,
+                "spark.sql.session.timeZone": timezone}):
+            pdf_ny, pdf_arrow_ny = self._toPandas_arrow_toggle(df)
+            self.assertPandasEqual(pdf_arrow_ny, pdf_ny)
+
+            self.assertFalse(pdf_ny.equals(pdf_la))
+
+            from pyspark.sql.types import _check_series_convert_timestamps_local_tz
+            pdf_la_corrected = pdf_la.copy()
+            for field in self.schema:
+                if isinstance(field.dataType, TimestampType):
+                    pdf_la_corrected[field.name] = _check_series_convert_timestamps_local_tz(
+                        pdf_la_corrected[field.name], timezone)
+            self.assertPandasEqual(pdf_ny, pdf_la_corrected)
+
+    def test_pandas_round_trip(self):
+        pdf = self.create_pandas_data_frame()
+        df = self.spark.createDataFrame(self.data, schema=self.schema)
+        pdf_arrow = df.toPandas()
+        self.assertPandasEqual(pdf_arrow, pdf)
+
+    def test_filtered_frame(self):
+        df = self.spark.range(3).toDF("i")
+        pdf = df.filter("i < 0").toPandas()
+        self.assertEqual(len(pdf.columns), 1)
+        self.assertEqual(pdf.columns[0], "i")
+        self.assertTrue(pdf.empty)
+
+    def _createDataFrame_toggle(self, pdf, schema=None):
+        with self.sql_conf({"spark.sql.execution.arrow.enabled": False}):
+            df_no_arrow = self.spark.createDataFrame(pdf, schema=schema)
+
+        df_arrow = self.spark.createDataFrame(pdf, schema=schema)
+
+        return df_no_arrow, df_arrow
+
+    def test_createDataFrame_toggle(self):
+        pdf = self.create_pandas_data_frame()
+        df_no_arrow, df_arrow = self._createDataFrame_toggle(pdf, schema=self.schema)
+        self.assertEquals(df_no_arrow.collect(), df_arrow.collect())
+
+    def test_createDataFrame_respect_session_timezone(self):
+        from datetime import timedelta
+        pdf = self.create_pandas_data_frame()
+        timezone = "America/New_York"
+        with self.sql_conf({
+                "spark.sql.execution.pandas.respectSessionTimeZone": False,
+                "spark.sql.session.timeZone": timezone}):
+            df_no_arrow_la, df_arrow_la = self._createDataFrame_toggle(pdf, schema=self.schema)
+            result_la = df_no_arrow_la.collect()
+            result_arrow_la = df_arrow_la.collect()
+            self.assertEqual(result_la, result_arrow_la)
+
+        with self.sql_conf({
+                "spark.sql.execution.pandas.respectSessionTimeZone": True,
+                "spark.sql.session.timeZone": timezone}):
+            df_no_arrow_ny, df_arrow_ny = self._createDataFrame_toggle(pdf, schema=self.schema)
+            result_ny = df_no_arrow_ny.collect()
+            result_arrow_ny = df_arrow_ny.collect()
+            self.assertEqual(result_ny, result_arrow_ny)
+
+            self.assertNotEqual(result_ny, result_la)
+
+            # Correct result_la by adjusting 3 hours difference between Los Angeles and New York
+            result_la_corrected = [Row(**{k: v - timedelta(hours=3) if k == '8_timestamp_t' else v
+                                          for k, v in row.asDict().items()})
+                                   for row in result_la]
+            self.assertEqual(result_ny, result_la_corrected)
+
+    def test_createDataFrame_with_schema(self):
+        pdf = self.create_pandas_data_frame()
+        df = self.spark.createDataFrame(pdf, schema=self.schema)
+        self.assertEquals(self.schema, df.schema)
+        pdf_arrow = df.toPandas()
+        self.assertPandasEqual(pdf_arrow, pdf)
+
+    def test_createDataFrame_with_incorrect_schema(self):
+        pdf = self.create_pandas_data_frame()
+        fields = list(self.schema)
+        fields[0], fields[7] = fields[7], fields[0]  # swap str with timestamp
+        wrong_schema = StructType(fields)
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(Exception, ".*No cast.*string.*timestamp.*"):
+                self.spark.createDataFrame(pdf, schema=wrong_schema)
+
+    def test_createDataFrame_with_names(self):
+        pdf = self.create_pandas_data_frame()
+        new_names = list(map(str, range(len(self.schema.fieldNames()))))
+        # Test that schema as a list of column names gets applied
+        df = self.spark.createDataFrame(pdf, schema=list(new_names))
+        self.assertEquals(df.schema.fieldNames(), new_names)
+        # Test that schema as tuple of column names gets applied
+        df = self.spark.createDataFrame(pdf, schema=tuple(new_names))
+        self.assertEquals(df.schema.fieldNames(), new_names)
+
+    def test_createDataFrame_column_name_encoding(self):
+        import pandas as pd
+        pdf = pd.DataFrame({u'a': [1]})
+        columns = self.spark.createDataFrame(pdf).columns
+        self.assertTrue(isinstance(columns[0], str))
+        self.assertEquals(columns[0], 'a')
+        columns = self.spark.createDataFrame(pdf, [u'b']).columns
+        self.assertTrue(isinstance(columns[0], str))
+        self.assertEquals(columns[0], 'b')
+
+    def test_createDataFrame_with_single_data_type(self):
+        import pandas as pd
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(ValueError, ".*IntegerType.*not supported.*"):
+                self.spark.createDataFrame(pd.DataFrame({"a": [1]}), schema="int")
+
+    def test_createDataFrame_does_not_modify_input(self):
+        import pandas as pd
+        # Some series get converted for Spark to consume, this makes sure input is unchanged
+        pdf = self.create_pandas_data_frame()
+        # Use a nanosecond value to make sure it is not truncated
+        pdf.ix[0, '8_timestamp_t'] = pd.Timestamp(1)
+        # Integers with nulls will get NaNs filled with 0 and will be casted
+        pdf.ix[1, '2_int_t'] = None
+        pdf_copy = pdf.copy(deep=True)
+        self.spark.createDataFrame(pdf, schema=self.schema)
+        self.assertTrue(pdf.equals(pdf_copy))
+
+    def test_schema_conversion_roundtrip(self):
+        from pyspark.sql.types import from_arrow_schema, to_arrow_schema
+        arrow_schema = to_arrow_schema(self.schema)
+        schema_rt = from_arrow_schema(arrow_schema)
+        self.assertEquals(self.schema, schema_rt)
+
+    def test_createDataFrame_with_array_type(self):
+        import pandas as pd
+        pdf = pd.DataFrame({"a": [[1, 2], [3, 4]], "b": [[u"x", u"y"], [u"y", u"z"]]})
+        df, df_arrow = self._createDataFrame_toggle(pdf)
+        result = df.collect()
+        result_arrow = df_arrow.collect()
+        expected = [tuple(list(e) for e in rec) for rec in pdf.to_records(index=False)]
+        for r in range(len(expected)):
+            for e in range(len(expected[r])):
+                self.assertTrue(expected[r][e] == result_arrow[r][e] and
+                                result[r][e] == result_arrow[r][e])
+
+    def test_toPandas_with_array_type(self):
+        expected = [([1, 2], [u"x", u"y"]), ([3, 4], [u"y", u"z"])]
+        array_schema = StructType([StructField("a", ArrayType(IntegerType())),
+                                   StructField("b", ArrayType(StringType()))])
+        df = self.spark.createDataFrame(expected, schema=array_schema)
+        pdf, pdf_arrow = self._toPandas_arrow_toggle(df)
+        result = [tuple(list(e) for e in rec) for rec in pdf.to_records(index=False)]
+        result_arrow = [tuple(list(e) for e in rec) for rec in pdf_arrow.to_records(index=False)]
+        for r in range(len(expected)):
+            for e in range(len(expected[r])):
+                self.assertTrue(expected[r][e] == result_arrow[r][e] and
+                                result[r][e] == result_arrow[r][e])
+
+    def test_createDataFrame_with_int_col_names(self):
+        import numpy as np
+        import pandas as pd
+        pdf = pd.DataFrame(np.random.rand(4, 2))
+        df, df_arrow = self._createDataFrame_toggle(pdf)
+        pdf_col_names = [str(c) for c in pdf.columns]
+        self.assertEqual(pdf_col_names, df.columns)
+        self.assertEqual(pdf_col_names, df_arrow.columns)
+
+    def test_createDataFrame_fallback_enabled(self):
+        import pandas as pd
+
+        with QuietTest(self.sc):
+            with self.sql_conf({"spark.sql.execution.arrow.fallback.enabled": True}):
+                with warnings.catch_warnings(record=True) as warns:
+                    # we want the warnings to appear even if this test is run from a subclass
+                    warnings.simplefilter("always")
+                    df = self.spark.createDataFrame(
+                        pd.DataFrame([[{u'a': 1}]]), "a: map<string, int>")
+                    # Catch and check the last UserWarning.
+                    user_warns = [
+                        warn.message for warn in warns if isinstance(warn.message, UserWarning)]
+                    self.assertTrue(len(user_warns) > 0)
+                    self.assertTrue(
+                        "Attempting non-optimization" in _exception_message(user_warns[-1]))
+                    self.assertEqual(df.collect(), [Row(a={u'a': 1})])
+
+    def test_createDataFrame_fallback_disabled(self):
+        from distutils.version import LooseVersion
+        import pandas as pd
+        import pyarrow as pa
+
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(TypeError, 'Unsupported type'):
+                self.spark.createDataFrame(
+                    pd.DataFrame([[{u'a': 1}]]), "a: map<string, int>")
+
+        # TODO: remove BinaryType check once minimum pyarrow version is 0.10.0
+        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
+            with QuietTest(self.sc):
+                with self.assertRaisesRegexp(TypeError, 'Unsupported type.*BinaryType'):
+                    self.spark.createDataFrame(
+                        pd.DataFrame([[{'a': b'aaa'}]]), "a: binary")
+
+    # Regression test for SPARK-23314
+    def test_timestamp_dst(self):
+        import pandas as pd
+        # Daylight saving time for Los Angeles for 2015 is Sun, Nov 1 at 2:00 am
+        dt = [datetime.datetime(2015, 11, 1, 0, 30),
+              datetime.datetime(2015, 11, 1, 1, 30),
+              datetime.datetime(2015, 11, 1, 2, 30)]
+        pdf = pd.DataFrame({'time': dt})
+
+        df_from_python = self.spark.createDataFrame(dt, 'timestamp').toDF('time')
+        df_from_pandas = self.spark.createDataFrame(pdf)
+
+        self.assertPandasEqual(pdf, df_from_python.toPandas())
+        self.assertPandasEqual(pdf, df_from_pandas.toPandas())
+
+
+class EncryptionArrowTests(ArrowTests):
+
+    @classmethod
+    def conf(cls):
+        return super(EncryptionArrowTests, cls).conf().set("spark.io.encryption.enabled", "true")
+
+
+@unittest.skipIf(
+    not _have_pandas or not _have_pyarrow,
+    _pandas_requirement_message or _pyarrow_requirement_message)
+class PandasUDFTests(ReusedSQLTestCase):
+
+    def test_pandas_udf_basic(self):
+        from pyspark.rdd import PythonEvalType
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+        udf = pandas_udf(lambda x: x, DoubleType())
+        self.assertEqual(udf.returnType, DoubleType())
+        self.assertEqual(udf.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
+
+        udf = pandas_udf(lambda x: x, DoubleType(), PandasUDFType.SCALAR)
+        self.assertEqual(udf.returnType, DoubleType())
+        self.assertEqual(udf.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
+
+        udf = pandas_udf(lambda x: x, 'double', PandasUDFType.SCALAR)
+        self.assertEqual(udf.returnType, DoubleType())
+        self.assertEqual(udf.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
+
+        udf = pandas_udf(lambda x: x, StructType([StructField("v", DoubleType())]),
+                         PandasUDFType.GROUPED_MAP)
+        self.assertEqual(udf.returnType, StructType([StructField("v", DoubleType())]))
+        self.assertEqual(udf.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
+
+        udf = pandas_udf(lambda x: x, 'v double', PandasUDFType.GROUPED_MAP)
+        self.assertEqual(udf.returnType, StructType([StructField("v", DoubleType())]))
+        self.assertEqual(udf.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
+
+        udf = pandas_udf(lambda x: x, 'v double',
+                         functionType=PandasUDFType.GROUPED_MAP)
+        self.assertEqual(udf.returnType, StructType([StructField("v", DoubleType())]))
+        self.assertEqual(udf.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
+
+        udf = pandas_udf(lambda x: x, returnType='v double',
+                         functionType=PandasUDFType.GROUPED_MAP)
+        self.assertEqual(udf.returnType, StructType([StructField("v", DoubleType())]))
+        self.assertEqual(udf.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
+
+    def test_pandas_udf_decorator(self):
+        from pyspark.rdd import PythonEvalType
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+        from pyspark.sql.types import StructType, StructField, DoubleType
+
+        @pandas_udf(DoubleType())
+        def foo(x):
+            return x
+        self.assertEqual(foo.returnType, DoubleType())
+        self.assertEqual(foo.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
+
+        @pandas_udf(returnType=DoubleType())
+        def foo(x):
+            return x
+        self.assertEqual(foo.returnType, DoubleType())
+        self.assertEqual(foo.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
+
+        schema = StructType([StructField("v", DoubleType())])
+
+        @pandas_udf(schema, PandasUDFType.GROUPED_MAP)
+        def foo(x):
+            return x
+        self.assertEqual(foo.returnType, schema)
+        self.assertEqual(foo.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
+
+        @pandas_udf('v double', PandasUDFType.GROUPED_MAP)
+        def foo(x):
+            return x
+        self.assertEqual(foo.returnType, schema)
+        self.assertEqual(foo.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
+
+        @pandas_udf(schema, functionType=PandasUDFType.GROUPED_MAP)
+        def foo(x):
+            return x
+        self.assertEqual(foo.returnType, schema)
+        self.assertEqual(foo.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
+
+        @pandas_udf(returnType='double', functionType=PandasUDFType.SCALAR)
+        def foo(x):
+            return x
+        self.assertEqual(foo.returnType, DoubleType())
+        self.assertEqual(foo.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
+
+        @pandas_udf(returnType=schema, functionType=PandasUDFType.GROUPED_MAP)
+        def foo(x):
+            return x
+        self.assertEqual(foo.returnType, schema)
+        self.assertEqual(foo.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
+
+    def test_udf_wrong_arg(self):
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+        with QuietTest(self.sc):
+            with self.assertRaises(ParseException):
+                @pandas_udf('blah')
+                def foo(x):
+                    return x
+            with self.assertRaisesRegexp(ValueError, 'Invalid returnType.*None'):
+                @pandas_udf(functionType=PandasUDFType.SCALAR)
+                def foo(x):
+                    return x
+            with self.assertRaisesRegexp(ValueError, 'Invalid functionType'):
+                @pandas_udf('double', 100)
+                def foo(x):
+                    return x
+
+            with self.assertRaisesRegexp(ValueError, '0-arg pandas_udfs.*not.*supported'):
+                pandas_udf(lambda: 1, LongType(), PandasUDFType.SCALAR)
+            with self.assertRaisesRegexp(ValueError, '0-arg pandas_udfs.*not.*supported'):
+                @pandas_udf(LongType(), PandasUDFType.SCALAR)
+                def zero_with_type():
+                    return 1
+
+            with self.assertRaisesRegexp(TypeError, 'Invalid returnType'):
+                @pandas_udf(returnType=PandasUDFType.GROUPED_MAP)
+                def foo(df):
+                    return df
+            with self.assertRaisesRegexp(TypeError, 'Invalid returnType'):
+                @pandas_udf(returnType='double', functionType=PandasUDFType.GROUPED_MAP)
+                def foo(df):
+                    return df
+            with self.assertRaisesRegexp(ValueError, 'Invalid function'):
+                @pandas_udf(returnType='k int, v double', functionType=PandasUDFType.GROUPED_MAP)
+                def foo(k, v, w):
+                    return k
+
+    def test_stopiteration_in_udf(self):
+        from pyspark.sql.functions import udf, pandas_udf, PandasUDFType
+        from py4j.protocol import Py4JJavaError
+
+        def foo(x):
+            raise StopIteration()
+
+        def foofoo(x, y):
+            raise StopIteration()
+
+        exc_message = "Caught StopIteration thrown from user's code; failing the task"
+        df = self.spark.range(0, 100)
+
+        # plain udf (test for SPARK-23754)
+        self.assertRaisesRegexp(
+            Py4JJavaError,
+            exc_message,
+            df.withColumn('v', udf(foo)('id')).collect
+        )
+
+        # pandas scalar udf
+        self.assertRaisesRegexp(
+            Py4JJavaError,
+            exc_message,
+            df.withColumn(
+                'v', pandas_udf(foo, 'double', PandasUDFType.SCALAR)('id')
+            ).collect
+        )
+
+        # pandas grouped map
+        self.assertRaisesRegexp(
+            Py4JJavaError,
+            exc_message,
+            df.groupBy('id').apply(
+                pandas_udf(foo, df.schema, PandasUDFType.GROUPED_MAP)
+            ).collect
+        )
+
+        self.assertRaisesRegexp(
+            Py4JJavaError,
+            exc_message,
+            df.groupBy('id').apply(
+                pandas_udf(foofoo, df.schema, PandasUDFType.GROUPED_MAP)
+            ).collect
+        )
+
+        # pandas grouped agg
+        self.assertRaisesRegexp(
+            Py4JJavaError,
+            exc_message,
+            df.groupBy('id').agg(
+                pandas_udf(foo, 'double', PandasUDFType.GROUPED_AGG)('id')
+            ).collect
+        )
+
+
+@unittest.skipIf(
+    not _have_pandas or not _have_pyarrow,
+    _pandas_requirement_message or _pyarrow_requirement_message)
+class ScalarPandasUDFTests(ReusedSQLTestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        ReusedSQLTestCase.setUpClass()
+
+        # Synchronize default timezone between Python and Java
+        cls.tz_prev = os.environ.get("TZ", None)  # save current tz if set
+        tz = "America/Los_Angeles"
+        os.environ["TZ"] = tz
+        time.tzset()
+
+        cls.sc.environment["TZ"] = tz
+        cls.spark.conf.set("spark.sql.session.timeZone", tz)
+
+    @classmethod
+    def tearDownClass(cls):
+        del os.environ["TZ"]
+        if cls.tz_prev is not None:
+            os.environ["TZ"] = cls.tz_prev
+        time.tzset()
+        ReusedSQLTestCase.tearDownClass()
+
+    @property
+    def nondeterministic_vectorized_udf(self):
+        from pyspark.sql.functions import pandas_udf
+
+        @pandas_udf('double')
+        def random_udf(v):
+            import pandas as pd
+            import numpy as np
+            return pd.Series(np.random.random(len(v)))
+        random_udf = random_udf.asNondeterministic()
+        return random_udf
+
+    def test_pandas_udf_tokenize(self):
+        from pyspark.sql.functions import pandas_udf
+        tokenize = pandas_udf(lambda s: s.apply(lambda str: str.split(' ')),
+                              ArrayType(StringType()))
+        self.assertEqual(tokenize.returnType, ArrayType(StringType()))
+        df = self.spark.createDataFrame([("hi boo",), ("bye boo",)], ["vals"])
+        result = df.select(tokenize("vals").alias("hi"))
+        self.assertEqual([Row(hi=[u'hi', u'boo']), Row(hi=[u'bye', u'boo'])], result.collect())
+
+    def test_pandas_udf_nested_arrays(self):
+        from pyspark.sql.functions import pandas_udf
+        tokenize = pandas_udf(lambda s: s.apply(lambda str: [str.split(' ')]),
+                              ArrayType(ArrayType(StringType())))
+        self.assertEqual(tokenize.returnType, ArrayType(ArrayType(StringType())))
+        df = self.spark.createDataFrame([("hi boo",), ("bye boo",)], ["vals"])
+        result = df.select(tokenize("vals").alias("hi"))
+        self.assertEqual([Row(hi=[[u'hi', u'boo']]), Row(hi=[[u'bye', u'boo']])], result.collect())
+
+    def test_vectorized_udf_basic(self):
+        from pyspark.sql.functions import pandas_udf, col, array
+        df = self.spark.range(10).select(
+            col('id').cast('string').alias('str'),
+            col('id').cast('int').alias('int'),
+            col('id').alias('long'),
+            col('id').cast('float').alias('float'),
+            col('id').cast('double').alias('double'),
+            col('id').cast('decimal').alias('decimal'),
+            col('id').cast('boolean').alias('bool'),
+            array(col('id')).alias('array_long'))
+        f = lambda x: x
+        str_f = pandas_udf(f, StringType())
+        int_f = pandas_udf(f, IntegerType())
+        long_f = pandas_udf(f, LongType())
+        float_f = pandas_udf(f, FloatType())
+        double_f = pandas_udf(f, DoubleType())
+        decimal_f = pandas_udf(f, DecimalType())
+        bool_f = pandas_udf(f, BooleanType())
+        array_long_f = pandas_udf(f, ArrayType(LongType()))
+        res = df.select(str_f(col('str')), int_f(col('int')),
+                        long_f(col('long')), float_f(col('float')),
+                        double_f(col('double')), decimal_f('decimal'),
+                        bool_f(col('bool')), array_long_f('array_long'))
+        self.assertEquals(df.collect(), res.collect())
+
+    def test_register_nondeterministic_vectorized_udf_basic(self):
+        from pyspark.sql.functions import pandas_udf
+        from pyspark.rdd import PythonEvalType
+        import random
+        random_pandas_udf = pandas_udf(
+            lambda x: random.randint(6, 6) + x, IntegerType()).asNondeterministic()
+        self.assertEqual(random_pandas_udf.deterministic, False)
+        self.assertEqual(random_pandas_udf.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
+        nondeterministic_pandas_udf = self.spark.catalog.registerFunction(
+            "randomPandasUDF", random_pandas_udf)
+        self.assertEqual(nondeterministic_pandas_udf.deterministic, False)
+        self.assertEqual(nondeterministic_pandas_udf.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
+        [row] = self.spark.sql("SELECT randomPandasUDF(1)").collect()
+        self.assertEqual(row[0], 7)
+
+    def test_vectorized_udf_null_boolean(self):
+        from pyspark.sql.functions import pandas_udf, col
+        data = [(True,), (True,), (None,), (False,)]
+        schema = StructType().add("bool", BooleanType())
+        df = self.spark.createDataFrame(data, schema)
+        bool_f = pandas_udf(lambda x: x, BooleanType())
+        res = df.select(bool_f(col('bool')))
+        self.assertEquals(df.collect(), res.collect())
+
+    def test_vectorized_udf_null_byte(self):
+        from pyspark.sql.functions import pandas_udf, col
+        data = [(None,), (2,), (3,), (4,)]
+        schema = StructType().add("byte", ByteType())
+        df = self.spark.createDataFrame(data, schema)
+        byte_f = pandas_udf(lambda x: x, ByteType())
+        res = df.select(byte_f(col('byte')))
+        self.assertEquals(df.collect(), res.collect())
+
+    def test_vectorized_udf_null_short(self):
+        from pyspark.sql.functions import pandas_udf, col
+        data = [(None,), (2,), (3,), (4,)]
+        schema = StructType().add("short", ShortType())
+        df = self.spark.createDataFrame(data, schema)
+        short_f = pandas_udf(lambda x: x, ShortType())
+        res = df.select(short_f(col('short')))
+        self.assertEquals(df.collect(), res.collect())
+
+    def test_vectorized_udf_null_int(self):
+        from pyspark.sql.functions import pandas_udf, col
+        data = [(None,), (2,), (3,), (4,)]
+        schema = StructType().add("int", IntegerType())
+        df = self.spark.createDataFrame(data, schema)
+        int_f = pandas_udf(lambda x: x, IntegerType())
+        res = df.select(int_f(col('int')))
+        self.assertEquals(df.collect(), res.collect())
+
+    def test_vectorized_udf_null_long(self):
+        from pyspark.sql.functions import pandas_udf, col
+        data = [(None,), (2,), (3,), (4,)]
+        schema = StructType().add("long", LongType())
+        df = self.spark.createDataFrame(data, schema)
+        long_f = pandas_udf(lambda x: x, LongType())
+        res = df.select(long_f(col('long')))
+        self.assertEquals(df.collect(), res.collect())
+
+    def test_vectorized_udf_null_float(self):
+        from pyspark.sql.functions import pandas_udf, col
+        data = [(3.0,), (5.0,), (-1.0,), (None,)]
+        schema = StructType().add("float", FloatType())
+        df = self.spark.createDataFrame(data, schema)
+        float_f = pandas_udf(lambda x: x, FloatType())
+        res = df.select(float_f(col('float')))
+        self.assertEquals(df.collect(), res.collect())
+
+    def test_vectorized_udf_null_double(self):
+        from pyspark.sql.functions import pandas_udf, col
+        data = [(3.0,), (5.0,), (-1.0,), (None,)]
+        schema = StructType().add("double", DoubleType())
+        df = self.spark.createDataFrame(data, schema)
+        double_f = pandas_udf(lambda x: x, DoubleType())
+        res = df.select(double_f(col('double')))
+        self.assertEquals(df.collect(), res.collect())
+
+    def test_vectorized_udf_null_decimal(self):
+        from decimal import Decimal
+        from pyspark.sql.functions import pandas_udf, col
+        data = [(Decimal(3.0),), (Decimal(5.0),), (Decimal(-1.0),), (None,)]
+        schema = StructType().add("decimal", DecimalType(38, 18))
+        df = self.spark.createDataFrame(data, schema)
+        decimal_f = pandas_udf(lambda x: x, DecimalType(38, 18))
+        res = df.select(decimal_f(col('decimal')))
+        self.assertEquals(df.collect(), res.collect())
+
+    def test_vectorized_udf_null_string(self):
+        from pyspark.sql.functions import pandas_udf, col
+        data = [("foo",), (None,), ("bar",), ("bar",)]
+        schema = StructType().add("str", StringType())
+        df = self.spark.createDataFrame(data, schema)
+        str_f = pandas_udf(lambda x: x, StringType())
+        res = df.select(str_f(col('str')))
+        self.assertEquals(df.collect(), res.collect())
+
+    def test_vectorized_udf_string_in_udf(self):
+        from pyspark.sql.functions import pandas_udf, col
+        import pandas as pd
+        df = self.spark.range(10)
+        str_f = pandas_udf(lambda x: pd.Series(map(str, x)), StringType())
+        actual = df.select(str_f(col('id')))
+        expected = df.select(col('id').cast('string'))
+        self.assertEquals(expected.collect(), actual.collect())
+
+    def test_vectorized_udf_datatype_string(self):
+        from pyspark.sql.functions import pandas_udf, col
+        df = self.spark.range(10).select(
+            col('id').cast('string').alias('str'),
+            col('id').cast('int').alias('int'),
+            col('id').alias('long'),
+            col('id').cast('float').alias('float'),
+            col('id').cast('double').alias('double'),
+            col('id').cast('decimal').alias('decimal'),
+            col('id').cast('boolean').alias('bool'))
+        f = lambda x: x
+        str_f = pandas_udf(f, 'string')
+        int_f = pandas_udf(f, 'integer')
+        long_f = pandas_udf(f, 'long')
+        float_f = pandas_udf(f, 'float')
+        double_f = pandas_udf(f, 'double')
+        decimal_f = pandas_udf(f, 'decimal(38, 18)')
+        bool_f = pandas_udf(f, 'boolean')
+        res = df.select(str_f(col('str')), int_f(col('int')),
+                        long_f(col('long')), float_f(col('float')),
+                        double_f(col('double')), decimal_f('decimal'),
+                        bool_f(col('bool')))
+        self.assertEquals(df.collect(), res.collect())
+
+    def test_vectorized_udf_null_binary(self):
+        from distutils.version import LooseVersion
+        import pyarrow as pa
+        from pyspark.sql.functions import pandas_udf, col
+        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
+            with QuietTest(self.sc):
+                with self.assertRaisesRegexp(
+                        NotImplementedError,
+                        'Invalid returnType.*scalar Pandas UDF.*BinaryType'):
+                    pandas_udf(lambda x: x, BinaryType())
+        else:
+            data = [(bytearray(b"a"),), (None,), (bytearray(b"bb"),), (bytearray(b"ccc"),)]
+            schema = StructType().add("binary", BinaryType())
+            df = self.spark.createDataFrame(data, schema)
+            str_f = pandas_udf(lambda x: x, BinaryType())
+            res = df.select(str_f(col('binary')))
+            self.assertEquals(df.collect(), res.collect())
+
+    def test_vectorized_udf_array_type(self):
+        from pyspark.sql.functions import pandas_udf, col
+        data = [([1, 2],), ([3, 4],)]
+        array_schema = StructType([StructField("array", ArrayType(IntegerType()))])
+        df = self.spark.createDataFrame(data, schema=array_schema)
+        array_f = pandas_udf(lambda x: x, ArrayType(IntegerType()))
+        result = df.select(array_f(col('array')))
+        self.assertEquals(df.collect(), result.collect())
+
+    def test_vectorized_udf_null_array(self):
+        from pyspark.sql.functions import pandas_udf, col
+        data = [([1, 2],), (None,), (None,), ([3, 4],), (None,)]
+        array_schema = StructType([StructField("array", ArrayType(IntegerType()))])
+        df = self.spark.createDataFrame(data, schema=array_schema)
+        array_f = pandas_udf(lambda x: x, ArrayType(IntegerType()))
+        result = df.select(array_f(col('array')))
+        self.assertEquals(df.collect(), result.collect())
+
+    def test_vectorized_udf_complex(self):
+        from pyspark.sql.functions import pandas_udf, col, expr
+        df = self.spark.range(10).select(
+            col('id').cast('int').alias('a'),
+            col('id').cast('int').alias('b'),
+            col('id').cast('double').alias('c'))
+        add = pandas_udf(lambda x, y: x + y, IntegerType())
+        power2 = pandas_udf(lambda x: 2 ** x, IntegerType())
+        mul = pandas_udf(lambda x, y: x * y, DoubleType())
+        res = df.select(add(col('a'), col('b')), power2(col('a')), mul(col('b'), col('c')))
+        expected = df.select(expr('a + b'), expr('power(2, a)'), expr('b * c'))
+        self.assertEquals(expected.collect(), res.collect())
+
+    def test_vectorized_udf_exception(self):
+        from pyspark.sql.functions import pandas_udf, col
+        df = self.spark.range(10)
+        raise_exception = pandas_udf(lambda x: x * (1 / 0), LongType())
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(Exception, 'division( or modulo)? by zero'):
+                df.select(raise_exception(col('id'))).collect()
+
+    def test_vectorized_udf_invalid_length(self):
+        from pyspark.sql.functions import pandas_udf, col
+        import pandas as pd
+        df = self.spark.range(10)
+        raise_exception = pandas_udf(lambda _: pd.Series(1), LongType())
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(
+                    Exception,
+                    'Result vector from pandas_udf was not the required length'):
+                df.select(raise_exception(col('id'))).collect()
+
+    def test_vectorized_udf_chained(self):
+        from pyspark.sql.functions import pandas_udf, col
+        df = self.spark.range(10)
+        f = pandas_udf(lambda x: x + 1, LongType())
+        g = pandas_udf(lambda x: x - 1, LongType())
+        res = df.select(g(f(col('id'))))
+        self.assertEquals(df.collect(), res.collect())
+
+    def test_vectorized_udf_wrong_return_type(self):
+        from pyspark.sql.functions import pandas_udf, col
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(
+                    NotImplementedError,
+                    'Invalid returnType.*scalar Pandas UDF.*MapType'):
+                pandas_udf(lambda x: x * 1.0, MapType(LongType(), LongType()))
+
+    def test_vectorized_udf_return_scalar(self):
+        from pyspark.sql.functions import pandas_udf, col
+        df = self.spark.range(10)
+        f = pandas_udf(lambda x: 1.0, DoubleType())
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(Exception, 'Return.*type.*Series'):
+                df.select(f(col('id'))).collect()
+
+    def test_vectorized_udf_decorator(self):
+        from pyspark.sql.functions import pandas_udf, col
+        df = self.spark.range(10)
+
+        @pandas_udf(returnType=LongType())
+        def identity(x):
+            return x
+        res = df.select(identity(col('id')))
+        self.assertEquals(df.collect(), res.collect())
+
+    def test_vectorized_udf_empty_partition(self):
+        from pyspark.sql.functions import pandas_udf, col
+        df = self.spark.createDataFrame(self.sc.parallelize([Row(id=1)], 2))
+        f = pandas_udf(lambda x: x, LongType())
+        res = df.select(f(col('id')))
+        self.assertEquals(df.collect(), res.collect())
+
+    def test_vectorized_udf_varargs(self):
+        from pyspark.sql.functions import pandas_udf, col
+        df = self.spark.createDataFrame(self.sc.parallelize([Row(id=1)], 2))
+        f = pandas_udf(lambda *v: v[0], LongType())
+        res = df.select(f(col('id')))
+        self.assertEquals(df.collect(), res.collect())
+
+    def test_vectorized_udf_unsupported_types(self):
+        from pyspark.sql.functions import pandas_udf
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(
+                    NotImplementedError,
+                    'Invalid returnType.*scalar Pandas UDF.*MapType'):
+                pandas_udf(lambda x: x, MapType(StringType(), IntegerType()))
+
+    def test_vectorized_udf_dates(self):
+        from pyspark.sql.functions import pandas_udf, col
+        from datetime import date
+        schema = StructType().add("idx", LongType()).add("date", DateType())
+        data = [(0, date(1969, 1, 1),),
+                (1, date(2012, 2, 2),),
+                (2, None,),
+                (3, date(2100, 4, 4),)]
+        df = self.spark.createDataFrame(data, schema=schema)
+
+        date_copy = pandas_udf(lambda t: t, returnType=DateType())
+        df = df.withColumn("date_copy", date_copy(col("date")))
+
+        @pandas_udf(returnType=StringType())
+        def check_data(idx, date, date_copy):
+            import pandas as pd
+            msgs = []
+            is_equal = date.isnull()
+            for i in range(len(idx)):
+                if (is_equal[i] and data[idx[i]][1] is None) or \
+                        date[i] == data[idx[i]][1]:
+                    msgs.append(None)
+                else:
+                    msgs.append(
+                        "date values are not equal (date='%s': data[%d][1]='%s')"
+                        % (date[i], idx[i], data[idx[i]][1]))
+            return pd.Series(msgs)
+
+        result = df.withColumn("check_data",
+                               check_data(col("idx"), col("date"), col("date_copy"))).collect()
+
+        self.assertEquals(len(data), len(result))
+        for i in range(len(result)):
+            self.assertEquals(data[i][1], result[i][1])  # "date" col
+            self.assertEquals(data[i][1], result[i][2])  # "date_copy" col
+            self.assertIsNone(result[i][3])  # "check_data" col
+
+    def test_vectorized_udf_timestamps(self):
+        from pyspark.sql.functions import pandas_udf, col
+        from datetime import datetime
+        schema = StructType([
+            StructField("idx", LongType(), True),
+            StructField("timestamp", TimestampType(), True)])
+        data = [(0, datetime(1969, 1, 1, 1, 1, 1)),
+                (1, datetime(2012, 2, 2, 2, 2, 2)),
+                (2, None),
+                (3, datetime(2100, 3, 3, 3, 3, 3))]
+
+        df = self.spark.createDataFrame(data, schema=schema)
+
+        # Check that a timestamp passed through a pandas_udf will not be altered by timezone calc
+        f_timestamp_copy = pandas_udf(lambda t: t, returnType=TimestampType())
+        df = df.withColumn("timestamp_copy", f_timestamp_copy(col("timestamp")))
+
+        @pandas_udf(returnType=StringType())
+        def check_data(idx, timestamp, timestamp_copy):
+            import pandas as pd
+            msgs = []
+            is_equal = timestamp.isnull()  # use this array to check values are equal
+            for i in range(len(idx)):
+                # Check that timestamps are as expected in the UDF
+                if (is_equal[i] and data[idx[i]][1] is None) or \
+                        timestamp[i].to_pydatetime() == data[idx[i]][1]:
+                    msgs.append(None)
+                else:
+                    msgs.append(
+                        "timestamp values are not equal (timestamp='%s': data[%d][1]='%s')"
+                        % (timestamp[i], idx[i], data[idx[i]][1]))
+            return pd.Series(msgs)
+
+        result = df.withColumn("check_data", check_data(col("idx"), col("timestamp"),
+                                                        col("timestamp_copy"))).collect()
+        # Check that collection values are correct
+        self.assertEquals(len(data), len(result))
+        for i in range(len(result)):
+            self.assertEquals(data[i][1], result[i][1])  # "timestamp" col
+            self.assertEquals(data[i][1], result[i][2])  # "timestamp_copy" col
+            self.assertIsNone(result[i][3])  # "check_data" col
+
+    def test_vectorized_udf_return_timestamp_tz(self):
+        from pyspark.sql.functions import pandas_udf, col
+        import pandas as pd
+        df = self.spark.range(10)
+
+        @pandas_udf(returnType=TimestampType())
+        def gen_timestamps(id):
+            ts = [pd.Timestamp(i, unit='D', tz='America/Los_Angeles') for i in id]
+            return pd.Series(ts)
+
+        result = df.withColumn("ts", gen_timestamps(col("id"))).collect()
+        spark_ts_t = TimestampType()
+        for r in result:
+            i, ts = r
+            ts_tz = pd.Timestamp(i, unit='D', tz='America/Los_Angeles').to_pydatetime()
+            expected = spark_ts_t.fromInternal(spark_ts_t.toInternal(ts_tz))
+            self.assertEquals(expected, ts)
+
+    def test_vectorized_udf_check_config(self):
+        from pyspark.sql.functions import pandas_udf, col
+        import pandas as pd
+        with self.sql_conf({"spark.sql.execution.arrow.maxRecordsPerBatch": 3}):
+            df = self.spark.range(10, numPartitions=1)
+
+            @pandas_udf(returnType=LongType())
+            def check_records_per_batch(x):
+                return pd.Series(x.size).repeat(x.size)
+
+            result = df.select(check_records_per_batch(col("id"))).collect()
+            for (r,) in result:
+                self.assertTrue(r <= 3)
+
+    def test_vectorized_udf_timestamps_respect_session_timezone(self):
+        from pyspark.sql.functions import pandas_udf, col
+        from datetime import datetime
+        import pandas as pd
+        schema = StructType([
+            StructField("idx", LongType(), True),
+            StructField("timestamp", TimestampType(), True)])
+        data = [(1, datetime(1969, 1, 1, 1, 1, 1)),
+                (2, datetime(2012, 2, 2, 2, 2, 2)),
+                (3, None),
+                (4, datetime(2100, 3, 3, 3, 3, 3))]
+        df = self.spark.createDataFrame(data, schema=schema)
+
+        f_timestamp_copy = pandas_udf(lambda ts: ts, TimestampType())
+        internal_value = pandas_udf(
+            lambda ts: ts.apply(lambda ts: ts.value if ts is not pd.NaT else None), LongType())
+
+        timezone = "America/New_York"
+        with self.sql_conf({
+                "spark.sql.execution.pandas.respectSessionTimeZone": False,
+                "spark.sql.session.timeZone": timezone}):
+            df_la = df.withColumn("tscopy", f_timestamp_copy(col("timestamp"))) \
+                .withColumn("internal_value", internal_value(col("timestamp")))
+            result_la = df_la.select(col("idx"), col("internal_value")).collect()
+            # Correct result_la by adjusting 3 hours difference between Los Angeles and New York
+            diff = 3 * 60 * 60 * 1000 * 1000 * 1000
+            result_la_corrected = \
+                df_la.select(col("idx"), col("tscopy"), col("internal_value") + diff).collect()
+
+        with self.sql_conf({
+                "spark.sql.execution.pandas.respectSessionTimeZone": True,
+                "spark.sql.session.timeZone": timezone}):
+            df_ny = df.withColumn("tscopy", f_timestamp_copy(col("timestamp"))) \
+                .withColumn("internal_value", internal_value(col("timestamp")))
+            result_ny = df_ny.select(col("idx"), col("tscopy"), col("internal_value")).collect()
+
+            self.assertNotEqual(result_ny, result_la)
+            self.assertEqual(result_ny, result_la_corrected)
+
+    def test_nondeterministic_vectorized_udf(self):
+        # Test that nondeterministic UDFs are evaluated only once in chained UDF evaluations
+        from pyspark.sql.functions import udf, pandas_udf, col
+
+        @pandas_udf('double')
+        def plus_ten(v):
+            return v + 10
+        random_udf = self.nondeterministic_vectorized_udf
+
+        df = self.spark.range(10).withColumn('rand', random_udf(col('id')))
+        result1 = df.withColumn('plus_ten(rand)', plus_ten(df['rand'])).toPandas()
+
+        self.assertEqual(random_udf.deterministic, False)
+        self.assertTrue(result1['plus_ten(rand)'].equals(result1['rand'] + 10))
+
+    def test_nondeterministic_vectorized_udf_in_aggregate(self):
+        from pyspark.sql.functions import pandas_udf, sum
+
+        df = self.spark.range(10)
+        random_udf = self.nondeterministic_vectorized_udf
+
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(AnalysisException, 'nondeterministic'):
+                df.groupby(df.id).agg(sum(random_udf(df.id))).collect()
+            with self.assertRaisesRegexp(AnalysisException, 'nondeterministic'):
+                df.agg(sum(random_udf(df.id))).collect()
+
+    def test_register_vectorized_udf_basic(self):
+        from pyspark.rdd import PythonEvalType
+        from pyspark.sql.functions import pandas_udf, col, expr
+        df = self.spark.range(10).select(
+            col('id').cast('int').alias('a'),
+            col('id').cast('int').alias('b'))
+        original_add = pandas_udf(lambda x, y: x + y, IntegerType())
+        self.assertEqual(original_add.deterministic, True)
+        self.assertEqual(original_add.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
+        new_add = self.spark.catalog.registerFunction("add1", original_add)
+        res1 = df.select(new_add(col('a'), col('b')))
+        res2 = self.spark.sql(
+            "SELECT add1(t.a, t.b) FROM (SELECT id as a, id as b FROM range(10)) t")
+        expected = df.select(expr('a + b'))
+        self.assertEquals(expected.collect(), res1.collect())
+        self.assertEquals(expected.collect(), res2.collect())
+
+    # Regression test for SPARK-23314
+    def test_timestamp_dst(self):
+        from pyspark.sql.functions import pandas_udf
+        # Daylight saving time for Los Angeles for 2015 is Sun, Nov 1 at 2:00 am
+        dt = [datetime.datetime(2015, 11, 1, 0, 30),
+              datetime.datetime(2015, 11, 1, 1, 30),
+              datetime.datetime(2015, 11, 1, 2, 30)]
+        df = self.spark.createDataFrame(dt, 'timestamp').toDF('time')
+        foo_udf = pandas_udf(lambda x: x, 'timestamp')
+        result = df.withColumn('time', foo_udf(df.time))
+        self.assertEquals(df.collect(), result.collect())
+
+    @unittest.skipIf(sys.version_info[:2] < (3, 5), "Type hints are supported from Python 3.5.")
+    def test_type_annotation(self):
+        from pyspark.sql.functions import pandas_udf
+        # Regression test to check if type hints can be used. See SPARK-23569.
+        # Note that it throws an error during compilation in lower Python versions if 'exec'
+        # is not used. Also, note that we explicitly use another dictionary to avoid modifications
+        # in the current 'locals()'.
+        #
+        # Hyukjin: I think it's an ugly way to test issues about syntax specific in
+        # higher versions of Python, which we shouldn't encourage. This was the last resort
+        # I could come up with at that time.
+        _locals = {}
+        exec(
+            "import pandas as pd\ndef noop(col: pd.Series) -> pd.Series: return col",
+            _locals)
+        df = self.spark.range(1).select(pandas_udf(f=_locals['noop'], returnType='bigint')('id'))
+        self.assertEqual(df.first()[0], 0)
+
+    def test_mixed_udf(self):
+        import pandas as pd
+        from pyspark.sql.functions import col, udf, pandas_udf
+
+        df = self.spark.range(0, 1).toDF('v')
+
+        # Test mixture of multiple UDFs and Pandas UDFs.
+
+        @udf('int')
+        def f1(x):
+            assert type(x) == int
+            return x + 1
+
+        @pandas_udf('int')
+        def f2(x):
+            assert type(x) == pd.Series
+            return x + 10
+
+        @udf('int')
+        def f3(x):
+            assert type(x) == int
+            return x + 100
+
+        @pandas_udf('int')
+        def f4(x):
+            assert type(x) == pd.Series
+            return x + 1000
+
+        # Test single expression with chained UDFs
+        df_chained_1 = df.withColumn('f2_f1', f2(f1(df['v'])))
+        df_chained_2 = df.withColumn('f3_f2_f1', f3(f2(f1(df['v']))))
+        df_chained_3 = df.withColumn('f4_f3_f2_f1', f4(f3(f2(f1(df['v'])))))
+        df_chained_4 = df.withColumn('f4_f2_f1', f4(f2(f1(df['v']))))
+        df_chained_5 = df.withColumn('f4_f3_f1', f4(f3(f1(df['v']))))
+
+        expected_chained_1 = df.withColumn('f2_f1', df['v'] + 11)
+        expected_chained_2 = df.withColumn('f3_f2_f1', df['v'] + 111)
+        expected_chained_3 = df.withColumn('f4_f3_f2_f1', df['v'] + 1111)
+        expected_chained_4 = df.withColumn('f4_f2_f1', df['v'] + 1011)
+        expected_chained_5 = df.withColumn('f4_f3_f1', df['v'] + 1101)
+
+        self.assertEquals(expected_chained_1.collect(), df_chained_1.collect())
+        self.assertEquals(expected_chained_2.collect(), df_chained_2.collect())
+        self.assertEquals(expected_chained_3.collect(), df_chained_3.collect())
+        self.assertEquals(expected_chained_4.collect(), df_chained_4.collect())
+        self.assertEquals(expected_chained_5.collect(), df_chained_5.collect())
+
+        # Test multiple mixed UDF expressions in a single projection
+        df_multi_1 = df \
+            .withColumn('f1', f1(col('v'))) \
+            .withColumn('f2', f2(col('v'))) \
+            .withColumn('f3', f3(col('v'))) \
+            .withColumn('f4', f4(col('v'))) \
+            .withColumn('f2_f1', f2(col('f1'))) \
+            .withColumn('f3_f1', f3(col('f1'))) \
+            .withColumn('f4_f1', f4(col('f1'))) \
+            .withColumn('f3_f2', f3(col('f2'))) \
+            .withColumn('f4_f2', f4(col('f2'))) \
+            .withColumn('f4_f3', f4(col('f3'))) \
+            .withColumn('f3_f2_f1', f3(col('f2_f1'))) \
+            .withColumn('f4_f2_f1', f4(col('f2_f1'))) \
+            .withColumn('f4_f3_f1', f4(col('f3_f1'))) \
+            .withColumn('f4_f3_f2', f4(col('f3_f2'))) \
+            .withColumn('f4_f3_f2_f1', f4(col('f3_f2_f1')))
+
+        # Test mixed udfs in a single expression
+        df_multi_2 = df \
+            .withColumn('f1', f1(col('v'))) \
+            .withColumn('f2', f2(col('v'))) \
+            .withColumn('f3', f3(col('v'))) \
+            .withColumn('f4', f4(col('v'))) \
+            .withColumn('f2_f1', f2(f1(col('v')))) \
+            .withColumn('f3_f1', f3(f1(col('v')))) \
+            .withColumn('f4_f1', f4(f1(col('v')))) \
+            .withColumn('f3_f2', f3(f2(col('v')))) \
+            .withColumn('f4_f2', f4(f2(col('v')))) \
+            .withColumn('f4_f3', f4(f3(col('v')))) \
+            .withColumn('f3_f2_f1', f3(f2(f1(col('v'))))) \
+            .withColumn('f4_f2_f1', f4(f2(f1(col('v'))))) \
+            .withColumn('f4_f3_f1', f4(f3(f1(col('v'))))) \
+            .withColumn('f4_f3_f2', f4(f3(f2(col('v'))))) \
+            .withColumn('f4_f3_f2_f1', f4(f3(f2(f1(col('v'))))))
+
+        expected = df \
+            .withColumn('f1', df['v'] + 1) \
+            .withColumn('f2', df['v'] + 10) \
+            .withColumn('f3', df['v'] + 100) \
+            .withColumn('f4', df['v'] + 1000) \
+            .withColumn('f2_f1', df['v'] + 11) \
+            .withColumn('f3_f1', df['v'] + 101) \
+            .withColumn('f4_f1', df['v'] + 1001) \
+            .withColumn('f3_f2', df['v'] + 110) \
+            .withColumn('f4_f2', df['v'] + 1010) \
+            .withColumn('f4_f3', df['v'] + 1100) \
+            .withColumn('f3_f2_f1', df['v'] + 111) \
+            .withColumn('f4_f2_f1', df['v'] + 1011) \
+            .withColumn('f4_f3_f1', df['v'] + 1101) \
+            .withColumn('f4_f3_f2', df['v'] + 1110) \
+            .withColumn('f4_f3_f2_f1', df['v'] + 1111)
+
+        self.assertEquals(expected.collect(), df_multi_1.collect())
+        self.assertEquals(expected.collect(), df_multi_2.collect())
+
+    def test_mixed_udf_and_sql(self):
+        import pandas as pd
+        from pyspark.sql import Column
+        from pyspark.sql.functions import udf, pandas_udf
+
+        df = self.spark.range(0, 1).toDF('v')
+
+        # Test mixture of UDFs, Pandas UDFs and SQL expression.
+
+        @udf('int')
+        def f1(x):
+            assert type(x) == int
+            return x + 1
+
+        def f2(x):
+            assert type(x) == Column
+            return x + 10
+
+        @pandas_udf('int')
+        def f3(x):
+            assert type(x) == pd.Series
+            return x + 100
+
+        df1 = df.withColumn('f1', f1(df['v'])) \
+            .withColumn('f2', f2(df['v'])) \
+            .withColumn('f3', f3(df['v'])) \
+            .withColumn('f1_f2', f1(f2(df['v']))) \
+            .withColumn('f1_f3', f1(f3(df['v']))) \
+            .withColumn('f2_f1', f2(f1(df['v']))) \
+            .withColumn('f2_f3', f2(f3(df['v']))) \
+            .withColumn('f3_f1', f3(f1(df['v']))) \
+            .withColumn('f3_f2', f3(f2(df['v']))) \
+            .withColumn('f1_f2_f3', f1(f2(f3(df['v'])))) \
+            .withColumn('f1_f3_f2', f1(f3(f2(df['v'])))) \
+            .withColumn('f2_f1_f3', f2(f1(f3(df['v'])))) \
+            .withColumn('f2_f3_f1', f2(f3(f1(df['v'])))) \
+            .withColumn('f3_f1_f2', f3(f1(f2(df['v'])))) \
+            .withColumn('f3_f2_f1', f3(f2(f1(df['v']))))
+
+        expected = df.withColumn('f1', df['v'] + 1) \
+            .withColumn('f2', df['v'] + 10) \
+            .withColumn('f3', df['v'] + 100) \
+            .withColumn('f1_f2', df['v'] + 11) \
+            .withColumn('f1_f3', df['v'] + 101) \
+            .withColumn('f2_f1', df['v'] + 11) \
+            .withColumn('f2_f3', df['v'] + 110) \
+            .withColumn('f3_f1', df['v'] + 101) \
+            .withColumn('f3_f2', df['v'] + 110) \
+            .withColumn('f1_f2_f3', df['v'] + 111) \
+            .withColumn('f1_f3_f2', df['v'] + 111) \
+            .withColumn('f2_f1_f3', df['v'] + 111) \
+            .withColumn('f2_f3_f1', df['v'] + 111) \
+            .withColumn('f3_f1_f2', df['v'] + 111) \
+            .withColumn('f3_f2_f1', df['v'] + 111)
+
+        self.assertEquals(expected.collect(), df1.collect())
+
+    # SPARK-24721
+    @unittest.skipIf(not _test_compiled, _test_not_compiled_message)
+    def test_datasource_with_udf(self):
+        # Same as SQLTests.test_datasource_with_udf, but with Pandas UDF
+        # This needs to a separate test because Arrow dependency is optional
+        import pandas as pd
+        import numpy as np
+        from pyspark.sql.functions import pandas_udf, lit, col
+
+        path = tempfile.mkdtemp()
+        shutil.rmtree(path)
+
+        try:
+            self.spark.range(1).write.mode("overwrite").format('csv').save(path)
+            filesource_df = self.spark.read.option('inferSchema', True).csv(path).toDF('i')
+            datasource_df = self.spark.read \
+                .format("org.apache.spark.sql.sources.SimpleScanSource") \
+                .option('from', 0).option('to', 1).load().toDF('i')
+            datasource_v2_df = self.spark.read \
+                .format("org.apache.spark.sql.sources.v2.SimpleDataSourceV2") \
+                .load().toDF('i', 'j')
+
+            c1 = pandas_udf(lambda x: x + 1, 'int')(lit(1))
+            c2 = pandas_udf(lambda x: x + 1, 'int')(col('i'))
+
+            f1 = pandas_udf(lambda x: pd.Series(np.repeat(False, len(x))), 'boolean')(lit(1))
+            f2 = pandas_udf(lambda x: pd.Series(np.repeat(False, len(x))), 'boolean')(col('i'))
+
+            for df in [filesource_df, datasource_df, datasource_v2_df]:
+                result = df.withColumn('c', c1)
+                expected = df.withColumn('c', lit(2))
+                self.assertEquals(expected.collect(), result.collect())
+
+            for df in [filesource_df, datasource_df, datasource_v2_df]:
+                result = df.withColumn('c', c2)
+                expected = df.withColumn('c', col('i') + 1)
+                self.assertEquals(expected.collect(), result.collect())
+
+            for df in [filesource_df, datasource_df, datasource_v2_df]:
+                for f in [f1, f2]:
+                    result = df.filter(f)
+                    self.assertEquals(0, result.count())
+        finally:
+            shutil.rmtree(path)
+
+
+@unittest.skipIf(
+    not _have_pandas or not _have_pyarrow,
+    _pandas_requirement_message or _pyarrow_requirement_message)
+class GroupedMapPandasUDFTests(ReusedSQLTestCase):
+
+    @property
+    def data(self):
+        from pyspark.sql.functions import array, explode, col, lit
+        return self.spark.range(10).toDF('id') \
+            .withColumn("vs", array([lit(i) for i in range(20, 30)])) \
+            .withColumn("v", explode(col('vs'))).drop('vs')
+
+    def test_supported_types(self):
+        from decimal import Decimal
+        from distutils.version import LooseVersion
+        import pyarrow as pa
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+        values = [
+            1, 2, 3,
+            4, 5, 1.1,
+            2.2, Decimal(1.123),
+            [1, 2, 2], True, 'hello'
+        ]
+        output_fields = [
+            ('id', IntegerType()), ('byte', ByteType()), ('short', ShortType()),
+            ('int', IntegerType()), ('long', LongType()), ('float', FloatType()),
+            ('double', DoubleType()), ('decim', DecimalType(10, 3)),
+            ('array', ArrayType(IntegerType())), ('bool', BooleanType()), ('str', StringType())
+        ]
+
+        # TODO: Add BinaryType to variables above once minimum pyarrow version is 0.10.0
+        if LooseVersion(pa.__version__) >= LooseVersion("0.10.0"):
+            values.append(bytearray([0x01, 0x02]))
+            output_fields.append(('bin', BinaryType()))
+
+        output_schema = StructType([StructField(*x) for x in output_fields])
+        df = self.spark.createDataFrame([values], schema=output_schema)
+
+        # Different forms of group map pandas UDF, results of these are the same
+        udf1 = pandas_udf(
+            lambda pdf: pdf.assign(
+                byte=pdf.byte * 2,
+                short=pdf.short * 2,
+                int=pdf.int * 2,
+                long=pdf.long * 2,
+                float=pdf.float * 2,
+                double=pdf.double * 2,
+                decim=pdf.decim * 2,
+                bool=False if pdf.bool else True,
+                str=pdf.str + 'there',
+                array=pdf.array,
+            ),
+            output_schema,
+            PandasUDFType.GROUPED_MAP
+        )
+
+        udf2 = pandas_udf(
+            lambda _, pdf: pdf.assign(
+                byte=pdf.byte * 2,
+                short=pdf.short * 2,
+                int=pdf.int * 2,
+                long=pdf.long * 2,
+                float=pdf.float * 2,
+                double=pdf.double * 2,
+                decim=pdf.decim * 2,
+                bool=False if pdf.bool else True,
+                str=pdf.str + 'there',
+                array=pdf.array,
+            ),
+            output_schema,
+            PandasUDFType.GROUPED_MAP
+        )
+
+        udf3 = pandas_udf(
+            lambda key, pdf: pdf.assign(
+                id=key[0],
+                byte=pdf.byte * 2,
+                short=pdf.short * 2,
+                int=pdf.int * 2,
+                long=pdf.long * 2,
+                float=pdf.float * 2,
+                double=pdf.double * 2,
+                decim=pdf.decim * 2,
+                bool=False if pdf.bool else True,
+                str=pdf.str + 'there',
+                array=pdf.array,
+            ),
+            output_schema,
+            PandasUDFType.GROUPED_MAP
+        )
+
+        result1 = df.groupby('id').apply(udf1).sort('id').toPandas()
+        expected1 = df.toPandas().groupby('id').apply(udf1.func).reset_index(drop=True)
+
+        result2 = df.groupby('id').apply(udf2).sort('id').toPandas()
+        expected2 = expected1
+
+        result3 = df.groupby('id').apply(udf3).sort('id').toPandas()
+        expected3 = expected1
+
+        self.assertPandasEqual(expected1, result1)
+        self.assertPandasEqual(expected2, result2)
+        self.assertPandasEqual(expected3, result3)
+
+    def test_array_type_correct(self):
+        from pyspark.sql.functions import pandas_udf, PandasUDFType, array, col
+
+        df = self.data.withColumn("arr", array(col("id"))).repartition(1, "id")
+
+        output_schema = StructType(
+            [StructField('id', LongType()),
+             StructField('v', IntegerType()),
+             StructField('arr', ArrayType(LongType()))])
+
+        udf = pandas_udf(
+            lambda pdf: pdf,
+            output_schema,
+            PandasUDFType.GROUPED_MAP
+        )
+
+        result = df.groupby('id').apply(udf).sort('id').toPandas()
+        expected = df.toPandas().groupby('id').apply(udf.func).reset_index(drop=True)
+        self.assertPandasEqual(expected, result)
+
+    def test_register_grouped_map_udf(self):
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+        foo_udf = pandas_udf(lambda x: x, "id long", PandasUDFType.GROUPED_MAP)
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(
+                    ValueError,
+                    'f.*SQL_BATCHED_UDF.*SQL_SCALAR_PANDAS_UDF.*SQL_GROUPED_AGG_PANDAS_UDF.*'):
+                self.spark.catalog.registerFunction("foo_udf", foo_udf)
+
+    def test_decorator(self):
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+        df = self.data
+
+        @pandas_udf(
+            'id long, v int, v1 double, v2 long',
+            PandasUDFType.GROUPED_MAP
+        )
+        def foo(pdf):
+            return pdf.assign(v1=pdf.v * pdf.id * 1.0, v2=pdf.v + pdf.id)
+
+        result = df.groupby('id').apply(foo).sort('id').toPandas()
+        expected = df.toPandas().groupby('id').apply(foo.func).reset_index(drop=True)
+        self.assertPandasEqual(expected, result)
+
+    def test_coerce(self):
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+        df = self.data
+
+        foo = pandas_udf(
+            lambda pdf: pdf,
+            'id long, v double',
+            PandasUDFType.GROUPED_MAP
+        )
+
+        result = df.groupby('id').apply(foo).sort('id').toPandas()
+        expected = df.toPandas().groupby('id').apply(foo.func).reset_index(drop=True)
+        expected = expected.assign(v=expected.v.astype('float64'))
+        self.assertPandasEqual(expected, result)
+
+    def test_complex_groupby(self):
+        from pyspark.sql.functions import pandas_udf, col, PandasUDFType
+        df = self.data
+
+        @pandas_udf(
+            'id long, v int, norm double',
+            PandasUDFType.GROUPED_MAP
+        )
+        def normalize(pdf):
+            v = pdf.v
+            return pdf.assign(norm=(v - v.mean()) / v.std())
+
+        result = df.groupby(col('id') % 2 == 0).apply(normalize).sort('id', 'v').toPandas()
+        pdf = df.toPandas()
+        expected = pdf.groupby(pdf['id'] % 2 == 0).apply(normalize.func)
+        expected = expected.sort_values(['id', 'v']).reset_index(drop=True)
+        expected = expected.assign(norm=expected.norm.astype('float64'))
+        self.assertPandasEqual(expected, result)
+
+    def test_empty_groupby(self):
+        from pyspark.sql.functions import pandas_udf, col, PandasUDFType
+        df = self.data
+
+        @pandas_udf(
+            'id long, v int, norm double',
+            PandasUDFType.GROUPED_MAP
+        )
+        def normalize(pdf):
+            v = pdf.v
+            return pdf.assign(norm=(v - v.mean()) / v.std())
+
+        result = df.groupby().apply(normalize).sort('id', 'v').toPandas()
+        pdf = df.toPandas()
+        expected = normalize.func(pdf)
+        expected = expected.sort_values(['id', 'v']).reset_index(drop=True)
+        expected = expected.assign(norm=expected.norm.astype('float64'))
+        self.assertPandasEqual(expected, result)
+
+    def test_datatype_string(self):
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+        df = self.data
+
+        foo_udf = pandas_udf(
+            lambda pdf: pdf.assign(v1=pdf.v * pdf.id * 1.0, v2=pdf.v + pdf.id),
+            'id long, v int, v1 double, v2 long',
+            PandasUDFType.GROUPED_MAP
+        )
+
+        result = df.groupby('id').apply(foo_udf).sort('id').toPandas()
+        expected = df.toPandas().groupby('id').apply(foo_udf.func).reset_index(drop=True)
+        self.assertPandasEqual(expected, result)
+
+    def test_wrong_return_type(self):
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(
+                    NotImplementedError,
+                    'Invalid returnType.*grouped map Pandas UDF.*MapType'):
+                pandas_udf(
+                    lambda pdf: pdf,
+                    'id long, v map<int, int>',
+                    PandasUDFType.GROUPED_MAP)
+
+    def test_wrong_args(self):
+        from pyspark.sql.functions import udf, pandas_udf, sum, PandasUDFType
+        df = self.data
+
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(ValueError, 'Invalid udf'):
+                df.groupby('id').apply(lambda x: x)
+            with self.assertRaisesRegexp(ValueError, 'Invalid udf'):
+                df.groupby('id').apply(udf(lambda x: x, DoubleType()))
+            with self.assertRaisesRegexp(ValueError, 'Invalid udf'):
+                df.groupby('id').apply(sum(df.v))
+            with self.assertRaisesRegexp(ValueError, 'Invalid udf'):
+                df.groupby('id').apply(df.v + 1)
+            with self.assertRaisesRegexp(ValueError, 'Invalid function'):
+                df.groupby('id').apply(
+                    pandas_udf(lambda: 1, StructType([StructField("d", DoubleType())])))
+            with self.assertRaisesRegexp(ValueError, 'Invalid udf'):
+                df.groupby('id').apply(pandas_udf(lambda x, y: x, DoubleType()))
+            with self.assertRaisesRegexp(ValueError, 'Invalid udf.*GROUPED_MAP'):
+                df.groupby('id').apply(
+                    pandas_udf(lambda x, y: x, DoubleType(), PandasUDFType.SCALAR))
+
+    def test_unsupported_types(self):
+        from distutils.version import LooseVersion
+        import pyarrow as pa
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+        common_err_msg = 'Invalid returnType.*grouped map Pandas UDF.*'
+        unsupported_types = [
+            StructField('map', MapType(StringType(), IntegerType())),
+            StructField('arr_ts', ArrayType(TimestampType())),
+            StructField('null', NullType()),
+        ]
+
+        # TODO: Remove this if-statement once minimum pyarrow version is 0.10.0
+        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
+            unsupported_types.append(StructField('bin', BinaryType()))
+
+        for unsupported_type in unsupported_types:
+            schema = StructType([StructField('id', LongType(), True), unsupported_type])
+            with QuietTest(self.sc):
+                with self.assertRaisesRegexp(NotImplementedError, common_err_msg):
+                    pandas_udf(lambda x: x, schema, PandasUDFType.GROUPED_MAP)
+
+    # Regression test for SPARK-23314
+    def test_timestamp_dst(self):
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+        # Daylight saving time for Los Angeles for 2015 is Sun, Nov 1 at 2:00 am
+        dt = [datetime.datetime(2015, 11, 1, 0, 30),
+              datetime.datetime(2015, 11, 1, 1, 30),
+              datetime.datetime(2015, 11, 1, 2, 30)]
+        df = self.spark.createDataFrame(dt, 'timestamp').toDF('time')
+        foo_udf = pandas_udf(lambda pdf: pdf, 'time timestamp', PandasUDFType.GROUPED_MAP)
+        result = df.groupby('time').apply(foo_udf).sort('time')
+        self.assertPandasEqual(df.toPandas(), result.toPandas())
+
+    def test_udf_with_key(self):
+        from pyspark.sql.functions import pandas_udf, col, PandasUDFType
+        df = self.data
+        pdf = df.toPandas()
+
+        def foo1(key, pdf):
+            import numpy as np
+            assert type(key) == tuple
+            assert type(key[0]) == np.int64
+
+            return pdf.assign(v1=key[0],
+                              v2=pdf.v * key[0],
+                              v3=pdf.v * pdf.id,
+                              v4=pdf.v * pdf.id.mean())
+
+        def foo2(key, pdf):
+            import numpy as np
+            assert type(key) == tuple
+            assert type(key[0]) == np.int64
+            assert type(key[1]) == np.int32
+
+            return pdf.assign(v1=key[0],
+                              v2=key[1],
+                              v3=pdf.v * key[0],
+                              v4=pdf.v + key[1])
+
+        def foo3(key, pdf):
+            assert type(key) == tuple
+            assert len(key) == 0
+            return pdf.assign(v1=pdf.v * pdf.id)
+
+        # v2 is int because numpy.int64 * pd.Series<int32> results in pd.Series<int32>
+        # v3 is long because pd.Series<int64> * pd.Series<int32> results in pd.Series<int64>
+        udf1 = pandas_udf(
+            foo1,
+            'id long, v int, v1 long, v2 int, v3 long, v4 double',
+            PandasUDFType.GROUPED_MAP)
+
+        udf2 = pandas_udf(
+            foo2,
+            'id long, v int, v1 long, v2 int, v3 int, v4 int',
+            PandasUDFType.GROUPED_MAP)
+
+        udf3 = pandas_udf(
+            foo3,
+            'id long, v int, v1 long',
+            PandasUDFType.GROUPED_MAP)
+
+        # Test groupby column
+        result1 = df.groupby('id').apply(udf1).sort('id', 'v').toPandas()
+        expected1 = pdf.groupby('id')\
+            .apply(lambda x: udf1.func((x.id.iloc[0],), x))\
+            .sort_values(['id', 'v']).reset_index(drop=True)
+        self.assertPandasEqual(expected1, result1)
+
+        # Test groupby expression
+        result2 = df.groupby(df.id % 2).apply(udf1).sort('id', 'v').toPandas()
+        expected2 = pdf.groupby(pdf.id % 2)\
+            .apply(lambda x: udf1.func((x.id.iloc[0] % 2,), x))\
+            .sort_values(['id', 'v']).reset_index(drop=True)
+        self.assertPandasEqual(expected2, result2)
+
+        # Test complex groupby
+        result3 = df.groupby(df.id, df.v % 2).apply(udf2).sort('id', 'v').toPandas()
+        expected3 = pdf.groupby([pdf.id, pdf.v % 2])\
+            .apply(lambda x: udf2.func((x.id.iloc[0], (x.v % 2).iloc[0],), x))\
+            .sort_values(['id', 'v']).reset_index(drop=True)
+        self.assertPandasEqual(expected3, result3)
+
+        # Test empty groupby
+        result4 = df.groupby().apply(udf3).sort('id', 'v').toPandas()
+        expected4 = udf3.func((), pdf)
+        self.assertPandasEqual(expected4, result4)
+
+    def test_column_order(self):
+        from collections import OrderedDict
+        import pandas as pd
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+        # Helper function to set column names from a list
+        def rename_pdf(pdf, names):
+            pdf.rename(columns={old: new for old, new in
+                                zip(pd_result.columns, names)}, inplace=True)
+
+        df = self.data
+        grouped_df = df.groupby('id')
+        grouped_pdf = df.toPandas().groupby('id')
+
+        # Function returns a pdf with required column names, but order could be arbitrary using dict
+        def change_col_order(pdf):
+            # Constructing a DataFrame from a dict should result in the same order,
+            # but use from_items to ensure the pdf column order is different than schema
+            return pd.DataFrame.from_items([
+                ('id', pdf.id),
+                ('u', pdf.v * 2),
+                ('v', pdf.v)])
+
+        ordered_udf = pandas_udf(
+            change_col_order,
+            'id long, v int, u int',
+            PandasUDFType.GROUPED_MAP
+        )
+
+        # The UDF result should assign columns by name from the pdf
+        result = grouped_df.apply(ordered_udf).sort('id', 'v')\
+            .select('id', 'u', 'v').toPandas()
+        pd_result = grouped_pdf.apply(change_col_order)
+        expected = pd_result.sort_values(['id', 'v']).reset_index(drop=True)
+        self.assertPandasEqual(expected, result)
+
+        # Function returns a pdf with positional columns, indexed by range
+        def range_col_order(pdf):
+            # Create a DataFrame with positional columns, fix types to long
+            return pd.DataFrame(list(zip(pdf.id, pdf.v * 3, pdf.v)), dtype='int64')
+
+        range_udf = pandas_udf(
+            range_col_order,
+            'id long, u long, v long',
+            PandasUDFType.GROUPED_MAP
+        )
+
+        # The UDF result uses positional columns from the pdf
+        result = grouped_df.apply(range_udf).sort('id', 'v') \
+            .select('id', 'u', 'v').toPandas()
+        pd_result = grouped_pdf.apply(range_col_order)
+        rename_pdf(pd_result, ['id', 'u', 'v'])
+        expected = pd_result.sort_values(['id', 'v']).reset_index(drop=True)
+        self.assertPandasEqual(expected, result)
+
+        # Function returns a pdf with columns indexed with integers
+        def int_index(pdf):
+            return pd.DataFrame(OrderedDict([(0, pdf.id), (1, pdf.v * 4), (2, pdf.v)]))
+
+        int_index_udf = pandas_udf(
+            int_index,
+            'id long, u int, v int',
+            PandasUDFType.GROUPED_MAP
+        )
+
+        # The UDF result should assign columns by position of integer index
+        result = grouped_df.apply(int_index_udf).sort('id', 'v') \
+            .select('id', 'u', 'v').toPandas()
+        pd_result = grouped_pdf.apply(int_index)
+        rename_pdf(pd_result, ['id', 'u', 'v'])
+        expected = pd_result.sort_values(['id', 'v']).reset_index(drop=True)
+        self.assertPandasEqual(expected, result)
+
+        @pandas_udf('id long, v int', PandasUDFType.GROUPED_MAP)
+        def column_name_typo(pdf):
+            return pd.DataFrame({'iid': pdf.id, 'v': pdf.v})
+
+        @pandas_udf('id long, v int', PandasUDFType.GROUPED_MAP)
+        def invalid_positional_types(pdf):
+            return pd.DataFrame([(u'a', 1.2)])
+
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(Exception, "KeyError: 'id'"):
+                grouped_df.apply(column_name_typo).collect()
+            with self.assertRaisesRegexp(Exception, "No cast implemented"):
+                grouped_df.apply(invalid_positional_types).collect()
+
+    def test_positional_assignment_conf(self):
+        import pandas as pd
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+        with self.sql_conf({
+                "spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName": False}):
+
+            @pandas_udf("a string, b float", PandasUDFType.GROUPED_MAP)
+            def foo(_):
+                return pd.DataFrame([('hi', 1)], columns=['x', 'y'])
+
+            df = self.data
+            result = df.groupBy('id').apply(foo).select('a', 'b').collect()
+            for r in result:
+                self.assertEqual(r.a, 'hi')
+                self.assertEqual(r.b, 1)
+
+    def test_self_join_with_pandas(self):
+        import pyspark.sql.functions as F
+
+        @F.pandas_udf('key long, col string', F.PandasUDFType.GROUPED_MAP)
+        def dummy_pandas_udf(df):
+            return df[['key', 'col']]
+
+        df = self.spark.createDataFrame([Row(key=1, col='A'), Row(key=1, col='B'),
+                                         Row(key=2, col='C')])
+        df_with_pandas = df.groupBy('key').apply(dummy_pandas_udf)
+
+        # this was throwing an AnalysisException before SPARK-24208
+        res = df_with_pandas.alias('temp0').join(df_with_pandas.alias('temp1'),
+                                                 F.col('temp0.key') == F.col('temp1.key'))
+        self.assertEquals(res.count(), 5)
+
+    def test_mixed_scalar_udfs_followed_by_grouby_apply(self):
+        import pandas as pd
+        from pyspark.sql.functions import udf, pandas_udf, PandasUDFType
+
+        df = self.spark.range(0, 10).toDF('v1')
+        df = df.withColumn('v2', udf(lambda x: x + 1, 'int')(df['v1'])) \
+            .withColumn('v3', pandas_udf(lambda x: x + 2, 'int')(df['v1']))
+
+        result = df.groupby() \
+            .apply(pandas_udf(lambda x: pd.DataFrame([x.sum().sum()]),
+                              'sum int',
+                              PandasUDFType.GROUPED_MAP))
+
+        self.assertEquals(result.collect()[0]['sum'], 165)
+
+
+@unittest.skipIf(
+    not _have_pandas or not _have_pyarrow,
+    _pandas_requirement_message or _pyarrow_requirement_message)
+class GroupedAggPandasUDFTests(ReusedSQLTestCase):
+
+    @property
+    def data(self):
+        from pyspark.sql.functions import array, explode, col, lit
+        return self.spark.range(10).toDF('id') \
+            .withColumn("vs", array([lit(i * 1.0) + col('id') for i in range(20, 30)])) \
+            .withColumn("v", explode(col('vs'))) \
+            .drop('vs') \
+            .withColumn('w', lit(1.0))
+
+    @property
+    def python_plus_one(self):
+        from pyspark.sql.functions import udf
+
+        @udf('double')
+        def plus_one(v):
+            assert isinstance(v, (int, float))
+            return v + 1
+        return plus_one
+
+    @property
+    def pandas_scalar_plus_two(self):
+        import pandas as pd
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+        @pandas_udf('double', PandasUDFType.SCALAR)
+        def plus_two(v):
+            assert isinstance(v, pd.Series)
+            return v + 2
+        return plus_two
+
+    @property
+    def pandas_agg_mean_udf(self):
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+        @pandas_udf('double', PandasUDFType.GROUPED_AGG)
+        def avg(v):
+            return v.mean()
+        return avg
+
+    @property
+    def pandas_agg_sum_udf(self):
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+        @pandas_udf('double', PandasUDFType.GROUPED_AGG)
+        def sum(v):
+            return v.sum()
+        return sum
+
+    @property
+    def pandas_agg_weighted_mean_udf(self):
+        import numpy as np
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+        @pandas_udf('double', PandasUDFType.GROUPED_AGG)
+        def weighted_mean(v, w):
+            return np.average(v, weights=w)
+        return weighted_mean
+
+    def test_manual(self):
+        from pyspark.sql.functions import pandas_udf, array
+
+        df = self.data
+        sum_udf = self.pandas_agg_sum_udf
+        mean_udf = self.pandas_agg_mean_udf
+        mean_arr_udf = pandas_udf(
+            self.pandas_agg_mean_udf.func,
+            ArrayType(self.pandas_agg_mean_udf.returnType),
+            self.pandas_agg_mean_udf.evalType)
+
+        result1 = df.groupby('id').agg(
+            sum_udf(df.v),
+            mean_udf(df.v),
+            mean_arr_udf(array(df.v))).sort('id')
+        expected1 = self.spark.createDataFrame(
+            [[0, 245.0, 24.5, [24.5]],
+             [1, 255.0, 25.5, [25.5]],
+             [2, 265.0, 26.5, [26.5]],
+             [3, 275.0, 27.5, [27.5]],
+             [4, 285.0, 28.5, [28.5]],
+             [5, 295.0, 29.5, [29.5]],
+             [6, 305.0, 30.5, [30.5]],
+             [7, 315.0, 31.5, [31.5]],
+             [8, 325.0, 32.5, [32.5]],
+             [9, 335.0, 33.5, [33.5]]],
+            ['id', 'sum(v)', 'avg(v)', 'avg(array(v))'])
+
+        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+
+    def test_basic(self):
+        from pyspark.sql.functions import col, lit, sum, mean
+
+        df = self.data
+        weighted_mean_udf = self.pandas_agg_weighted_mean_udf
+
+        # Groupby one column and aggregate one UDF with literal
+        result1 = df.groupby('id').agg(weighted_mean_udf(df.v, lit(1.0))).sort('id')
+        expected1 = df.groupby('id').agg(mean(df.v).alias('weighted_mean(v, 1.0)')).sort('id')
+        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+
+        # Groupby one expression and aggregate one UDF with literal
+        result2 = df.groupby((col('id') + 1)).agg(weighted_mean_udf(df.v, lit(1.0)))\
+            .sort(df.id + 1)
+        expected2 = df.groupby((col('id') + 1))\
+            .agg(mean(df.v).alias('weighted_mean(v, 1.0)')).sort(df.id + 1)
+        self.assertPandasEqual(expected2.toPandas(), result2.toPandas())
+
+        # Groupby one column and aggregate one UDF without literal
+        result3 = df.groupby('id').agg(weighted_mean_udf(df.v, df.w)).sort('id')
+        expected3 = df.groupby('id').agg(mean(df.v).alias('weighted_mean(v, w)')).sort('id')
+        self.assertPandasEqual(expected3.toPandas(), result3.toPandas())
+
+        # Groupby one expression and aggregate one UDF without literal
+        result4 = df.groupby((col('id') + 1).alias('id'))\
+            .agg(weighted_mean_udf(df.v, df.w))\
+            .sort('id')
+        expected4 = df.groupby((col('id') + 1).alias('id'))\
+            .agg(mean(df.v).alias('weighted_mean(v, w)'))\
+            .sort('id')
+        self.assertPandasEqual(expected4.toPandas(), result4.toPandas())
+
+    def test_unsupported_types(self):
+        from pyspark.sql.types import DoubleType, MapType
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(NotImplementedError, 'not supported'):
+                pandas_udf(
+                    lambda x: x,
+                    ArrayType(ArrayType(TimestampType())),
+                    PandasUDFType.GROUPED_AGG)
+
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(NotImplementedError, 'not supported'):
+                @pandas_udf('mean double, std double', PandasUDFType.GROUPED_AGG)
+                def mean_and_std_udf(v):
+                    return v.mean(), v.std()
+
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(NotImplementedError, 'not supported'):
+                @pandas_udf(MapType(DoubleType(), DoubleType()), PandasUDFType.GROUPED_AGG)
+                def mean_and_std_udf(v):
+                    return {v.mean(): v.std()}
+
+    def test_alias(self):
+        from pyspark.sql.functions import mean
+
+        df = self.data
+        mean_udf = self.pandas_agg_mean_udf
+
+        result1 = df.groupby('id').agg(mean_udf(df.v).alias('mean_alias'))
+        expected1 = df.groupby('id').agg(mean(df.v).alias('mean_alias'))
+
+        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+
+    def test_mixed_sql(self):
+        """
+        Test mixing group aggregate pandas UDF with sql expression.
+        """
+        from pyspark.sql.functions import sum, mean
+
+        df = self.data
+        sum_udf = self.pandas_agg_sum_udf
+
+        # Mix group aggregate pandas UDF with sql expression
+        result1 = (df.groupby('id')
+                   .agg(sum_udf(df.v) + 1)
+                   .sort('id'))
+        expected1 = (df.groupby('id')
+                     .agg(sum(df.v) + 1)
+                     .sort('id'))
+
+        # Mix group aggregate pandas UDF with sql expression (order swapped)
+        result2 = (df.groupby('id')
+                     .agg(sum_udf(df.v + 1))
+                     .sort('id'))
+
+        expected2 = (df.groupby('id')
+                       .agg(sum(df.v + 1))
+                       .sort('id'))
+
+        # Wrap group aggregate pandas UDF with two sql expressions
+        result3 = (df.groupby('id')
+                   .agg(sum_udf(df.v + 1) + 2)
+                   .sort('id'))
+        expected3 = (df.groupby('id')
+                     .agg(sum(df.v + 1) + 2)
+                     .sort('id'))
+
+        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+        self.assertPandasEqual(expected2.toPandas(), result2.toPandas())
+        self.assertPandasEqual(expected3.toPandas(), result3.toPandas())
+
+    def test_mixed_udfs(self):
+        """
+        Test mixing group aggregate pandas UDF with python UDF and scalar pandas UDF.
+        """
+        from pyspark.sql.functions import sum, mean
+
+        df = self.data
+        plus_one = self.python_plus_one
+        plus_two = self.pandas_scalar_plus_two
+        sum_udf = self.pandas_agg_sum_udf
+
+        # Mix group aggregate pandas UDF and python UDF
+        result1 = (df.groupby('id')
+                   .agg(plus_one(sum_udf(df.v)))
+                   .sort('id'))
+        expected1 = (df.groupby('id')
+                     .agg(plus_one(sum(df.v)))
+                     .sort('id'))
+
+        # Mix group aggregate pandas UDF and python UDF (order swapped)
+        result2 = (df.groupby('id')
+                   .agg(sum_udf(plus_one(df.v)))
+                   .sort('id'))
+        expected2 = (df.groupby('id')
+                     .agg(sum(plus_one(df.v)))
+                     .sort('id'))
+
+        # Mix group aggregate pandas UDF and scalar pandas UDF
+        result3 = (df.groupby('id')
+                   .agg(sum_udf(plus_two(df.v)))
+                   .sort('id'))
+        expected3 = (df.groupby('id')
+                     .agg(sum(plus_two(df.v)))
+                     .sort('id'))
+
+        # Mix group aggregate pandas UDF and scalar pandas UDF (order swapped)
+        result4 = (df.groupby('id')
+                   .agg(plus_two(sum_udf(df.v)))
+                   .sort('id'))
+        expected4 = (df.groupby('id')
+                     .agg(plus_two(sum(df.v)))
+                     .sort('id'))
+
+        # Wrap group aggregate pandas UDF with two python UDFs and use python UDF in groupby
+        result5 = (df.groupby(plus_one(df.id))
+                   .agg(plus_one(sum_udf(plus_one(df.v))))
+                   .sort('plus_one(id)'))
+        expected5 = (df.groupby(plus_one(df.id))
+                     .agg(plus_one(sum(plus_one(df.v))))
+                     .sort('plus_one(id)'))
+
+        # Wrap group aggregate pandas UDF with two scala pandas UDF and user scala pandas UDF in
+        # groupby
+        result6 = (df.groupby(plus_two(df.id))
+                   .agg(plus_two(sum_udf(plus_two(df.v))))
+                   .sort('plus_two(id)'))
+        expected6 = (df.groupby(plus_two(df.id))
+                     .agg(plus_two(sum(plus_two(df.v))))
+                     .sort('plus_two(id)'))
+
+        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+        self.assertPandasEqual(expected2.toPandas(), result2.toPandas())
+        self.assertPandasEqual(expected3.toPandas(), result3.toPandas())
+        self.assertPandasEqual(expected4.toPandas(), result4.toPandas())
+        self.assertPandasEqual(expected5.toPandas(), result5.toPandas())
+        self.assertPandasEqual(expected6.toPandas(), result6.toPandas())
+
+    def test_multiple_udfs(self):
+        """
+        Test multiple group aggregate pandas UDFs in one agg function.
+        """
+        from pyspark.sql.functions import col, lit, sum, mean
+
+        df = self.data
+        mean_udf = self.pandas_agg_mean_udf
+        sum_udf = self.pandas_agg_sum_udf
+        weighted_mean_udf = self.pandas_agg_weighted_mean_udf
+
+        result1 = (df.groupBy('id')
+                   .agg(mean_udf(df.v),
+                        sum_udf(df.v),
+                        weighted_mean_udf(df.v, df.w))
+                   .sort('id')
+                   .toPandas())
+        expected1 = (df.groupBy('id')
+                     .agg(mean(df.v),
+                          sum(df.v),
+                          mean(df.v).alias('weighted_mean(v, w)'))
+                     .sort('id')
+                     .toPandas())
+
+        self.assertPandasEqual(expected1, result1)
+
+    def test_complex_groupby(self):
+        from pyspark.sql.functions import lit, sum
+
+        df = self.data
+        sum_udf = self.pandas_agg_sum_udf
+        plus_one = self.python_plus_one
+        plus_two = self.pandas_scalar_plus_two
+
+        # groupby one expression
+        result1 = df.groupby(df.v % 2).agg(sum_udf(df.v))
+        expected1 = df.groupby(df.v % 2).agg(sum(df.v))
+
+        # empty groupby
+        result2 = df.groupby().agg(sum_udf(df.v))
+        expected2 = df.groupby().agg(sum(df.v))
+
+        # groupby one column and one sql expression
+        result3 = df.groupby(df.id, df.v % 2).agg(sum_udf(df.v)).orderBy(df.id, df.v % 2)
+        expected3 = df.groupby(df.id, df.v % 2).agg(sum(df.v)).orderBy(df.id, df.v % 2)
+
+        # groupby one python UDF
+        result4 = df.groupby(plus_one(df.id)).agg(sum_udf(df.v))
+        expected4 = df.groupby(plus_one(df.id)).agg(sum(df.v))
+
+        # groupby one scalar pandas UDF
+        result5 = df.groupby(plus_two(df.id)).agg(sum_udf(df.v))
+        expected5 = df.groupby(plus_two(df.id)).agg(sum(df.v))
+
+        # groupby one expression and one python UDF
+        result6 = df.groupby(df.v % 2, plus_one(df.id)).agg(sum_udf(df.v))
+        expected6 = df.groupby(df.v % 2, plus_one(df.id)).agg(sum(df.v))
+
+        # groupby one expression and one scalar pandas UDF
+        result7 = df.groupby(df.v % 2, plus_two(df.id)).agg(sum_udf(df.v)).sort('sum(v)')
+        expected7 = df.groupby(df.v % 2, plus_two(df.id)).agg(sum(df.v)).sort('sum(v)')
+
+        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+        self.assertPandasEqual(expected2.toPandas(), result2.toPandas())
+        self.assertPandasEqual(expected3.toPandas(), result3.toPandas())
+        self.assertPandasEqual(expected4.toPandas(), result4.toPandas())
+        self.assertPandasEqual(expected5.toPandas(), result5.toPandas())
+        self.assertPandasEqual(expected6.toPandas(), result6.toPandas())
+        self.assertPandasEqual(expected7.toPandas(), result7.toPandas())
+
+    def test_complex_expressions(self):
+        from pyspark.sql.functions import col, sum
+
+        df = self.data
+        plus_one = self.python_plus_one
+        plus_two = self.pandas_scalar_plus_two
+        sum_udf = self.pandas_agg_sum_udf
+
+        # Test complex expressions with sql expression, python UDF and
+        # group aggregate pandas UDF
+        result1 = (df.withColumn('v1', plus_one(df.v))
+                   .withColumn('v2', df.v + 2)
+                   .groupby(df.id, df.v % 2)
+                   .agg(sum_udf(col('v')),
+                        sum_udf(col('v1') + 3),
+                        sum_udf(col('v2')) + 5,
+                        plus_one(sum_udf(col('v1'))),
+                        sum_udf(plus_one(col('v2'))))
+                   .sort('id')
+                   .toPandas())
+
+        expected1 = (df.withColumn('v1', df.v + 1)
+                     .withColumn('v2', df.v + 2)
+                     .groupby(df.id, df.v % 2)
+                     .agg(sum(col('v')),
+                          sum(col('v1') + 3),
+                          sum(col('v2')) + 5,
+                          plus_one(sum(col('v1'))),
+                          sum(plus_one(col('v2'))))
+                     .sort('id')
+                     .toPandas())
+
+        # Test complex expressions with sql expression, scala pandas UDF and
+        # group aggregate pandas UDF
+        result2 = (df.withColumn('v1', plus_one(df.v))
+                   .withColumn('v2', df.v + 2)
+                   .groupby(df.id, df.v % 2)
+                   .agg(sum_udf(col('v')),
+                        sum_udf(col('v1') + 3),
+                        sum_udf(col('v2')) + 5,
+                        plus_two(sum_udf(col('v1'))),
+                        sum_udf(plus_two(col('v2'))))
+                   .sort('id')
+                   .toPandas())
+
+        expected2 = (df.withColumn('v1', df.v + 1)
+                     .withColumn('v2', df.v + 2)
+                     .groupby(df.id, df.v % 2)
+                     .agg(sum(col('v')),
+                          sum(col('v1') + 3),
+                          sum(col('v2')) + 5,
+                          plus_two(sum(col('v1'))),
+                          sum(plus_two(col('v2'))))
+                     .sort('id')
+                     .toPandas())
+
+        # Test sequential groupby aggregate
+        result3 = (df.groupby('id')
+                   .agg(sum_udf(df.v).alias('v'))
+                   .groupby('id')
+                   .agg(sum_udf(col('v')))
+                   .sort('id')
+                   .toPandas())
+
+        expected3 = (df.groupby('id')
+                     .agg(sum(df.v).alias('v'))
+                     .groupby('id')
+                     .agg(sum(col('v')))
+                     .sort('id')
+                     .toPandas())
+
+        self.assertPandasEqual(expected1, result1)
+        self.assertPandasEqual(expected2, result2)
+        self.assertPandasEqual(expected3, result3)
+
+    def test_retain_group_columns(self):
+        from pyspark.sql.functions import sum, lit, col
+        with self.sql_conf({"spark.sql.retainGroupColumns": False}):
+            df = self.data
+            sum_udf = self.pandas_agg_sum_udf
+
+            result1 = df.groupby(df.id).agg(sum_udf(df.v))
+            expected1 = df.groupby(df.id).agg(sum(df.v))
+            self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+
+    def test_array_type(self):
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+        df = self.data
+
+        array_udf = pandas_udf(lambda x: [1.0, 2.0], 'array<double>', PandasUDFType.GROUPED_AGG)
+        result1 = df.groupby('id').agg(array_udf(df['v']).alias('v2'))
+        self.assertEquals(result1.first()['v2'], [1.0, 2.0])
+
+    def test_invalid_args(self):
+        from pyspark.sql.functions import mean
+
+        df = self.data
+        plus_one = self.python_plus_one
+        mean_udf = self.pandas_agg_mean_udf
+
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(
+                    AnalysisException,
+                    'nor.*aggregate function'):
+                df.groupby(df.id).agg(plus_one(df.v)).collect()
+
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(
+                    AnalysisException,
+                    'aggregate function.*argument.*aggregate function'):
+                df.groupby(df.id).agg(mean_udf(mean_udf(df.v))).collect()
+
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(
+                    AnalysisException,
+                    'mixture.*aggregate function.*group aggregate pandas UDF'):
+                df.groupby(df.id).agg(mean_udf(df.v), mean(df.v)).collect()
+
+    def test_register_vectorized_udf_basic(self):
+        from pyspark.sql.functions import pandas_udf
+        from pyspark.rdd import PythonEvalType
+
+        sum_pandas_udf = pandas_udf(
+            lambda v: v.sum(), "integer", PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF)
+
+        self.assertEqual(sum_pandas_udf.evalType, PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF)
+        group_agg_pandas_udf = self.spark.udf.register("sum_pandas_udf", sum_pandas_udf)
+        self.assertEqual(group_agg_pandas_udf.evalType, PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF)
+        q = "SELECT sum_pandas_udf(v1) FROM VALUES (3, 0), (2, 0), (1, 1) tbl(v1, v2) GROUP BY v2"
+        actual = sorted(map(lambda r: r[0], self.spark.sql(q).collect()))
+        expected = [1, 5]
+        self.assertEqual(actual, expected)
+
+
+@unittest.skipIf(
+    not _have_pandas or not _have_pyarrow,
+    _pandas_requirement_message or _pyarrow_requirement_message)
+class WindowPandasUDFTests(ReusedSQLTestCase):
+    @property
+    def data(self):
+        from pyspark.sql.functions import array, explode, col, lit
+        return self.spark.range(10).toDF('id') \
+            .withColumn("vs", array([lit(i * 1.0) + col('id') for i in range(20, 30)])) \
+            .withColumn("v", explode(col('vs'))) \
+            .drop('vs') \
+            .withColumn('w', lit(1.0))
+
+    @property
+    def python_plus_one(self):
+        from pyspark.sql.functions import udf
+        return udf(lambda v: v + 1, 'double')
+
+    @property
+    def pandas_scalar_time_two(self):
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+        return pandas_udf(lambda v: v * 2, 'double')
+
+    @property
+    def pandas_agg_mean_udf(self):
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+        @pandas_udf('double', PandasUDFType.GROUPED_AGG)
+        def avg(v):
+            return v.mean()
+        return avg
+
+    @property
+    def pandas_agg_max_udf(self):
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+        @pandas_udf('double', PandasUDFType.GROUPED_AGG)
+        def max(v):
+            return v.max()
+        return max
+
+    @property
+    def pandas_agg_min_udf(self):
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+        @pandas_udf('double', PandasUDFType.GROUPED_AGG)
+        def min(v):
+            return v.min()
+        return min
+
+    @property
+    def unbounded_window(self):
+        return Window.partitionBy('id') \
+            .rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)
+
+    @property
+    def ordered_window(self):
+        return Window.partitionBy('id').orderBy('v')
+
+    @property
+    def unpartitioned_window(self):
+        return Window.partitionBy()
+
+    def test_simple(self):
+        from pyspark.sql.functions import pandas_udf, PandasUDFType, percent_rank, mean, max
+
+        df = self.data
+        w = self.unbounded_window
+
+        mean_udf = self.pandas_agg_mean_udf
+
+        result1 = df.withColumn('mean_v', mean_udf(df['v']).over(w))
+        expected1 = df.withColumn('mean_v', mean(df['v']).over(w))
+
+        result2 = df.select(mean_udf(df['v']).over(w))
+        expected2 = df.select(mean(df['v']).over(w))
+
+        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+        self.assertPandasEqual(expected2.toPandas(), result2.toPandas())
+
+    def test_multiple_udfs(self):
+        from pyspark.sql.functions import max, min, mean
+
+        df = self.data
+        w = self.unbounded_window
+
+        result1 = df.withColumn('mean_v', self.pandas_agg_mean_udf(df['v']).over(w)) \
+                    .withColumn('max_v', self.pandas_agg_max_udf(df['v']).over(w)) \
+                    .withColumn('min_w', self.pandas_agg_min_udf(df['w']).over(w))
+
+        expected1 = df.withColumn('mean_v', mean(df['v']).over(w)) \
+                      .withColumn('max_v', max(df['v']).over(w)) \
+                      .withColumn('min_w', min(df['w']).over(w))
+
+        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+
+    def test_replace_existing(self):
+        from pyspark.sql.functions import mean
+
+        df = self.data
+        w = self.unbounded_window
+
+        result1 = df.withColumn('v', self.pandas_agg_mean_udf(df['v']).over(w))
+        expected1 = df.withColumn('v', mean(df['v']).over(w))
+
+        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+
+    def test_mixed_sql(self):
+        from pyspark.sql.functions import mean
+
+        df = self.data
+        w = self.unbounded_window
+        mean_udf = self.pandas_agg_mean_udf
+
+        result1 = df.withColumn('v', mean_udf(df['v'] * 2).over(w) + 1)
+        expected1 = df.withColumn('v', mean(df['v'] * 2).over(w) + 1)
+
+        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+
+    def test_mixed_udf(self):
+        from pyspark.sql.functions import mean
+
+        df = self.data
+        w = self.unbounded_window
+
+        plus_one = self.python_plus_one
+        time_two = self.pandas_scalar_time_two
+        mean_udf = self.pandas_agg_mean_udf
+
+        result1 = df.withColumn(
+            'v2',
+            plus_one(mean_udf(plus_one(df['v'])).over(w)))
+        expected1 = df.withColumn(
+            'v2',
+            plus_one(mean(plus_one(df['v'])).over(w)))
+
+        result2 = df.withColumn(
+            'v2',
+            time_two(mean_udf(time_two(df['v'])).over(w)))
+        expected2 = df.withColumn(
+            'v2',
+            time_two(mean(time_two(df['v'])).over(w)))
+
+        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+        self.assertPandasEqual(expected2.toPandas(), result2.toPandas())
+
+    def test_without_partitionBy(self):
+        from pyspark.sql.functions import mean
+
+        df = self.data
+        w = self.unpartitioned_window
+        mean_udf = self.pandas_agg_mean_udf
+
+        result1 = df.withColumn('v2', mean_udf(df['v']).over(w))
+        expected1 = df.withColumn('v2', mean(df['v']).over(w))
+
+        result2 = df.select(mean_udf(df['v']).over(w))
+        expected2 = df.select(mean(df['v']).over(w))
+
+        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+        self.assertPandasEqual(expected2.toPandas(), result2.toPandas())
+
+    def test_mixed_sql_and_udf(self):
+        from pyspark.sql.functions import max, min, rank, col
+
+        df = self.data
+        w = self.unbounded_window
+        ow = self.ordered_window
+        max_udf = self.pandas_agg_max_udf
+        min_udf = self.pandas_agg_min_udf
+
+        result1 = df.withColumn('v_diff', max_udf(df['v']).over(w) - min_udf(df['v']).over(w))
+        expected1 = df.withColumn('v_diff', max(df['v']).over(w) - min(df['v']).over(w))
+
+        # Test mixing sql window function and window udf in the same expression
+        result2 = df.withColumn('v_diff', max_udf(df['v']).over(w) - min(df['v']).over(w))
+        expected2 = expected1
+
+        # Test chaining sql aggregate function and udf
+        result3 = df.withColumn('max_v', max_udf(df['v']).over(w)) \
+                    .withColumn('min_v', min(df['v']).over(w)) \
+                    .withColumn('v_diff', col('max_v') - col('min_v')) \
+                    .drop('max_v', 'min_v')
+        expected3 = expected1
+
+        # Test mixing sql window function and udf
+        result4 = df.withColumn('max_v', max_udf(df['v']).over(w)) \
+                    .withColumn('rank', rank().over(ow))
+        expected4 = df.withColumn('max_v', max(df['v']).over(w)) \
+                      .withColumn('rank', rank().over(ow))
+
+        self.assertPandasEqual(expected1.toPandas(), result1.toPandas())
+        self.assertPandasEqual(expected2.toPandas(), result2.toPandas())
+        self.assertPandasEqual(expected3.toPandas(), result3.toPandas())
+        self.assertPandasEqual(expected4.toPandas(), result4.toPandas())
+
+    def test_array_type(self):
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+        df = self.data
+        w = self.unbounded_window
+
+        array_udf = pandas_udf(lambda x: [1.0, 2.0], 'array<double>', PandasUDFType.GROUPED_AGG)
+        result1 = df.withColumn('v2', array_udf(df['v']).over(w))
+        self.assertEquals(result1.first()['v2'], [1.0, 2.0])
+
+    def test_invalid_args(self):
+        from pyspark.sql.functions import mean, pandas_udf, PandasUDFType
+
+        df = self.data
+        w = self.unbounded_window
+        ow = self.ordered_window
+        mean_udf = self.pandas_agg_mean_udf
+
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(
+                    AnalysisException,
+                    '.*not supported within a window function'):
+                foo_udf = pandas_udf(lambda x: x, 'v double', PandasUDFType.GROUPED_MAP)
+                df.withColumn('v2', foo_udf(df['v']).over(w))
+
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(
+                    AnalysisException,
+                    '.*Only unbounded window frame is supported.*'):
+                df.withColumn('mean_v', mean_udf(df['v']).over(ow))
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests import *
+    if xmlrunner:
+        unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
+    else:
+        unittest.main(verbosity=2)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/types.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/types.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d24c40e5858eb6f027eeb16fdb81fd7d1da55ca
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/types.py
@@ -0,0 +1,1883 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import decimal
+import time
+import datetime
+import calendar
+import json
+import re
+import base64
+from array import array
+import ctypes
+
+if sys.version >= "3":
+    long = int
+    basestring = unicode = str
+
+from py4j.protocol import register_input_converter
+from py4j.java_gateway import JavaClass
+
+from pyspark import SparkContext
+from pyspark.serializers import CloudPickleSerializer
+
+__all__ = [
+    "DataType", "NullType", "StringType", "BinaryType", "BooleanType", "DateType",
+    "TimestampType", "DecimalType", "DoubleType", "FloatType", "ByteType", "IntegerType",
+    "LongType", "ShortType", "ArrayType", "MapType", "StructField", "StructType"]
+
+
+class DataType(object):
+    """Base class for data types."""
+
+    def __repr__(self):
+        return self.__class__.__name__
+
+    def __hash__(self):
+        return hash(str(self))
+
+    def __eq__(self, other):
+        return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    @classmethod
+    def typeName(cls):
+        return cls.__name__[:-4].lower()
+
+    def simpleString(self):
+        return self.typeName()
+
+    def jsonValue(self):
+        return self.typeName()
+
+    def json(self):
+        return json.dumps(self.jsonValue(),
+                          separators=(',', ':'),
+                          sort_keys=True)
+
+    def needConversion(self):
+        """
+        Does this type need to conversion between Python object and internal SQL object.
+
+        This is used to avoid the unnecessary conversion for ArrayType/MapType/StructType.
+        """
+        return False
+
+    def toInternal(self, obj):
+        """
+        Converts a Python object into an internal SQL object.
+        """
+        return obj
+
+    def fromInternal(self, obj):
+        """
+        Converts an internal SQL object into a native Python object.
+        """
+        return obj
+
+
+# This singleton pattern does not work with pickle, you will get
+# another object after pickle and unpickle
+class DataTypeSingleton(type):
+    """Metaclass for DataType"""
+
+    _instances = {}
+
+    def __call__(cls):
+        if cls not in cls._instances:
+            cls._instances[cls] = super(DataTypeSingleton, cls).__call__()
+        return cls._instances[cls]
+
+
+class NullType(DataType):
+    """Null type.
+
+    The data type representing None, used for the types that cannot be inferred.
+    """
+
+    __metaclass__ = DataTypeSingleton
+
+
+class AtomicType(DataType):
+    """An internal type used to represent everything that is not
+    null, UDTs, arrays, structs, and maps."""
+
+
+class NumericType(AtomicType):
+    """Numeric data types.
+    """
+
+
+class IntegralType(NumericType):
+    """Integral data types.
+    """
+
+    __metaclass__ = DataTypeSingleton
+
+
+class FractionalType(NumericType):
+    """Fractional data types.
+    """
+
+
+class StringType(AtomicType):
+    """String data type.
+    """
+
+    __metaclass__ = DataTypeSingleton
+
+
+class BinaryType(AtomicType):
+    """Binary (byte array) data type.
+    """
+
+    __metaclass__ = DataTypeSingleton
+
+
+class BooleanType(AtomicType):
+    """Boolean data type.
+    """
+
+    __metaclass__ = DataTypeSingleton
+
+
+class DateType(AtomicType):
+    """Date (datetime.date) data type.
+    """
+
+    __metaclass__ = DataTypeSingleton
+
+    EPOCH_ORDINAL = datetime.datetime(1970, 1, 1).toordinal()
+
+    def needConversion(self):
+        return True
+
+    def toInternal(self, d):
+        if d is not None:
+            return d.toordinal() - self.EPOCH_ORDINAL
+
+    def fromInternal(self, v):
+        if v is not None:
+            return datetime.date.fromordinal(v + self.EPOCH_ORDINAL)
+
+
+class TimestampType(AtomicType):
+    """Timestamp (datetime.datetime) data type.
+    """
+
+    __metaclass__ = DataTypeSingleton
+
+    def needConversion(self):
+        return True
+
+    def toInternal(self, dt):
+        if dt is not None:
+            seconds = (calendar.timegm(dt.utctimetuple()) if dt.tzinfo
+                       else time.mktime(dt.timetuple()))
+            return int(seconds) * 1000000 + dt.microsecond
+
+    def fromInternal(self, ts):
+        if ts is not None:
+            # using int to avoid precision loss in float
+            return datetime.datetime.fromtimestamp(ts // 1000000).replace(microsecond=ts % 1000000)
+
+
+class DecimalType(FractionalType):
+    """Decimal (decimal.Decimal) data type.
+
+    The DecimalType must have fixed precision (the maximum total number of digits)
+    and scale (the number of digits on the right of dot). For example, (5, 2) can
+    support the value from [-999.99 to 999.99].
+
+    The precision can be up to 38, the scale must be less or equal to precision.
+
+    When create a DecimalType, the default precision and scale is (10, 0). When infer
+    schema from decimal.Decimal objects, it will be DecimalType(38, 18).
+
+    :param precision: the maximum total number of digits (default: 10)
+    :param scale: the number of digits on right side of dot. (default: 0)
+    """
+
+    def __init__(self, precision=10, scale=0):
+        self.precision = precision
+        self.scale = scale
+        self.hasPrecisionInfo = True  # this is public API
+
+    def simpleString(self):
+        return "decimal(%d,%d)" % (self.precision, self.scale)
+
+    def jsonValue(self):
+        return "decimal(%d,%d)" % (self.precision, self.scale)
+
+    def __repr__(self):
+        return "DecimalType(%d,%d)" % (self.precision, self.scale)
+
+
+class DoubleType(FractionalType):
+    """Double data type, representing double precision floats.
+    """
+
+    __metaclass__ = DataTypeSingleton
+
+
+class FloatType(FractionalType):
+    """Float data type, representing single precision floats.
+    """
+
+    __metaclass__ = DataTypeSingleton
+
+
+class ByteType(IntegralType):
+    """Byte data type, i.e. a signed integer in a single byte.
+    """
+    def simpleString(self):
+        return 'tinyint'
+
+
+class IntegerType(IntegralType):
+    """Int data type, i.e. a signed 32-bit integer.
+    """
+    def simpleString(self):
+        return 'int'
+
+
+class LongType(IntegralType):
+    """Long data type, i.e. a signed 64-bit integer.
+
+    If the values are beyond the range of [-9223372036854775808, 9223372036854775807],
+    please use :class:`DecimalType`.
+    """
+    def simpleString(self):
+        return 'bigint'
+
+
+class ShortType(IntegralType):
+    """Short data type, i.e. a signed 16-bit integer.
+    """
+    def simpleString(self):
+        return 'smallint'
+
+
+class ArrayType(DataType):
+    """Array data type.
+
+    :param elementType: :class:`DataType` of each element in the array.
+    :param containsNull: boolean, whether the array can contain null (None) values.
+    """
+
+    def __init__(self, elementType, containsNull=True):
+        """
+        >>> ArrayType(StringType()) == ArrayType(StringType(), True)
+        True
+        >>> ArrayType(StringType(), False) == ArrayType(StringType())
+        False
+        """
+        assert isinstance(elementType, DataType),\
+            "elementType %s should be an instance of %s" % (elementType, DataType)
+        self.elementType = elementType
+        self.containsNull = containsNull
+
+    def simpleString(self):
+        return 'array<%s>' % self.elementType.simpleString()
+
+    def __repr__(self):
+        return "ArrayType(%s,%s)" % (self.elementType,
+                                     str(self.containsNull).lower())
+
+    def jsonValue(self):
+        return {"type": self.typeName(),
+                "elementType": self.elementType.jsonValue(),
+                "containsNull": self.containsNull}
+
+    @classmethod
+    def fromJson(cls, json):
+        return ArrayType(_parse_datatype_json_value(json["elementType"]),
+                         json["containsNull"])
+
+    def needConversion(self):
+        return self.elementType.needConversion()
+
+    def toInternal(self, obj):
+        if not self.needConversion():
+            return obj
+        return obj and [self.elementType.toInternal(v) for v in obj]
+
+    def fromInternal(self, obj):
+        if not self.needConversion():
+            return obj
+        return obj and [self.elementType.fromInternal(v) for v in obj]
+
+
+class MapType(DataType):
+    """Map data type.
+
+    :param keyType: :class:`DataType` of the keys in the map.
+    :param valueType: :class:`DataType` of the values in the map.
+    :param valueContainsNull: indicates whether values can contain null (None) values.
+
+    Keys in a map data type are not allowed to be null (None).
+    """
+
+    def __init__(self, keyType, valueType, valueContainsNull=True):
+        """
+        >>> (MapType(StringType(), IntegerType())
+        ...        == MapType(StringType(), IntegerType(), True))
+        True
+        >>> (MapType(StringType(), IntegerType(), False)
+        ...        == MapType(StringType(), FloatType()))
+        False
+        """
+        assert isinstance(keyType, DataType),\
+            "keyType %s should be an instance of %s" % (keyType, DataType)
+        assert isinstance(valueType, DataType),\
+            "valueType %s should be an instance of %s" % (valueType, DataType)
+        self.keyType = keyType
+        self.valueType = valueType
+        self.valueContainsNull = valueContainsNull
+
+    def simpleString(self):
+        return 'map<%s,%s>' % (self.keyType.simpleString(), self.valueType.simpleString())
+
+    def __repr__(self):
+        return "MapType(%s,%s,%s)" % (self.keyType, self.valueType,
+                                      str(self.valueContainsNull).lower())
+
+    def jsonValue(self):
+        return {"type": self.typeName(),
+                "keyType": self.keyType.jsonValue(),
+                "valueType": self.valueType.jsonValue(),
+                "valueContainsNull": self.valueContainsNull}
+
+    @classmethod
+    def fromJson(cls, json):
+        return MapType(_parse_datatype_json_value(json["keyType"]),
+                       _parse_datatype_json_value(json["valueType"]),
+                       json["valueContainsNull"])
+
+    def needConversion(self):
+        return self.keyType.needConversion() or self.valueType.needConversion()
+
+    def toInternal(self, obj):
+        if not self.needConversion():
+            return obj
+        return obj and dict((self.keyType.toInternal(k), self.valueType.toInternal(v))
+                            for k, v in obj.items())
+
+    def fromInternal(self, obj):
+        if not self.needConversion():
+            return obj
+        return obj and dict((self.keyType.fromInternal(k), self.valueType.fromInternal(v))
+                            for k, v in obj.items())
+
+
+class StructField(DataType):
+    """A field in :class:`StructType`.
+
+    :param name: string, name of the field.
+    :param dataType: :class:`DataType` of the field.
+    :param nullable: boolean, whether the field can be null (None) or not.
+    :param metadata: a dict from string to simple type that can be toInternald to JSON automatically
+    """
+
+    def __init__(self, name, dataType, nullable=True, metadata=None):
+        """
+        >>> (StructField("f1", StringType(), True)
+        ...      == StructField("f1", StringType(), True))
+        True
+        >>> (StructField("f1", StringType(), True)
+        ...      == StructField("f2", StringType(), True))
+        False
+        """
+        assert isinstance(dataType, DataType),\
+            "dataType %s should be an instance of %s" % (dataType, DataType)
+        assert isinstance(name, basestring), "field name %s should be string" % (name)
+        if not isinstance(name, str):
+            name = name.encode('utf-8')
+        self.name = name
+        self.dataType = dataType
+        self.nullable = nullable
+        self.metadata = metadata or {}
+
+    def simpleString(self):
+        return '%s:%s' % (self.name, self.dataType.simpleString())
+
+    def __repr__(self):
+        return "StructField(%s,%s,%s)" % (self.name, self.dataType,
+                                          str(self.nullable).lower())
+
+    def jsonValue(self):
+        return {"name": self.name,
+                "type": self.dataType.jsonValue(),
+                "nullable": self.nullable,
+                "metadata": self.metadata}
+
+    @classmethod
+    def fromJson(cls, json):
+        return StructField(json["name"],
+                           _parse_datatype_json_value(json["type"]),
+                           json["nullable"],
+                           json["metadata"])
+
+    def needConversion(self):
+        return self.dataType.needConversion()
+
+    def toInternal(self, obj):
+        return self.dataType.toInternal(obj)
+
+    def fromInternal(self, obj):
+        return self.dataType.fromInternal(obj)
+
+    def typeName(self):
+        raise TypeError(
+            "StructField does not have typeName. "
+            "Use typeName on its type explicitly instead.")
+
+
+class StructType(DataType):
+    """Struct type, consisting of a list of :class:`StructField`.
+
+    This is the data type representing a :class:`Row`.
+
+    Iterating a :class:`StructType` will iterate its :class:`StructField`\\s.
+    A contained :class:`StructField` can be accessed by name or position.
+
+    >>> struct1 = StructType([StructField("f1", StringType(), True)])
+    >>> struct1["f1"]
+    StructField(f1,StringType,true)
+    >>> struct1[0]
+    StructField(f1,StringType,true)
+    """
+    def __init__(self, fields=None):
+        """
+        >>> struct1 = StructType([StructField("f1", StringType(), True)])
+        >>> struct2 = StructType([StructField("f1", StringType(), True)])
+        >>> struct1 == struct2
+        True
+        >>> struct1 = StructType([StructField("f1", StringType(), True)])
+        >>> struct2 = StructType([StructField("f1", StringType(), True),
+        ...     StructField("f2", IntegerType(), False)])
+        >>> struct1 == struct2
+        False
+        """
+        if not fields:
+            self.fields = []
+            self.names = []
+        else:
+            self.fields = fields
+            self.names = [f.name for f in fields]
+            assert all(isinstance(f, StructField) for f in fields),\
+                "fields should be a list of StructField"
+        # Precalculated list of fields that need conversion with fromInternal/toInternal functions
+        self._needConversion = [f.needConversion() for f in self]
+        self._needSerializeAnyField = any(self._needConversion)
+
+    def add(self, field, data_type=None, nullable=True, metadata=None):
+        """
+        Construct a StructType by adding new elements to it to define the schema. The method accepts
+        either:
+
+            a) A single parameter which is a StructField object.
+            b) Between 2 and 4 parameters as (name, data_type, nullable (optional),
+               metadata(optional). The data_type parameter may be either a String or a
+               DataType object.
+
+        >>> struct1 = StructType().add("f1", StringType(), True).add("f2", StringType(), True, None)
+        >>> struct2 = StructType([StructField("f1", StringType(), True), \\
+        ...     StructField("f2", StringType(), True, None)])
+        >>> struct1 == struct2
+        True
+        >>> struct1 = StructType().add(StructField("f1", StringType(), True))
+        >>> struct2 = StructType([StructField("f1", StringType(), True)])
+        >>> struct1 == struct2
+        True
+        >>> struct1 = StructType().add("f1", "string", True)
+        >>> struct2 = StructType([StructField("f1", StringType(), True)])
+        >>> struct1 == struct2
+        True
+
+        :param field: Either the name of the field or a StructField object
+        :param data_type: If present, the DataType of the StructField to create
+        :param nullable: Whether the field to add should be nullable (default True)
+        :param metadata: Any additional metadata (default None)
+        :return: a new updated StructType
+        """
+        if isinstance(field, StructField):
+            self.fields.append(field)
+            self.names.append(field.name)
+        else:
+            if isinstance(field, str) and data_type is None:
+                raise ValueError("Must specify DataType if passing name of struct_field to create.")
+
+            if isinstance(data_type, str):
+                data_type_f = _parse_datatype_json_value(data_type)
+            else:
+                data_type_f = data_type
+            self.fields.append(StructField(field, data_type_f, nullable, metadata))
+            self.names.append(field)
+        # Precalculated list of fields that need conversion with fromInternal/toInternal functions
+        self._needConversion = [f.needConversion() for f in self]
+        self._needSerializeAnyField = any(self._needConversion)
+        return self
+
+    def __iter__(self):
+        """Iterate the fields"""
+        return iter(self.fields)
+
+    def __len__(self):
+        """Return the number of fields."""
+        return len(self.fields)
+
+    def __getitem__(self, key):
+        """Access fields by name or slice."""
+        if isinstance(key, str):
+            for field in self:
+                if field.name == key:
+                    return field
+            raise KeyError('No StructField named {0}'.format(key))
+        elif isinstance(key, int):
+            try:
+                return self.fields[key]
+            except IndexError:
+                raise IndexError('StructType index out of range')
+        elif isinstance(key, slice):
+            return StructType(self.fields[key])
+        else:
+            raise TypeError('StructType keys should be strings, integers or slices')
+
+    def simpleString(self):
+        return 'struct<%s>' % (','.join(f.simpleString() for f in self))
+
+    def __repr__(self):
+        return ("StructType(List(%s))" %
+                ",".join(str(field) for field in self))
+
+    def jsonValue(self):
+        return {"type": self.typeName(),
+                "fields": [f.jsonValue() for f in self]}
+
+    @classmethod
+    def fromJson(cls, json):
+        return StructType([StructField.fromJson(f) for f in json["fields"]])
+
+    def fieldNames(self):
+        """
+        Returns all field names in a list.
+
+        >>> struct = StructType([StructField("f1", StringType(), True)])
+        >>> struct.fieldNames()
+        ['f1']
+        """
+        return list(self.names)
+
+    def needConversion(self):
+        # We need convert Row()/namedtuple into tuple()
+        return True
+
+    def toInternal(self, obj):
+        if obj is None:
+            return
+
+        if self._needSerializeAnyField:
+            # Only calling toInternal function for fields that need conversion
+            if isinstance(obj, dict):
+                return tuple(f.toInternal(obj.get(n)) if c else obj.get(n)
+                             for n, f, c in zip(self.names, self.fields, self._needConversion))
+            elif isinstance(obj, (tuple, list)):
+                return tuple(f.toInternal(v) if c else v
+                             for f, v, c in zip(self.fields, obj, self._needConversion))
+            elif hasattr(obj, "__dict__"):
+                d = obj.__dict__
+                return tuple(f.toInternal(d.get(n)) if c else d.get(n)
+                             for n, f, c in zip(self.names, self.fields, self._needConversion))
+            else:
+                raise ValueError("Unexpected tuple %r with StructType" % obj)
+        else:
+            if isinstance(obj, dict):
+                return tuple(obj.get(n) for n in self.names)
+            elif isinstance(obj, Row) and getattr(obj, "__from_dict__", False):
+                return tuple(obj[n] for n in self.names)
+            elif isinstance(obj, (list, tuple)):
+                return tuple(obj)
+            elif hasattr(obj, "__dict__"):
+                d = obj.__dict__
+                return tuple(d.get(n) for n in self.names)
+            else:
+                raise ValueError("Unexpected tuple %r with StructType" % obj)
+
+    def fromInternal(self, obj):
+        if obj is None:
+            return
+        if isinstance(obj, Row):
+            # it's already converted by pickler
+            return obj
+        if self._needSerializeAnyField:
+            # Only calling fromInternal function for fields that need conversion
+            values = [f.fromInternal(v) if c else v
+                      for f, v, c in zip(self.fields, obj, self._needConversion)]
+        else:
+            values = obj
+        return _create_row(self.names, values)
+
+
+class UserDefinedType(DataType):
+    """User-defined type (UDT).
+
+    .. note:: WARN: Spark Internal Use Only
+    """
+
+    @classmethod
+    def typeName(cls):
+        return cls.__name__.lower()
+
+    @classmethod
+    def sqlType(cls):
+        """
+        Underlying SQL storage type for this UDT.
+        """
+        raise NotImplementedError("UDT must implement sqlType().")
+
+    @classmethod
+    def module(cls):
+        """
+        The Python module of the UDT.
+        """
+        raise NotImplementedError("UDT must implement module().")
+
+    @classmethod
+    def scalaUDT(cls):
+        """
+        The class name of the paired Scala UDT (could be '', if there
+        is no corresponding one).
+        """
+        return ''
+
+    def needConversion(self):
+        return True
+
+    @classmethod
+    def _cachedSqlType(cls):
+        """
+        Cache the sqlType() into class, because it's heavy used in `toInternal`.
+        """
+        if not hasattr(cls, "_cached_sql_type"):
+            cls._cached_sql_type = cls.sqlType()
+        return cls._cached_sql_type
+
+    def toInternal(self, obj):
+        if obj is not None:
+            return self._cachedSqlType().toInternal(self.serialize(obj))
+
+    def fromInternal(self, obj):
+        v = self._cachedSqlType().fromInternal(obj)
+        if v is not None:
+            return self.deserialize(v)
+
+    def serialize(self, obj):
+        """
+        Converts the a user-type object into a SQL datum.
+        """
+        raise NotImplementedError("UDT must implement toInternal().")
+
+    def deserialize(self, datum):
+        """
+        Converts a SQL datum into a user-type object.
+        """
+        raise NotImplementedError("UDT must implement fromInternal().")
+
+    def simpleString(self):
+        return 'udt'
+
+    def json(self):
+        return json.dumps(self.jsonValue(), separators=(',', ':'), sort_keys=True)
+
+    def jsonValue(self):
+        if self.scalaUDT():
+            assert self.module() != '__main__', 'UDT in __main__ cannot work with ScalaUDT'
+            schema = {
+                "type": "udt",
+                "class": self.scalaUDT(),
+                "pyClass": "%s.%s" % (self.module(), type(self).__name__),
+                "sqlType": self.sqlType().jsonValue()
+            }
+        else:
+            ser = CloudPickleSerializer()
+            b = ser.dumps(type(self))
+            schema = {
+                "type": "udt",
+                "pyClass": "%s.%s" % (self.module(), type(self).__name__),
+                "serializedClass": base64.b64encode(b).decode('utf8'),
+                "sqlType": self.sqlType().jsonValue()
+            }
+        return schema
+
+    @classmethod
+    def fromJson(cls, json):
+        pyUDT = str(json["pyClass"])  # convert unicode to str
+        split = pyUDT.rfind(".")
+        pyModule = pyUDT[:split]
+        pyClass = pyUDT[split+1:]
+        m = __import__(pyModule, globals(), locals(), [pyClass])
+        if not hasattr(m, pyClass):
+            s = base64.b64decode(json['serializedClass'].encode('utf-8'))
+            UDT = CloudPickleSerializer().loads(s)
+        else:
+            UDT = getattr(m, pyClass)
+        return UDT()
+
+    def __eq__(self, other):
+        return type(self) == type(other)
+
+
+_atomic_types = [StringType, BinaryType, BooleanType, DecimalType, FloatType, DoubleType,
+                 ByteType, ShortType, IntegerType, LongType, DateType, TimestampType, NullType]
+_all_atomic_types = dict((t.typeName(), t) for t in _atomic_types)
+_all_complex_types = dict((v.typeName(), v)
+                          for v in [ArrayType, MapType, StructType])
+
+
+_FIXED_DECIMAL = re.compile(r"decimal\(\s*(\d+)\s*,\s*(\d+)\s*\)")
+
+
+def _parse_datatype_string(s):
+    """
+    Parses the given data type string to a :class:`DataType`. The data type string format equals
+    to :class:`DataType.simpleString`, except that top level struct type can omit
+    the ``struct<>`` and atomic types use ``typeName()`` as their format, e.g. use ``byte`` instead
+    of ``tinyint`` for :class:`ByteType`. We can also use ``int`` as a short name
+    for :class:`IntegerType`. Since Spark 2.3, this also supports a schema in a DDL-formatted
+    string and case-insensitive strings.
+
+    >>> _parse_datatype_string("int ")
+    IntegerType
+    >>> _parse_datatype_string("INT ")
+    IntegerType
+    >>> _parse_datatype_string("a: byte, b: decimal(  16 , 8   ) ")
+    StructType(List(StructField(a,ByteType,true),StructField(b,DecimalType(16,8),true)))
+    >>> _parse_datatype_string("a DOUBLE, b STRING")
+    StructType(List(StructField(a,DoubleType,true),StructField(b,StringType,true)))
+    >>> _parse_datatype_string("a: array< short>")
+    StructType(List(StructField(a,ArrayType(ShortType,true),true)))
+    >>> _parse_datatype_string(" map<string , string > ")
+    MapType(StringType,StringType,true)
+
+    >>> # Error cases
+    >>> _parse_datatype_string("blabla") # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    ParseException:...
+    >>> _parse_datatype_string("a: int,") # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    ParseException:...
+    >>> _parse_datatype_string("array<int") # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    ParseException:...
+    >>> _parse_datatype_string("map<int, boolean>>") # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    ParseException:...
+    """
+    sc = SparkContext._active_spark_context
+
+    def from_ddl_schema(type_str):
+        return _parse_datatype_json_string(
+            sc._jvm.org.apache.spark.sql.types.StructType.fromDDL(type_str).json())
+
+    def from_ddl_datatype(type_str):
+        return _parse_datatype_json_string(
+            sc._jvm.org.apache.spark.sql.api.python.PythonSQLUtils.parseDataType(type_str).json())
+
+    try:
+        # DDL format, "fieldname datatype, fieldname datatype".
+        return from_ddl_schema(s)
+    except Exception as e:
+        try:
+            # For backwards compatibility, "integer", "struct<fieldname: datatype>" and etc.
+            return from_ddl_datatype(s)
+        except:
+            try:
+                # For backwards compatibility, "fieldname: datatype, fieldname: datatype" case.
+                return from_ddl_datatype("struct<%s>" % s.strip())
+            except:
+                raise e
+
+
+def _parse_datatype_json_string(json_string):
+    """Parses the given data type JSON string.
+    >>> import pickle
+    >>> def check_datatype(datatype):
+    ...     pickled = pickle.loads(pickle.dumps(datatype))
+    ...     assert datatype == pickled
+    ...     scala_datatype = spark._jsparkSession.parseDataType(datatype.json())
+    ...     python_datatype = _parse_datatype_json_string(scala_datatype.json())
+    ...     assert datatype == python_datatype
+    >>> for cls in _all_atomic_types.values():
+    ...     check_datatype(cls())
+
+    >>> # Simple ArrayType.
+    >>> simple_arraytype = ArrayType(StringType(), True)
+    >>> check_datatype(simple_arraytype)
+
+    >>> # Simple MapType.
+    >>> simple_maptype = MapType(StringType(), LongType())
+    >>> check_datatype(simple_maptype)
+
+    >>> # Simple StructType.
+    >>> simple_structtype = StructType([
+    ...     StructField("a", DecimalType(), False),
+    ...     StructField("b", BooleanType(), True),
+    ...     StructField("c", LongType(), True),
+    ...     StructField("d", BinaryType(), False)])
+    >>> check_datatype(simple_structtype)
+
+    >>> # Complex StructType.
+    >>> complex_structtype = StructType([
+    ...     StructField("simpleArray", simple_arraytype, True),
+    ...     StructField("simpleMap", simple_maptype, True),
+    ...     StructField("simpleStruct", simple_structtype, True),
+    ...     StructField("boolean", BooleanType(), False),
+    ...     StructField("withMeta", DoubleType(), False, {"name": "age"})])
+    >>> check_datatype(complex_structtype)
+
+    >>> # Complex ArrayType.
+    >>> complex_arraytype = ArrayType(complex_structtype, True)
+    >>> check_datatype(complex_arraytype)
+
+    >>> # Complex MapType.
+    >>> complex_maptype = MapType(complex_structtype,
+    ...                           complex_arraytype, False)
+    >>> check_datatype(complex_maptype)
+    """
+    return _parse_datatype_json_value(json.loads(json_string))
+
+
+def _parse_datatype_json_value(json_value):
+    if not isinstance(json_value, dict):
+        if json_value in _all_atomic_types.keys():
+            return _all_atomic_types[json_value]()
+        elif json_value == 'decimal':
+            return DecimalType()
+        elif _FIXED_DECIMAL.match(json_value):
+            m = _FIXED_DECIMAL.match(json_value)
+            return DecimalType(int(m.group(1)), int(m.group(2)))
+        else:
+            raise ValueError("Could not parse datatype: %s" % json_value)
+    else:
+        tpe = json_value["type"]
+        if tpe in _all_complex_types:
+            return _all_complex_types[tpe].fromJson(json_value)
+        elif tpe == 'udt':
+            return UserDefinedType.fromJson(json_value)
+        else:
+            raise ValueError("not supported type: %s" % tpe)
+
+
+# Mapping Python types to Spark SQL DataType
+_type_mappings = {
+    type(None): NullType,
+    bool: BooleanType,
+    int: LongType,
+    float: DoubleType,
+    str: StringType,
+    bytearray: BinaryType,
+    decimal.Decimal: DecimalType,
+    datetime.date: DateType,
+    datetime.datetime: TimestampType,
+    datetime.time: TimestampType,
+}
+
+if sys.version < "3":
+    _type_mappings.update({
+        unicode: StringType,
+        long: LongType,
+    })
+
+# Mapping Python array types to Spark SQL DataType
+# We should be careful here. The size of these types in python depends on C
+# implementation. We need to make sure that this conversion does not lose any
+# precision. Also, JVM only support signed types, when converting unsigned types,
+# keep in mind that it required 1 more bit when stored as singed types.
+#
+# Reference for C integer size, see:
+# ISO/IEC 9899:201x specification, chapter 5.2.4.2.1 Sizes of integer types <limits.h>.
+# Reference for python array typecode, see:
+# https://docs.python.org/2/library/array.html
+# https://docs.python.org/3.6/library/array.html
+# Reference for JVM's supported integral types:
+# http://docs.oracle.com/javase/specs/jvms/se8/html/jvms-2.html#jvms-2.3.1
+
+_array_signed_int_typecode_ctype_mappings = {
+    'b': ctypes.c_byte,
+    'h': ctypes.c_short,
+    'i': ctypes.c_int,
+    'l': ctypes.c_long,
+}
+
+_array_unsigned_int_typecode_ctype_mappings = {
+    'B': ctypes.c_ubyte,
+    'H': ctypes.c_ushort,
+    'I': ctypes.c_uint,
+    'L': ctypes.c_ulong
+}
+
+
+def _int_size_to_type(size):
+    """
+    Return the Catalyst datatype from the size of integers.
+    """
+    if size <= 8:
+        return ByteType
+    if size <= 16:
+        return ShortType
+    if size <= 32:
+        return IntegerType
+    if size <= 64:
+        return LongType
+
+# The list of all supported array typecodes is stored here
+_array_type_mappings = {
+    # Warning: Actual properties for float and double in C is not specified in C.
+    # On almost every system supported by both python and JVM, they are IEEE 754
+    # single-precision binary floating-point format and IEEE 754 double-precision
+    # binary floating-point format. And we do assume the same thing here for now.
+    'f': FloatType,
+    'd': DoubleType
+}
+
+# compute array typecode mappings for signed integer types
+for _typecode in _array_signed_int_typecode_ctype_mappings.keys():
+    size = ctypes.sizeof(_array_signed_int_typecode_ctype_mappings[_typecode]) * 8
+    dt = _int_size_to_type(size)
+    if dt is not None:
+        _array_type_mappings[_typecode] = dt
+
+# compute array typecode mappings for unsigned integer types
+for _typecode in _array_unsigned_int_typecode_ctype_mappings.keys():
+    # JVM does not have unsigned types, so use signed types that is at least 1
+    # bit larger to store
+    size = ctypes.sizeof(_array_unsigned_int_typecode_ctype_mappings[_typecode]) * 8 + 1
+    dt = _int_size_to_type(size)
+    if dt is not None:
+        _array_type_mappings[_typecode] = dt
+
+# Type code 'u' in Python's array is deprecated since version 3.3, and will be
+# removed in version 4.0. See: https://docs.python.org/3/library/array.html
+if sys.version_info[0] < 4:
+    _array_type_mappings['u'] = StringType
+
+# Type code 'c' are only available at python 2
+if sys.version_info[0] < 3:
+    _array_type_mappings['c'] = StringType
+
+# SPARK-21465:
+# In python2, array of 'L' happened to be mistakenly partially supported. To
+# avoid breaking user's code, we should keep this partial support. Below is a
+# dirty hacking to keep this partial support and make the unit test passes
+import platform
+if sys.version_info[0] < 3 and platform.python_implementation() != 'PyPy':
+    if 'L' not in _array_type_mappings.keys():
+        _array_type_mappings['L'] = LongType
+        _array_unsigned_int_typecode_ctype_mappings['L'] = ctypes.c_uint
+
+
+def _infer_type(obj):
+    """Infer the DataType from obj
+    """
+    if obj is None:
+        return NullType()
+
+    if hasattr(obj, '__UDT__'):
+        return obj.__UDT__
+
+    dataType = _type_mappings.get(type(obj))
+    if dataType is DecimalType:
+        # the precision and scale of `obj` may be different from row to row.
+        return DecimalType(38, 18)
+    elif dataType is not None:
+        return dataType()
+
+    if isinstance(obj, dict):
+        for key, value in obj.items():
+            if key is not None and value is not None:
+                return MapType(_infer_type(key), _infer_type(value), True)
+        else:
+            return MapType(NullType(), NullType(), True)
+    elif isinstance(obj, list):
+        for v in obj:
+            if v is not None:
+                return ArrayType(_infer_type(obj[0]), True)
+        else:
+            return ArrayType(NullType(), True)
+    elif isinstance(obj, array):
+        if obj.typecode in _array_type_mappings:
+            return ArrayType(_array_type_mappings[obj.typecode](), False)
+        else:
+            raise TypeError("not supported type: array(%s)" % obj.typecode)
+    else:
+        try:
+            return _infer_schema(obj)
+        except TypeError:
+            raise TypeError("not supported type: %s" % type(obj))
+
+
+def _infer_schema(row, names=None):
+    """Infer the schema from dict/namedtuple/object"""
+    if isinstance(row, dict):
+        items = sorted(row.items())
+
+    elif isinstance(row, (tuple, list)):
+        if hasattr(row, "__fields__"):  # Row
+            items = zip(row.__fields__, tuple(row))
+        elif hasattr(row, "_fields"):  # namedtuple
+            items = zip(row._fields, tuple(row))
+        else:
+            if names is None:
+                names = ['_%d' % i for i in range(1, len(row) + 1)]
+            elif len(names) < len(row):
+                names.extend('_%d' % i for i in range(len(names) + 1, len(row) + 1))
+            items = zip(names, row)
+
+    elif hasattr(row, "__dict__"):  # object
+        items = sorted(row.__dict__.items())
+
+    else:
+        raise TypeError("Can not infer schema for type: %s" % type(row))
+
+    fields = [StructField(k, _infer_type(v), True) for k, v in items]
+    return StructType(fields)
+
+
+def _has_nulltype(dt):
+    """ Return whether there is NullType in `dt` or not """
+    if isinstance(dt, StructType):
+        return any(_has_nulltype(f.dataType) for f in dt.fields)
+    elif isinstance(dt, ArrayType):
+        return _has_nulltype((dt.elementType))
+    elif isinstance(dt, MapType):
+        return _has_nulltype(dt.keyType) or _has_nulltype(dt.valueType)
+    else:
+        return isinstance(dt, NullType)
+
+
+def _merge_type(a, b, name=None):
+    if name is None:
+        new_msg = lambda msg: msg
+        new_name = lambda n: "field %s" % n
+    else:
+        new_msg = lambda msg: "%s: %s" % (name, msg)
+        new_name = lambda n: "field %s in %s" % (n, name)
+
+    if isinstance(a, NullType):
+        return b
+    elif isinstance(b, NullType):
+        return a
+    elif type(a) is not type(b):
+        # TODO: type cast (such as int -> long)
+        raise TypeError(new_msg("Can not merge type %s and %s" % (type(a), type(b))))
+
+    # same type
+    if isinstance(a, StructType):
+        nfs = dict((f.name, f.dataType) for f in b.fields)
+        fields = [StructField(f.name, _merge_type(f.dataType, nfs.get(f.name, NullType()),
+                                                  name=new_name(f.name)))
+                  for f in a.fields]
+        names = set([f.name for f in fields])
+        for n in nfs:
+            if n not in names:
+                fields.append(StructField(n, nfs[n]))
+        return StructType(fields)
+
+    elif isinstance(a, ArrayType):
+        return ArrayType(_merge_type(a.elementType, b.elementType,
+                                     name='element in array %s' % name), True)
+
+    elif isinstance(a, MapType):
+        return MapType(_merge_type(a.keyType, b.keyType, name='key of map %s' % name),
+                       _merge_type(a.valueType, b.valueType, name='value of map %s' % name),
+                       True)
+    else:
+        return a
+
+
+def _need_converter(dataType):
+    if isinstance(dataType, StructType):
+        return True
+    elif isinstance(dataType, ArrayType):
+        return _need_converter(dataType.elementType)
+    elif isinstance(dataType, MapType):
+        return _need_converter(dataType.keyType) or _need_converter(dataType.valueType)
+    elif isinstance(dataType, NullType):
+        return True
+    else:
+        return False
+
+
+def _create_converter(dataType):
+    """Create a converter to drop the names of fields in obj """
+    if not _need_converter(dataType):
+        return lambda x: x
+
+    if isinstance(dataType, ArrayType):
+        conv = _create_converter(dataType.elementType)
+        return lambda row: [conv(v) for v in row]
+
+    elif isinstance(dataType, MapType):
+        kconv = _create_converter(dataType.keyType)
+        vconv = _create_converter(dataType.valueType)
+        return lambda row: dict((kconv(k), vconv(v)) for k, v in row.items())
+
+    elif isinstance(dataType, NullType):
+        return lambda x: None
+
+    elif not isinstance(dataType, StructType):
+        return lambda x: x
+
+    # dataType must be StructType
+    names = [f.name for f in dataType.fields]
+    converters = [_create_converter(f.dataType) for f in dataType.fields]
+    convert_fields = any(_need_converter(f.dataType) for f in dataType.fields)
+
+    def convert_struct(obj):
+        if obj is None:
+            return
+
+        if isinstance(obj, (tuple, list)):
+            if convert_fields:
+                return tuple(conv(v) for v, conv in zip(obj, converters))
+            else:
+                return tuple(obj)
+
+        if isinstance(obj, dict):
+            d = obj
+        elif hasattr(obj, "__dict__"):  # object
+            d = obj.__dict__
+        else:
+            raise TypeError("Unexpected obj type: %s" % type(obj))
+
+        if convert_fields:
+            return tuple([conv(d.get(name)) for name, conv in zip(names, converters)])
+        else:
+            return tuple([d.get(name) for name in names])
+
+    return convert_struct
+
+
+_acceptable_types = {
+    BooleanType: (bool,),
+    ByteType: (int, long),
+    ShortType: (int, long),
+    IntegerType: (int, long),
+    LongType: (int, long),
+    FloatType: (float,),
+    DoubleType: (float,),
+    DecimalType: (decimal.Decimal,),
+    StringType: (str, unicode),
+    BinaryType: (bytearray,),
+    DateType: (datetime.date, datetime.datetime),
+    TimestampType: (datetime.datetime,),
+    ArrayType: (list, tuple, array),
+    MapType: (dict,),
+    StructType: (tuple, list, dict),
+}
+
+
+def _make_type_verifier(dataType, nullable=True, name=None):
+    """
+    Make a verifier that checks the type of obj against dataType and raises a TypeError if they do
+    not match.
+
+    This verifier also checks the value of obj against datatype and raises a ValueError if it's not
+    within the allowed range, e.g. using 128 as ByteType will overflow. Note that, Python float is
+    not checked, so it will become infinity when cast to Java float if it overflows.
+
+    >>> _make_type_verifier(StructType([]))(None)
+    >>> _make_type_verifier(StringType())("")
+    >>> _make_type_verifier(LongType())(0)
+    >>> _make_type_verifier(ArrayType(ShortType()))(list(range(3)))
+    >>> _make_type_verifier(ArrayType(StringType()))(set()) # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    TypeError:...
+    >>> _make_type_verifier(MapType(StringType(), IntegerType()))({})
+    >>> _make_type_verifier(StructType([]))(())
+    >>> _make_type_verifier(StructType([]))([])
+    >>> _make_type_verifier(StructType([]))([1]) # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    ValueError:...
+    >>> # Check if numeric values are within the allowed range.
+    >>> _make_type_verifier(ByteType())(12)
+    >>> _make_type_verifier(ByteType())(1234) # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    ValueError:...
+    >>> _make_type_verifier(ByteType(), False)(None) # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    ValueError:...
+    >>> _make_type_verifier(
+    ...     ArrayType(ShortType(), False))([1, None]) # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    ValueError:...
+    >>> _make_type_verifier(MapType(StringType(), IntegerType()))({None: 1})
+    Traceback (most recent call last):
+        ...
+    ValueError:...
+    >>> schema = StructType().add("a", IntegerType()).add("b", StringType(), False)
+    >>> _make_type_verifier(schema)((1, None)) # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    ValueError:...
+    """
+
+    if name is None:
+        new_msg = lambda msg: msg
+        new_name = lambda n: "field %s" % n
+    else:
+        new_msg = lambda msg: "%s: %s" % (name, msg)
+        new_name = lambda n: "field %s in %s" % (n, name)
+
+    def verify_nullability(obj):
+        if obj is None:
+            if nullable:
+                return True
+            else:
+                raise ValueError(new_msg("This field is not nullable, but got None"))
+        else:
+            return False
+
+    _type = type(dataType)
+
+    def assert_acceptable_types(obj):
+        assert _type in _acceptable_types, \
+            new_msg("unknown datatype: %s for object %r" % (dataType, obj))
+
+    def verify_acceptable_types(obj):
+        # subclass of them can not be fromInternal in JVM
+        if type(obj) not in _acceptable_types[_type]:
+            raise TypeError(new_msg("%s can not accept object %r in type %s"
+                                    % (dataType, obj, type(obj))))
+
+    if isinstance(dataType, StringType):
+        # StringType can work with any types
+        verify_value = lambda _: _
+
+    elif isinstance(dataType, UserDefinedType):
+        verifier = _make_type_verifier(dataType.sqlType(), name=name)
+
+        def verify_udf(obj):
+            if not (hasattr(obj, '__UDT__') and obj.__UDT__ == dataType):
+                raise ValueError(new_msg("%r is not an instance of type %r" % (obj, dataType)))
+            verifier(dataType.toInternal(obj))
+
+        verify_value = verify_udf
+
+    elif isinstance(dataType, ByteType):
+        def verify_byte(obj):
+            assert_acceptable_types(obj)
+            verify_acceptable_types(obj)
+            if obj < -128 or obj > 127:
+                raise ValueError(new_msg("object of ByteType out of range, got: %s" % obj))
+
+        verify_value = verify_byte
+
+    elif isinstance(dataType, ShortType):
+        def verify_short(obj):
+            assert_acceptable_types(obj)
+            verify_acceptable_types(obj)
+            if obj < -32768 or obj > 32767:
+                raise ValueError(new_msg("object of ShortType out of range, got: %s" % obj))
+
+        verify_value = verify_short
+
+    elif isinstance(dataType, IntegerType):
+        def verify_integer(obj):
+            assert_acceptable_types(obj)
+            verify_acceptable_types(obj)
+            if obj < -2147483648 or obj > 2147483647:
+                raise ValueError(
+                    new_msg("object of IntegerType out of range, got: %s" % obj))
+
+        verify_value = verify_integer
+
+    elif isinstance(dataType, ArrayType):
+        element_verifier = _make_type_verifier(
+            dataType.elementType, dataType.containsNull, name="element in array %s" % name)
+
+        def verify_array(obj):
+            assert_acceptable_types(obj)
+            verify_acceptable_types(obj)
+            for i in obj:
+                element_verifier(i)
+
+        verify_value = verify_array
+
+    elif isinstance(dataType, MapType):
+        key_verifier = _make_type_verifier(dataType.keyType, False, name="key of map %s" % name)
+        value_verifier = _make_type_verifier(
+            dataType.valueType, dataType.valueContainsNull, name="value of map %s" % name)
+
+        def verify_map(obj):
+            assert_acceptable_types(obj)
+            verify_acceptable_types(obj)
+            for k, v in obj.items():
+                key_verifier(k)
+                value_verifier(v)
+
+        verify_value = verify_map
+
+    elif isinstance(dataType, StructType):
+        verifiers = []
+        for f in dataType.fields:
+            verifier = _make_type_verifier(f.dataType, f.nullable, name=new_name(f.name))
+            verifiers.append((f.name, verifier))
+
+        def verify_struct(obj):
+            assert_acceptable_types(obj)
+
+            if isinstance(obj, dict):
+                for f, verifier in verifiers:
+                    verifier(obj.get(f))
+            elif isinstance(obj, Row) and getattr(obj, "__from_dict__", False):
+                # the order in obj could be different than dataType.fields
+                for f, verifier in verifiers:
+                    verifier(obj[f])
+            elif isinstance(obj, (tuple, list)):
+                if len(obj) != len(verifiers):
+                    raise ValueError(
+                        new_msg("Length of object (%d) does not match with "
+                                "length of fields (%d)" % (len(obj), len(verifiers))))
+                for v, (_, verifier) in zip(obj, verifiers):
+                    verifier(v)
+            elif hasattr(obj, "__dict__"):
+                d = obj.__dict__
+                for f, verifier in verifiers:
+                    verifier(d.get(f))
+            else:
+                raise TypeError(new_msg("StructType can not accept object %r in type %s"
+                                        % (obj, type(obj))))
+        verify_value = verify_struct
+
+    else:
+        def verify_default(obj):
+            assert_acceptable_types(obj)
+            verify_acceptable_types(obj)
+
+        verify_value = verify_default
+
+    def verify(obj):
+        if not verify_nullability(obj):
+            verify_value(obj)
+
+    return verify
+
+
+# This is used to unpickle a Row from JVM
+def _create_row_inbound_converter(dataType):
+    return lambda *a: dataType.fromInternal(a)
+
+
+def _create_row(fields, values):
+    row = Row(*values)
+    row.__fields__ = fields
+    return row
+
+
+class Row(tuple):
+
+    """
+    A row in L{DataFrame}.
+    The fields in it can be accessed:
+
+    * like attributes (``row.key``)
+    * like dictionary values (``row[key]``)
+
+    ``key in row`` will search through row keys.
+
+    Row can be used to create a row object by using named arguments,
+    the fields will be sorted by names. It is not allowed to omit
+    a named argument to represent the value is None or missing. This should be
+    explicitly set to None in this case.
+
+    >>> row = Row(name="Alice", age=11)
+    >>> row
+    Row(age=11, name='Alice')
+    >>> row['name'], row['age']
+    ('Alice', 11)
+    >>> row.name, row.age
+    ('Alice', 11)
+    >>> 'name' in row
+    True
+    >>> 'wrong_key' in row
+    False
+
+    Row also can be used to create another Row like class, then it
+    could be used to create Row objects, such as
+
+    >>> Person = Row("name", "age")
+    >>> Person
+    <Row(name, age)>
+    >>> 'name' in Person
+    True
+    >>> 'wrong_key' in Person
+    False
+    >>> Person("Alice", 11)
+    Row(name='Alice', age=11)
+    """
+
+    def __new__(self, *args, **kwargs):
+        if args and kwargs:
+            raise ValueError("Can not use both args "
+                             "and kwargs to create Row")
+        if kwargs:
+            # create row objects
+            names = sorted(kwargs.keys())
+            row = tuple.__new__(self, [kwargs[n] for n in names])
+            row.__fields__ = names
+            row.__from_dict__ = True
+            return row
+
+        else:
+            # create row class or objects
+            return tuple.__new__(self, args)
+
+    def asDict(self, recursive=False):
+        """
+        Return as an dict
+
+        :param recursive: turns the nested Row as dict (default: False).
+
+        >>> Row(name="Alice", age=11).asDict() == {'name': 'Alice', 'age': 11}
+        True
+        >>> row = Row(key=1, value=Row(name='a', age=2))
+        >>> row.asDict() == {'key': 1, 'value': Row(age=2, name='a')}
+        True
+        >>> row.asDict(True) == {'key': 1, 'value': {'name': 'a', 'age': 2}}
+        True
+        """
+        if not hasattr(self, "__fields__"):
+            raise TypeError("Cannot convert a Row class into dict")
+
+        if recursive:
+            def conv(obj):
+                if isinstance(obj, Row):
+                    return obj.asDict(True)
+                elif isinstance(obj, list):
+                    return [conv(o) for o in obj]
+                elif isinstance(obj, dict):
+                    return dict((k, conv(v)) for k, v in obj.items())
+                else:
+                    return obj
+            return dict(zip(self.__fields__, (conv(o) for o in self)))
+        else:
+            return dict(zip(self.__fields__, self))
+
+    def __contains__(self, item):
+        if hasattr(self, "__fields__"):
+            return item in self.__fields__
+        else:
+            return super(Row, self).__contains__(item)
+
+    # let object acts like class
+    def __call__(self, *args):
+        """create new Row object"""
+        if len(args) > len(self):
+            raise ValueError("Can not create Row with fields %s, expected %d values "
+                             "but got %s" % (self, len(self), args))
+        return _create_row(self, args)
+
+    def __getitem__(self, item):
+        if isinstance(item, (int, slice)):
+            return super(Row, self).__getitem__(item)
+        try:
+            # it will be slow when it has many fields,
+            # but this will not be used in normal cases
+            idx = self.__fields__.index(item)
+            return super(Row, self).__getitem__(idx)
+        except IndexError:
+            raise KeyError(item)
+        except ValueError:
+            raise ValueError(item)
+
+    def __getattr__(self, item):
+        if item.startswith("__"):
+            raise AttributeError(item)
+        try:
+            # it will be slow when it has many fields,
+            # but this will not be used in normal cases
+            idx = self.__fields__.index(item)
+            return self[idx]
+        except IndexError:
+            raise AttributeError(item)
+        except ValueError:
+            raise AttributeError(item)
+
+    def __setattr__(self, key, value):
+        if key != '__fields__' and key != "__from_dict__":
+            raise Exception("Row is read-only")
+        self.__dict__[key] = value
+
+    def __reduce__(self):
+        """Returns a tuple so Python knows how to pickle Row."""
+        if hasattr(self, "__fields__"):
+            return (_create_row, (self.__fields__, tuple(self)))
+        else:
+            return tuple.__reduce__(self)
+
+    def __repr__(self):
+        """Printable representation of Row used in Python REPL."""
+        if hasattr(self, "__fields__"):
+            return "Row(%s)" % ", ".join("%s=%r" % (k, v)
+                                         for k, v in zip(self.__fields__, tuple(self)))
+        else:
+            return "<Row(%s)>" % ", ".join(self)
+
+
+class DateConverter(object):
+    def can_convert(self, obj):
+        return isinstance(obj, datetime.date)
+
+    def convert(self, obj, gateway_client):
+        Date = JavaClass("java.sql.Date", gateway_client)
+        return Date.valueOf(obj.strftime("%Y-%m-%d"))
+
+
+class DatetimeConverter(object):
+    def can_convert(self, obj):
+        return isinstance(obj, datetime.datetime)
+
+    def convert(self, obj, gateway_client):
+        Timestamp = JavaClass("java.sql.Timestamp", gateway_client)
+        seconds = (calendar.timegm(obj.utctimetuple()) if obj.tzinfo
+                   else time.mktime(obj.timetuple()))
+        t = Timestamp(int(seconds) * 1000)
+        t.setNanos(obj.microsecond * 1000)
+        return t
+
+# datetime is a subclass of date, we should register DatetimeConverter first
+register_input_converter(DatetimeConverter())
+register_input_converter(DateConverter())
+
+
+def to_arrow_type(dt):
+    """ Convert Spark data type to pyarrow type
+    """
+    from distutils.version import LooseVersion
+    import pyarrow as pa
+    if type(dt) == BooleanType:
+        arrow_type = pa.bool_()
+    elif type(dt) == ByteType:
+        arrow_type = pa.int8()
+    elif type(dt) == ShortType:
+        arrow_type = pa.int16()
+    elif type(dt) == IntegerType:
+        arrow_type = pa.int32()
+    elif type(dt) == LongType:
+        arrow_type = pa.int64()
+    elif type(dt) == FloatType:
+        arrow_type = pa.float32()
+    elif type(dt) == DoubleType:
+        arrow_type = pa.float64()
+    elif type(dt) == DecimalType:
+        arrow_type = pa.decimal128(dt.precision, dt.scale)
+    elif type(dt) == StringType:
+        arrow_type = pa.string()
+    elif type(dt) == BinaryType:
+        # TODO: remove version check once minimum pyarrow version is 0.10.0
+        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
+            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt) +
+                            "\nPlease install pyarrow >= 0.10.0 for BinaryType support.")
+        arrow_type = pa.binary()
+    elif type(dt) == DateType:
+        arrow_type = pa.date32()
+    elif type(dt) == TimestampType:
+        # Timestamps should be in UTC, JVM Arrow timestamps require a timezone to be read
+        arrow_type = pa.timestamp('us', tz='UTC')
+    elif type(dt) == ArrayType:
+        if type(dt.elementType) == TimestampType:
+            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
+        arrow_type = pa.list_(to_arrow_type(dt.elementType))
+    else:
+        raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
+    return arrow_type
+
+
+def to_arrow_schema(schema):
+    """ Convert a schema from Spark to Arrow
+    """
+    import pyarrow as pa
+    fields = [pa.field(field.name, to_arrow_type(field.dataType), nullable=field.nullable)
+              for field in schema]
+    return pa.schema(fields)
+
+
+def from_arrow_type(at):
+    """ Convert pyarrow type to Spark data type.
+    """
+    from distutils.version import LooseVersion
+    import pyarrow as pa
+    import pyarrow.types as types
+    if types.is_boolean(at):
+        spark_type = BooleanType()
+    elif types.is_int8(at):
+        spark_type = ByteType()
+    elif types.is_int16(at):
+        spark_type = ShortType()
+    elif types.is_int32(at):
+        spark_type = IntegerType()
+    elif types.is_int64(at):
+        spark_type = LongType()
+    elif types.is_float32(at):
+        spark_type = FloatType()
+    elif types.is_float64(at):
+        spark_type = DoubleType()
+    elif types.is_decimal(at):
+        spark_type = DecimalType(precision=at.precision, scale=at.scale)
+    elif types.is_string(at):
+        spark_type = StringType()
+    elif types.is_binary(at):
+        # TODO: remove version check once minimum pyarrow version is 0.10.0
+        if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
+            raise TypeError("Unsupported type in conversion from Arrow: " + str(at) +
+                            "\nPlease install pyarrow >= 0.10.0 for BinaryType support.")
+        spark_type = BinaryType()
+    elif types.is_date32(at):
+        spark_type = DateType()
+    elif types.is_timestamp(at):
+        spark_type = TimestampType()
+    elif types.is_list(at):
+        if types.is_timestamp(at.value_type):
+            raise TypeError("Unsupported type in conversion from Arrow: " + str(at))
+        spark_type = ArrayType(from_arrow_type(at.value_type))
+    else:
+        raise TypeError("Unsupported type in conversion from Arrow: " + str(at))
+    return spark_type
+
+
+def from_arrow_schema(arrow_schema):
+    """ Convert schema from Arrow to Spark.
+    """
+    return StructType(
+        [StructField(field.name, from_arrow_type(field.type), nullable=field.nullable)
+         for field in arrow_schema])
+
+
+def _check_series_convert_date(series, data_type):
+    """
+    Cast the series to datetime.date if it's a date type, otherwise returns the original series.
+
+    :param series: pandas.Series
+    :param data_type: a Spark data type for the series
+    """
+    if type(data_type) == DateType:
+        return series.dt.date
+    else:
+        return series
+
+
+def _check_dataframe_convert_date(pdf, schema):
+    """ Correct date type value to use datetime.date.
+
+    Pandas DataFrame created from PyArrow uses datetime64[ns] for date type values, but we should
+    use datetime.date to match the behavior with when Arrow optimization is disabled.
+
+    :param pdf: pandas.DataFrame
+    :param schema: a Spark schema of the pandas.DataFrame
+    """
+    for field in schema:
+        pdf[field.name] = _check_series_convert_date(pdf[field.name], field.dataType)
+    return pdf
+
+
+def _get_local_timezone():
+    """ Get local timezone using pytz with environment variable, or dateutil.
+
+    If there is a 'TZ' environment variable, pass it to pandas to use pytz and use it as timezone
+    string, otherwise use the special word 'dateutil/:' which means that pandas uses dateutil and
+    it reads system configuration to know the system local timezone.
+
+    See also:
+    - https://github.com/pandas-dev/pandas/blob/0.19.x/pandas/tslib.pyx#L1753
+    - https://github.com/dateutil/dateutil/blob/2.6.1/dateutil/tz/tz.py#L1338
+    """
+    import os
+    return os.environ.get('TZ', 'dateutil/:')
+
+
+def _check_series_localize_timestamps(s, timezone):
+    """
+    Convert timezone aware timestamps to timezone-naive in the specified timezone or local timezone.
+
+    If the input series is not a timestamp series, then the same series is returned. If the input
+    series is a timestamp series, then a converted series is returned.
+
+    :param s: pandas.Series
+    :param timezone: the timezone to convert. if None then use local timezone
+    :return pandas.Series that have been converted to tz-naive
+    """
+    from pyspark.sql.utils import require_minimum_pandas_version
+    require_minimum_pandas_version()
+
+    from pandas.api.types import is_datetime64tz_dtype
+    tz = timezone or _get_local_timezone()
+    # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
+    if is_datetime64tz_dtype(s.dtype):
+        return s.dt.tz_convert(tz).dt.tz_localize(None)
+    else:
+        return s
+
+
+def _check_dataframe_localize_timestamps(pdf, timezone):
+    """
+    Convert timezone aware timestamps to timezone-naive in the specified timezone or local timezone
+
+    :param pdf: pandas.DataFrame
+    :param timezone: the timezone to convert. if None then use local timezone
+    :return pandas.DataFrame where any timezone aware columns have been converted to tz-naive
+    """
+    from pyspark.sql.utils import require_minimum_pandas_version
+    require_minimum_pandas_version()
+
+    for column, series in pdf.iteritems():
+        pdf[column] = _check_series_localize_timestamps(series, timezone)
+    return pdf
+
+
+def _check_series_convert_timestamps_internal(s, timezone):
+    """
+    Convert a tz-naive timestamp in the specified timezone or local timezone to UTC normalized for
+    Spark internal storage
+
+    :param s: a pandas.Series
+    :param timezone: the timezone to convert. if None then use local timezone
+    :return pandas.Series where if it is a timestamp, has been UTC normalized without a time zone
+    """
+    from pyspark.sql.utils import require_minimum_pandas_version
+    require_minimum_pandas_version()
+
+    from pandas.api.types import is_datetime64_dtype, is_datetime64tz_dtype
+    # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
+    if is_datetime64_dtype(s.dtype):
+        # When tz_localize a tz-naive timestamp, the result is ambiguous if the tz-naive
+        # timestamp is during the hour when the clock is adjusted backward during due to
+        # daylight saving time (dst).
+        # E.g., for America/New_York, the clock is adjusted backward on 2015-11-01 2:00 to
+        # 2015-11-01 1:00 from dst-time to standard time, and therefore, when tz_localize
+        # a tz-naive timestamp 2015-11-01 1:30 with America/New_York timezone, it can be either
+        # dst time (2015-01-01 1:30-0400) or standard time (2015-11-01 1:30-0500).
+        #
+        # Here we explicit choose to use standard time. This matches the default behavior of
+        # pytz.
+        #
+        # Here are some code to help understand this behavior:
+        # >>> import datetime
+        # >>> import pandas as pd
+        # >>> import pytz
+        # >>>
+        # >>> t = datetime.datetime(2015, 11, 1, 1, 30)
+        # >>> ts = pd.Series([t])
+        # >>> tz = pytz.timezone('America/New_York')
+        # >>>
+        # >>> ts.dt.tz_localize(tz, ambiguous=True)
+        # 0   2015-11-01 01:30:00-04:00
+        # dtype: datetime64[ns, America/New_York]
+        # >>>
+        # >>> ts.dt.tz_localize(tz, ambiguous=False)
+        # 0   2015-11-01 01:30:00-05:00
+        # dtype: datetime64[ns, America/New_York]
+        # >>>
+        # >>> str(tz.localize(t))
+        # '2015-11-01 01:30:00-05:00'
+        tz = timezone or _get_local_timezone()
+        return s.dt.tz_localize(tz, ambiguous=False).dt.tz_convert('UTC')
+    elif is_datetime64tz_dtype(s.dtype):
+        return s.dt.tz_convert('UTC')
+    else:
+        return s
+
+
+def _check_series_convert_timestamps_localize(s, from_timezone, to_timezone):
+    """
+    Convert timestamp to timezone-naive in the specified timezone or local timezone
+
+    :param s: a pandas.Series
+    :param from_timezone: the timezone to convert from. if None then use local timezone
+    :param to_timezone: the timezone to convert to. if None then use local timezone
+    :return pandas.Series where if it is a timestamp, has been converted to tz-naive
+    """
+    from pyspark.sql.utils import require_minimum_pandas_version
+    require_minimum_pandas_version()
+
+    import pandas as pd
+    from pandas.api.types import is_datetime64tz_dtype, is_datetime64_dtype
+    from_tz = from_timezone or _get_local_timezone()
+    to_tz = to_timezone or _get_local_timezone()
+    # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
+    if is_datetime64tz_dtype(s.dtype):
+        return s.dt.tz_convert(to_tz).dt.tz_localize(None)
+    elif is_datetime64_dtype(s.dtype) and from_tz != to_tz:
+        # `s.dt.tz_localize('tzlocal()')` doesn't work properly when including NaT.
+        return s.apply(
+            lambda ts: ts.tz_localize(from_tz, ambiguous=False).tz_convert(to_tz).tz_localize(None)
+            if ts is not pd.NaT else pd.NaT)
+    else:
+        return s
+
+
+def _check_series_convert_timestamps_local_tz(s, timezone):
+    """
+    Convert timestamp to timezone-naive in the specified timezone or local timezone
+
+    :param s: a pandas.Series
+    :param timezone: the timezone to convert to. if None then use local timezone
+    :return pandas.Series where if it is a timestamp, has been converted to tz-naive
+    """
+    return _check_series_convert_timestamps_localize(s, None, timezone)
+
+
+def _check_series_convert_timestamps_tz_local(s, timezone):
+    """
+    Convert timestamp to timezone-naive in the specified timezone or local timezone
+
+    :param s: a pandas.Series
+    :param timezone: the timezone to convert from. if None then use local timezone
+    :return pandas.Series where if it is a timestamp, has been converted to tz-naive
+    """
+    return _check_series_convert_timestamps_localize(s, timezone, None)
+
+
+def _test():
+    import doctest
+    from pyspark.context import SparkContext
+    from pyspark.sql import SparkSession
+    globs = globals()
+    sc = SparkContext('local[4]', 'PythonTest')
+    globs['sc'] = sc
+    globs['spark'] = SparkSession.builder.getOrCreate()
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    globs['sc'].stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/udf.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/udf.py
new file mode 100644
index 0000000000000000000000000000000000000000..58f4e0dff5ee5c7ed5edb0bf27b22dab72af0a63
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/udf.py
@@ -0,0 +1,414 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""
+User-defined function related classes and functions
+"""
+import functools
+import sys
+
+from pyspark import SparkContext, since
+from pyspark.rdd import _prepare_for_python_RDD, PythonEvalType, ignore_unicode_prefix
+from pyspark.sql.column import Column, _to_java_column, _to_seq
+from pyspark.sql.types import StringType, DataType, StructType, _parse_datatype_string,\
+    to_arrow_type, to_arrow_schema
+from pyspark.util import _get_argspec
+
+__all__ = ["UDFRegistration"]
+
+
+def _wrap_function(sc, func, returnType):
+    command = (func, returnType)
+    pickled_command, broadcast_vars, env, includes = _prepare_for_python_RDD(sc, command)
+    return sc._jvm.PythonFunction(bytearray(pickled_command), env, includes, sc.pythonExec,
+                                  sc.pythonVer, broadcast_vars, sc._javaAccumulator)
+
+
+def _create_udf(f, returnType, evalType):
+
+    if evalType in (PythonEvalType.SQL_SCALAR_PANDAS_UDF,
+                    PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
+                    PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF):
+
+        from pyspark.sql.utils import require_minimum_pyarrow_version
+        require_minimum_pyarrow_version()
+
+        argspec = _get_argspec(f)
+
+        if evalType == PythonEvalType.SQL_SCALAR_PANDAS_UDF and len(argspec.args) == 0 and \
+                argspec.varargs is None:
+            raise ValueError(
+                "Invalid function: 0-arg pandas_udfs are not supported. "
+                "Instead, create a 1-arg pandas_udf and ignore the arg in your function."
+            )
+
+        if evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF \
+                and len(argspec.args) not in (1, 2):
+            raise ValueError(
+                "Invalid function: pandas_udfs with function type GROUPED_MAP "
+                "must take either one argument (data) or two arguments (key, data).")
+
+    # Set the name of the UserDefinedFunction object to be the name of function f
+    udf_obj = UserDefinedFunction(
+        f, returnType=returnType, name=None, evalType=evalType, deterministic=True)
+    return udf_obj._wrapped()
+
+
+class UserDefinedFunction(object):
+    """
+    User defined function in Python
+
+    .. versionadded:: 1.3
+    """
+    def __init__(self, func,
+                 returnType=StringType(),
+                 name=None,
+                 evalType=PythonEvalType.SQL_BATCHED_UDF,
+                 deterministic=True):
+        if not callable(func):
+            raise TypeError(
+                "Invalid function: not a function or callable (__call__ is not defined): "
+                "{0}".format(type(func)))
+
+        if not isinstance(returnType, (DataType, str)):
+            raise TypeError(
+                "Invalid returnType: returnType should be DataType or str "
+                "but is {}".format(returnType))
+
+        if not isinstance(evalType, int):
+            raise TypeError(
+                "Invalid evalType: evalType should be an int but is {}".format(evalType))
+
+        self.func = func
+        self._returnType = returnType
+        # Stores UserDefinedPythonFunctions jobj, once initialized
+        self._returnType_placeholder = None
+        self._judf_placeholder = None
+        self._name = name or (
+            func.__name__ if hasattr(func, '__name__')
+            else func.__class__.__name__)
+        self.evalType = evalType
+        self.deterministic = deterministic
+
+    @property
+    def returnType(self):
+        # This makes sure this is called after SparkContext is initialized.
+        # ``_parse_datatype_string`` accesses to JVM for parsing a DDL formatted string.
+        if self._returnType_placeholder is None:
+            if isinstance(self._returnType, DataType):
+                self._returnType_placeholder = self._returnType
+            else:
+                self._returnType_placeholder = _parse_datatype_string(self._returnType)
+
+        if self.evalType == PythonEvalType.SQL_SCALAR_PANDAS_UDF:
+            try:
+                to_arrow_type(self._returnType_placeholder)
+            except TypeError:
+                raise NotImplementedError(
+                    "Invalid returnType with scalar Pandas UDFs: %s is "
+                    "not supported" % str(self._returnType_placeholder))
+        elif self.evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF:
+            if isinstance(self._returnType_placeholder, StructType):
+                try:
+                    to_arrow_schema(self._returnType_placeholder)
+                except TypeError:
+                    raise NotImplementedError(
+                        "Invalid returnType with grouped map Pandas UDFs: "
+                        "%s is not supported" % str(self._returnType_placeholder))
+            else:
+                raise TypeError("Invalid returnType for grouped map Pandas "
+                                "UDFs: returnType must be a StructType.")
+        elif self.evalType == PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF:
+            try:
+                to_arrow_type(self._returnType_placeholder)
+            except TypeError:
+                raise NotImplementedError(
+                    "Invalid returnType with grouped aggregate Pandas UDFs: "
+                    "%s is not supported" % str(self._returnType_placeholder))
+
+        return self._returnType_placeholder
+
+    @property
+    def _judf(self):
+        # It is possible that concurrent access, to newly created UDF,
+        # will initialize multiple UserDefinedPythonFunctions.
+        # This is unlikely, doesn't affect correctness,
+        # and should have a minimal performance impact.
+        if self._judf_placeholder is None:
+            self._judf_placeholder = self._create_judf()
+        return self._judf_placeholder
+
+    def _create_judf(self):
+        from pyspark.sql import SparkSession
+
+        spark = SparkSession.builder.getOrCreate()
+        sc = spark.sparkContext
+
+        wrapped_func = _wrap_function(sc, self.func, self.returnType)
+        jdt = spark._jsparkSession.parseDataType(self.returnType.json())
+        judf = sc._jvm.org.apache.spark.sql.execution.python.UserDefinedPythonFunction(
+            self._name, wrapped_func, jdt, self.evalType, self.deterministic)
+        return judf
+
+    def __call__(self, *cols):
+        judf = self._judf
+        sc = SparkContext._active_spark_context
+        return Column(judf.apply(_to_seq(sc, cols, _to_java_column)))
+
+    # This function is for improving the online help system in the interactive interpreter.
+    # For example, the built-in help / pydoc.help. It wraps the UDF with the docstring and
+    # argument annotation. (See: SPARK-19161)
+    def _wrapped(self):
+        """
+        Wrap this udf with a function and attach docstring from func
+        """
+
+        # It is possible for a callable instance without __name__ attribute or/and
+        # __module__ attribute to be wrapped here. For example, functools.partial. In this case,
+        # we should avoid wrapping the attributes from the wrapped function to the wrapper
+        # function. So, we take out these attribute names from the default names to set and
+        # then manually assign it after being wrapped.
+        assignments = tuple(
+            a for a in functools.WRAPPER_ASSIGNMENTS if a != '__name__' and a != '__module__')
+
+        @functools.wraps(self.func, assigned=assignments)
+        def wrapper(*args):
+            return self(*args)
+
+        wrapper.__name__ = self._name
+        wrapper.__module__ = (self.func.__module__ if hasattr(self.func, '__module__')
+                              else self.func.__class__.__module__)
+
+        wrapper.func = self.func
+        wrapper.returnType = self.returnType
+        wrapper.evalType = self.evalType
+        wrapper.deterministic = self.deterministic
+        wrapper.asNondeterministic = functools.wraps(
+            self.asNondeterministic)(lambda: self.asNondeterministic()._wrapped())
+        return wrapper
+
+    def asNondeterministic(self):
+        """
+        Updates UserDefinedFunction to nondeterministic.
+
+        .. versionadded:: 2.3
+        """
+        # Here, we explicitly clean the cache to create a JVM UDF instance
+        # with 'deterministic' updated. See SPARK-23233.
+        self._judf_placeholder = None
+        self.deterministic = False
+        return self
+
+
+class UDFRegistration(object):
+    """
+    Wrapper for user-defined function registration. This instance can be accessed by
+    :attr:`spark.udf` or :attr:`sqlContext.udf`.
+
+    .. versionadded:: 1.3.1
+    """
+
+    def __init__(self, sparkSession):
+        self.sparkSession = sparkSession
+
+    @ignore_unicode_prefix
+    @since("1.3.1")
+    def register(self, name, f, returnType=None):
+        """Register a Python function (including lambda function) or a user-defined function
+        as a SQL function.
+
+        :param name: name of the user-defined function in SQL statements.
+        :param f: a Python function, or a user-defined function. The user-defined function can
+            be either row-at-a-time or vectorized. See :meth:`pyspark.sql.functions.udf` and
+            :meth:`pyspark.sql.functions.pandas_udf`.
+        :param returnType: the return type of the registered user-defined function. The value can
+            be either a :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
+        :return: a user-defined function.
+
+        To register a nondeterministic Python function, users need to first build
+        a nondeterministic user-defined function for the Python function and then register it
+        as a SQL function.
+
+        `returnType` can be optionally specified when `f` is a Python function but not
+        when `f` is a user-defined function. Please see below.
+
+        1. When `f` is a Python function:
+
+            `returnType` defaults to string type and can be optionally specified. The produced
+            object must match the specified type. In this case, this API works as if
+            `register(name, f, returnType=StringType())`.
+
+            >>> strlen = spark.udf.register("stringLengthString", lambda x: len(x))
+            >>> spark.sql("SELECT stringLengthString('test')").collect()
+            [Row(stringLengthString(test)=u'4')]
+
+            >>> spark.sql("SELECT 'foo' AS text").select(strlen("text")).collect()
+            [Row(stringLengthString(text)=u'3')]
+
+            >>> from pyspark.sql.types import IntegerType
+            >>> _ = spark.udf.register("stringLengthInt", lambda x: len(x), IntegerType())
+            >>> spark.sql("SELECT stringLengthInt('test')").collect()
+            [Row(stringLengthInt(test)=4)]
+
+            >>> from pyspark.sql.types import IntegerType
+            >>> _ = spark.udf.register("stringLengthInt", lambda x: len(x), IntegerType())
+            >>> spark.sql("SELECT stringLengthInt('test')").collect()
+            [Row(stringLengthInt(test)=4)]
+
+        2. When `f` is a user-defined function:
+
+            Spark uses the return type of the given user-defined function as the return type of
+            the registered user-defined function. `returnType` should not be specified.
+            In this case, this API works as if `register(name, f)`.
+
+            >>> from pyspark.sql.types import IntegerType
+            >>> from pyspark.sql.functions import udf
+            >>> slen = udf(lambda s: len(s), IntegerType())
+            >>> _ = spark.udf.register("slen", slen)
+            >>> spark.sql("SELECT slen('test')").collect()
+            [Row(slen(test)=4)]
+
+            >>> import random
+            >>> from pyspark.sql.functions import udf
+            >>> from pyspark.sql.types import IntegerType
+            >>> random_udf = udf(lambda: random.randint(0, 100), IntegerType()).asNondeterministic()
+            >>> new_random_udf = spark.udf.register("random_udf", random_udf)
+            >>> spark.sql("SELECT random_udf()").collect()  # doctest: +SKIP
+            [Row(random_udf()=82)]
+
+            >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
+            >>> @pandas_udf("integer", PandasUDFType.SCALAR)  # doctest: +SKIP
+            ... def add_one(x):
+            ...     return x + 1
+            ...
+            >>> _ = spark.udf.register("add_one", add_one)  # doctest: +SKIP
+            >>> spark.sql("SELECT add_one(id) FROM range(3)").collect()  # doctest: +SKIP
+            [Row(add_one(id)=1), Row(add_one(id)=2), Row(add_one(id)=3)]
+
+            >>> @pandas_udf("integer", PandasUDFType.GROUPED_AGG)  # doctest: +SKIP
+            ... def sum_udf(v):
+            ...     return v.sum()
+            ...
+            >>> _ = spark.udf.register("sum_udf", sum_udf)  # doctest: +SKIP
+            >>> q = "SELECT sum_udf(v1) FROM VALUES (3, 0), (2, 0), (1, 1) tbl(v1, v2) GROUP BY v2"
+            >>> spark.sql(q).collect()  # doctest: +SKIP
+            [Row(sum_udf(v1)=1), Row(sum_udf(v1)=5)]
+
+            .. note:: Registration for a user-defined function (case 2.) was added from
+                Spark 2.3.0.
+        """
+
+        # This is to check whether the input function is from a user-defined function or
+        # Python function.
+        if hasattr(f, 'asNondeterministic'):
+            if returnType is not None:
+                raise TypeError(
+                    "Invalid returnType: data type can not be specified when f is"
+                    "a user-defined function, but got %s." % returnType)
+            if f.evalType not in [PythonEvalType.SQL_BATCHED_UDF,
+                                  PythonEvalType.SQL_SCALAR_PANDAS_UDF,
+                                  PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF]:
+                raise ValueError(
+                    "Invalid f: f must be SQL_BATCHED_UDF, SQL_SCALAR_PANDAS_UDF or "
+                    "SQL_GROUPED_AGG_PANDAS_UDF")
+            register_udf = UserDefinedFunction(f.func, returnType=f.returnType, name=name,
+                                               evalType=f.evalType,
+                                               deterministic=f.deterministic)
+            return_udf = f
+        else:
+            if returnType is None:
+                returnType = StringType()
+            register_udf = UserDefinedFunction(f, returnType=returnType, name=name,
+                                               evalType=PythonEvalType.SQL_BATCHED_UDF)
+            return_udf = register_udf._wrapped()
+        self.sparkSession._jsparkSession.udf().registerPython(name, register_udf._judf)
+        return return_udf
+
+    @ignore_unicode_prefix
+    @since(2.3)
+    def registerJavaFunction(self, name, javaClassName, returnType=None):
+        """Register a Java user-defined function as a SQL function.
+
+        In addition to a name and the function itself, the return type can be optionally specified.
+        When the return type is not specified we would infer it via reflection.
+
+        :param name: name of the user-defined function
+        :param javaClassName: fully qualified name of java class
+        :param returnType: the return type of the registered Java function. The value can be either
+            a :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
+
+        >>> from pyspark.sql.types import IntegerType
+        >>> spark.udf.registerJavaFunction(
+        ...     "javaStringLength", "test.org.apache.spark.sql.JavaStringLength", IntegerType())
+        >>> spark.sql("SELECT javaStringLength('test')").collect()
+        [Row(UDF:javaStringLength(test)=4)]
+
+        >>> spark.udf.registerJavaFunction(
+        ...     "javaStringLength2", "test.org.apache.spark.sql.JavaStringLength")
+        >>> spark.sql("SELECT javaStringLength2('test')").collect()
+        [Row(UDF:javaStringLength2(test)=4)]
+
+        >>> spark.udf.registerJavaFunction(
+        ...     "javaStringLength3", "test.org.apache.spark.sql.JavaStringLength", "integer")
+        >>> spark.sql("SELECT javaStringLength3('test')").collect()
+        [Row(UDF:javaStringLength3(test)=4)]
+        """
+
+        jdt = None
+        if returnType is not None:
+            if not isinstance(returnType, DataType):
+                returnType = _parse_datatype_string(returnType)
+            jdt = self.sparkSession._jsparkSession.parseDataType(returnType.json())
+        self.sparkSession._jsparkSession.udf().registerJava(name, javaClassName, jdt)
+
+    @ignore_unicode_prefix
+    @since(2.3)
+    def registerJavaUDAF(self, name, javaClassName):
+        """Register a Java user-defined aggregate function as a SQL function.
+
+        :param name: name of the user-defined aggregate function
+        :param javaClassName: fully qualified name of java class
+
+        >>> spark.udf.registerJavaUDAF("javaUDAF", "test.org.apache.spark.sql.MyDoubleAvg")
+        >>> df = spark.createDataFrame([(1, "a"),(2, "b"), (3, "a")],["id", "name"])
+        >>> df.createOrReplaceTempView("df")
+        >>> spark.sql("SELECT name, javaUDAF(id) as avg from df group by name").collect()
+        [Row(name=u'b', avg=102.0), Row(name=u'a', avg=102.0)]
+        """
+
+        self.sparkSession._jsparkSession.udf().registerJavaUDAF(name, javaClassName)
+
+
+def _test():
+    import doctest
+    from pyspark.sql import SparkSession
+    import pyspark.sql.udf
+    globs = pyspark.sql.udf.__dict__.copy()
+    spark = SparkSession.builder\
+        .master("local[4]")\
+        .appName("sql.udf tests")\
+        .getOrCreate()
+    globs['spark'] = spark
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.udf, globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/utils.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..bdb3a1467f1d8e4ce3f026e877660ee13a0b5ccd
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/utils.py
@@ -0,0 +1,194 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import py4j
+
+
+class CapturedException(Exception):
+    def __init__(self, desc, stackTrace):
+        self.desc = desc
+        self.stackTrace = stackTrace
+
+    def __str__(self):
+        return repr(self.desc)
+
+
+class AnalysisException(CapturedException):
+    """
+    Failed to analyze a SQL query plan.
+    """
+
+
+class ParseException(CapturedException):
+    """
+    Failed to parse a SQL command.
+    """
+
+
+class IllegalArgumentException(CapturedException):
+    """
+    Passed an illegal or inappropriate argument.
+    """
+
+
+class StreamingQueryException(CapturedException):
+    """
+    Exception that stopped a :class:`StreamingQuery`.
+    """
+
+
+class QueryExecutionException(CapturedException):
+    """
+    Failed to execute a query.
+    """
+
+
+def capture_sql_exception(f):
+    def deco(*a, **kw):
+        try:
+            return f(*a, **kw)
+        except py4j.protocol.Py4JJavaError as e:
+            s = e.java_exception.toString()
+            stackTrace = '\n\t at '.join(map(lambda x: x.toString(),
+                                             e.java_exception.getStackTrace()))
+            if s.startswith('org.apache.spark.sql.AnalysisException: '):
+                raise AnalysisException(s.split(': ', 1)[1], stackTrace)
+            if s.startswith('org.apache.spark.sql.catalyst.analysis'):
+                raise AnalysisException(s.split(': ', 1)[1], stackTrace)
+            if s.startswith('org.apache.spark.sql.catalyst.parser.ParseException: '):
+                raise ParseException(s.split(': ', 1)[1], stackTrace)
+            if s.startswith('org.apache.spark.sql.streaming.StreamingQueryException: '):
+                raise StreamingQueryException(s.split(': ', 1)[1], stackTrace)
+            if s.startswith('org.apache.spark.sql.execution.QueryExecutionException: '):
+                raise QueryExecutionException(s.split(': ', 1)[1], stackTrace)
+            if s.startswith('java.lang.IllegalArgumentException: '):
+                raise IllegalArgumentException(s.split(': ', 1)[1], stackTrace)
+            raise
+    return deco
+
+
+def install_exception_handler():
+    """
+    Hook an exception handler into Py4j, which could capture some SQL exceptions in Java.
+
+    When calling Java API, it will call `get_return_value` to parse the returned object.
+    If any exception happened in JVM, the result will be Java exception object, it raise
+    py4j.protocol.Py4JJavaError. We replace the original `get_return_value` with one that
+    could capture the Java exception and throw a Python one (with the same error message).
+
+    It's idempotent, could be called multiple times.
+    """
+    original = py4j.protocol.get_return_value
+    # The original `get_return_value` is not patched, it's idempotent.
+    patched = capture_sql_exception(original)
+    # only patch the one used in py4j.java_gateway (call Java API)
+    py4j.java_gateway.get_return_value = patched
+
+
+def toJArray(gateway, jtype, arr):
+    """
+    Convert python list to java type array
+    :param gateway: Py4j Gateway
+    :param jtype: java type of element in array
+    :param arr: python type list
+    """
+    jarr = gateway.new_array(jtype, len(arr))
+    for i in range(0, len(arr)):
+        jarr[i] = arr[i]
+    return jarr
+
+
+def require_minimum_pandas_version():
+    """ Raise ImportError if minimum version of Pandas is not installed
+    """
+    # TODO(HyukjinKwon): Relocate and deduplicate the version specification.
+    minimum_pandas_version = "0.19.2"
+
+    from distutils.version import LooseVersion
+    try:
+        import pandas
+        have_pandas = True
+    except ImportError:
+        have_pandas = False
+    if not have_pandas:
+        raise ImportError("Pandas >= %s must be installed; however, "
+                          "it was not found." % minimum_pandas_version)
+    if LooseVersion(pandas.__version__) < LooseVersion(minimum_pandas_version):
+        raise ImportError("Pandas >= %s must be installed; however, "
+                          "your version was %s." % (minimum_pandas_version, pandas.__version__))
+
+
+def require_minimum_pyarrow_version():
+    """ Raise ImportError if minimum version of pyarrow is not installed
+    """
+    # TODO(HyukjinKwon): Relocate and deduplicate the version specification.
+    minimum_pyarrow_version = "0.8.0"
+
+    from distutils.version import LooseVersion
+    try:
+        import pyarrow
+        have_arrow = True
+    except ImportError:
+        have_arrow = False
+    if not have_arrow:
+        raise ImportError("PyArrow >= %s must be installed; however, "
+                          "it was not found." % minimum_pyarrow_version)
+    if LooseVersion(pyarrow.__version__) < LooseVersion(minimum_pyarrow_version):
+        raise ImportError("PyArrow >= %s must be installed; however, "
+                          "your version was %s." % (minimum_pyarrow_version, pyarrow.__version__))
+
+
+def require_test_compiled():
+    """ Raise Exception if test classes are not compiled
+    """
+    import os
+    import glob
+    try:
+        spark_home = os.environ['SPARK_HOME']
+    except KeyError:
+        raise RuntimeError('SPARK_HOME is not defined in environment')
+
+    test_class_path = os.path.join(
+        spark_home, 'sql', 'core', 'target', '*', 'test-classes')
+    paths = glob.glob(test_class_path)
+
+    if len(paths) == 0:
+        raise RuntimeError(
+            "%s doesn't exist. Spark sql test classes are not compiled." % test_class_path)
+
+
+class ForeachBatchFunction(object):
+    """
+    This is the Python implementation of Java interface 'ForeachBatchFunction'. This wraps
+    the user-defined 'foreachBatch' function such that it can be called from the JVM when
+    the query is active.
+    """
+
+    def __init__(self, sql_ctx, func):
+        self.sql_ctx = sql_ctx
+        self.func = func
+
+    def call(self, jdf, batch_id):
+        from pyspark.sql.dataframe import DataFrame
+        try:
+            self.func(DataFrame(jdf, self.sql_ctx), batch_id)
+        except Exception as e:
+            self.error = e
+            raise e
+
+    class Java:
+        implements = ['org.apache.spark.sql.execution.streaming.sources.PythonForeachBatchFunction']
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/window.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/window.py
new file mode 100644
index 0000000000000000000000000000000000000000..e76563dfaa9c8de2f380ead88966cdc7d97dd9bf
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/sql/window.py
@@ -0,0 +1,241 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+
+from pyspark import since, SparkContext
+from pyspark.sql.column import _to_seq, _to_java_column
+
+__all__ = ["Window", "WindowSpec"]
+
+
+def _to_java_cols(cols):
+    sc = SparkContext._active_spark_context
+    if len(cols) == 1 and isinstance(cols[0], list):
+        cols = cols[0]
+    return _to_seq(sc, cols, _to_java_column)
+
+
+class Window(object):
+    """
+    Utility functions for defining window in DataFrames.
+
+    For example:
+
+    >>> # ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+    >>> window = Window.orderBy("date").rowsBetween(Window.unboundedPreceding, Window.currentRow)
+
+    >>> # PARTITION BY country ORDER BY date RANGE BETWEEN 3 PRECEDING AND 3 FOLLOWING
+    >>> window = Window.orderBy("date").partitionBy("country").rangeBetween(-3, 3)
+
+    .. note:: When ordering is not defined, an unbounded window frame (rowFrame,
+         unboundedPreceding, unboundedFollowing) is used by default. When ordering is defined,
+         a growing window frame (rangeFrame, unboundedPreceding, currentRow) is used by default.
+
+    .. note:: Experimental
+
+    .. versionadded:: 1.4
+    """
+
+    _JAVA_MIN_LONG = -(1 << 63)  # -9223372036854775808
+    _JAVA_MAX_LONG = (1 << 63) - 1  # 9223372036854775807
+    _PRECEDING_THRESHOLD = max(-sys.maxsize, _JAVA_MIN_LONG)
+    _FOLLOWING_THRESHOLD = min(sys.maxsize, _JAVA_MAX_LONG)
+
+    unboundedPreceding = _JAVA_MIN_LONG
+
+    unboundedFollowing = _JAVA_MAX_LONG
+
+    currentRow = 0
+
+    @staticmethod
+    @since(1.4)
+    def partitionBy(*cols):
+        """
+        Creates a :class:`WindowSpec` with the partitioning defined.
+        """
+        sc = SparkContext._active_spark_context
+        jspec = sc._jvm.org.apache.spark.sql.expressions.Window.partitionBy(_to_java_cols(cols))
+        return WindowSpec(jspec)
+
+    @staticmethod
+    @since(1.4)
+    def orderBy(*cols):
+        """
+        Creates a :class:`WindowSpec` with the ordering defined.
+        """
+        sc = SparkContext._active_spark_context
+        jspec = sc._jvm.org.apache.spark.sql.expressions.Window.orderBy(_to_java_cols(cols))
+        return WindowSpec(jspec)
+
+    @staticmethod
+    @since(2.1)
+    def rowsBetween(start, end):
+        """
+        Creates a :class:`WindowSpec` with the frame boundaries defined,
+        from `start` (inclusive) to `end` (inclusive).
+
+        Both `start` and `end` are relative positions from the current row.
+        For example, "0" means "current row", while "-1" means the row before
+        the current row, and "5" means the fifth row after the current row.
+
+        We recommend users use ``Window.unboundedPreceding``, ``Window.unboundedFollowing``,
+        and ``Window.currentRow`` to specify special boundary values, rather than using integral
+        values directly.
+
+        :param start: boundary start, inclusive.
+                      The frame is unbounded if this is ``Window.unboundedPreceding``, or
+                      any value less than or equal to -9223372036854775808.
+        :param end: boundary end, inclusive.
+                    The frame is unbounded if this is ``Window.unboundedFollowing``, or
+                    any value greater than or equal to 9223372036854775807.
+        """
+        if start <= Window._PRECEDING_THRESHOLD:
+            start = Window.unboundedPreceding
+        if end >= Window._FOLLOWING_THRESHOLD:
+            end = Window.unboundedFollowing
+        sc = SparkContext._active_spark_context
+        jspec = sc._jvm.org.apache.spark.sql.expressions.Window.rowsBetween(start, end)
+        return WindowSpec(jspec)
+
+    @staticmethod
+    @since(2.1)
+    def rangeBetween(start, end):
+        """
+        Creates a :class:`WindowSpec` with the frame boundaries defined,
+        from `start` (inclusive) to `end` (inclusive).
+
+        Both `start` and `end` are relative from the current row. For example,
+        "0" means "current row", while "-1" means one off before the current row,
+        and "5" means the five off after the current row.
+
+        We recommend users use ``Window.unboundedPreceding``, ``Window.unboundedFollowing``,
+        and ``Window.currentRow`` to specify special boundary values, rather than using integral
+        values directly.
+
+        :param start: boundary start, inclusive.
+                      The frame is unbounded if this is ``Window.unboundedPreceding``, or
+                      any value less than or equal to max(-sys.maxsize, -9223372036854775808).
+        :param end: boundary end, inclusive.
+                    The frame is unbounded if this is ``Window.unboundedFollowing``, or
+                    any value greater than or equal to min(sys.maxsize, 9223372036854775807).
+        """
+        if start <= Window._PRECEDING_THRESHOLD:
+            start = Window.unboundedPreceding
+        if end >= Window._FOLLOWING_THRESHOLD:
+            end = Window.unboundedFollowing
+        sc = SparkContext._active_spark_context
+        jspec = sc._jvm.org.apache.spark.sql.expressions.Window.rangeBetween(start, end)
+        return WindowSpec(jspec)
+
+
+class WindowSpec(object):
+    """
+    A window specification that defines the partitioning, ordering,
+    and frame boundaries.
+
+    Use the static methods in :class:`Window` to create a :class:`WindowSpec`.
+
+    .. note:: Experimental
+
+    .. versionadded:: 1.4
+    """
+
+    def __init__(self, jspec):
+        self._jspec = jspec
+
+    @since(1.4)
+    def partitionBy(self, *cols):
+        """
+        Defines the partitioning columns in a :class:`WindowSpec`.
+
+        :param cols: names of columns or expressions
+        """
+        return WindowSpec(self._jspec.partitionBy(_to_java_cols(cols)))
+
+    @since(1.4)
+    def orderBy(self, *cols):
+        """
+        Defines the ordering columns in a :class:`WindowSpec`.
+
+        :param cols: names of columns or expressions
+        """
+        return WindowSpec(self._jspec.orderBy(_to_java_cols(cols)))
+
+    @since(1.4)
+    def rowsBetween(self, start, end):
+        """
+        Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive).
+
+        Both `start` and `end` are relative positions from the current row.
+        For example, "0" means "current row", while "-1" means the row before
+        the current row, and "5" means the fifth row after the current row.
+
+        We recommend users use ``Window.unboundedPreceding``, ``Window.unboundedFollowing``,
+        and ``Window.currentRow`` to specify special boundary values, rather than using integral
+        values directly.
+
+        :param start: boundary start, inclusive.
+                      The frame is unbounded if this is ``Window.unboundedPreceding``, or
+                      any value less than or equal to max(-sys.maxsize, -9223372036854775808).
+        :param end: boundary end, inclusive.
+                    The frame is unbounded if this is ``Window.unboundedFollowing``, or
+                    any value greater than or equal to min(sys.maxsize, 9223372036854775807).
+        """
+        if start <= Window._PRECEDING_THRESHOLD:
+            start = Window.unboundedPreceding
+        if end >= Window._FOLLOWING_THRESHOLD:
+            end = Window.unboundedFollowing
+        return WindowSpec(self._jspec.rowsBetween(start, end))
+
+    @since(1.4)
+    def rangeBetween(self, start, end):
+        """
+        Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive).
+
+        Both `start` and `end` are relative from the current row. For example,
+        "0" means "current row", while "-1" means one off before the current row,
+        and "5" means the five off after the current row.
+
+        We recommend users use ``Window.unboundedPreceding``, ``Window.unboundedFollowing``,
+        and ``Window.currentRow`` to specify special boundary values, rather than using integral
+        values directly.
+
+        :param start: boundary start, inclusive.
+                      The frame is unbounded if this is ``Window.unboundedPreceding``, or
+                      any value less than or equal to max(-sys.maxsize, -9223372036854775808).
+        :param end: boundary end, inclusive.
+                    The frame is unbounded if this is ``Window.unboundedFollowing``, or
+                    any value greater than or equal to min(sys.maxsize, 9223372036854775807).
+        """
+        if start <= Window._PRECEDING_THRESHOLD:
+            start = Window.unboundedPreceding
+        if end >= Window._FOLLOWING_THRESHOLD:
+            end = Window.unboundedFollowing
+        return WindowSpec(self._jspec.rangeBetween(start, end))
+
+
+def _test():
+    import doctest
+    SparkContext('local[4]', 'PythonTest')
+    (failure_count, test_count) = doctest.testmod()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/statcounter.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/statcounter.py
new file mode 100644
index 0000000000000000000000000000000000000000..03ea0b6d33c9d0d5816fef9cc33392b50039847a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/statcounter.py
@@ -0,0 +1,158 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This file is ported from spark/util/StatCounter.scala
+
+import copy
+import math
+
+try:
+    from numpy import maximum, minimum, sqrt
+except ImportError:
+    maximum = max
+    minimum = min
+    sqrt = math.sqrt
+
+
+class StatCounter(object):
+
+    def __init__(self, values=None):
+        if values is None:
+            values = list()
+        self.n = 0    # Running count of our values
+        self.mu = 0.0  # Running mean of our values
+        self.m2 = 0.0  # Running variance numerator (sum of (x - mean)^2)
+        self.maxValue = float("-inf")
+        self.minValue = float("inf")
+
+        for v in values:
+            self.merge(v)
+
+    # Add a value into this StatCounter, updating the internal statistics.
+    def merge(self, value):
+        delta = value - self.mu
+        self.n += 1
+        self.mu += delta / self.n
+        self.m2 += delta * (value - self.mu)
+        self.maxValue = maximum(self.maxValue, value)
+        self.minValue = minimum(self.minValue, value)
+
+        return self
+
+    # Merge another StatCounter into this one, adding up the internal statistics.
+    def mergeStats(self, other):
+        if not isinstance(other, StatCounter):
+            raise Exception("Can only merge Statcounters!")
+
+        if other is self:  # reference equality holds
+            self.merge(copy.deepcopy(other))  # Avoid overwriting fields in a weird order
+        else:
+            if self.n == 0:
+                self.mu = other.mu
+                self.m2 = other.m2
+                self.n = other.n
+                self.maxValue = other.maxValue
+                self.minValue = other.minValue
+
+            elif other.n != 0:
+                delta = other.mu - self.mu
+                if other.n * 10 < self.n:
+                    self.mu = self.mu + (delta * other.n) / (self.n + other.n)
+                elif self.n * 10 < other.n:
+                    self.mu = other.mu - (delta * self.n) / (self.n + other.n)
+                else:
+                    self.mu = (self.mu * self.n + other.mu * other.n) / (self.n + other.n)
+
+                self.maxValue = maximum(self.maxValue, other.maxValue)
+                self.minValue = minimum(self.minValue, other.minValue)
+
+                self.m2 += other.m2 + (delta * delta * self.n * other.n) / (self.n + other.n)
+                self.n += other.n
+        return self
+
+    # Clone this StatCounter
+    def copy(self):
+        return copy.deepcopy(self)
+
+    def count(self):
+        return int(self.n)
+
+    def mean(self):
+        return self.mu
+
+    def sum(self):
+        return self.n * self.mu
+
+    def min(self):
+        return self.minValue
+
+    def max(self):
+        return self.maxValue
+
+    # Return the variance of the values.
+    def variance(self):
+        if self.n == 0:
+            return float('nan')
+        else:
+            return self.m2 / self.n
+
+    #
+    # Return the sample variance, which corrects for bias in estimating the variance by dividing
+    # by N-1 instead of N.
+    #
+    def sampleVariance(self):
+        if self.n <= 1:
+            return float('nan')
+        else:
+            return self.m2 / (self.n - 1)
+
+    # Return the standard deviation of the values.
+    def stdev(self):
+        return sqrt(self.variance())
+
+    #
+    # Return the sample standard deviation of the values, which corrects for bias in estimating the
+    # variance by dividing by N-1 instead of N.
+    #
+    def sampleStdev(self):
+        return sqrt(self.sampleVariance())
+
+    def asDict(self, sample=False):
+        """Returns the :class:`StatCounter` members as a ``dict``.
+
+        >>> sc.parallelize([1., 2., 3., 4.]).stats().asDict()
+        {'count': 4L,
+         'max': 4.0,
+         'mean': 2.5,
+         'min': 1.0,
+         'stdev': 1.2909944487358056,
+         'sum': 10.0,
+         'variance': 1.6666666666666667}
+        """
+        return {
+            'count': self.count(),
+            'mean': self.mean(),
+            'sum': self.sum(),
+            'min': self.min(),
+            'max': self.max(),
+            'stdev': self.stdev() if sample else self.sampleStdev(),
+            'variance': self.variance() if sample else self.sampleVariance()
+        }
+
+    def __repr__(self):
+        return ("(count: %s, mean: %s, stdev: %s, max: %s, min: %s)" %
+                (self.count(), self.mean(), self.stdev(), self.max(), self.min()))
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/status.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/status.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6fa7dd3144d4f2dbd70588f07b556e1af12efb7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/status.py
@@ -0,0 +1,96 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from collections import namedtuple
+
+__all__ = ["SparkJobInfo", "SparkStageInfo", "StatusTracker"]
+
+
+class SparkJobInfo(namedtuple("SparkJobInfo", "jobId stageIds status")):
+    """
+    Exposes information about Spark Jobs.
+    """
+
+
+class SparkStageInfo(namedtuple("SparkStageInfo",
+                                "stageId currentAttemptId name numTasks numActiveTasks "
+                                "numCompletedTasks numFailedTasks")):
+    """
+    Exposes information about Spark Stages.
+    """
+
+
+class StatusTracker(object):
+    """
+    Low-level status reporting APIs for monitoring job and stage progress.
+
+    These APIs intentionally provide very weak consistency semantics;
+    consumers of these APIs should be prepared to handle empty / missing
+    information. For example, a job's stage ids may be known but the status
+    API may not have any information about the details of those stages, so
+    `getStageInfo` could potentially return `None` for a valid stage id.
+
+    To limit memory usage, these APIs only provide information on recent
+    jobs / stages.  These APIs will provide information for the last
+    `spark.ui.retainedStages` stages and `spark.ui.retainedJobs` jobs.
+    """
+    def __init__(self, jtracker):
+        self._jtracker = jtracker
+
+    def getJobIdsForGroup(self, jobGroup=None):
+        """
+        Return a list of all known jobs in a particular job group.  If
+        `jobGroup` is None, then returns all known jobs that are not
+        associated with a job group.
+
+        The returned list may contain running, failed, and completed jobs,
+        and may vary across invocations of this method. This method does
+        not guarantee the order of the elements in its result.
+        """
+        return list(self._jtracker.getJobIdsForGroup(jobGroup))
+
+    def getActiveStageIds(self):
+        """
+        Returns an array containing the ids of all active stages.
+        """
+        return sorted(list(self._jtracker.getActiveStageIds()))
+
+    def getActiveJobsIds(self):
+        """
+        Returns an array containing the ids of all active jobs.
+        """
+        return sorted((list(self._jtracker.getActiveJobIds())))
+
+    def getJobInfo(self, jobId):
+        """
+        Returns a :class:`SparkJobInfo` object, or None if the job info
+        could not be found or was garbage collected.
+        """
+        job = self._jtracker.getJobInfo(jobId)
+        if job is not None:
+            return SparkJobInfo(jobId, job.stageIds(), str(job.status()))
+
+    def getStageInfo(self, stageId):
+        """
+        Returns a :class:`SparkStageInfo` object, or None if the stage
+        info could not be found or was garbage collected.
+        """
+        stage = self._jtracker.getStageInfo(stageId)
+        if stage is not None:
+            # TODO: fetch them in batch for better performance
+            attrs = [getattr(stage, f)() for f in SparkStageInfo._fields[1:]]
+            return SparkStageInfo(stageId, *attrs)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/storagelevel.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/storagelevel.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f29646c07432e6104dd3bdc46aa3af946e236f7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/storagelevel.py
@@ -0,0 +1,71 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+__all__ = ["StorageLevel"]
+
+
+class StorageLevel(object):
+
+    """
+    Flags for controlling the storage of an RDD. Each StorageLevel records whether to use memory,
+    whether to drop the RDD to disk if it falls out of memory, whether to keep the data in memory
+    in a JAVA-specific serialized format, and whether to replicate the RDD partitions on multiple
+    nodes. Also contains static constants for some commonly used storage levels, MEMORY_ONLY.
+    Since the data is always serialized on the Python side, all the constants use the serialized
+    formats.
+    """
+
+    def __init__(self, useDisk, useMemory, useOffHeap, deserialized, replication=1):
+        self.useDisk = useDisk
+        self.useMemory = useMemory
+        self.useOffHeap = useOffHeap
+        self.deserialized = deserialized
+        self.replication = replication
+
+    def __repr__(self):
+        return "StorageLevel(%s, %s, %s, %s, %s)" % (
+            self.useDisk, self.useMemory, self.useOffHeap, self.deserialized, self.replication)
+
+    def __str__(self):
+        result = ""
+        result += "Disk " if self.useDisk else ""
+        result += "Memory " if self.useMemory else ""
+        result += "OffHeap " if self.useOffHeap else ""
+        result += "Deserialized " if self.deserialized else "Serialized "
+        result += "%sx Replicated" % self.replication
+        return result
+
+StorageLevel.DISK_ONLY = StorageLevel(True, False, False, False)
+StorageLevel.DISK_ONLY_2 = StorageLevel(True, False, False, False, 2)
+StorageLevel.MEMORY_ONLY = StorageLevel(False, True, False, False)
+StorageLevel.MEMORY_ONLY_2 = StorageLevel(False, True, False, False, 2)
+StorageLevel.MEMORY_AND_DISK = StorageLevel(True, True, False, False)
+StorageLevel.MEMORY_AND_DISK_2 = StorageLevel(True, True, False, False, 2)
+StorageLevel.OFF_HEAP = StorageLevel(True, True, True, False, 1)
+
+"""
+.. note:: The following four storage level constants are deprecated in 2.0, since the records
+    will always be serialized in Python.
+"""
+StorageLevel.MEMORY_ONLY_SER = StorageLevel.MEMORY_ONLY
+""".. note:: Deprecated in 2.0, use ``StorageLevel.MEMORY_ONLY`` instead."""
+StorageLevel.MEMORY_ONLY_SER_2 = StorageLevel.MEMORY_ONLY_2
+""".. note:: Deprecated in 2.0, use ``StorageLevel.MEMORY_ONLY_2`` instead."""
+StorageLevel.MEMORY_AND_DISK_SER = StorageLevel.MEMORY_AND_DISK
+""".. note:: Deprecated in 2.0, use ``StorageLevel.MEMORY_AND_DISK`` instead."""
+StorageLevel.MEMORY_AND_DISK_SER_2 = StorageLevel.MEMORY_AND_DISK_2
+""".. note:: Deprecated in 2.0, use ``StorageLevel.MEMORY_AND_DISK_2`` instead."""
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/__init__.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..66e8f8ef001e32137a3473d82dfd27a565959066
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/__init__.py
@@ -0,0 +1,22 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark.streaming.context import StreamingContext
+from pyspark.streaming.dstream import DStream
+from pyspark.streaming.listener import StreamingListener
+
+__all__ = ['StreamingContext', 'DStream', 'StreamingListener']
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/context.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/context.py
new file mode 100644
index 0000000000000000000000000000000000000000..3fa57ca85b37b2c5a3f5178290820d7e029879df
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/context.py
@@ -0,0 +1,356 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+import os
+import sys
+
+from py4j.java_gateway import java_import, JavaObject
+
+from pyspark import RDD, SparkConf
+from pyspark.serializers import NoOpSerializer, UTF8Deserializer, CloudPickleSerializer
+from pyspark.context import SparkContext
+from pyspark.storagelevel import StorageLevel
+from pyspark.streaming.dstream import DStream
+from pyspark.streaming.util import TransformFunction, TransformFunctionSerializer
+
+__all__ = ["StreamingContext"]
+
+
+class StreamingContext(object):
+    """
+    Main entry point for Spark Streaming functionality. A StreamingContext
+    represents the connection to a Spark cluster, and can be used to create
+    L{DStream} various input sources. It can be from an existing L{SparkContext}.
+    After creating and transforming DStreams, the streaming computation can
+    be started and stopped using `context.start()` and `context.stop()`,
+    respectively. `context.awaitTermination()` allows the current thread
+    to wait for the termination of the context by `stop()` or by an exception.
+    """
+    _transformerSerializer = None
+
+    # Reference to a currently active StreamingContext
+    _activeContext = None
+
+    def __init__(self, sparkContext, batchDuration=None, jssc=None):
+        """
+        Create a new StreamingContext.
+
+        @param sparkContext: L{SparkContext} object.
+        @param batchDuration: the time interval (in seconds) at which streaming
+                              data will be divided into batches
+        """
+
+        self._sc = sparkContext
+        self._jvm = self._sc._jvm
+        self._jssc = jssc or self._initialize_context(self._sc, batchDuration)
+
+    def _initialize_context(self, sc, duration):
+        self._ensure_initialized()
+        return self._jvm.JavaStreamingContext(sc._jsc, self._jduration(duration))
+
+    def _jduration(self, seconds):
+        """
+        Create Duration object given number of seconds
+        """
+        return self._jvm.Duration(int(seconds * 1000))
+
+    @classmethod
+    def _ensure_initialized(cls):
+        SparkContext._ensure_initialized()
+        gw = SparkContext._gateway
+
+        java_import(gw.jvm, "org.apache.spark.streaming.*")
+        java_import(gw.jvm, "org.apache.spark.streaming.api.java.*")
+        java_import(gw.jvm, "org.apache.spark.streaming.api.python.*")
+
+        from pyspark.java_gateway import ensure_callback_server_started
+        ensure_callback_server_started(gw)
+
+        # register serializer for TransformFunction
+        # it happens before creating SparkContext when loading from checkpointing
+        cls._transformerSerializer = TransformFunctionSerializer(
+            SparkContext._active_spark_context, CloudPickleSerializer(), gw)
+
+    @classmethod
+    def getOrCreate(cls, checkpointPath, setupFunc):
+        """
+        Either recreate a StreamingContext from checkpoint data or create a new StreamingContext.
+        If checkpoint data exists in the provided `checkpointPath`, then StreamingContext will be
+        recreated from the checkpoint data. If the data does not exist, then the provided setupFunc
+        will be used to create a new context.
+
+        @param checkpointPath: Checkpoint directory used in an earlier streaming program
+        @param setupFunc:      Function to create a new context and setup DStreams
+        """
+        cls._ensure_initialized()
+        gw = SparkContext._gateway
+
+        # Check whether valid checkpoint information exists in the given path
+        ssc_option = gw.jvm.StreamingContextPythonHelper().tryRecoverFromCheckpoint(checkpointPath)
+        if ssc_option.isEmpty():
+            ssc = setupFunc()
+            ssc.checkpoint(checkpointPath)
+            return ssc
+
+        jssc = gw.jvm.JavaStreamingContext(ssc_option.get())
+
+        # If there is already an active instance of Python SparkContext use it, or create a new one
+        if not SparkContext._active_spark_context:
+            jsc = jssc.sparkContext()
+            conf = SparkConf(_jconf=jsc.getConf())
+            SparkContext(conf=conf, gateway=gw, jsc=jsc)
+
+        sc = SparkContext._active_spark_context
+
+        # update ctx in serializer
+        cls._transformerSerializer.ctx = sc
+        return StreamingContext(sc, None, jssc)
+
+    @classmethod
+    def getActive(cls):
+        """
+        Return either the currently active StreamingContext (i.e., if there is a context started
+        but not stopped) or None.
+        """
+        activePythonContext = cls._activeContext
+        if activePythonContext is not None:
+            # Verify that the current running Java StreamingContext is active and is the same one
+            # backing the supposedly active Python context
+            activePythonContextJavaId = activePythonContext._jssc.ssc().hashCode()
+            activeJvmContextOption = activePythonContext._jvm.StreamingContext.getActive()
+
+            if activeJvmContextOption.isEmpty():
+                cls._activeContext = None
+            elif activeJvmContextOption.get().hashCode() != activePythonContextJavaId:
+                cls._activeContext = None
+                raise Exception("JVM's active JavaStreamingContext is not the JavaStreamingContext "
+                                "backing the action Python StreamingContext. This is unexpected.")
+        return cls._activeContext
+
+    @classmethod
+    def getActiveOrCreate(cls, checkpointPath, setupFunc):
+        """
+        Either return the active StreamingContext (i.e. currently started but not stopped),
+        or recreate a StreamingContext from checkpoint data or create a new StreamingContext
+        using the provided setupFunc function. If the checkpointPath is None or does not contain
+        valid checkpoint data, then setupFunc will be called to create a new context and setup
+        DStreams.
+
+        @param checkpointPath: Checkpoint directory used in an earlier streaming program. Can be
+                               None if the intention is to always create a new context when there
+                               is no active context.
+        @param setupFunc:      Function to create a new JavaStreamingContext and setup DStreams
+        """
+
+        if setupFunc is None:
+            raise Exception("setupFunc cannot be None")
+        activeContext = cls.getActive()
+        if activeContext is not None:
+            return activeContext
+        elif checkpointPath is not None:
+            return cls.getOrCreate(checkpointPath, setupFunc)
+        else:
+            return setupFunc()
+
+    @property
+    def sparkContext(self):
+        """
+        Return SparkContext which is associated with this StreamingContext.
+        """
+        return self._sc
+
+    def start(self):
+        """
+        Start the execution of the streams.
+        """
+        self._jssc.start()
+        StreamingContext._activeContext = self
+
+    def awaitTermination(self, timeout=None):
+        """
+        Wait for the execution to stop.
+
+        @param timeout: time to wait in seconds
+        """
+        if timeout is None:
+            self._jssc.awaitTermination()
+        else:
+            self._jssc.awaitTerminationOrTimeout(int(timeout * 1000))
+
+    def awaitTerminationOrTimeout(self, timeout):
+        """
+        Wait for the execution to stop. Return `true` if it's stopped; or
+        throw the reported error during the execution; or `false` if the
+        waiting time elapsed before returning from the method.
+
+        @param timeout: time to wait in seconds
+        """
+        return self._jssc.awaitTerminationOrTimeout(int(timeout * 1000))
+
+    def stop(self, stopSparkContext=True, stopGraceFully=False):
+        """
+        Stop the execution of the streams, with option of ensuring all
+        received data has been processed.
+
+        @param stopSparkContext: Stop the associated SparkContext or not
+        @param stopGracefully: Stop gracefully by waiting for the processing
+                              of all received data to be completed
+        """
+        self._jssc.stop(stopSparkContext, stopGraceFully)
+        StreamingContext._activeContext = None
+        if stopSparkContext:
+            self._sc.stop()
+
+    def remember(self, duration):
+        """
+        Set each DStreams in this context to remember RDDs it generated
+        in the last given duration. DStreams remember RDDs only for a
+        limited duration of time and releases them for garbage collection.
+        This method allows the developer to specify how long to remember
+        the RDDs (if the developer wishes to query old data outside the
+        DStream computation).
+
+        @param duration: Minimum duration (in seconds) that each DStream
+                        should remember its RDDs
+        """
+        self._jssc.remember(self._jduration(duration))
+
+    def checkpoint(self, directory):
+        """
+        Sets the context to periodically checkpoint the DStream operations for master
+        fault-tolerance. The graph will be checkpointed every batch interval.
+
+        @param directory: HDFS-compatible directory where the checkpoint data
+                         will be reliably stored
+        """
+        self._jssc.checkpoint(directory)
+
+    def socketTextStream(self, hostname, port, storageLevel=StorageLevel.MEMORY_AND_DISK_2):
+        """
+        Create an input from TCP source hostname:port. Data is received using
+        a TCP socket and receive byte is interpreted as UTF8 encoded ``\\n`` delimited
+        lines.
+
+        @param hostname:      Hostname to connect to for receiving data
+        @param port:          Port to connect to for receiving data
+        @param storageLevel:  Storage level to use for storing the received objects
+        """
+        jlevel = self._sc._getJavaStorageLevel(storageLevel)
+        return DStream(self._jssc.socketTextStream(hostname, port, jlevel), self,
+                       UTF8Deserializer())
+
+    def textFileStream(self, directory):
+        """
+        Create an input stream that monitors a Hadoop-compatible file system
+        for new files and reads them as text files. Files must be wrriten to the
+        monitored directory by "moving" them from another location within the same
+        file system. File names starting with . are ignored.
+        """
+        return DStream(self._jssc.textFileStream(directory), self, UTF8Deserializer())
+
+    def binaryRecordsStream(self, directory, recordLength):
+        """
+        Create an input stream that monitors a Hadoop-compatible file system
+        for new files and reads them as flat binary files with records of
+        fixed length. Files must be written to the monitored directory by "moving"
+        them from another location within the same file system.
+        File names starting with . are ignored.
+
+        @param directory:       Directory to load data from
+        @param recordLength:    Length of each record in bytes
+        """
+        return DStream(self._jssc.binaryRecordsStream(directory, recordLength), self,
+                       NoOpSerializer())
+
+    def _check_serializers(self, rdds):
+        # make sure they have same serializer
+        if len(set(rdd._jrdd_deserializer for rdd in rdds)) > 1:
+            for i in range(len(rdds)):
+                # reset them to sc.serializer
+                rdds[i] = rdds[i]._reserialize()
+
+    def queueStream(self, rdds, oneAtATime=True, default=None):
+        """
+        Create an input stream from a queue of RDDs or list. In each batch,
+        it will process either one or all of the RDDs returned by the queue.
+
+        .. note:: Changes to the queue after the stream is created will not be recognized.
+
+        @param rdds:       Queue of RDDs
+        @param oneAtATime: pick one rdd each time or pick all of them once.
+        @param default:    The default rdd if no more in rdds
+        """
+        if default and not isinstance(default, RDD):
+            default = self._sc.parallelize(default)
+
+        if not rdds and default:
+            rdds = [rdds]
+
+        if rdds and not isinstance(rdds[0], RDD):
+            rdds = [self._sc.parallelize(input) for input in rdds]
+        self._check_serializers(rdds)
+
+        queue = self._jvm.PythonDStream.toRDDQueue([r._jrdd for r in rdds])
+        if default:
+            default = default._reserialize(rdds[0]._jrdd_deserializer)
+            jdstream = self._jssc.queueStream(queue, oneAtATime, default._jrdd)
+        else:
+            jdstream = self._jssc.queueStream(queue, oneAtATime)
+        return DStream(jdstream, self, rdds[0]._jrdd_deserializer)
+
+    def transform(self, dstreams, transformFunc):
+        """
+        Create a new DStream in which each RDD is generated by applying
+        a function on RDDs of the DStreams. The order of the JavaRDDs in
+        the transform function parameter will be the same as the order
+        of corresponding DStreams in the list.
+        """
+        jdstreams = [d._jdstream for d in dstreams]
+        # change the final serializer to sc.serializer
+        func = TransformFunction(self._sc,
+                                 lambda t, *rdds: transformFunc(rdds),
+                                 *[d._jrdd_deserializer for d in dstreams])
+        jfunc = self._jvm.TransformFunction(func)
+        jdstream = self._jssc.transform(jdstreams, jfunc)
+        return DStream(jdstream, self, self._sc.serializer)
+
+    def union(self, *dstreams):
+        """
+        Create a unified DStream from multiple DStreams of the same
+        type and same slide duration.
+        """
+        if not dstreams:
+            raise ValueError("should have at least one DStream to union")
+        if len(dstreams) == 1:
+            return dstreams[0]
+        if len(set(s._jrdd_deserializer for s in dstreams)) > 1:
+            raise ValueError("All DStreams should have same serializer")
+        if len(set(s._slideDuration for s in dstreams)) > 1:
+            raise ValueError("All DStreams should have same slide duration")
+        first = dstreams[0]
+        jrest = [d._jdstream for d in dstreams[1:]]
+        return DStream(self._jssc.union(first._jdstream, jrest), self, first._jrdd_deserializer)
+
+    def addStreamingListener(self, streamingListener):
+        """
+        Add a [[org.apache.spark.streaming.scheduler.StreamingListener]] object for
+        receiving system events related to streaming.
+        """
+        self._jssc.addStreamingListener(self._jvm.JavaStreamingListenerWrapper(
+            self._jvm.PythonStreamingListenerWrapper(streamingListener)))
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/dstream.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/dstream.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce42a857d0c067b5a9984e32360dcf6a10524a27
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/dstream.py
@@ -0,0 +1,647 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import operator
+import time
+from itertools import chain
+from datetime import datetime
+
+if sys.version < "3":
+    from itertools import imap as map, ifilter as filter
+else:
+    long = int
+
+from py4j.protocol import Py4JJavaError
+
+from pyspark import RDD
+from pyspark.storagelevel import StorageLevel
+from pyspark.streaming.util import rddToFileName, TransformFunction
+from pyspark.rdd import portable_hash
+from pyspark.resultiterable import ResultIterable
+
+__all__ = ["DStream"]
+
+
+class DStream(object):
+    """
+    A Discretized Stream (DStream), the basic abstraction in Spark Streaming,
+    is a continuous sequence of RDDs (of the same type) representing a
+    continuous stream of data (see L{RDD} in the Spark core documentation
+    for more details on RDDs).
+
+    DStreams can either be created from live data (such as, data from TCP
+    sockets, Kafka, Flume, etc.) using a L{StreamingContext} or it can be
+    generated by transforming existing DStreams using operations such as
+    `map`, `window` and `reduceByKeyAndWindow`. While a Spark Streaming
+    program is running, each DStream periodically generates a RDD, either
+    from live data or by transforming the RDD generated by a parent DStream.
+
+    DStreams internally is characterized by a few basic properties:
+     - A list of other DStreams that the DStream depends on
+     - A time interval at which the DStream generates an RDD
+     - A function that is used to generate an RDD after each time interval
+    """
+    def __init__(self, jdstream, ssc, jrdd_deserializer):
+        self._jdstream = jdstream
+        self._ssc = ssc
+        self._sc = ssc._sc
+        self._jrdd_deserializer = jrdd_deserializer
+        self.is_cached = False
+        self.is_checkpointed = False
+
+    def context(self):
+        """
+        Return the StreamingContext associated with this DStream
+        """
+        return self._ssc
+
+    def count(self):
+        """
+        Return a new DStream in which each RDD has a single element
+        generated by counting each RDD of this DStream.
+        """
+        return self.mapPartitions(lambda i: [sum(1 for _ in i)]).reduce(operator.add)
+
+    def filter(self, f):
+        """
+        Return a new DStream containing only the elements that satisfy predicate.
+        """
+        def func(iterator):
+            return filter(f, iterator)
+        return self.mapPartitions(func, True)
+
+    def flatMap(self, f, preservesPartitioning=False):
+        """
+        Return a new DStream by applying a function to all elements of
+        this DStream, and then flattening the results
+        """
+        def func(s, iterator):
+            return chain.from_iterable(map(f, iterator))
+        return self.mapPartitionsWithIndex(func, preservesPartitioning)
+
+    def map(self, f, preservesPartitioning=False):
+        """
+        Return a new DStream by applying a function to each element of DStream.
+        """
+        def func(iterator):
+            return map(f, iterator)
+        return self.mapPartitions(func, preservesPartitioning)
+
+    def mapPartitions(self, f, preservesPartitioning=False):
+        """
+        Return a new DStream in which each RDD is generated by applying
+        mapPartitions() to each RDDs of this DStream.
+        """
+        def func(s, iterator):
+            return f(iterator)
+        return self.mapPartitionsWithIndex(func, preservesPartitioning)
+
+    def mapPartitionsWithIndex(self, f, preservesPartitioning=False):
+        """
+        Return a new DStream in which each RDD is generated by applying
+        mapPartitionsWithIndex() to each RDDs of this DStream.
+        """
+        return self.transform(lambda rdd: rdd.mapPartitionsWithIndex(f, preservesPartitioning))
+
+    def reduce(self, func):
+        """
+        Return a new DStream in which each RDD has a single element
+        generated by reducing each RDD of this DStream.
+        """
+        return self.map(lambda x: (None, x)).reduceByKey(func, 1).map(lambda x: x[1])
+
+    def reduceByKey(self, func, numPartitions=None):
+        """
+        Return a new DStream by applying reduceByKey to each RDD.
+        """
+        if numPartitions is None:
+            numPartitions = self._sc.defaultParallelism
+        return self.combineByKey(lambda x: x, func, func, numPartitions)
+
+    def combineByKey(self, createCombiner, mergeValue, mergeCombiners,
+                     numPartitions=None):
+        """
+        Return a new DStream by applying combineByKey to each RDD.
+        """
+        if numPartitions is None:
+            numPartitions = self._sc.defaultParallelism
+
+        def func(rdd):
+            return rdd.combineByKey(createCombiner, mergeValue, mergeCombiners, numPartitions)
+        return self.transform(func)
+
+    def partitionBy(self, numPartitions, partitionFunc=portable_hash):
+        """
+        Return a copy of the DStream in which each RDD are partitioned
+        using the specified partitioner.
+        """
+        return self.transform(lambda rdd: rdd.partitionBy(numPartitions, partitionFunc))
+
+    def foreachRDD(self, func):
+        """
+        Apply a function to each RDD in this DStream.
+        """
+        if func.__code__.co_argcount == 1:
+            old_func = func
+            func = lambda t, rdd: old_func(rdd)
+        jfunc = TransformFunction(self._sc, func, self._jrdd_deserializer)
+        api = self._ssc._jvm.PythonDStream
+        api.callForeachRDD(self._jdstream, jfunc)
+
+    def pprint(self, num=10):
+        """
+        Print the first num elements of each RDD generated in this DStream.
+
+        @param num: the number of elements from the first will be printed.
+        """
+        def takeAndPrint(time, rdd):
+            taken = rdd.take(num + 1)
+            print("-------------------------------------------")
+            print("Time: %s" % time)
+            print("-------------------------------------------")
+            for record in taken[:num]:
+                print(record)
+            if len(taken) > num:
+                print("...")
+            print("")
+
+        self.foreachRDD(takeAndPrint)
+
+    def mapValues(self, f):
+        """
+        Return a new DStream by applying a map function to the value of
+        each key-value pairs in this DStream without changing the key.
+        """
+        map_values_fn = lambda kv: (kv[0], f(kv[1]))
+        return self.map(map_values_fn, preservesPartitioning=True)
+
+    def flatMapValues(self, f):
+        """
+        Return a new DStream by applying a flatmap function to the value
+        of each key-value pairs in this DStream without changing the key.
+        """
+        flat_map_fn = lambda kv: ((kv[0], x) for x in f(kv[1]))
+        return self.flatMap(flat_map_fn, preservesPartitioning=True)
+
+    def glom(self):
+        """
+        Return a new DStream in which RDD is generated by applying glom()
+        to RDD of this DStream.
+        """
+        def func(iterator):
+            yield list(iterator)
+        return self.mapPartitions(func)
+
+    def cache(self):
+        """
+        Persist the RDDs of this DStream with the default storage level
+        (C{MEMORY_ONLY}).
+        """
+        self.is_cached = True
+        self.persist(StorageLevel.MEMORY_ONLY)
+        return self
+
+    def persist(self, storageLevel):
+        """
+        Persist the RDDs of this DStream with the given storage level
+        """
+        self.is_cached = True
+        javaStorageLevel = self._sc._getJavaStorageLevel(storageLevel)
+        self._jdstream.persist(javaStorageLevel)
+        return self
+
+    def checkpoint(self, interval):
+        """
+        Enable periodic checkpointing of RDDs of this DStream
+
+        @param interval: time in seconds, after each period of that, generated
+                         RDD will be checkpointed
+        """
+        self.is_checkpointed = True
+        self._jdstream.checkpoint(self._ssc._jduration(interval))
+        return self
+
+    def groupByKey(self, numPartitions=None):
+        """
+        Return a new DStream by applying groupByKey on each RDD.
+        """
+        if numPartitions is None:
+            numPartitions = self._sc.defaultParallelism
+        return self.transform(lambda rdd: rdd.groupByKey(numPartitions))
+
+    def countByValue(self):
+        """
+        Return a new DStream in which each RDD contains the counts of each
+        distinct value in each RDD of this DStream.
+        """
+        return self.map(lambda x: (x, 1)).reduceByKey(lambda x, y: x+y)
+
+    def saveAsTextFiles(self, prefix, suffix=None):
+        """
+        Save each RDD in this DStream as at text file, using string
+        representation of elements.
+        """
+        def saveAsTextFile(t, rdd):
+            path = rddToFileName(prefix, suffix, t)
+            try:
+                rdd.saveAsTextFile(path)
+            except Py4JJavaError as e:
+                # after recovered from checkpointing, the foreachRDD may
+                # be called twice
+                if 'FileAlreadyExistsException' not in str(e):
+                    raise
+        return self.foreachRDD(saveAsTextFile)
+
+    # TODO: uncomment this until we have ssc.pickleFileStream()
+    # def saveAsPickleFiles(self, prefix, suffix=None):
+    #     """
+    #     Save each RDD in this DStream as at binary file, the elements are
+    #     serialized by pickle.
+    #     """
+    #     def saveAsPickleFile(t, rdd):
+    #         path = rddToFileName(prefix, suffix, t)
+    #         try:
+    #             rdd.saveAsPickleFile(path)
+    #         except Py4JJavaError as e:
+    #             # after recovered from checkpointing, the foreachRDD may
+    #             # be called twice
+    #             if 'FileAlreadyExistsException' not in str(e):
+    #                 raise
+    #     return self.foreachRDD(saveAsPickleFile)
+
+    def transform(self, func):
+        """
+        Return a new DStream in which each RDD is generated by applying a function
+        on each RDD of this DStream.
+
+        `func` can have one argument of `rdd`, or have two arguments of
+        (`time`, `rdd`)
+        """
+        if func.__code__.co_argcount == 1:
+            oldfunc = func
+            func = lambda t, rdd: oldfunc(rdd)
+        assert func.__code__.co_argcount == 2, "func should take one or two arguments"
+        return TransformedDStream(self, func)
+
+    def transformWith(self, func, other, keepSerializer=False):
+        """
+        Return a new DStream in which each RDD is generated by applying a function
+        on each RDD of this DStream and 'other' DStream.
+
+        `func` can have two arguments of (`rdd_a`, `rdd_b`) or have three
+        arguments of (`time`, `rdd_a`, `rdd_b`)
+        """
+        if func.__code__.co_argcount == 2:
+            oldfunc = func
+            func = lambda t, a, b: oldfunc(a, b)
+        assert func.__code__.co_argcount == 3, "func should take two or three arguments"
+        jfunc = TransformFunction(self._sc, func, self._jrdd_deserializer, other._jrdd_deserializer)
+        dstream = self._sc._jvm.PythonTransformed2DStream(self._jdstream.dstream(),
+                                                          other._jdstream.dstream(), jfunc)
+        jrdd_serializer = self._jrdd_deserializer if keepSerializer else self._sc.serializer
+        return DStream(dstream.asJavaDStream(), self._ssc, jrdd_serializer)
+
+    def repartition(self, numPartitions):
+        """
+        Return a new DStream with an increased or decreased level of parallelism.
+        """
+        return self.transform(lambda rdd: rdd.repartition(numPartitions))
+
+    @property
+    def _slideDuration(self):
+        """
+        Return the slideDuration in seconds of this DStream
+        """
+        return self._jdstream.dstream().slideDuration().milliseconds() / 1000.0
+
+    def union(self, other):
+        """
+        Return a new DStream by unifying data of another DStream with this DStream.
+
+        @param other: Another DStream having the same interval (i.e., slideDuration)
+                     as this DStream.
+        """
+        if self._slideDuration != other._slideDuration:
+            raise ValueError("the two DStream should have same slide duration")
+        return self.transformWith(lambda a, b: a.union(b), other, True)
+
+    def cogroup(self, other, numPartitions=None):
+        """
+        Return a new DStream by applying 'cogroup' between RDDs of this
+        DStream and `other` DStream.
+
+        Hash partitioning is used to generate the RDDs with `numPartitions` partitions.
+        """
+        if numPartitions is None:
+            numPartitions = self._sc.defaultParallelism
+        return self.transformWith(lambda a, b: a.cogroup(b, numPartitions), other)
+
+    def join(self, other, numPartitions=None):
+        """
+        Return a new DStream by applying 'join' between RDDs of this DStream and
+        `other` DStream.
+
+        Hash partitioning is used to generate the RDDs with `numPartitions`
+        partitions.
+        """
+        if numPartitions is None:
+            numPartitions = self._sc.defaultParallelism
+        return self.transformWith(lambda a, b: a.join(b, numPartitions), other)
+
+    def leftOuterJoin(self, other, numPartitions=None):
+        """
+        Return a new DStream by applying 'left outer join' between RDDs of this DStream and
+        `other` DStream.
+
+        Hash partitioning is used to generate the RDDs with `numPartitions`
+        partitions.
+        """
+        if numPartitions is None:
+            numPartitions = self._sc.defaultParallelism
+        return self.transformWith(lambda a, b: a.leftOuterJoin(b, numPartitions), other)
+
+    def rightOuterJoin(self, other, numPartitions=None):
+        """
+        Return a new DStream by applying 'right outer join' between RDDs of this DStream and
+        `other` DStream.
+
+        Hash partitioning is used to generate the RDDs with `numPartitions`
+        partitions.
+        """
+        if numPartitions is None:
+            numPartitions = self._sc.defaultParallelism
+        return self.transformWith(lambda a, b: a.rightOuterJoin(b, numPartitions), other)
+
+    def fullOuterJoin(self, other, numPartitions=None):
+        """
+        Return a new DStream by applying 'full outer join' between RDDs of this DStream and
+        `other` DStream.
+
+        Hash partitioning is used to generate the RDDs with `numPartitions`
+        partitions.
+        """
+        if numPartitions is None:
+            numPartitions = self._sc.defaultParallelism
+        return self.transformWith(lambda a, b: a.fullOuterJoin(b, numPartitions), other)
+
+    def _jtime(self, timestamp):
+        """ Convert datetime or unix_timestamp into Time
+        """
+        if isinstance(timestamp, datetime):
+            timestamp = time.mktime(timestamp.timetuple())
+        return self._sc._jvm.Time(long(timestamp * 1000))
+
+    def slice(self, begin, end):
+        """
+        Return all the RDDs between 'begin' to 'end' (both included)
+
+        `begin`, `end` could be datetime.datetime() or unix_timestamp
+        """
+        jrdds = self._jdstream.slice(self._jtime(begin), self._jtime(end))
+        return [RDD(jrdd, self._sc, self._jrdd_deserializer) for jrdd in jrdds]
+
+    def _validate_window_param(self, window, slide):
+        duration = self._jdstream.dstream().slideDuration().milliseconds()
+        if int(window * 1000) % duration != 0:
+            raise ValueError("windowDuration must be multiple of the slide duration (%d ms)"
+                             % duration)
+        if slide and int(slide * 1000) % duration != 0:
+            raise ValueError("slideDuration must be multiple of the slide duration (%d ms)"
+                             % duration)
+
+    def window(self, windowDuration, slideDuration=None):
+        """
+        Return a new DStream in which each RDD contains all the elements in seen in a
+        sliding window of time over this DStream.
+
+        @param windowDuration: width of the window; must be a multiple of this DStream's
+                              batching interval
+        @param slideDuration:  sliding interval of the window (i.e., the interval after which
+                              the new DStream will generate RDDs); must be a multiple of this
+                              DStream's batching interval
+        """
+        self._validate_window_param(windowDuration, slideDuration)
+        d = self._ssc._jduration(windowDuration)
+        if slideDuration is None:
+            return DStream(self._jdstream.window(d), self._ssc, self._jrdd_deserializer)
+        s = self._ssc._jduration(slideDuration)
+        return DStream(self._jdstream.window(d, s), self._ssc, self._jrdd_deserializer)
+
+    def reduceByWindow(self, reduceFunc, invReduceFunc, windowDuration, slideDuration):
+        """
+        Return a new DStream in which each RDD has a single element generated by reducing all
+        elements in a sliding window over this DStream.
+
+        if `invReduceFunc` is not None, the reduction is done incrementally
+        using the old window's reduced value :
+
+        1. reduce the new values that entered the window (e.g., adding new counts)
+
+        2. "inverse reduce" the old values that left the window (e.g., subtracting old counts)
+        This is more efficient than `invReduceFunc` is None.
+
+        @param reduceFunc:     associative and commutative reduce function
+        @param invReduceFunc:  inverse reduce function of `reduceFunc`; such that for all y,
+                               and invertible x:
+                               `invReduceFunc(reduceFunc(x, y), x) = y`
+        @param windowDuration: width of the window; must be a multiple of this DStream's
+                               batching interval
+        @param slideDuration:  sliding interval of the window (i.e., the interval after which
+                               the new DStream will generate RDDs); must be a multiple of this
+                               DStream's batching interval
+        """
+        keyed = self.map(lambda x: (1, x))
+        reduced = keyed.reduceByKeyAndWindow(reduceFunc, invReduceFunc,
+                                             windowDuration, slideDuration, 1)
+        return reduced.map(lambda kv: kv[1])
+
+    def countByWindow(self, windowDuration, slideDuration):
+        """
+        Return a new DStream in which each RDD has a single element generated
+        by counting the number of elements in a window over this DStream.
+        windowDuration and slideDuration are as defined in the window() operation.
+
+        This is equivalent to window(windowDuration, slideDuration).count(),
+        but will be more efficient if window is large.
+        """
+        return self.map(lambda x: 1).reduceByWindow(operator.add, operator.sub,
+                                                    windowDuration, slideDuration)
+
+    def countByValueAndWindow(self, windowDuration, slideDuration, numPartitions=None):
+        """
+        Return a new DStream in which each RDD contains the count of distinct elements in
+        RDDs in a sliding window over this DStream.
+
+        @param windowDuration: width of the window; must be a multiple of this DStream's
+                              batching interval
+        @param slideDuration:  sliding interval of the window (i.e., the interval after which
+                              the new DStream will generate RDDs); must be a multiple of this
+                              DStream's batching interval
+        @param numPartitions:  number of partitions of each RDD in the new DStream.
+        """
+        keyed = self.map(lambda x: (x, 1))
+        counted = keyed.reduceByKeyAndWindow(operator.add, operator.sub,
+                                             windowDuration, slideDuration, numPartitions)
+        return counted.filter(lambda kv: kv[1] > 0)
+
+    def groupByKeyAndWindow(self, windowDuration, slideDuration, numPartitions=None):
+        """
+        Return a new DStream by applying `groupByKey` over a sliding window.
+        Similar to `DStream.groupByKey()`, but applies it over a sliding window.
+
+        @param windowDuration: width of the window; must be a multiple of this DStream's
+                              batching interval
+        @param slideDuration:  sliding interval of the window (i.e., the interval after which
+                              the new DStream will generate RDDs); must be a multiple of this
+                              DStream's batching interval
+        @param numPartitions:  Number of partitions of each RDD in the new DStream.
+        """
+        ls = self.mapValues(lambda x: [x])
+        grouped = ls.reduceByKeyAndWindow(lambda a, b: a.extend(b) or a, lambda a, b: a[len(b):],
+                                          windowDuration, slideDuration, numPartitions)
+        return grouped.mapValues(ResultIterable)
+
+    def reduceByKeyAndWindow(self, func, invFunc, windowDuration, slideDuration=None,
+                             numPartitions=None, filterFunc=None):
+        """
+        Return a new DStream by applying incremental `reduceByKey` over a sliding window.
+
+        The reduced value of over a new window is calculated using the old window's reduce value :
+         1. reduce the new values that entered the window (e.g., adding new counts)
+         2. "inverse reduce" the old values that left the window (e.g., subtracting old counts)
+
+        `invFunc` can be None, then it will reduce all the RDDs in window, could be slower
+        than having `invFunc`.
+
+        @param func:           associative and commutative reduce function
+        @param invFunc:        inverse function of `reduceFunc`
+        @param windowDuration: width of the window; must be a multiple of this DStream's
+                              batching interval
+        @param slideDuration:  sliding interval of the window (i.e., the interval after which
+                              the new DStream will generate RDDs); must be a multiple of this
+                              DStream's batching interval
+        @param numPartitions:  number of partitions of each RDD in the new DStream.
+        @param filterFunc:     function to filter expired key-value pairs;
+                              only pairs that satisfy the function are retained
+                              set this to null if you do not want to filter
+        """
+        self._validate_window_param(windowDuration, slideDuration)
+        if numPartitions is None:
+            numPartitions = self._sc.defaultParallelism
+
+        reduced = self.reduceByKey(func, numPartitions)
+
+        if invFunc:
+            def reduceFunc(t, a, b):
+                b = b.reduceByKey(func, numPartitions)
+                r = a.union(b).reduceByKey(func, numPartitions) if a else b
+                if filterFunc:
+                    r = r.filter(filterFunc)
+                return r
+
+            def invReduceFunc(t, a, b):
+                b = b.reduceByKey(func, numPartitions)
+                joined = a.leftOuterJoin(b, numPartitions)
+                return joined.mapValues(lambda kv: invFunc(kv[0], kv[1])
+                                        if kv[1] is not None else kv[0])
+
+            jreduceFunc = TransformFunction(self._sc, reduceFunc, reduced._jrdd_deserializer)
+            jinvReduceFunc = TransformFunction(self._sc, invReduceFunc, reduced._jrdd_deserializer)
+            if slideDuration is None:
+                slideDuration = self._slideDuration
+            dstream = self._sc._jvm.PythonReducedWindowedDStream(
+                reduced._jdstream.dstream(),
+                jreduceFunc, jinvReduceFunc,
+                self._ssc._jduration(windowDuration),
+                self._ssc._jduration(slideDuration))
+            return DStream(dstream.asJavaDStream(), self._ssc, self._sc.serializer)
+        else:
+            return reduced.window(windowDuration, slideDuration).reduceByKey(func, numPartitions)
+
+    def updateStateByKey(self, updateFunc, numPartitions=None, initialRDD=None):
+        """
+        Return a new "state" DStream where the state for each key is updated by applying
+        the given function on the previous state of the key and the new values of the key.
+
+        @param updateFunc: State update function. If this function returns None, then
+                           corresponding state key-value pair will be eliminated.
+        """
+        if numPartitions is None:
+            numPartitions = self._sc.defaultParallelism
+
+        if initialRDD and not isinstance(initialRDD, RDD):
+            initialRDD = self._sc.parallelize(initialRDD)
+
+        def reduceFunc(t, a, b):
+            if a is None:
+                g = b.groupByKey(numPartitions).mapValues(lambda vs: (list(vs), None))
+            else:
+                g = a.cogroup(b.partitionBy(numPartitions), numPartitions)
+                g = g.mapValues(lambda ab: (list(ab[1]), list(ab[0])[0] if len(ab[0]) else None))
+            state = g.mapValues(lambda vs_s: updateFunc(vs_s[0], vs_s[1]))
+            return state.filter(lambda k_v: k_v[1] is not None)
+
+        jreduceFunc = TransformFunction(self._sc, reduceFunc,
+                                        self._sc.serializer, self._jrdd_deserializer)
+        if initialRDD:
+            initialRDD = initialRDD._reserialize(self._jrdd_deserializer)
+            dstream = self._sc._jvm.PythonStateDStream(self._jdstream.dstream(), jreduceFunc,
+                                                       initialRDD._jrdd)
+        else:
+            dstream = self._sc._jvm.PythonStateDStream(self._jdstream.dstream(), jreduceFunc)
+
+        return DStream(dstream.asJavaDStream(), self._ssc, self._sc.serializer)
+
+
+class TransformedDStream(DStream):
+    """
+    TransformedDStream is a DStream generated by an Python function
+    transforming each RDD of a DStream to another RDDs.
+
+    Multiple continuous transformations of DStream can be combined into
+    one transformation.
+    """
+    def __init__(self, prev, func):
+        self._ssc = prev._ssc
+        self._sc = self._ssc._sc
+        self._jrdd_deserializer = self._sc.serializer
+        self.is_cached = False
+        self.is_checkpointed = False
+        self._jdstream_val = None
+
+        # Using type() to avoid folding the functions and compacting the DStreams which is not
+        # not strictly an object of TransformedDStream.
+        # Changed here is to avoid bug in KafkaTransformedDStream when calling offsetRanges().
+        if (type(prev) is TransformedDStream and
+                not prev.is_cached and not prev.is_checkpointed):
+            prev_func = prev.func
+            self.func = lambda t, rdd: func(t, prev_func(t, rdd))
+            self.prev = prev.prev
+        else:
+            self.prev = prev
+            self.func = func
+
+    @property
+    def _jdstream(self):
+        if self._jdstream_val is not None:
+            return self._jdstream_val
+
+        jfunc = TransformFunction(self._sc, self.func, self.prev._jrdd_deserializer)
+        dstream = self._sc._jvm.PythonTransformedDStream(self.prev._jdstream.dstream(), jfunc)
+        self._jdstream_val = dstream.asJavaDStream()
+        return self._jdstream_val
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/flume.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/flume.py
new file mode 100644
index 0000000000000000000000000000000000000000..5de448114ece81437f0beca4ce43e8ecc337fbbe
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/flume.py
@@ -0,0 +1,156 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+if sys.version >= "3":
+    from io import BytesIO
+else:
+    from StringIO import StringIO
+import warnings
+
+from py4j.protocol import Py4JJavaError
+
+from pyspark.storagelevel import StorageLevel
+from pyspark.serializers import PairDeserializer, NoOpSerializer, UTF8Deserializer, read_int
+from pyspark.streaming import DStream
+
+__all__ = ['FlumeUtils', 'utf8_decoder']
+
+
+def utf8_decoder(s):
+    """ Decode the unicode as UTF-8 """
+    if s is None:
+        return None
+    return s.decode('utf-8')
+
+
+class FlumeUtils(object):
+
+    @staticmethod
+    def createStream(ssc, hostname, port,
+                     storageLevel=StorageLevel.MEMORY_AND_DISK_2,
+                     enableDecompression=False,
+                     bodyDecoder=utf8_decoder):
+        """
+        Create an input stream that pulls events from Flume.
+
+        :param ssc:  StreamingContext object
+        :param hostname:  Hostname of the slave machine to which the flume data will be sent
+        :param port:  Port of the slave machine to which the flume data will be sent
+        :param storageLevel:  Storage level to use for storing the received objects
+        :param enableDecompression:  Should netty server decompress input stream
+        :param bodyDecoder:  A function used to decode body (default is utf8_decoder)
+        :return: A DStream object
+
+        .. note:: Deprecated in 2.3.0. Flume support is deprecated as of Spark 2.3.0.
+            See SPARK-22142.
+        """
+        warnings.warn(
+            "Deprecated in 2.3.0. Flume support is deprecated as of Spark 2.3.0. "
+            "See SPARK-22142.",
+            DeprecationWarning)
+        jlevel = ssc._sc._getJavaStorageLevel(storageLevel)
+        helper = FlumeUtils._get_helper(ssc._sc)
+        jstream = helper.createStream(ssc._jssc, hostname, port, jlevel, enableDecompression)
+        return FlumeUtils._toPythonDStream(ssc, jstream, bodyDecoder)
+
+    @staticmethod
+    def createPollingStream(ssc, addresses,
+                            storageLevel=StorageLevel.MEMORY_AND_DISK_2,
+                            maxBatchSize=1000,
+                            parallelism=5,
+                            bodyDecoder=utf8_decoder):
+        """
+        Creates an input stream that is to be used with the Spark Sink deployed on a Flume agent.
+        This stream will poll the sink for data and will pull events as they are available.
+
+        :param ssc:  StreamingContext object
+        :param addresses:  List of (host, port)s on which the Spark Sink is running.
+        :param storageLevel:  Storage level to use for storing the received objects
+        :param maxBatchSize:  The maximum number of events to be pulled from the Spark sink
+                              in a single RPC call
+        :param parallelism:  Number of concurrent requests this stream should send to the sink.
+                             Note that having a higher number of requests concurrently being pulled
+                             will result in this stream using more threads
+        :param bodyDecoder:  A function used to decode body (default is utf8_decoder)
+        :return: A DStream object
+
+        .. note:: Deprecated in 2.3.0. Flume support is deprecated as of Spark 2.3.0.
+            See SPARK-22142.
+        """
+        warnings.warn(
+            "Deprecated in 2.3.0. Flume support is deprecated as of Spark 2.3.0. "
+            "See SPARK-22142.",
+            DeprecationWarning)
+        jlevel = ssc._sc._getJavaStorageLevel(storageLevel)
+        hosts = []
+        ports = []
+        for (host, port) in addresses:
+            hosts.append(host)
+            ports.append(port)
+        helper = FlumeUtils._get_helper(ssc._sc)
+        jstream = helper.createPollingStream(
+            ssc._jssc, hosts, ports, jlevel, maxBatchSize, parallelism)
+        return FlumeUtils._toPythonDStream(ssc, jstream, bodyDecoder)
+
+    @staticmethod
+    def _toPythonDStream(ssc, jstream, bodyDecoder):
+        ser = PairDeserializer(NoOpSerializer(), NoOpSerializer())
+        stream = DStream(jstream, ssc, ser)
+
+        def func(event):
+            headersBytes = BytesIO(event[0]) if sys.version >= "3" else StringIO(event[0])
+            headers = {}
+            strSer = UTF8Deserializer()
+            for i in range(0, read_int(headersBytes)):
+                key = strSer.loads(headersBytes)
+                value = strSer.loads(headersBytes)
+                headers[key] = value
+            body = bodyDecoder(event[1])
+            return (headers, body)
+        return stream.map(func)
+
+    @staticmethod
+    def _get_helper(sc):
+        try:
+            return sc._jvm.org.apache.spark.streaming.flume.FlumeUtilsPythonHelper()
+        except TypeError as e:
+            if str(e) == "'JavaPackage' object is not callable":
+                FlumeUtils._printErrorMsg(sc)
+            raise
+
+    @staticmethod
+    def _printErrorMsg(sc):
+        print("""
+________________________________________________________________________________________________
+
+  Spark Streaming's Flume libraries not found in class path. Try one of the following.
+
+  1. Include the Flume library and its dependencies with in the
+     spark-submit command as
+
+     $ bin/spark-submit --packages org.apache.spark:spark-streaming-flume:%s ...
+
+  2. Download the JAR of the artifact from Maven Central http://search.maven.org/,
+     Group Id = org.apache.spark, Artifact Id = spark-streaming-flume-assembly, Version = %s.
+     Then, include the jar in the spark-submit command as
+
+     $ bin/spark-submit --jars <spark-streaming-flume-assembly.jar> ...
+
+________________________________________________________________________________________________
+
+""" % (sc.version, sc.version))
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/kafka.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/kafka.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed2e0e7d10fa22d5248226363767a2a152d8b5ea
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/kafka.py
@@ -0,0 +1,506 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import warnings
+
+from py4j.protocol import Py4JJavaError
+
+from pyspark.rdd import RDD
+from pyspark.storagelevel import StorageLevel
+from pyspark.serializers import AutoBatchedSerializer, PickleSerializer, PairDeserializer, \
+    NoOpSerializer
+from pyspark.streaming import DStream
+from pyspark.streaming.dstream import TransformedDStream
+from pyspark.streaming.util import TransformFunction
+
+__all__ = ['Broker', 'KafkaMessageAndMetadata', 'KafkaUtils', 'OffsetRange',
+           'TopicAndPartition', 'utf8_decoder']
+
+
+def utf8_decoder(s):
+    """ Decode the unicode as UTF-8 """
+    if s is None:
+        return None
+    return s.decode('utf-8')
+
+
+class KafkaUtils(object):
+
+    @staticmethod
+    def createStream(ssc, zkQuorum, groupId, topics, kafkaParams=None,
+                     storageLevel=StorageLevel.MEMORY_AND_DISK_2,
+                     keyDecoder=utf8_decoder, valueDecoder=utf8_decoder):
+        """
+        Create an input stream that pulls messages from a Kafka Broker.
+
+        :param ssc:  StreamingContext object
+        :param zkQuorum:  Zookeeper quorum (hostname:port,hostname:port,..).
+        :param groupId:  The group id for this consumer.
+        :param topics:  Dict of (topic_name -> numPartitions) to consume.
+                        Each partition is consumed in its own thread.
+        :param kafkaParams: Additional params for Kafka
+        :param storageLevel:  RDD storage level.
+        :param keyDecoder:  A function used to decode key (default is utf8_decoder)
+        :param valueDecoder:  A function used to decode value (default is utf8_decoder)
+        :return: A DStream object
+
+        .. note:: Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0.
+            See SPARK-21893.
+        """
+        warnings.warn(
+            "Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0. "
+            "See SPARK-21893.",
+            DeprecationWarning)
+        if kafkaParams is None:
+            kafkaParams = dict()
+        kafkaParams.update({
+            "zookeeper.connect": zkQuorum,
+            "group.id": groupId,
+            "zookeeper.connection.timeout.ms": "10000",
+        })
+        if not isinstance(topics, dict):
+            raise TypeError("topics should be dict")
+        jlevel = ssc._sc._getJavaStorageLevel(storageLevel)
+        helper = KafkaUtils._get_helper(ssc._sc)
+        jstream = helper.createStream(ssc._jssc, kafkaParams, topics, jlevel)
+        ser = PairDeserializer(NoOpSerializer(), NoOpSerializer())
+        stream = DStream(jstream, ssc, ser)
+        return stream.map(lambda k_v: (keyDecoder(k_v[0]), valueDecoder(k_v[1])))
+
+    @staticmethod
+    def createDirectStream(ssc, topics, kafkaParams, fromOffsets=None,
+                           keyDecoder=utf8_decoder, valueDecoder=utf8_decoder,
+                           messageHandler=None):
+        """
+        Create an input stream that directly pulls messages from a Kafka Broker and specific offset.
+
+        This is not a receiver based Kafka input stream, it directly pulls the message from Kafka
+        in each batch duration and processed without storing.
+
+        This does not use Zookeeper to store offsets. The consumed offsets are tracked
+        by the stream itself. For interoperability with Kafka monitoring tools that depend on
+        Zookeeper, you have to update Kafka/Zookeeper yourself from the streaming application.
+        You can access the offsets used in each batch from the generated RDDs (see
+
+        To recover from driver failures, you have to enable checkpointing in the StreamingContext.
+        The information on consumed offset can be recovered from the checkpoint.
+        See the programming guide for details (constraints, etc.).
+
+        :param ssc:  StreamingContext object.
+        :param topics:  list of topic_name to consume.
+        :param kafkaParams: Additional params for Kafka.
+        :param fromOffsets: Per-topic/partition Kafka offsets defining the (inclusive) starting
+                            point of the stream (a dictionary mapping `TopicAndPartition` to
+                            integers).
+        :param keyDecoder:  A function used to decode key (default is utf8_decoder).
+        :param valueDecoder:  A function used to decode value (default is utf8_decoder).
+        :param messageHandler: A function used to convert KafkaMessageAndMetadata. You can assess
+                               meta using messageHandler (default is None).
+        :return: A DStream object
+
+        .. note:: Experimental
+        .. note:: Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0.
+            See SPARK-21893.
+        """
+        warnings.warn(
+            "Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0. "
+            "See SPARK-21893.",
+            DeprecationWarning)
+        if fromOffsets is None:
+            fromOffsets = dict()
+        if not isinstance(topics, list):
+            raise TypeError("topics should be list")
+        if not isinstance(kafkaParams, dict):
+            raise TypeError("kafkaParams should be dict")
+
+        def funcWithoutMessageHandler(k_v):
+            return (keyDecoder(k_v[0]), valueDecoder(k_v[1]))
+
+        def funcWithMessageHandler(m):
+            m._set_key_decoder(keyDecoder)
+            m._set_value_decoder(valueDecoder)
+            return messageHandler(m)
+
+        helper = KafkaUtils._get_helper(ssc._sc)
+
+        jfromOffsets = dict([(k._jTopicAndPartition(helper),
+                              v) for (k, v) in fromOffsets.items()])
+        if messageHandler is None:
+            ser = PairDeserializer(NoOpSerializer(), NoOpSerializer())
+            func = funcWithoutMessageHandler
+            jstream = helper.createDirectStreamWithoutMessageHandler(
+                ssc._jssc, kafkaParams, set(topics), jfromOffsets)
+        else:
+            ser = AutoBatchedSerializer(PickleSerializer())
+            func = funcWithMessageHandler
+            jstream = helper.createDirectStreamWithMessageHandler(
+                ssc._jssc, kafkaParams, set(topics), jfromOffsets)
+
+        stream = DStream(jstream, ssc, ser).map(func)
+        return KafkaDStream(stream._jdstream, ssc, stream._jrdd_deserializer)
+
+    @staticmethod
+    def createRDD(sc, kafkaParams, offsetRanges, leaders=None,
+                  keyDecoder=utf8_decoder, valueDecoder=utf8_decoder,
+                  messageHandler=None):
+        """
+        Create an RDD from Kafka using offset ranges for each topic and partition.
+
+        :param sc:  SparkContext object
+        :param kafkaParams: Additional params for Kafka
+        :param offsetRanges:  list of offsetRange to specify topic:partition:[start, end) to consume
+        :param leaders: Kafka brokers for each TopicAndPartition in offsetRanges.  May be an empty
+            map, in which case leaders will be looked up on the driver.
+        :param keyDecoder:  A function used to decode key (default is utf8_decoder)
+        :param valueDecoder:  A function used to decode value (default is utf8_decoder)
+        :param messageHandler: A function used to convert KafkaMessageAndMetadata. You can assess
+                               meta using messageHandler (default is None).
+        :return: An RDD object
+
+        .. note:: Experimental
+        .. note:: Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0.
+            See SPARK-21893.
+        """
+        warnings.warn(
+            "Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0. "
+            "See SPARK-21893.",
+            DeprecationWarning)
+        if leaders is None:
+            leaders = dict()
+        if not isinstance(kafkaParams, dict):
+            raise TypeError("kafkaParams should be dict")
+        if not isinstance(offsetRanges, list):
+            raise TypeError("offsetRanges should be list")
+
+        def funcWithoutMessageHandler(k_v):
+            return (keyDecoder(k_v[0]), valueDecoder(k_v[1]))
+
+        def funcWithMessageHandler(m):
+            m._set_key_decoder(keyDecoder)
+            m._set_value_decoder(valueDecoder)
+            return messageHandler(m)
+
+        helper = KafkaUtils._get_helper(sc)
+
+        joffsetRanges = [o._jOffsetRange(helper) for o in offsetRanges]
+        jleaders = dict([(k._jTopicAndPartition(helper),
+                          v._jBroker(helper)) for (k, v) in leaders.items()])
+        if messageHandler is None:
+            jrdd = helper.createRDDWithoutMessageHandler(
+                sc._jsc, kafkaParams, joffsetRanges, jleaders)
+            ser = PairDeserializer(NoOpSerializer(), NoOpSerializer())
+            rdd = RDD(jrdd, sc, ser).map(funcWithoutMessageHandler)
+        else:
+            jrdd = helper.createRDDWithMessageHandler(
+                sc._jsc, kafkaParams, joffsetRanges, jleaders)
+            rdd = RDD(jrdd, sc).map(funcWithMessageHandler)
+
+        return KafkaRDD(rdd._jrdd, sc, rdd._jrdd_deserializer)
+
+    @staticmethod
+    def _get_helper(sc):
+        try:
+            return sc._jvm.org.apache.spark.streaming.kafka.KafkaUtilsPythonHelper()
+        except TypeError as e:
+            if str(e) == "'JavaPackage' object is not callable":
+                KafkaUtils._printErrorMsg(sc)
+            raise
+
+    @staticmethod
+    def _printErrorMsg(sc):
+        print("""
+________________________________________________________________________________________________
+
+  Spark Streaming's Kafka libraries not found in class path. Try one of the following.
+
+  1. Include the Kafka library and its dependencies with in the
+     spark-submit command as
+
+     $ bin/spark-submit --packages org.apache.spark:spark-streaming-kafka-0-8:%s ...
+
+  2. Download the JAR of the artifact from Maven Central http://search.maven.org/,
+     Group Id = org.apache.spark, Artifact Id = spark-streaming-kafka-0-8-assembly, Version = %s.
+     Then, include the jar in the spark-submit command as
+
+     $ bin/spark-submit --jars <spark-streaming-kafka-0-8-assembly.jar> ...
+
+________________________________________________________________________________________________
+
+""" % (sc.version, sc.version))
+
+
+class OffsetRange(object):
+    """
+    Represents a range of offsets from a single Kafka TopicAndPartition.
+
+    .. note:: Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0.
+        See SPARK-21893.
+    """
+
+    def __init__(self, topic, partition, fromOffset, untilOffset):
+        """
+        Create an OffsetRange to represent range of offsets
+        :param topic: Kafka topic name.
+        :param partition: Kafka partition id.
+        :param fromOffset: Inclusive starting offset.
+        :param untilOffset: Exclusive ending offset.
+        """
+        warnings.warn(
+            "Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0. "
+            "See SPARK-21893.",
+            DeprecationWarning)
+        self.topic = topic
+        self.partition = partition
+        self.fromOffset = fromOffset
+        self.untilOffset = untilOffset
+
+    def __eq__(self, other):
+        if isinstance(other, self.__class__):
+            return (self.topic == other.topic
+                    and self.partition == other.partition
+                    and self.fromOffset == other.fromOffset
+                    and self.untilOffset == other.untilOffset)
+        else:
+            return False
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __str__(self):
+        return "OffsetRange(topic: %s, partition: %d, range: [%d -> %d]" \
+               % (self.topic, self.partition, self.fromOffset, self.untilOffset)
+
+    def _jOffsetRange(self, helper):
+        return helper.createOffsetRange(self.topic, self.partition, self.fromOffset,
+                                        self.untilOffset)
+
+
+class TopicAndPartition(object):
+    """
+    Represents a specific topic and partition for Kafka.
+
+    .. note:: Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0.
+        See SPARK-21893.
+    """
+
+    def __init__(self, topic, partition):
+        """
+        Create a Python TopicAndPartition to map to the Java related object
+        :param topic: Kafka topic name.
+        :param partition: Kafka partition id.
+        """
+        warnings.warn(
+            "Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0. "
+            "See SPARK-21893.",
+            DeprecationWarning)
+        self._topic = topic
+        self._partition = partition
+
+    def _jTopicAndPartition(self, helper):
+        return helper.createTopicAndPartition(self._topic, self._partition)
+
+    def __eq__(self, other):
+        if isinstance(other, self.__class__):
+            return (self._topic == other._topic
+                    and self._partition == other._partition)
+        else:
+            return False
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __hash__(self):
+        return (self._topic, self._partition).__hash__()
+
+
+class Broker(object):
+    """
+    Represent the host and port info for a Kafka broker.
+
+    .. note:: Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0.
+        See SPARK-21893.
+    """
+
+    def __init__(self, host, port):
+        """
+        Create a Python Broker to map to the Java related object.
+        :param host: Broker's hostname.
+        :param port: Broker's port.
+        """
+        warnings.warn(
+            "Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0. "
+            "See SPARK-21893.",
+            DeprecationWarning)
+        self._host = host
+        self._port = port
+
+    def _jBroker(self, helper):
+        return helper.createBroker(self._host, self._port)
+
+
+class KafkaRDD(RDD):
+    """
+    A Python wrapper of KafkaRDD, to provide additional information on normal RDD.
+
+    .. note:: Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0.
+        See SPARK-21893.
+    """
+
+    def __init__(self, jrdd, ctx, jrdd_deserializer):
+        warnings.warn(
+            "Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0. "
+            "See SPARK-21893.",
+            DeprecationWarning)
+        RDD.__init__(self, jrdd, ctx, jrdd_deserializer)
+
+    def offsetRanges(self):
+        """
+        Get the OffsetRange of specific KafkaRDD.
+        :return: A list of OffsetRange
+        """
+        helper = KafkaUtils._get_helper(self.ctx)
+        joffsetRanges = helper.offsetRangesOfKafkaRDD(self._jrdd.rdd())
+        ranges = [OffsetRange(o.topic(), o.partition(), o.fromOffset(), o.untilOffset())
+                  for o in joffsetRanges]
+        return ranges
+
+
+class KafkaDStream(DStream):
+    """
+    A Python wrapper of KafkaDStream
+
+    .. note:: Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0.
+        See SPARK-21893.
+    """
+
+    def __init__(self, jdstream, ssc, jrdd_deserializer):
+        warnings.warn(
+            "Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0. "
+            "See SPARK-21893.",
+            DeprecationWarning)
+        DStream.__init__(self, jdstream, ssc, jrdd_deserializer)
+
+    def foreachRDD(self, func):
+        """
+        Apply a function to each RDD in this DStream.
+        """
+        if func.__code__.co_argcount == 1:
+            old_func = func
+            func = lambda r, rdd: old_func(rdd)
+        jfunc = TransformFunction(self._sc, func, self._jrdd_deserializer) \
+            .rdd_wrapper(lambda jrdd, ctx, ser: KafkaRDD(jrdd, ctx, ser))
+        api = self._ssc._jvm.PythonDStream
+        api.callForeachRDD(self._jdstream, jfunc)
+
+    def transform(self, func):
+        """
+        Return a new DStream in which each RDD is generated by applying a function
+        on each RDD of this DStream.
+
+        `func` can have one argument of `rdd`, or have two arguments of
+        (`time`, `rdd`)
+        """
+        if func.__code__.co_argcount == 1:
+            oldfunc = func
+            func = lambda t, rdd: oldfunc(rdd)
+        assert func.__code__.co_argcount == 2, "func should take one or two arguments"
+
+        return KafkaTransformedDStream(self, func)
+
+
+class KafkaTransformedDStream(TransformedDStream):
+    """
+    Kafka specific wrapper of TransformedDStream to transform on Kafka RDD.
+
+    .. note:: Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0.
+        See SPARK-21893.
+    """
+
+    def __init__(self, prev, func):
+        warnings.warn(
+            "Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0. "
+            "See SPARK-21893.",
+            DeprecationWarning)
+        TransformedDStream.__init__(self, prev, func)
+
+    @property
+    def _jdstream(self):
+        if self._jdstream_val is not None:
+            return self._jdstream_val
+
+        jfunc = TransformFunction(self._sc, self.func, self.prev._jrdd_deserializer) \
+            .rdd_wrapper(lambda jrdd, ctx, ser: KafkaRDD(jrdd, ctx, ser))
+        dstream = self._sc._jvm.PythonTransformedDStream(self.prev._jdstream.dstream(), jfunc)
+        self._jdstream_val = dstream.asJavaDStream()
+        return self._jdstream_val
+
+
+class KafkaMessageAndMetadata(object):
+    """
+    Kafka message and metadata information. Including topic, partition, offset and message
+
+    .. note:: Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0.
+        See SPARK-21893.
+    """
+
+    def __init__(self, topic, partition, offset, key, message):
+        """
+        Python wrapper of Kafka MessageAndMetadata
+        :param topic: topic name of this Kafka message
+        :param partition: partition id of this Kafka message
+        :param offset: Offset of this Kafka message in the specific partition
+        :param key: key payload of this Kafka message, can be null if this Kafka message has no key
+                    specified, the return data is undecoded bytearry.
+        :param message: actual message payload of this Kafka message, the return data is
+                        undecoded bytearray.
+        """
+        warnings.warn(
+            "Deprecated in 2.3.0. Kafka 0.8 support is deprecated as of Spark 2.3.0. "
+            "See SPARK-21893.",
+            DeprecationWarning)
+        self.topic = topic
+        self.partition = partition
+        self.offset = offset
+        self._rawKey = key
+        self._rawMessage = message
+        self._keyDecoder = utf8_decoder
+        self._valueDecoder = utf8_decoder
+
+    def __str__(self):
+        return "KafkaMessageAndMetadata(topic: %s, partition: %d, offset: %d, key and message...)" \
+               % (self.topic, self.partition, self.offset)
+
+    def __repr__(self):
+        return self.__str__()
+
+    def __reduce__(self):
+        return (KafkaMessageAndMetadata,
+                (self.topic, self.partition, self.offset, self._rawKey, self._rawMessage))
+
+    def _set_key_decoder(self, decoder):
+        self._keyDecoder = decoder
+
+    def _set_value_decoder(self, decoder):
+        self._valueDecoder = decoder
+
+    @property
+    def key(self):
+        return self._keyDecoder(self._rawKey)
+
+    @property
+    def message(self):
+        return self._valueDecoder(self._rawMessage)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/kinesis.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/kinesis.py
new file mode 100644
index 0000000000000000000000000000000000000000..b839859c45252ae7ee472a1ab8fde27aa06233c0
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/kinesis.py
@@ -0,0 +1,120 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from py4j.protocol import Py4JJavaError
+
+from pyspark.serializers import PairDeserializer, NoOpSerializer
+from pyspark.storagelevel import StorageLevel
+from pyspark.streaming import DStream
+
+__all__ = ['KinesisUtils', 'InitialPositionInStream', 'utf8_decoder']
+
+
+def utf8_decoder(s):
+    """ Decode the unicode as UTF-8 """
+    if s is None:
+        return None
+    return s.decode('utf-8')
+
+
+class KinesisUtils(object):
+
+    @staticmethod
+    def createStream(ssc, kinesisAppName, streamName, endpointUrl, regionName,
+                     initialPositionInStream, checkpointInterval,
+                     storageLevel=StorageLevel.MEMORY_AND_DISK_2,
+                     awsAccessKeyId=None, awsSecretKey=None, decoder=utf8_decoder,
+                     stsAssumeRoleArn=None, stsSessionName=None, stsExternalId=None):
+        """
+        Create an input stream that pulls messages from a Kinesis stream. This uses the
+        Kinesis Client Library (KCL) to pull messages from Kinesis.
+
+        .. note:: The given AWS credentials will get saved in DStream checkpoints if checkpointing
+            is enabled. Make sure that your checkpoint directory is secure.
+
+        :param ssc:  StreamingContext object
+        :param kinesisAppName:  Kinesis application name used by the Kinesis Client Library (KCL) to
+                                update DynamoDB
+        :param streamName:  Kinesis stream name
+        :param endpointUrl:  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
+        :param regionName:  Name of region used by the Kinesis Client Library (KCL) to update
+                            DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
+        :param initialPositionInStream:  In the absence of Kinesis checkpoint info, this is the
+                                         worker's initial starting position in the stream. The
+                                         values are either the beginning of the stream per Kinesis'
+                                         limit of 24 hours (InitialPositionInStream.TRIM_HORIZON) or
+                                         the tip of the stream (InitialPositionInStream.LATEST).
+        :param checkpointInterval:  Checkpoint interval for Kinesis checkpointing. See the Kinesis
+                                    Spark Streaming documentation for more details on the different
+                                    types of checkpoints.
+        :param storageLevel:  Storage level to use for storing the received objects (default is
+                              StorageLevel.MEMORY_AND_DISK_2)
+        :param awsAccessKeyId:  AWS AccessKeyId (default is None. If None, will use
+                                DefaultAWSCredentialsProviderChain)
+        :param awsSecretKey:  AWS SecretKey (default is None. If None, will use
+                              DefaultAWSCredentialsProviderChain)
+        :param decoder:  A function used to decode value (default is utf8_decoder)
+        :param stsAssumeRoleArn: ARN of IAM role to assume when using STS sessions to read from
+                                 the Kinesis stream (default is None).
+        :param stsSessionName: Name to uniquely identify STS sessions used to read from Kinesis
+                               stream, if STS is being used (default is None).
+        :param stsExternalId: External ID that can be used to validate against the assumed IAM
+                              role's trust policy, if STS is being used (default is None).
+        :return: A DStream object
+        """
+        jlevel = ssc._sc._getJavaStorageLevel(storageLevel)
+        jduration = ssc._jduration(checkpointInterval)
+
+        try:
+            # Use KinesisUtilsPythonHelper to access Scala's KinesisUtils
+            helper = ssc._jvm.org.apache.spark.streaming.kinesis.KinesisUtilsPythonHelper()
+        except TypeError as e:
+            if str(e) == "'JavaPackage' object is not callable":
+                KinesisUtils._printErrorMsg(ssc.sparkContext)
+            raise
+        jstream = helper.createStream(ssc._jssc, kinesisAppName, streamName, endpointUrl,
+                                      regionName, initialPositionInStream, jduration, jlevel,
+                                      awsAccessKeyId, awsSecretKey, stsAssumeRoleArn,
+                                      stsSessionName, stsExternalId)
+        stream = DStream(jstream, ssc, NoOpSerializer())
+        return stream.map(lambda v: decoder(v))
+
+    @staticmethod
+    def _printErrorMsg(sc):
+        print("""
+________________________________________________________________________________________________
+
+  Spark Streaming's Kinesis libraries not found in class path. Try one of the following.
+
+  1. Include the Kinesis library and its dependencies with in the
+     spark-submit command as
+
+     $ bin/spark-submit --packages org.apache.spark:spark-streaming-kinesis-asl:%s ...
+
+  2. Download the JAR of the artifact from Maven Central http://search.maven.org/,
+     Group Id = org.apache.spark, Artifact Id = spark-streaming-kinesis-asl-assembly, Version = %s.
+     Then, include the jar in the spark-submit command as
+
+     $ bin/spark-submit --jars <spark-streaming-kinesis-asl-assembly.jar> ...
+
+________________________________________________________________________________________________
+
+""" % (sc.version, sc.version))
+
+
+class InitialPositionInStream(object):
+    LATEST, TRIM_HORIZON = (0, 1)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/listener.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/listener.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4ecc215aea992f279567e00a8220af186c5af0e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/listener.py
@@ -0,0 +1,81 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+__all__ = ["StreamingListener"]
+
+
+class StreamingListener(object):
+
+    def __init__(self):
+        pass
+
+    def onStreamingStarted(self, streamingStarted):
+        """
+        Called when the streaming has been started.
+        """
+        pass
+
+    def onReceiverStarted(self, receiverStarted):
+        """
+        Called when a receiver has been started
+        """
+        pass
+
+    def onReceiverError(self, receiverError):
+        """
+        Called when a receiver has reported an error
+        """
+        pass
+
+    def onReceiverStopped(self, receiverStopped):
+        """
+        Called when a receiver has been stopped
+        """
+        pass
+
+    def onBatchSubmitted(self, batchSubmitted):
+        """
+        Called when a batch of jobs has been submitted for processing.
+        """
+        pass
+
+    def onBatchStarted(self, batchStarted):
+        """
+        Called when processing of a batch of jobs has started.
+        """
+        pass
+
+    def onBatchCompleted(self, batchCompleted):
+        """
+        Called when processing of a batch of jobs has completed.
+        """
+        pass
+
+    def onOutputOperationStarted(self, outputOperationStarted):
+        """
+        Called when processing of a job of a batch has started.
+        """
+        pass
+
+    def onOutputOperationCompleted(self, outputOperationCompleted):
+        """
+        Called when processing of a job of a batch has completed
+        """
+        pass
+
+    class Java:
+        implements = ["org.apache.spark.streaming.api.java.PythonStreamingListener"]
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/tests.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/tests.py
new file mode 100644
index 0000000000000000000000000000000000000000..5cef621a28e6e8a4cb3b35883fa8ac0774275ac3
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/tests.py
@@ -0,0 +1,1640 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import glob
+import os
+import sys
+from itertools import chain
+import time
+import operator
+import tempfile
+import random
+import struct
+import shutil
+from functools import reduce
+
+try:
+    import xmlrunner
+except ImportError:
+    xmlrunner = None
+
+if sys.version_info[:2] <= (2, 6):
+    try:
+        import unittest2 as unittest
+    except ImportError:
+        sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier')
+        sys.exit(1)
+else:
+    import unittest
+
+if sys.version >= "3":
+    long = int
+
+from pyspark.context import SparkConf, SparkContext, RDD
+from pyspark.storagelevel import StorageLevel
+from pyspark.streaming.context import StreamingContext
+from pyspark.streaming.kafka import Broker, KafkaUtils, OffsetRange, TopicAndPartition
+from pyspark.streaming.flume import FlumeUtils
+from pyspark.streaming.kinesis import KinesisUtils, InitialPositionInStream
+from pyspark.streaming.listener import StreamingListener
+
+
+class PySparkStreamingTestCase(unittest.TestCase):
+
+    timeout = 30  # seconds
+    duration = .5
+
+    @classmethod
+    def setUpClass(cls):
+        class_name = cls.__name__
+        conf = SparkConf().set("spark.default.parallelism", 1)
+        cls.sc = SparkContext(appName=class_name, conf=conf)
+        cls.sc.setCheckpointDir(tempfile.mkdtemp())
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.sc.stop()
+        # Clean up in the JVM just in case there has been some issues in Python API
+        try:
+            jSparkContextOption = SparkContext._jvm.SparkContext.get()
+            if jSparkContextOption.nonEmpty():
+                jSparkContextOption.get().stop()
+        except:
+            pass
+
+    def setUp(self):
+        self.ssc = StreamingContext(self.sc, self.duration)
+
+    def tearDown(self):
+        if self.ssc is not None:
+            self.ssc.stop(False)
+        # Clean up in the JVM just in case there has been some issues in Python API
+        try:
+            jStreamingContextOption = StreamingContext._jvm.SparkContext.getActive()
+            if jStreamingContextOption.nonEmpty():
+                jStreamingContextOption.get().stop(False)
+        except:
+            pass
+
+    def wait_for(self, result, n):
+        start_time = time.time()
+        while len(result) < n and time.time() - start_time < self.timeout:
+            time.sleep(0.01)
+        if len(result) < n:
+            print("timeout after", self.timeout)
+
+    def _take(self, dstream, n):
+        """
+        Return the first `n` elements in the stream (will start and stop).
+        """
+        results = []
+
+        def take(_, rdd):
+            if rdd and len(results) < n:
+                results.extend(rdd.take(n - len(results)))
+
+        dstream.foreachRDD(take)
+
+        self.ssc.start()
+        self.wait_for(results, n)
+        return results
+
+    def _collect(self, dstream, n, block=True):
+        """
+        Collect each RDDs into the returned list.
+
+        :return: list, which will have the collected items.
+        """
+        result = []
+
+        def get_output(_, rdd):
+            if rdd and len(result) < n:
+                r = rdd.collect()
+                if r:
+                    result.append(r)
+
+        dstream.foreachRDD(get_output)
+
+        if not block:
+            return result
+
+        self.ssc.start()
+        self.wait_for(result, n)
+        return result
+
+    def _test_func(self, input, func, expected, sort=False, input2=None):
+        """
+        @param input: dataset for the test. This should be list of lists.
+        @param func: wrapped function. This function should return PythonDStream object.
+        @param expected: expected output for this testcase.
+        """
+        if not isinstance(input[0], RDD):
+            input = [self.sc.parallelize(d, 1) for d in input]
+        input_stream = self.ssc.queueStream(input)
+        if input2 and not isinstance(input2[0], RDD):
+            input2 = [self.sc.parallelize(d, 1) for d in input2]
+        input_stream2 = self.ssc.queueStream(input2) if input2 is not None else None
+
+        # Apply test function to stream.
+        if input2:
+            stream = func(input_stream, input_stream2)
+        else:
+            stream = func(input_stream)
+
+        result = self._collect(stream, len(expected))
+        if sort:
+            self._sort_result_based_on_key(result)
+            self._sort_result_based_on_key(expected)
+        self.assertEqual(expected, result)
+
+    def _sort_result_based_on_key(self, outputs):
+        """Sort the list based on first value."""
+        for output in outputs:
+            output.sort(key=lambda x: x[0])
+
+
+class BasicOperationTests(PySparkStreamingTestCase):
+
+    def test_map(self):
+        """Basic operation test for DStream.map."""
+        input = [range(1, 5), range(5, 9), range(9, 13)]
+
+        def func(dstream):
+            return dstream.map(str)
+        expected = [list(map(str, x)) for x in input]
+        self._test_func(input, func, expected)
+
+    def test_flatMap(self):
+        """Basic operation test for DStream.flatMap."""
+        input = [range(1, 5), range(5, 9), range(9, 13)]
+
+        def func(dstream):
+            return dstream.flatMap(lambda x: (x, x * 2))
+        expected = [list(chain.from_iterable((map(lambda y: [y, y * 2], x))))
+                    for x in input]
+        self._test_func(input, func, expected)
+
+    def test_filter(self):
+        """Basic operation test for DStream.filter."""
+        input = [range(1, 5), range(5, 9), range(9, 13)]
+
+        def func(dstream):
+            return dstream.filter(lambda x: x % 2 == 0)
+        expected = [[y for y in x if y % 2 == 0] for x in input]
+        self._test_func(input, func, expected)
+
+    def test_count(self):
+        """Basic operation test for DStream.count."""
+        input = [range(5), range(10), range(20)]
+
+        def func(dstream):
+            return dstream.count()
+        expected = [[len(x)] for x in input]
+        self._test_func(input, func, expected)
+
+    def test_slice(self):
+        """Basic operation test for DStream.slice."""
+        import datetime as dt
+        self.ssc = StreamingContext(self.sc, 1.0)
+        self.ssc.remember(4.0)
+        input = [[1], [2], [3], [4]]
+        stream = self.ssc.queueStream([self.sc.parallelize(d, 1) for d in input])
+
+        time_vals = []
+
+        def get_times(t, rdd):
+            if rdd and len(time_vals) < len(input):
+                time_vals.append(t)
+
+        stream.foreachRDD(get_times)
+
+        self.ssc.start()
+        self.wait_for(time_vals, 4)
+        begin_time = time_vals[0]
+
+        def get_sliced(begin_delta, end_delta):
+            begin = begin_time + dt.timedelta(seconds=begin_delta)
+            end = begin_time + dt.timedelta(seconds=end_delta)
+            rdds = stream.slice(begin, end)
+            result_list = [rdd.collect() for rdd in rdds]
+            return [r for result in result_list for r in result]
+
+        self.assertEqual(set([1]), set(get_sliced(0, 0)))
+        self.assertEqual(set([2, 3]), set(get_sliced(1, 2)))
+        self.assertEqual(set([2, 3, 4]), set(get_sliced(1, 4)))
+        self.assertEqual(set([1, 2, 3, 4]), set(get_sliced(0, 4)))
+
+    def test_reduce(self):
+        """Basic operation test for DStream.reduce."""
+        input = [range(1, 5), range(5, 9), range(9, 13)]
+
+        def func(dstream):
+            return dstream.reduce(operator.add)
+        expected = [[reduce(operator.add, x)] for x in input]
+        self._test_func(input, func, expected)
+
+    def test_reduceByKey(self):
+        """Basic operation test for DStream.reduceByKey."""
+        input = [[("a", 1), ("a", 1), ("b", 1), ("b", 1)],
+                 [("", 1), ("", 1), ("", 1), ("", 1)],
+                 [(1, 1), (1, 1), (2, 1), (2, 1), (3, 1)]]
+
+        def func(dstream):
+            return dstream.reduceByKey(operator.add)
+        expected = [[("a", 2), ("b", 2)], [("", 4)], [(1, 2), (2, 2), (3, 1)]]
+        self._test_func(input, func, expected, sort=True)
+
+    def test_mapValues(self):
+        """Basic operation test for DStream.mapValues."""
+        input = [[("a", 2), ("b", 2), ("c", 1), ("d", 1)],
+                 [(0, 4), (1, 1), (2, 2), (3, 3)],
+                 [(1, 1), (2, 1), (3, 1), (4, 1)]]
+
+        def func(dstream):
+            return dstream.mapValues(lambda x: x + 10)
+        expected = [[("a", 12), ("b", 12), ("c", 11), ("d", 11)],
+                    [(0, 14), (1, 11), (2, 12), (3, 13)],
+                    [(1, 11), (2, 11), (3, 11), (4, 11)]]
+        self._test_func(input, func, expected, sort=True)
+
+    def test_flatMapValues(self):
+        """Basic operation test for DStream.flatMapValues."""
+        input = [[("a", 2), ("b", 2), ("c", 1), ("d", 1)],
+                 [(0, 4), (1, 1), (2, 1), (3, 1)],
+                 [(1, 1), (2, 1), (3, 1), (4, 1)]]
+
+        def func(dstream):
+            return dstream.flatMapValues(lambda x: (x, x + 10))
+        expected = [[("a", 2), ("a", 12), ("b", 2), ("b", 12),
+                     ("c", 1), ("c", 11), ("d", 1), ("d", 11)],
+                    [(0, 4), (0, 14), (1, 1), (1, 11), (2, 1), (2, 11), (3, 1), (3, 11)],
+                    [(1, 1), (1, 11), (2, 1), (2, 11), (3, 1), (3, 11), (4, 1), (4, 11)]]
+        self._test_func(input, func, expected)
+
+    def test_glom(self):
+        """Basic operation test for DStream.glom."""
+        input = [range(1, 5), range(5, 9), range(9, 13)]
+        rdds = [self.sc.parallelize(r, 2) for r in input]
+
+        def func(dstream):
+            return dstream.glom()
+        expected = [[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[9, 10], [11, 12]]]
+        self._test_func(rdds, func, expected)
+
+    def test_mapPartitions(self):
+        """Basic operation test for DStream.mapPartitions."""
+        input = [range(1, 5), range(5, 9), range(9, 13)]
+        rdds = [self.sc.parallelize(r, 2) for r in input]
+
+        def func(dstream):
+            def f(iterator):
+                yield sum(iterator)
+            return dstream.mapPartitions(f)
+        expected = [[3, 7], [11, 15], [19, 23]]
+        self._test_func(rdds, func, expected)
+
+    def test_countByValue(self):
+        """Basic operation test for DStream.countByValue."""
+        input = [list(range(1, 5)) * 2, list(range(5, 7)) + list(range(5, 9)), ["a", "a", "b", ""]]
+
+        def func(dstream):
+            return dstream.countByValue()
+        expected = [[(1, 2), (2, 2), (3, 2), (4, 2)],
+                    [(5, 2), (6, 2), (7, 1), (8, 1)],
+                    [("a", 2), ("b", 1), ("", 1)]]
+        self._test_func(input, func, expected, sort=True)
+
+    def test_groupByKey(self):
+        """Basic operation test for DStream.groupByKey."""
+        input = [[(1, 1), (2, 1), (3, 1), (4, 1)],
+                 [(1, 1), (1, 1), (1, 1), (2, 1), (2, 1), (3, 1)],
+                 [("a", 1), ("a", 1), ("b", 1), ("", 1), ("", 1), ("", 1)]]
+
+        def func(dstream):
+            return dstream.groupByKey().mapValues(list)
+
+        expected = [[(1, [1]), (2, [1]), (3, [1]), (4, [1])],
+                    [(1, [1, 1, 1]), (2, [1, 1]), (3, [1])],
+                    [("a", [1, 1]), ("b", [1]), ("", [1, 1, 1])]]
+        self._test_func(input, func, expected, sort=True)
+
+    def test_combineByKey(self):
+        """Basic operation test for DStream.combineByKey."""
+        input = [[(1, 1), (2, 1), (3, 1), (4, 1)],
+                 [(1, 1), (1, 1), (1, 1), (2, 1), (2, 1), (3, 1)],
+                 [("a", 1), ("a", 1), ("b", 1), ("", 1), ("", 1), ("", 1)]]
+
+        def func(dstream):
+            def add(a, b):
+                return a + str(b)
+            return dstream.combineByKey(str, add, add)
+        expected = [[(1, "1"), (2, "1"), (3, "1"), (4, "1")],
+                    [(1, "111"), (2, "11"), (3, "1")],
+                    [("a", "11"), ("b", "1"), ("", "111")]]
+        self._test_func(input, func, expected, sort=True)
+
+    def test_repartition(self):
+        input = [range(1, 5), range(5, 9)]
+        rdds = [self.sc.parallelize(r, 2) for r in input]
+
+        def func(dstream):
+            return dstream.repartition(1).glom()
+        expected = [[[1, 2, 3, 4]], [[5, 6, 7, 8]]]
+        self._test_func(rdds, func, expected)
+
+    def test_union(self):
+        input1 = [range(3), range(5), range(6)]
+        input2 = [range(3, 6), range(5, 6)]
+
+        def func(d1, d2):
+            return d1.union(d2)
+
+        expected = [list(range(6)), list(range(6)), list(range(6))]
+        self._test_func(input1, func, expected, input2=input2)
+
+    def test_cogroup(self):
+        input = [[(1, 1), (2, 1), (3, 1)],
+                 [(1, 1), (1, 1), (1, 1), (2, 1)],
+                 [("a", 1), ("a", 1), ("b", 1), ("", 1), ("", 1)]]
+        input2 = [[(1, 2)],
+                  [(4, 1)],
+                  [("a", 1), ("a", 1), ("b", 1), ("", 1), ("", 2)]]
+
+        def func(d1, d2):
+            return d1.cogroup(d2).mapValues(lambda vs: tuple(map(list, vs)))
+
+        expected = [[(1, ([1], [2])), (2, ([1], [])), (3, ([1], []))],
+                    [(1, ([1, 1, 1], [])), (2, ([1], [])), (4, ([], [1]))],
+                    [("a", ([1, 1], [1, 1])), ("b", ([1], [1])), ("", ([1, 1], [1, 2]))]]
+        self._test_func(input, func, expected, sort=True, input2=input2)
+
+    def test_join(self):
+        input = [[('a', 1), ('b', 2)]]
+        input2 = [[('b', 3), ('c', 4)]]
+
+        def func(a, b):
+            return a.join(b)
+
+        expected = [[('b', (2, 3))]]
+        self._test_func(input, func, expected, True, input2)
+
+    def test_left_outer_join(self):
+        input = [[('a', 1), ('b', 2)]]
+        input2 = [[('b', 3), ('c', 4)]]
+
+        def func(a, b):
+            return a.leftOuterJoin(b)
+
+        expected = [[('a', (1, None)), ('b', (2, 3))]]
+        self._test_func(input, func, expected, True, input2)
+
+    def test_right_outer_join(self):
+        input = [[('a', 1), ('b', 2)]]
+        input2 = [[('b', 3), ('c', 4)]]
+
+        def func(a, b):
+            return a.rightOuterJoin(b)
+
+        expected = [[('b', (2, 3)), ('c', (None, 4))]]
+        self._test_func(input, func, expected, True, input2)
+
+    def test_full_outer_join(self):
+        input = [[('a', 1), ('b', 2)]]
+        input2 = [[('b', 3), ('c', 4)]]
+
+        def func(a, b):
+            return a.fullOuterJoin(b)
+
+        expected = [[('a', (1, None)), ('b', (2, 3)), ('c', (None, 4))]]
+        self._test_func(input, func, expected, True, input2)
+
+    def test_update_state_by_key(self):
+
+        def updater(vs, s):
+            if not s:
+                s = []
+            s.extend(vs)
+            return s
+
+        input = [[('k', i)] for i in range(5)]
+
+        def func(dstream):
+            return dstream.updateStateByKey(updater)
+
+        expected = [[0], [0, 1], [0, 1, 2], [0, 1, 2, 3], [0, 1, 2, 3, 4]]
+        expected = [[('k', v)] for v in expected]
+        self._test_func(input, func, expected)
+
+    def test_update_state_by_key_initial_rdd(self):
+
+        def updater(vs, s):
+            if not s:
+                s = []
+            s.extend(vs)
+            return s
+
+        initial = [('k', [0, 1])]
+        initial = self.sc.parallelize(initial, 1)
+
+        input = [[('k', i)] for i in range(2, 5)]
+
+        def func(dstream):
+            return dstream.updateStateByKey(updater, initialRDD=initial)
+
+        expected = [[0, 1, 2], [0, 1, 2, 3], [0, 1, 2, 3, 4]]
+        expected = [[('k', v)] for v in expected]
+        self._test_func(input, func, expected)
+
+    def test_failed_func(self):
+        # Test failure in
+        # TransformFunction.apply(rdd: Option[RDD[_]], time: Time)
+        input = [self.sc.parallelize([d], 1) for d in range(4)]
+        input_stream = self.ssc.queueStream(input)
+
+        def failed_func(i):
+            raise ValueError("This is a special error")
+
+        input_stream.map(failed_func).pprint()
+        self.ssc.start()
+        try:
+            self.ssc.awaitTerminationOrTimeout(10)
+        except:
+            import traceback
+            failure = traceback.format_exc()
+            self.assertTrue("This is a special error" in failure)
+            return
+
+        self.fail("a failed func should throw an error")
+
+    def test_failed_func2(self):
+        # Test failure in
+        # TransformFunction.apply(rdd: Option[RDD[_]], rdd2: Option[RDD[_]], time: Time)
+        input = [self.sc.parallelize([d], 1) for d in range(4)]
+        input_stream1 = self.ssc.queueStream(input)
+        input_stream2 = self.ssc.queueStream(input)
+
+        def failed_func(rdd1, rdd2):
+            raise ValueError("This is a special error")
+
+        input_stream1.transformWith(failed_func, input_stream2, True).pprint()
+        self.ssc.start()
+        try:
+            self.ssc.awaitTerminationOrTimeout(10)
+        except:
+            import traceback
+            failure = traceback.format_exc()
+            self.assertTrue("This is a special error" in failure)
+            return
+
+        self.fail("a failed func should throw an error")
+
+    def test_failed_func_with_reseting_failure(self):
+        input = [self.sc.parallelize([d], 1) for d in range(4)]
+        input_stream = self.ssc.queueStream(input)
+
+        def failed_func(i):
+            if i == 1:
+                # Make it fail in the second batch
+                raise ValueError("This is a special error")
+            else:
+                return i
+
+        # We should be able to see the results of the 3rd and 4th batches even if the second batch
+        # fails
+        expected = [[0], [2], [3]]
+        self.assertEqual(expected, self._collect(input_stream.map(failed_func), 3))
+        try:
+            self.ssc.awaitTerminationOrTimeout(10)
+        except:
+            import traceback
+            failure = traceback.format_exc()
+            self.assertTrue("This is a special error" in failure)
+            return
+
+        self.fail("a failed func should throw an error")
+
+
+class StreamingListenerTests(PySparkStreamingTestCase):
+
+    duration = .5
+
+    class BatchInfoCollector(StreamingListener):
+
+        def __init__(self):
+            super(StreamingListener, self).__init__()
+            self.batchInfosCompleted = []
+            self.batchInfosStarted = []
+            self.batchInfosSubmitted = []
+            self.streamingStartedTime = []
+
+        def onStreamingStarted(self, streamingStarted):
+            self.streamingStartedTime.append(streamingStarted.time)
+
+        def onBatchSubmitted(self, batchSubmitted):
+            self.batchInfosSubmitted.append(batchSubmitted.batchInfo())
+
+        def onBatchStarted(self, batchStarted):
+            self.batchInfosStarted.append(batchStarted.batchInfo())
+
+        def onBatchCompleted(self, batchCompleted):
+            self.batchInfosCompleted.append(batchCompleted.batchInfo())
+
+    def test_batch_info_reports(self):
+        batch_collector = self.BatchInfoCollector()
+        self.ssc.addStreamingListener(batch_collector)
+        input = [[1], [2], [3], [4]]
+
+        def func(dstream):
+            return dstream.map(int)
+        expected = [[1], [2], [3], [4]]
+        self._test_func(input, func, expected)
+
+        batchInfosSubmitted = batch_collector.batchInfosSubmitted
+        batchInfosStarted = batch_collector.batchInfosStarted
+        batchInfosCompleted = batch_collector.batchInfosCompleted
+        streamingStartedTime = batch_collector.streamingStartedTime
+
+        self.wait_for(batchInfosCompleted, 4)
+
+        self.assertEqual(len(streamingStartedTime), 1)
+
+        self.assertGreaterEqual(len(batchInfosSubmitted), 4)
+        for info in batchInfosSubmitted:
+            self.assertGreaterEqual(info.batchTime().milliseconds(), 0)
+            self.assertGreaterEqual(info.submissionTime(), 0)
+
+            for streamId in info.streamIdToInputInfo():
+                streamInputInfo = info.streamIdToInputInfo()[streamId]
+                self.assertGreaterEqual(streamInputInfo.inputStreamId(), 0)
+                self.assertGreaterEqual(streamInputInfo.numRecords, 0)
+                for key in streamInputInfo.metadata():
+                    self.assertIsNotNone(streamInputInfo.metadata()[key])
+                self.assertIsNotNone(streamInputInfo.metadataDescription())
+
+            for outputOpId in info.outputOperationInfos():
+                outputInfo = info.outputOperationInfos()[outputOpId]
+                self.assertGreaterEqual(outputInfo.batchTime().milliseconds(), 0)
+                self.assertGreaterEqual(outputInfo.id(), 0)
+                self.assertIsNotNone(outputInfo.name())
+                self.assertIsNotNone(outputInfo.description())
+                self.assertGreaterEqual(outputInfo.startTime(), -1)
+                self.assertGreaterEqual(outputInfo.endTime(), -1)
+                self.assertIsNone(outputInfo.failureReason())
+
+            self.assertEqual(info.schedulingDelay(), -1)
+            self.assertEqual(info.processingDelay(), -1)
+            self.assertEqual(info.totalDelay(), -1)
+            self.assertEqual(info.numRecords(), 0)
+
+        self.assertGreaterEqual(len(batchInfosStarted), 4)
+        for info in batchInfosStarted:
+            self.assertGreaterEqual(info.batchTime().milliseconds(), 0)
+            self.assertGreaterEqual(info.submissionTime(), 0)
+
+            for streamId in info.streamIdToInputInfo():
+                streamInputInfo = info.streamIdToInputInfo()[streamId]
+                self.assertGreaterEqual(streamInputInfo.inputStreamId(), 0)
+                self.assertGreaterEqual(streamInputInfo.numRecords, 0)
+                for key in streamInputInfo.metadata():
+                    self.assertIsNotNone(streamInputInfo.metadata()[key])
+                self.assertIsNotNone(streamInputInfo.metadataDescription())
+
+            for outputOpId in info.outputOperationInfos():
+                outputInfo = info.outputOperationInfos()[outputOpId]
+                self.assertGreaterEqual(outputInfo.batchTime().milliseconds(), 0)
+                self.assertGreaterEqual(outputInfo.id(), 0)
+                self.assertIsNotNone(outputInfo.name())
+                self.assertIsNotNone(outputInfo.description())
+                self.assertGreaterEqual(outputInfo.startTime(), -1)
+                self.assertGreaterEqual(outputInfo.endTime(), -1)
+                self.assertIsNone(outputInfo.failureReason())
+
+            self.assertGreaterEqual(info.schedulingDelay(), 0)
+            self.assertEqual(info.processingDelay(), -1)
+            self.assertEqual(info.totalDelay(), -1)
+            self.assertEqual(info.numRecords(), 0)
+
+        self.assertGreaterEqual(len(batchInfosCompleted), 4)
+        for info in batchInfosCompleted:
+            self.assertGreaterEqual(info.batchTime().milliseconds(), 0)
+            self.assertGreaterEqual(info.submissionTime(), 0)
+
+            for streamId in info.streamIdToInputInfo():
+                streamInputInfo = info.streamIdToInputInfo()[streamId]
+                self.assertGreaterEqual(streamInputInfo.inputStreamId(), 0)
+                self.assertGreaterEqual(streamInputInfo.numRecords, 0)
+                for key in streamInputInfo.metadata():
+                    self.assertIsNotNone(streamInputInfo.metadata()[key])
+                self.assertIsNotNone(streamInputInfo.metadataDescription())
+
+            for outputOpId in info.outputOperationInfos():
+                outputInfo = info.outputOperationInfos()[outputOpId]
+                self.assertGreaterEqual(outputInfo.batchTime().milliseconds(), 0)
+                self.assertGreaterEqual(outputInfo.id(), 0)
+                self.assertIsNotNone(outputInfo.name())
+                self.assertIsNotNone(outputInfo.description())
+                self.assertGreaterEqual(outputInfo.startTime(), 0)
+                self.assertGreaterEqual(outputInfo.endTime(), 0)
+                self.assertIsNone(outputInfo.failureReason())
+
+            self.assertGreaterEqual(info.schedulingDelay(), 0)
+            self.assertGreaterEqual(info.processingDelay(), 0)
+            self.assertGreaterEqual(info.totalDelay(), 0)
+            self.assertEqual(info.numRecords(), 0)
+
+
+class WindowFunctionTests(PySparkStreamingTestCase):
+
+    timeout = 15
+
+    def test_window(self):
+        input = [range(1), range(2), range(3), range(4), range(5)]
+
+        def func(dstream):
+            return dstream.window(1.5, .5).count()
+
+        expected = [[1], [3], [6], [9], [12], [9], [5]]
+        self._test_func(input, func, expected)
+
+    def test_count_by_window(self):
+        input = [range(1), range(2), range(3), range(4), range(5)]
+
+        def func(dstream):
+            return dstream.countByWindow(1.5, .5)
+
+        expected = [[1], [3], [6], [9], [12], [9], [5]]
+        self._test_func(input, func, expected)
+
+    def test_count_by_window_large(self):
+        input = [range(1), range(2), range(3), range(4), range(5), range(6)]
+
+        def func(dstream):
+            return dstream.countByWindow(2.5, .5)
+
+        expected = [[1], [3], [6], [10], [15], [20], [18], [15], [11], [6]]
+        self._test_func(input, func, expected)
+
+    def test_count_by_value_and_window(self):
+        input = [range(1), range(2), range(3), range(4), range(5), range(6)]
+
+        def func(dstream):
+            return dstream.countByValueAndWindow(2.5, .5)
+
+        expected = [[(0, 1)],
+                    [(0, 2), (1, 1)],
+                    [(0, 3), (1, 2), (2, 1)],
+                    [(0, 4), (1, 3), (2, 2), (3, 1)],
+                    [(0, 5), (1, 4), (2, 3), (3, 2), (4, 1)],
+                    [(0, 5), (1, 5), (2, 4), (3, 3), (4, 2), (5, 1)],
+                    [(0, 4), (1, 4), (2, 4), (3, 3), (4, 2), (5, 1)],
+                    [(0, 3), (1, 3), (2, 3), (3, 3), (4, 2), (5, 1)],
+                    [(0, 2), (1, 2), (2, 2), (3, 2), (4, 2), (5, 1)],
+                    [(0, 1), (1, 1), (2, 1), (3, 1), (4, 1), (5, 1)]]
+        self._test_func(input, func, expected)
+
+    def test_group_by_key_and_window(self):
+        input = [[('a', i)] for i in range(5)]
+
+        def func(dstream):
+            return dstream.groupByKeyAndWindow(1.5, .5).mapValues(list)
+
+        expected = [[('a', [0])], [('a', [0, 1])], [('a', [0, 1, 2])], [('a', [1, 2, 3])],
+                    [('a', [2, 3, 4])], [('a', [3, 4])], [('a', [4])]]
+        self._test_func(input, func, expected)
+
+    def test_reduce_by_invalid_window(self):
+        input1 = [range(3), range(5), range(1), range(6)]
+        d1 = self.ssc.queueStream(input1)
+        self.assertRaises(ValueError, lambda: d1.reduceByKeyAndWindow(None, None, 0.1, 0.1))
+        self.assertRaises(ValueError, lambda: d1.reduceByKeyAndWindow(None, None, 1, 0.1))
+
+    def test_reduce_by_key_and_window_with_none_invFunc(self):
+        input = [range(1), range(2), range(3), range(4), range(5), range(6)]
+
+        def func(dstream):
+            return dstream.map(lambda x: (x, 1))\
+                .reduceByKeyAndWindow(operator.add, None, 5, 1)\
+                .filter(lambda kv: kv[1] > 0).count()
+
+        expected = [[2], [4], [6], [6], [6], [6]]
+        self._test_func(input, func, expected)
+
+
+class StreamingContextTests(PySparkStreamingTestCase):
+
+    duration = 0.1
+    setupCalled = False
+
+    def _add_input_stream(self):
+        inputs = [range(1, x) for x in range(101)]
+        stream = self.ssc.queueStream(inputs)
+        self._collect(stream, 1, block=False)
+
+    def test_stop_only_streaming_context(self):
+        self._add_input_stream()
+        self.ssc.start()
+        self.ssc.stop(False)
+        self.assertEqual(len(self.sc.parallelize(range(5), 5).glom().collect()), 5)
+
+    def test_stop_multiple_times(self):
+        self._add_input_stream()
+        self.ssc.start()
+        self.ssc.stop(False)
+        self.ssc.stop(False)
+
+    def test_queue_stream(self):
+        input = [list(range(i + 1)) for i in range(3)]
+        dstream = self.ssc.queueStream(input)
+        result = self._collect(dstream, 3)
+        self.assertEqual(input, result)
+
+    def test_text_file_stream(self):
+        d = tempfile.mkdtemp()
+        self.ssc = StreamingContext(self.sc, self.duration)
+        dstream2 = self.ssc.textFileStream(d).map(int)
+        result = self._collect(dstream2, 2, block=False)
+        self.ssc.start()
+        for name in ('a', 'b'):
+            time.sleep(1)
+            with open(os.path.join(d, name), "w") as f:
+                f.writelines(["%d\n" % i for i in range(10)])
+        self.wait_for(result, 2)
+        self.assertEqual([list(range(10)), list(range(10))], result)
+
+    def test_binary_records_stream(self):
+        d = tempfile.mkdtemp()
+        self.ssc = StreamingContext(self.sc, self.duration)
+        dstream = self.ssc.binaryRecordsStream(d, 10).map(
+            lambda v: struct.unpack("10b", bytes(v)))
+        result = self._collect(dstream, 2, block=False)
+        self.ssc.start()
+        for name in ('a', 'b'):
+            time.sleep(1)
+            with open(os.path.join(d, name), "wb") as f:
+                f.write(bytearray(range(10)))
+        self.wait_for(result, 2)
+        self.assertEqual([list(range(10)), list(range(10))], [list(v[0]) for v in result])
+
+    def test_union(self):
+        input = [list(range(i + 1)) for i in range(3)]
+        dstream = self.ssc.queueStream(input)
+        dstream2 = self.ssc.queueStream(input)
+        dstream3 = self.ssc.union(dstream, dstream2)
+        result = self._collect(dstream3, 3)
+        expected = [i * 2 for i in input]
+        self.assertEqual(expected, result)
+
+    def test_transform(self):
+        dstream1 = self.ssc.queueStream([[1]])
+        dstream2 = self.ssc.queueStream([[2]])
+        dstream3 = self.ssc.queueStream([[3]])
+
+        def func(rdds):
+            rdd1, rdd2, rdd3 = rdds
+            return rdd2.union(rdd3).union(rdd1)
+
+        dstream = self.ssc.transform([dstream1, dstream2, dstream3], func)
+
+        self.assertEqual([2, 3, 1], self._take(dstream, 3))
+
+    def test_transform_pairrdd(self):
+        # This regression test case is for SPARK-17756.
+        dstream = self.ssc.queueStream(
+            [[1], [2], [3]]).transform(lambda rdd: rdd.cartesian(rdd))
+        self.assertEqual([(1, 1), (2, 2), (3, 3)], self._take(dstream, 3))
+
+    def test_get_active(self):
+        self.assertEqual(StreamingContext.getActive(), None)
+
+        # Verify that getActive() returns the active context
+        self.ssc.queueStream([[1]]).foreachRDD(lambda rdd: rdd.count())
+        self.ssc.start()
+        self.assertEqual(StreamingContext.getActive(), self.ssc)
+
+        # Verify that getActive() returns None
+        self.ssc.stop(False)
+        self.assertEqual(StreamingContext.getActive(), None)
+
+        # Verify that if the Java context is stopped, then getActive() returns None
+        self.ssc = StreamingContext(self.sc, self.duration)
+        self.ssc.queueStream([[1]]).foreachRDD(lambda rdd: rdd.count())
+        self.ssc.start()
+        self.assertEqual(StreamingContext.getActive(), self.ssc)
+        self.ssc._jssc.stop(False)
+        self.assertEqual(StreamingContext.getActive(), None)
+
+    def test_get_active_or_create(self):
+        # Test StreamingContext.getActiveOrCreate() without checkpoint data
+        # See CheckpointTests for tests with checkpoint data
+        self.ssc = None
+        self.assertEqual(StreamingContext.getActive(), None)
+
+        def setupFunc():
+            ssc = StreamingContext(self.sc, self.duration)
+            ssc.queueStream([[1]]).foreachRDD(lambda rdd: rdd.count())
+            self.setupCalled = True
+            return ssc
+
+        # Verify that getActiveOrCreate() (w/o checkpoint) calls setupFunc when no context is active
+        self.setupCalled = False
+        self.ssc = StreamingContext.getActiveOrCreate(None, setupFunc)
+        self.assertTrue(self.setupCalled)
+
+        # Verify that getActiveOrCreate() returns active context and does not call the setupFunc
+        self.ssc.start()
+        self.setupCalled = False
+        self.assertEqual(StreamingContext.getActiveOrCreate(None, setupFunc), self.ssc)
+        self.assertFalse(self.setupCalled)
+
+        # Verify that getActiveOrCreate() calls setupFunc after active context is stopped
+        self.ssc.stop(False)
+        self.setupCalled = False
+        self.ssc = StreamingContext.getActiveOrCreate(None, setupFunc)
+        self.assertTrue(self.setupCalled)
+
+        # Verify that if the Java context is stopped, then getActive() returns None
+        self.ssc = StreamingContext(self.sc, self.duration)
+        self.ssc.queueStream([[1]]).foreachRDD(lambda rdd: rdd.count())
+        self.ssc.start()
+        self.assertEqual(StreamingContext.getActive(), self.ssc)
+        self.ssc._jssc.stop(False)
+        self.setupCalled = False
+        self.ssc = StreamingContext.getActiveOrCreate(None, setupFunc)
+        self.assertTrue(self.setupCalled)
+
+    def test_await_termination_or_timeout(self):
+        self._add_input_stream()
+        self.ssc.start()
+        self.assertFalse(self.ssc.awaitTerminationOrTimeout(0.001))
+        self.ssc.stop(False)
+        self.assertTrue(self.ssc.awaitTerminationOrTimeout(0.001))
+
+
+class CheckpointTests(unittest.TestCase):
+
+    setupCalled = False
+
+    @staticmethod
+    def tearDownClass():
+        # Clean up in the JVM just in case there has been some issues in Python API
+        if SparkContext._jvm is not None:
+            jStreamingContextOption = \
+                SparkContext._jvm.org.apache.spark.streaming.StreamingContext.getActive()
+            if jStreamingContextOption.nonEmpty():
+                jStreamingContextOption.get().stop()
+
+    def setUp(self):
+        self.ssc = None
+        self.sc = None
+        self.cpd = None
+
+    def tearDown(self):
+        if self.ssc is not None:
+            self.ssc.stop(True)
+        if self.sc is not None:
+            self.sc.stop()
+        if self.cpd is not None:
+            shutil.rmtree(self.cpd)
+
+    def test_transform_function_serializer_failure(self):
+        inputd = tempfile.mkdtemp()
+        self.cpd = tempfile.mkdtemp("test_transform_function_serializer_failure")
+
+        def setup():
+            conf = SparkConf().set("spark.default.parallelism", 1)
+            sc = SparkContext(conf=conf)
+            ssc = StreamingContext(sc, 0.5)
+
+            # A function that cannot be serialized
+            def process(time, rdd):
+                sc.parallelize(range(1, 10))
+
+            ssc.textFileStream(inputd).foreachRDD(process)
+            return ssc
+
+        self.ssc = StreamingContext.getOrCreate(self.cpd, setup)
+        try:
+            self.ssc.start()
+        except:
+            import traceback
+            failure = traceback.format_exc()
+            self.assertTrue(
+                "It appears that you are attempting to reference SparkContext" in failure)
+            return
+
+        self.fail("using SparkContext in process should fail because it's not Serializable")
+
+    def test_get_or_create_and_get_active_or_create(self):
+        inputd = tempfile.mkdtemp()
+        outputd = tempfile.mkdtemp() + "/"
+
+        def updater(vs, s):
+            return sum(vs, s or 0)
+
+        def setup():
+            conf = SparkConf().set("spark.default.parallelism", 1)
+            sc = SparkContext(conf=conf)
+            ssc = StreamingContext(sc, 2)
+            dstream = ssc.textFileStream(inputd).map(lambda x: (x, 1))
+            wc = dstream.updateStateByKey(updater)
+            wc.map(lambda x: "%s,%d" % x).saveAsTextFiles(outputd + "test")
+            wc.checkpoint(2)
+            self.setupCalled = True
+            return ssc
+
+        # Verify that getOrCreate() calls setup() in absence of checkpoint files
+        self.cpd = tempfile.mkdtemp("test_streaming_cps")
+        self.setupCalled = False
+        self.ssc = StreamingContext.getOrCreate(self.cpd, setup)
+        self.assertTrue(self.setupCalled)
+
+        self.ssc.start()
+
+        def check_output(n):
+            while not os.listdir(outputd):
+                if self.ssc.awaitTerminationOrTimeout(0.5):
+                    raise Exception("ssc stopped")
+            time.sleep(1)  # make sure mtime is larger than the previous one
+            with open(os.path.join(inputd, str(n)), 'w') as f:
+                f.writelines(["%d\n" % i for i in range(10)])
+
+            while True:
+                if self.ssc.awaitTerminationOrTimeout(0.5):
+                    raise Exception("ssc stopped")
+                p = os.path.join(outputd, max(os.listdir(outputd)))
+                if '_SUCCESS' not in os.listdir(p):
+                    # not finished
+                    continue
+                ordd = self.ssc.sparkContext.textFile(p).map(lambda line: line.split(","))
+                d = ordd.values().map(int).collect()
+                if not d:
+                    continue
+                self.assertEqual(10, len(d))
+                s = set(d)
+                self.assertEqual(1, len(s))
+                m = s.pop()
+                if n > m:
+                    continue
+                self.assertEqual(n, m)
+                break
+
+        check_output(1)
+        check_output(2)
+
+        # Verify the getOrCreate() recovers from checkpoint files
+        self.ssc.stop(True, True)
+        time.sleep(1)
+        self.setupCalled = False
+        self.ssc = StreamingContext.getOrCreate(self.cpd, setup)
+        self.assertFalse(self.setupCalled)
+        self.ssc.start()
+        check_output(3)
+
+        # Verify that getOrCreate() uses existing SparkContext
+        self.ssc.stop(True, True)
+        time.sleep(1)
+        self.sc = SparkContext(conf=SparkConf())
+        self.setupCalled = False
+        self.ssc = StreamingContext.getOrCreate(self.cpd, setup)
+        self.assertFalse(self.setupCalled)
+        self.assertTrue(self.ssc.sparkContext == self.sc)
+
+        # Verify the getActiveOrCreate() recovers from checkpoint files
+        self.ssc.stop(True, True)
+        time.sleep(1)
+        self.setupCalled = False
+        self.ssc = StreamingContext.getActiveOrCreate(self.cpd, setup)
+        self.assertFalse(self.setupCalled)
+        self.ssc.start()
+        check_output(4)
+
+        # Verify that getActiveOrCreate() returns active context
+        self.setupCalled = False
+        self.assertEqual(StreamingContext.getActiveOrCreate(self.cpd, setup), self.ssc)
+        self.assertFalse(self.setupCalled)
+
+        # Verify that getActiveOrCreate() uses existing SparkContext
+        self.ssc.stop(True, True)
+        time.sleep(1)
+        self.sc = SparkContext(conf=SparkConf())
+        self.setupCalled = False
+        self.ssc = StreamingContext.getActiveOrCreate(self.cpd, setup)
+        self.assertFalse(self.setupCalled)
+        self.assertTrue(self.ssc.sparkContext == self.sc)
+
+        # Verify that getActiveOrCreate() calls setup() in absence of checkpoint files
+        self.ssc.stop(True, True)
+        shutil.rmtree(self.cpd)  # delete checkpoint directory
+        time.sleep(1)
+        self.setupCalled = False
+        self.ssc = StreamingContext.getActiveOrCreate(self.cpd, setup)
+        self.assertTrue(self.setupCalled)
+
+        # Stop everything
+        self.ssc.stop(True, True)
+
+
+class KafkaStreamTests(PySparkStreamingTestCase):
+    timeout = 20  # seconds
+    duration = 1
+
+    def setUp(self):
+        super(KafkaStreamTests, self).setUp()
+        self._kafkaTestUtils = self.ssc._jvm.org.apache.spark.streaming.kafka.KafkaTestUtils()
+        self._kafkaTestUtils.setup()
+
+    def tearDown(self):
+        super(KafkaStreamTests, self).tearDown()
+
+        if self._kafkaTestUtils is not None:
+            self._kafkaTestUtils.teardown()
+            self._kafkaTestUtils = None
+
+    def _randomTopic(self):
+        return "topic-%d" % random.randint(0, 10000)
+
+    def _validateStreamResult(self, sendData, stream):
+        result = {}
+        for i in chain.from_iterable(self._collect(stream.map(lambda x: x[1]),
+                                                   sum(sendData.values()))):
+            result[i] = result.get(i, 0) + 1
+
+        self.assertEqual(sendData, result)
+
+    def _validateRddResult(self, sendData, rdd):
+        result = {}
+        for i in rdd.map(lambda x: x[1]).collect():
+            result[i] = result.get(i, 0) + 1
+        self.assertEqual(sendData, result)
+
+    def test_kafka_stream(self):
+        """Test the Python Kafka stream API."""
+        topic = self._randomTopic()
+        sendData = {"a": 3, "b": 5, "c": 10}
+
+        self._kafkaTestUtils.createTopic(topic)
+        self._kafkaTestUtils.sendMessages(topic, sendData)
+
+        stream = KafkaUtils.createStream(self.ssc, self._kafkaTestUtils.zkAddress(),
+                                         "test-streaming-consumer", {topic: 1},
+                                         {"auto.offset.reset": "smallest"})
+        self._validateStreamResult(sendData, stream)
+
+    def test_kafka_direct_stream(self):
+        """Test the Python direct Kafka stream API."""
+        topic = self._randomTopic()
+        sendData = {"a": 1, "b": 2, "c": 3}
+        kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress(),
+                       "auto.offset.reset": "smallest"}
+
+        self._kafkaTestUtils.createTopic(topic)
+        self._kafkaTestUtils.sendMessages(topic, sendData)
+
+        stream = KafkaUtils.createDirectStream(self.ssc, [topic], kafkaParams)
+        self._validateStreamResult(sendData, stream)
+
+    def test_kafka_direct_stream_from_offset(self):
+        """Test the Python direct Kafka stream API with start offset specified."""
+        topic = self._randomTopic()
+        sendData = {"a": 1, "b": 2, "c": 3}
+        fromOffsets = {TopicAndPartition(topic, 0): long(0)}
+        kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress()}
+
+        self._kafkaTestUtils.createTopic(topic)
+        self._kafkaTestUtils.sendMessages(topic, sendData)
+
+        stream = KafkaUtils.createDirectStream(self.ssc, [topic], kafkaParams, fromOffsets)
+        self._validateStreamResult(sendData, stream)
+
+    def test_kafka_rdd(self):
+        """Test the Python direct Kafka RDD API."""
+        topic = self._randomTopic()
+        sendData = {"a": 1, "b": 2}
+        offsetRanges = [OffsetRange(topic, 0, long(0), long(sum(sendData.values())))]
+        kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress()}
+
+        self._kafkaTestUtils.createTopic(topic)
+        self._kafkaTestUtils.sendMessages(topic, sendData)
+        rdd = KafkaUtils.createRDD(self.sc, kafkaParams, offsetRanges)
+        self._validateRddResult(sendData, rdd)
+
+    def test_kafka_rdd_with_leaders(self):
+        """Test the Python direct Kafka RDD API with leaders."""
+        topic = self._randomTopic()
+        sendData = {"a": 1, "b": 2, "c": 3}
+        offsetRanges = [OffsetRange(topic, 0, long(0), long(sum(sendData.values())))]
+        kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress()}
+        address = self._kafkaTestUtils.brokerAddress().split(":")
+        leaders = {TopicAndPartition(topic, 0): Broker(address[0], int(address[1]))}
+
+        self._kafkaTestUtils.createTopic(topic)
+        self._kafkaTestUtils.sendMessages(topic, sendData)
+        rdd = KafkaUtils.createRDD(self.sc, kafkaParams, offsetRanges, leaders)
+        self._validateRddResult(sendData, rdd)
+
+    def test_kafka_rdd_get_offsetRanges(self):
+        """Test Python direct Kafka RDD get OffsetRanges."""
+        topic = self._randomTopic()
+        sendData = {"a": 3, "b": 4, "c": 5}
+        offsetRanges = [OffsetRange(topic, 0, long(0), long(sum(sendData.values())))]
+        kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress()}
+
+        self._kafkaTestUtils.createTopic(topic)
+        self._kafkaTestUtils.sendMessages(topic, sendData)
+        rdd = KafkaUtils.createRDD(self.sc, kafkaParams, offsetRanges)
+        self.assertEqual(offsetRanges, rdd.offsetRanges())
+
+    def test_kafka_direct_stream_foreach_get_offsetRanges(self):
+        """Test the Python direct Kafka stream foreachRDD get offsetRanges."""
+        topic = self._randomTopic()
+        sendData = {"a": 1, "b": 2, "c": 3}
+        kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress(),
+                       "auto.offset.reset": "smallest"}
+
+        self._kafkaTestUtils.createTopic(topic)
+        self._kafkaTestUtils.sendMessages(topic, sendData)
+
+        stream = KafkaUtils.createDirectStream(self.ssc, [topic], kafkaParams)
+
+        offsetRanges = []
+
+        def getOffsetRanges(_, rdd):
+            for o in rdd.offsetRanges():
+                offsetRanges.append(o)
+
+        stream.foreachRDD(getOffsetRanges)
+        self.ssc.start()
+        self.wait_for(offsetRanges, 1)
+
+        self.assertEqual(offsetRanges, [OffsetRange(topic, 0, long(0), long(6))])
+
+    def test_kafka_direct_stream_transform_get_offsetRanges(self):
+        """Test the Python direct Kafka stream transform get offsetRanges."""
+        topic = self._randomTopic()
+        sendData = {"a": 1, "b": 2, "c": 3}
+        kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress(),
+                       "auto.offset.reset": "smallest"}
+
+        self._kafkaTestUtils.createTopic(topic)
+        self._kafkaTestUtils.sendMessages(topic, sendData)
+
+        stream = KafkaUtils.createDirectStream(self.ssc, [topic], kafkaParams)
+
+        offsetRanges = []
+
+        def transformWithOffsetRanges(rdd):
+            for o in rdd.offsetRanges():
+                offsetRanges.append(o)
+            return rdd
+
+        # Test whether it is ok mixing KafkaTransformedDStream and TransformedDStream together,
+        # only the TransformedDstreams can be folded together.
+        stream.transform(transformWithOffsetRanges).map(lambda kv: kv[1]).count().pprint()
+        self.ssc.start()
+        self.wait_for(offsetRanges, 1)
+
+        self.assertEqual(offsetRanges, [OffsetRange(topic, 0, long(0), long(6))])
+
+    def test_topic_and_partition_equality(self):
+        topic_and_partition_a = TopicAndPartition("foo", 0)
+        topic_and_partition_b = TopicAndPartition("foo", 0)
+        topic_and_partition_c = TopicAndPartition("bar", 0)
+        topic_and_partition_d = TopicAndPartition("foo", 1)
+
+        self.assertEqual(topic_and_partition_a, topic_and_partition_b)
+        self.assertNotEqual(topic_and_partition_a, topic_and_partition_c)
+        self.assertNotEqual(topic_and_partition_a, topic_and_partition_d)
+
+    def test_kafka_direct_stream_transform_with_checkpoint(self):
+        """Test the Python direct Kafka stream transform with checkpoint correctly recovered."""
+        topic = self._randomTopic()
+        sendData = {"a": 1, "b": 2, "c": 3}
+        kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress(),
+                       "auto.offset.reset": "smallest"}
+
+        self._kafkaTestUtils.createTopic(topic)
+        self._kafkaTestUtils.sendMessages(topic, sendData)
+
+        offsetRanges = []
+
+        def transformWithOffsetRanges(rdd):
+            for o in rdd.offsetRanges():
+                offsetRanges.append(o)
+            return rdd
+
+        self.ssc.stop(False)
+        self.ssc = None
+        tmpdir = "checkpoint-test-%d" % random.randint(0, 10000)
+
+        def setup():
+            ssc = StreamingContext(self.sc, 0.5)
+            ssc.checkpoint(tmpdir)
+            stream = KafkaUtils.createDirectStream(ssc, [topic], kafkaParams)
+            stream.transform(transformWithOffsetRanges).count().pprint()
+            return ssc
+
+        try:
+            ssc1 = StreamingContext.getOrCreate(tmpdir, setup)
+            ssc1.start()
+            self.wait_for(offsetRanges, 1)
+            self.assertEqual(offsetRanges, [OffsetRange(topic, 0, long(0), long(6))])
+
+            # To make sure some checkpoint is written
+            time.sleep(3)
+            ssc1.stop(False)
+            ssc1 = None
+
+            # Restart again to make sure the checkpoint is recovered correctly
+            ssc2 = StreamingContext.getOrCreate(tmpdir, setup)
+            ssc2.start()
+            ssc2.awaitTermination(3)
+            ssc2.stop(stopSparkContext=False, stopGraceFully=True)
+            ssc2 = None
+        finally:
+            shutil.rmtree(tmpdir)
+
+    def test_kafka_rdd_message_handler(self):
+        """Test Python direct Kafka RDD MessageHandler."""
+        topic = self._randomTopic()
+        sendData = {"a": 1, "b": 1, "c": 2}
+        offsetRanges = [OffsetRange(topic, 0, long(0), long(sum(sendData.values())))]
+        kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress()}
+
+        def getKeyAndDoubleMessage(m):
+            return m and (m.key, m.message * 2)
+
+        self._kafkaTestUtils.createTopic(topic)
+        self._kafkaTestUtils.sendMessages(topic, sendData)
+        rdd = KafkaUtils.createRDD(self.sc, kafkaParams, offsetRanges,
+                                   messageHandler=getKeyAndDoubleMessage)
+        self._validateRddResult({"aa": 1, "bb": 1, "cc": 2}, rdd)
+
+    def test_kafka_direct_stream_message_handler(self):
+        """Test the Python direct Kafka stream MessageHandler."""
+        topic = self._randomTopic()
+        sendData = {"a": 1, "b": 2, "c": 3}
+        kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress(),
+                       "auto.offset.reset": "smallest"}
+
+        self._kafkaTestUtils.createTopic(topic)
+        self._kafkaTestUtils.sendMessages(topic, sendData)
+
+        def getKeyAndDoubleMessage(m):
+            return m and (m.key, m.message * 2)
+
+        stream = KafkaUtils.createDirectStream(self.ssc, [topic], kafkaParams,
+                                               messageHandler=getKeyAndDoubleMessage)
+        self._validateStreamResult({"aa": 1, "bb": 2, "cc": 3}, stream)
+
+
+class FlumeStreamTests(PySparkStreamingTestCase):
+    timeout = 20  # seconds
+    duration = 1
+
+    def setUp(self):
+        super(FlumeStreamTests, self).setUp()
+        self._utils = self.ssc._jvm.org.apache.spark.streaming.flume.FlumeTestUtils()
+
+    def tearDown(self):
+        if self._utils is not None:
+            self._utils.close()
+            self._utils = None
+
+        super(FlumeStreamTests, self).tearDown()
+
+    def _startContext(self, n, compressed):
+        # Start the StreamingContext and also collect the result
+        dstream = FlumeUtils.createStream(self.ssc, "localhost", self._utils.getTestPort(),
+                                          enableDecompression=compressed)
+        result = []
+
+        def get_output(_, rdd):
+            for event in rdd.collect():
+                if len(result) < n:
+                    result.append(event)
+        dstream.foreachRDD(get_output)
+        self.ssc.start()
+        return result
+
+    def _validateResult(self, input, result):
+        # Validate both the header and the body
+        header = {"test": "header"}
+        self.assertEqual(len(input), len(result))
+        for i in range(0, len(input)):
+            self.assertEqual(header, result[i][0])
+            self.assertEqual(input[i], result[i][1])
+
+    def _writeInput(self, input, compressed):
+        # Try to write input to the receiver until success or timeout
+        start_time = time.time()
+        while True:
+            try:
+                self._utils.writeInput(input, compressed)
+                break
+            except:
+                if time.time() - start_time < self.timeout:
+                    time.sleep(0.01)
+                else:
+                    raise
+
+    def test_flume_stream(self):
+        input = [str(i) for i in range(1, 101)]
+        result = self._startContext(len(input), False)
+        self._writeInput(input, False)
+        self.wait_for(result, len(input))
+        self._validateResult(input, result)
+
+    def test_compressed_flume_stream(self):
+        input = [str(i) for i in range(1, 101)]
+        result = self._startContext(len(input), True)
+        self._writeInput(input, True)
+        self.wait_for(result, len(input))
+        self._validateResult(input, result)
+
+
+class FlumePollingStreamTests(PySparkStreamingTestCase):
+    timeout = 20  # seconds
+    duration = 1
+    maxAttempts = 5
+
+    def setUp(self):
+        self._utils = self.sc._jvm.org.apache.spark.streaming.flume.PollingFlumeTestUtils()
+
+    def tearDown(self):
+        if self._utils is not None:
+            self._utils.close()
+            self._utils = None
+
+    def _writeAndVerify(self, ports):
+        # Set up the streaming context and input streams
+        ssc = StreamingContext(self.sc, self.duration)
+        try:
+            addresses = [("localhost", port) for port in ports]
+            dstream = FlumeUtils.createPollingStream(
+                ssc,
+                addresses,
+                maxBatchSize=self._utils.eventsPerBatch(),
+                parallelism=5)
+            outputBuffer = []
+
+            def get_output(_, rdd):
+                for e in rdd.collect():
+                    outputBuffer.append(e)
+
+            dstream.foreachRDD(get_output)
+            ssc.start()
+            self._utils.sendDataAndEnsureAllDataHasBeenReceived()
+
+            self.wait_for(outputBuffer, self._utils.getTotalEvents())
+            outputHeaders = [event[0] for event in outputBuffer]
+            outputBodies = [event[1] for event in outputBuffer]
+            self._utils.assertOutput(outputHeaders, outputBodies)
+        finally:
+            ssc.stop(False)
+
+    def _testMultipleTimes(self, f):
+        attempt = 0
+        while True:
+            try:
+                f()
+                break
+            except:
+                attempt += 1
+                if attempt >= self.maxAttempts:
+                    raise
+                else:
+                    import traceback
+                    traceback.print_exc()
+
+    def _testFlumePolling(self):
+        try:
+            port = self._utils.startSingleSink()
+            self._writeAndVerify([port])
+            self._utils.assertChannelsAreEmpty()
+        finally:
+            self._utils.close()
+
+    def _testFlumePollingMultipleHosts(self):
+        try:
+            port = self._utils.startSingleSink()
+            self._writeAndVerify([port])
+            self._utils.assertChannelsAreEmpty()
+        finally:
+            self._utils.close()
+
+    def test_flume_polling(self):
+        self._testMultipleTimes(self._testFlumePolling)
+
+    def test_flume_polling_multiple_hosts(self):
+        self._testMultipleTimes(self._testFlumePollingMultipleHosts)
+
+
+class KinesisStreamTests(PySparkStreamingTestCase):
+
+    def test_kinesis_stream_api(self):
+        # Don't start the StreamingContext because we cannot test it in Jenkins
+        kinesisStream1 = KinesisUtils.createStream(
+            self.ssc, "myAppNam", "mySparkStream",
+            "https://kinesis.us-west-2.amazonaws.com", "us-west-2",
+            InitialPositionInStream.LATEST, 2, StorageLevel.MEMORY_AND_DISK_2)
+        kinesisStream2 = KinesisUtils.createStream(
+            self.ssc, "myAppNam", "mySparkStream",
+            "https://kinesis.us-west-2.amazonaws.com", "us-west-2",
+            InitialPositionInStream.LATEST, 2, StorageLevel.MEMORY_AND_DISK_2,
+            "awsAccessKey", "awsSecretKey")
+
+    def test_kinesis_stream(self):
+        if not are_kinesis_tests_enabled:
+            sys.stderr.write(
+                "Skipped test_kinesis_stream (enable by setting environment variable %s=1"
+                % kinesis_test_environ_var)
+            return
+
+        import random
+        kinesisAppName = ("KinesisStreamTests-%d" % abs(random.randint(0, 10000000)))
+        kinesisTestUtils = self.ssc._jvm.org.apache.spark.streaming.kinesis.KinesisTestUtils(2)
+        try:
+            kinesisTestUtils.createStream()
+            aWSCredentials = kinesisTestUtils.getAWSCredentials()
+            stream = KinesisUtils.createStream(
+                self.ssc, kinesisAppName, kinesisTestUtils.streamName(),
+                kinesisTestUtils.endpointUrl(), kinesisTestUtils.regionName(),
+                InitialPositionInStream.LATEST, 10, StorageLevel.MEMORY_ONLY,
+                aWSCredentials.getAWSAccessKeyId(), aWSCredentials.getAWSSecretKey())
+
+            outputBuffer = []
+
+            def get_output(_, rdd):
+                for e in rdd.collect():
+                    outputBuffer.append(e)
+
+            stream.foreachRDD(get_output)
+            self.ssc.start()
+
+            testData = [i for i in range(1, 11)]
+            expectedOutput = set([str(i) for i in testData])
+            start_time = time.time()
+            while time.time() - start_time < 120:
+                kinesisTestUtils.pushData(testData)
+                if expectedOutput == set(outputBuffer):
+                    break
+                time.sleep(10)
+            self.assertEqual(expectedOutput, set(outputBuffer))
+        except:
+            import traceback
+            traceback.print_exc()
+            raise
+        finally:
+            self.ssc.stop(False)
+            kinesisTestUtils.deleteStream()
+            kinesisTestUtils.deleteDynamoDBTable(kinesisAppName)
+
+
+# Search jar in the project dir using the jar name_prefix for both sbt build and maven build because
+# the artifact jars are in different directories.
+def search_jar(dir, name_prefix):
+    # We should ignore the following jars
+    ignored_jar_suffixes = ("javadoc.jar", "sources.jar", "test-sources.jar", "tests.jar")
+    jars = (glob.glob(os.path.join(dir, "target/scala-*/" + name_prefix + "-*.jar")) +  # sbt build
+            glob.glob(os.path.join(dir, "target/" + name_prefix + "_*.jar")))  # maven build
+    return [jar for jar in jars if not jar.endswith(ignored_jar_suffixes)]
+
+
+def search_kafka_assembly_jar():
+    SPARK_HOME = os.environ["SPARK_HOME"]
+    kafka_assembly_dir = os.path.join(SPARK_HOME, "external/kafka-0-8-assembly")
+    jars = search_jar(kafka_assembly_dir, "spark-streaming-kafka-0-8-assembly")
+    if not jars:
+        raise Exception(
+            ("Failed to find Spark Streaming kafka assembly jar in %s. " % kafka_assembly_dir) +
+            "You need to build Spark with "
+            "'build/sbt -Pkafka-0-8 assembly/package streaming-kafka-0-8-assembly/assembly' or "
+            "'build/mvn -DskipTests -Pkafka-0-8 package' before running this test.")
+    elif len(jars) > 1:
+        raise Exception(("Found multiple Spark Streaming Kafka assembly JARs: %s; please "
+                         "remove all but one") % (", ".join(jars)))
+    else:
+        return jars[0]
+
+
+def search_flume_assembly_jar():
+    SPARK_HOME = os.environ["SPARK_HOME"]
+    flume_assembly_dir = os.path.join(SPARK_HOME, "external/flume-assembly")
+    jars = search_jar(flume_assembly_dir, "spark-streaming-flume-assembly")
+    if not jars:
+        raise Exception(
+            ("Failed to find Spark Streaming Flume assembly jar in %s. " % flume_assembly_dir) +
+            "You need to build Spark with "
+            "'build/sbt -Pflume assembly/package streaming-flume-assembly/assembly' or "
+            "'build/mvn -DskipTests -Pflume package' before running this test.")
+    elif len(jars) > 1:
+        raise Exception(("Found multiple Spark Streaming Flume assembly JARs: %s; please "
+                        "remove all but one") % (", ".join(jars)))
+    else:
+        return jars[0]
+
+
+def _kinesis_asl_assembly_dir():
+    SPARK_HOME = os.environ["SPARK_HOME"]
+    return os.path.join(SPARK_HOME, "external/kinesis-asl-assembly")
+
+
+def search_kinesis_asl_assembly_jar():
+    jars = search_jar(_kinesis_asl_assembly_dir(), "spark-streaming-kinesis-asl-assembly")
+    if not jars:
+        return None
+    elif len(jars) > 1:
+        raise Exception(("Found multiple Spark Streaming Kinesis ASL assembly JARs: %s; please "
+                         "remove all but one") % (", ".join(jars)))
+    else:
+        return jars[0]
+
+
+# Must be same as the variable and condition defined in modules.py
+flume_test_environ_var = "ENABLE_FLUME_TESTS"
+are_flume_tests_enabled = os.environ.get(flume_test_environ_var) == '1'
+# Must be same as the variable and condition defined in modules.py
+kafka_test_environ_var = "ENABLE_KAFKA_0_8_TESTS"
+are_kafka_tests_enabled = os.environ.get(kafka_test_environ_var) == '1'
+# Must be same as the variable and condition defined in KinesisTestUtils.scala and modules.py
+kinesis_test_environ_var = "ENABLE_KINESIS_TESTS"
+are_kinesis_tests_enabled = os.environ.get(kinesis_test_environ_var) == '1'
+
+if __name__ == "__main__":
+    from pyspark.streaming.tests import *
+    kafka_assembly_jar = search_kafka_assembly_jar()
+    flume_assembly_jar = search_flume_assembly_jar()
+    kinesis_asl_assembly_jar = search_kinesis_asl_assembly_jar()
+
+    if kinesis_asl_assembly_jar is None:
+        kinesis_jar_present = False
+        jars = "%s,%s" % (kafka_assembly_jar, flume_assembly_jar)
+    else:
+        kinesis_jar_present = True
+        jars = "%s,%s,%s" % (kafka_assembly_jar, flume_assembly_jar, kinesis_asl_assembly_jar)
+
+    existing_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
+    jars_args = "--jars %s" % jars
+    os.environ["PYSPARK_SUBMIT_ARGS"] = " ".join([jars_args, existing_args])
+    testcases = [BasicOperationTests, WindowFunctionTests, StreamingContextTests, CheckpointTests,
+                 StreamingListenerTests]
+
+    if are_flume_tests_enabled:
+        testcases.append(FlumeStreamTests)
+        testcases.append(FlumePollingStreamTests)
+    else:
+        sys.stderr.write(
+            "Skipped test_flume_stream (enable by setting environment variable %s=1"
+            % flume_test_environ_var)
+
+    if are_kafka_tests_enabled:
+        testcases.append(KafkaStreamTests)
+    else:
+        sys.stderr.write(
+            "Skipped test_kafka_stream (enable by setting environment variable %s=1"
+            % kafka_test_environ_var)
+
+    if kinesis_jar_present is True:
+        testcases.append(KinesisStreamTests)
+    elif are_kinesis_tests_enabled is False:
+        sys.stderr.write("Skipping all Kinesis Python tests as the optional Kinesis project was "
+                         "not compiled into a JAR. To run these tests, "
+                         "you need to build Spark with 'build/sbt -Pkinesis-asl assembly/package "
+                         "streaming-kinesis-asl-assembly/assembly' or "
+                         "'build/mvn -Pkinesis-asl package' before running this test.")
+    else:
+        raise Exception(
+            ("Failed to find Spark Streaming Kinesis assembly jar in %s. "
+             % _kinesis_asl_assembly_dir()) +
+            "You need to build Spark with 'build/sbt -Pkinesis-asl "
+            "assembly/package streaming-kinesis-asl-assembly/assembly'"
+            "or 'build/mvn -Pkinesis-asl package' before running this test.")
+
+    sys.stderr.write("Running tests: %s \n" % (str(testcases)))
+    failed = False
+    for testcase in testcases:
+        sys.stderr.write("[Running %s]\n" % (testcase))
+        tests = unittest.TestLoader().loadTestsFromTestCase(testcase)
+        if xmlrunner:
+            result = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2).run(tests)
+            if not result.wasSuccessful():
+                failed = True
+        else:
+            result = unittest.TextTestRunner(verbosity=2).run(tests)
+            if not result.wasSuccessful():
+                failed = True
+    sys.exit(failed)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/util.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..b4b9f97feb7cac29f16733a515599889252a6f6f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/streaming/util.py
@@ -0,0 +1,160 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import time
+from datetime import datetime
+import traceback
+import sys
+
+from py4j.java_gateway import is_instance_of
+
+from pyspark import SparkContext, RDD
+
+
+class TransformFunction(object):
+    """
+    This class wraps a function RDD[X] -> RDD[Y] that was passed to
+    DStream.transform(), allowing it to be called from Java via Py4J's
+    callback server.
+
+    Java calls this function with a sequence of JavaRDDs and this function
+    returns a single JavaRDD pointer back to Java.
+    """
+    _emptyRDD = None
+
+    def __init__(self, ctx, func, *deserializers):
+        self.ctx = ctx
+        self.func = func
+        self.deserializers = deserializers
+        self.rdd_wrap_func = lambda jrdd, ctx, ser: RDD(jrdd, ctx, ser)
+        self.failure = None
+
+    def rdd_wrapper(self, func):
+        self.rdd_wrap_func = func
+        return self
+
+    def call(self, milliseconds, jrdds):
+        # Clear the failure
+        self.failure = None
+        try:
+            if self.ctx is None:
+                self.ctx = SparkContext._active_spark_context
+            if not self.ctx or not self.ctx._jsc:
+                # stopped
+                return
+
+            # extend deserializers with the first one
+            sers = self.deserializers
+            if len(sers) < len(jrdds):
+                sers += (sers[0],) * (len(jrdds) - len(sers))
+
+            rdds = [self.rdd_wrap_func(jrdd, self.ctx, ser) if jrdd else None
+                    for jrdd, ser in zip(jrdds, sers)]
+            t = datetime.fromtimestamp(milliseconds / 1000.0)
+            r = self.func(t, *rdds)
+            if r:
+                # Here, we work around to ensure `_jrdd` is `JavaRDD` by wrapping it by `map`.
+                # org.apache.spark.streaming.api.python.PythonTransformFunction requires to return
+                # `JavaRDD`; however, this could be `JavaPairRDD` by some APIs, for example, `zip`.
+                # See SPARK-17756.
+                if is_instance_of(self.ctx._gateway, r._jrdd, "org.apache.spark.api.java.JavaRDD"):
+                    return r._jrdd
+                else:
+                    return r.map(lambda x: x)._jrdd
+        except:
+            self.failure = traceback.format_exc()
+
+    def getLastFailure(self):
+        return self.failure
+
+    def __repr__(self):
+        return "TransformFunction(%s)" % self.func
+
+    class Java:
+        implements = ['org.apache.spark.streaming.api.python.PythonTransformFunction']
+
+
+class TransformFunctionSerializer(object):
+    """
+    This class implements a serializer for PythonTransformFunction Java
+    objects.
+
+    This is necessary because the Java PythonTransformFunction objects are
+    actually Py4J references to Python objects and thus are not directly
+    serializable. When Java needs to serialize a PythonTransformFunction,
+    it uses this class to invoke Python, which returns the serialized function
+    as a byte array.
+    """
+    def __init__(self, ctx, serializer, gateway=None):
+        self.ctx = ctx
+        self.serializer = serializer
+        self.gateway = gateway or self.ctx._gateway
+        self.gateway.jvm.PythonDStream.registerSerializer(self)
+        self.failure = None
+
+    def dumps(self, id):
+        # Clear the failure
+        self.failure = None
+        try:
+            func = self.gateway.gateway_property.pool[id]
+            return bytearray(self.serializer.dumps((
+                func.func, func.rdd_wrap_func, func.deserializers)))
+        except:
+            self.failure = traceback.format_exc()
+
+    def loads(self, data):
+        # Clear the failure
+        self.failure = None
+        try:
+            f, wrap_func, deserializers = self.serializer.loads(bytes(data))
+            return TransformFunction(self.ctx, f, *deserializers).rdd_wrapper(wrap_func)
+        except:
+            self.failure = traceback.format_exc()
+
+    def getLastFailure(self):
+        return self.failure
+
+    def __repr__(self):
+        return "TransformFunctionSerializer(%s)" % self.serializer
+
+    class Java:
+        implements = ['org.apache.spark.streaming.api.python.PythonTransformFunctionSerializer']
+
+
+def rddToFileName(prefix, suffix, timestamp):
+    """
+    Return string prefix-time(.suffix)
+
+    >>> rddToFileName("spark", None, 12345678910)
+    'spark-12345678910'
+    >>> rddToFileName("spark", "tmp", 12345678910)
+    'spark-12345678910.tmp'
+    """
+    if isinstance(timestamp, datetime):
+        seconds = time.mktime(timestamp.timetuple())
+        timestamp = int(seconds * 1000) + timestamp.microsecond // 1000
+    if suffix is None:
+        return prefix + "-" + str(timestamp)
+    else:
+        return prefix + "-" + str(timestamp) + "." + suffix
+
+
+if __name__ == "__main__":
+    import doctest
+    (failure_count, test_count) = doctest.testmod()
+    if failure_count:
+        sys.exit(-1)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/taskcontext.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/taskcontext.py
new file mode 100644
index 0000000000000000000000000000000000000000..b61643eb0a16efdec8992ed654e321e7d1b8cc23
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/taskcontext.py
@@ -0,0 +1,225 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+import socket
+
+from pyspark.java_gateway import local_connect_and_auth
+from pyspark.serializers import write_int, UTF8Deserializer
+
+
+class TaskContext(object):
+
+    """
+    .. note:: Experimental
+
+    Contextual information about a task which can be read or mutated during
+    execution. To access the TaskContext for a running task, use:
+    L{TaskContext.get()}.
+    """
+
+    _taskContext = None
+
+    _attemptNumber = None
+    _partitionId = None
+    _stageId = None
+    _taskAttemptId = None
+    _localProperties = None
+
+    def __new__(cls):
+        """Even if users construct TaskContext instead of using get, give them the singleton."""
+        taskContext = cls._taskContext
+        if taskContext is not None:
+            return taskContext
+        cls._taskContext = taskContext = object.__new__(cls)
+        return taskContext
+
+    def __init__(self):
+        """Construct a TaskContext, use get instead"""
+        pass
+
+    @classmethod
+    def _getOrCreate(cls):
+        """Internal function to get or create global TaskContext."""
+        if cls._taskContext is None:
+            cls._taskContext = TaskContext()
+        return cls._taskContext
+
+    @classmethod
+    def get(cls):
+        """
+        Return the currently active TaskContext. This can be called inside of
+        user functions to access contextual information about running tasks.
+
+        .. note:: Must be called on the worker, not the driver. Returns None if not initialized.
+        """
+        return cls._taskContext
+
+    def stageId(self):
+        """The ID of the stage that this task belong to."""
+        return self._stageId
+
+    def partitionId(self):
+        """
+        The ID of the RDD partition that is computed by this task.
+        """
+        return self._partitionId
+
+    def attemptNumber(self):
+        """"
+        How many times this task has been attempted.  The first task attempt will be assigned
+        attemptNumber = 0, and subsequent attempts will have increasing attempt numbers.
+        """
+        return self._attemptNumber
+
+    def taskAttemptId(self):
+        """
+        An ID that is unique to this task attempt (within the same SparkContext, no two task
+        attempts will share the same attempt ID).  This is roughly equivalent to Hadoop's
+        TaskAttemptID.
+        """
+        return self._taskAttemptId
+
+    def getLocalProperty(self, key):
+        """
+        Get a local property set upstream in the driver, or None if it is missing.
+        """
+        return self._localProperties.get(key, None)
+
+
+BARRIER_FUNCTION = 1
+
+
+def _load_from_socket(port, auth_secret):
+    """
+    Load data from a given socket, this is a blocking method thus only return when the socket
+    connection has been closed.
+    """
+    (sockfile, sock) = local_connect_and_auth(port, auth_secret)
+    # The barrier() call may block forever, so no timeout
+    sock.settimeout(None)
+    # Make a barrier() function call.
+    write_int(BARRIER_FUNCTION, sockfile)
+    sockfile.flush()
+
+    # Collect result.
+    res = UTF8Deserializer().loads(sockfile)
+
+    # Release resources.
+    sockfile.close()
+    sock.close()
+
+    return res
+
+
+class BarrierTaskContext(TaskContext):
+
+    """
+    .. note:: Experimental
+
+    A :class:`TaskContext` with extra contextual info and tooling for tasks in a barrier stage.
+    Use :func:`BarrierTaskContext.get` to obtain the barrier context for a running barrier task.
+
+    .. versionadded:: 2.4.0
+    """
+
+    _port = None
+    _secret = None
+
+    def __init__(self):
+        """Construct a BarrierTaskContext, use get instead"""
+        pass
+
+    @classmethod
+    def _getOrCreate(cls):
+        """Internal function to get or create global BarrierTaskContext."""
+        if cls._taskContext is None:
+            cls._taskContext = BarrierTaskContext()
+        return cls._taskContext
+
+    @classmethod
+    def get(cls):
+        """
+        .. note:: Experimental
+
+        Return the currently active :class:`BarrierTaskContext`.
+        This can be called inside of user functions to access contextual information about
+        running tasks.
+
+        .. note:: Must be called on the worker, not the driver. Returns None if not initialized.
+        """
+        return cls._taskContext
+
+    @classmethod
+    def _initialize(cls, port, secret):
+        """
+        Initialize BarrierTaskContext, other methods within BarrierTaskContext can only be called
+        after BarrierTaskContext is initialized.
+        """
+        cls._port = port
+        cls._secret = secret
+
+    def barrier(self):
+        """
+        .. note:: Experimental
+
+        Sets a global barrier and waits until all tasks in this stage hit this barrier.
+        Similar to `MPI_Barrier` function in MPI, this function blocks until all tasks
+        in the same stage have reached this routine.
+
+        .. warning:: In a barrier stage, each task much have the same number of `barrier()`
+            calls, in all possible code branches.
+            Otherwise, you may get the job hanging or a SparkException after timeout.
+
+        .. versionadded:: 2.4.0
+        """
+        if self._port is None or self._secret is None:
+            raise Exception("Not supported to call barrier() before initialize " +
+                            "BarrierTaskContext.")
+        else:
+            _load_from_socket(self._port, self._secret)
+
+    def getTaskInfos(self):
+        """
+        .. note:: Experimental
+
+        Returns :class:`BarrierTaskInfo` for all tasks in this barrier stage,
+        ordered by partition ID.
+
+        .. versionadded:: 2.4.0
+        """
+        if self._port is None or self._secret is None:
+            raise Exception("Not supported to call getTaskInfos() before initialize " +
+                            "BarrierTaskContext.")
+        else:
+            addresses = self._localProperties.get("addresses", "")
+            return [BarrierTaskInfo(h.strip()) for h in addresses.split(",")]
+
+
+class BarrierTaskInfo(object):
+    """
+    .. note:: Experimental
+
+    Carries all task infos of a barrier task.
+
+    :var address: The IPv4 address (host:port) of the executor that the barrier task is running on
+
+    .. versionadded:: 2.4.0
+    """
+
+    def __init__(self, address):
+        self.address = address
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/test_broadcast.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/test_broadcast.py
new file mode 100644
index 0000000000000000000000000000000000000000..a00329c18ad8fe5558ed3cf2c7fd386a07621c71
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/test_broadcast.py
@@ -0,0 +1,126 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import random
+import tempfile
+import unittest
+
+try:
+    import xmlrunner
+except ImportError:
+    xmlrunner = None
+
+from pyspark.broadcast import Broadcast
+from pyspark.conf import SparkConf
+from pyspark.context import SparkContext
+from pyspark.java_gateway import launch_gateway
+from pyspark.serializers import ChunkedStream
+
+
+class BroadcastTest(unittest.TestCase):
+
+    def tearDown(self):
+        if getattr(self, "sc", None) is not None:
+            self.sc.stop()
+            self.sc = None
+
+    def _test_encryption_helper(self, vs):
+        """
+        Creates a broadcast variables for each value in vs, and runs a simple job to make sure the
+        value is the same when it's read in the executors.  Also makes sure there are no task
+        failures.
+        """
+        bs = [self.sc.broadcast(value=v) for v in vs]
+        exec_values = self.sc.parallelize(range(2)).map(lambda x: [b.value for b in bs]).collect()
+        for ev in exec_values:
+            self.assertEqual(ev, vs)
+        # make sure there are no task failures
+        status = self.sc.statusTracker()
+        for jid in status.getJobIdsForGroup():
+            for sid in status.getJobInfo(jid).stageIds:
+                stage_info = status.getStageInfo(sid)
+                self.assertEqual(0, stage_info.numFailedTasks)
+
+    def _test_multiple_broadcasts(self, *extra_confs):
+        """
+        Test broadcast variables make it OK to the executors.  Tests multiple broadcast variables,
+        and also multiple jobs.
+        """
+        conf = SparkConf()
+        for key, value in extra_confs:
+            conf.set(key, value)
+        conf.setMaster("local-cluster[2,1,1024]")
+        self.sc = SparkContext(conf=conf)
+        self._test_encryption_helper([5])
+        self._test_encryption_helper([5, 10, 20])
+
+    def test_broadcast_with_encryption(self):
+        self._test_multiple_broadcasts(("spark.io.encryption.enabled", "true"))
+
+    def test_broadcast_no_encryption(self):
+        self._test_multiple_broadcasts()
+
+
+class BroadcastFrameProtocolTest(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        gateway = launch_gateway(SparkConf())
+        cls._jvm = gateway.jvm
+        cls.longMessage = True
+        random.seed(42)
+
+    def _test_chunked_stream(self, data, py_buf_size):
+        # write data using the chunked protocol from python.
+        chunked_file = tempfile.NamedTemporaryFile(delete=False)
+        dechunked_file = tempfile.NamedTemporaryFile(delete=False)
+        dechunked_file.close()
+        try:
+            out = ChunkedStream(chunked_file, py_buf_size)
+            out.write(data)
+            out.close()
+            # now try to read it in java
+            jin = self._jvm.java.io.FileInputStream(chunked_file.name)
+            jout = self._jvm.java.io.FileOutputStream(dechunked_file.name)
+            self._jvm.DechunkedInputStream.dechunkAndCopyToOutput(jin, jout)
+            # java should have decoded it back to the original data
+            self.assertEqual(len(data), os.stat(dechunked_file.name).st_size)
+            with open(dechunked_file.name, "rb") as f:
+                byte = f.read(1)
+                idx = 0
+                while byte:
+                    self.assertEqual(data[idx], bytearray(byte)[0], msg="idx = " + str(idx))
+                    byte = f.read(1)
+                    idx += 1
+        finally:
+            os.unlink(chunked_file.name)
+            os.unlink(dechunked_file.name)
+
+    def test_chunked_stream(self):
+        def random_bytes(n):
+            return bytearray(random.getrandbits(8) for _ in range(n))
+        for data_length in [1, 10, 100, 10000]:
+            for buffer_length in [1, 2, 5, 8192]:
+                self._test_chunked_stream(random_bytes(data_length), buffer_length)
+
+if __name__ == '__main__':
+    from pyspark.test_broadcast import *
+    if xmlrunner:
+        unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
+    else:
+        unittest.main(verbosity=2)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/test_serializers.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/test_serializers.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b43729f9ebb1a75ad7ee42997c450cd56f3f926
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/test_serializers.py
@@ -0,0 +1,90 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import io
+import math
+import struct
+import sys
+import unittest
+
+try:
+    import xmlrunner
+except ImportError:
+    xmlrunner = None
+
+from pyspark import serializers
+
+
+def read_int(b):
+    return struct.unpack("!i", b)[0]
+
+
+def write_int(i):
+    return struct.pack("!i", i)
+
+
+class SerializersTest(unittest.TestCase):
+
+    def test_chunked_stream(self):
+        original_bytes = bytearray(range(100))
+        for data_length in [1, 10, 100]:
+            for buffer_length in [1, 2, 3, 5, 20, 99, 100, 101, 500]:
+                dest = ByteArrayOutput()
+                stream_out = serializers.ChunkedStream(dest, buffer_length)
+                stream_out.write(original_bytes[:data_length])
+                stream_out.close()
+                num_chunks = int(math.ceil(float(data_length) / buffer_length))
+                # length for each chunk, and a final -1 at the very end
+                exp_size = (num_chunks + 1) * 4 + data_length
+                self.assertEqual(len(dest.buffer), exp_size)
+                dest_pos = 0
+                data_pos = 0
+                for chunk_idx in range(num_chunks):
+                    chunk_length = read_int(dest.buffer[dest_pos:(dest_pos + 4)])
+                    if chunk_idx == num_chunks - 1:
+                        exp_length = data_length % buffer_length
+                        if exp_length == 0:
+                            exp_length = buffer_length
+                    else:
+                        exp_length = buffer_length
+                    self.assertEqual(chunk_length, exp_length)
+                    dest_pos += 4
+                    dest_chunk = dest.buffer[dest_pos:dest_pos + chunk_length]
+                    orig_chunk = original_bytes[data_pos:data_pos + chunk_length]
+                    self.assertEqual(dest_chunk, orig_chunk)
+                    dest_pos += chunk_length
+                    data_pos += chunk_length
+                # ends with a -1
+                self.assertEqual(dest.buffer[-4:], write_int(-1))
+
+
+class ByteArrayOutput(object):
+    def __init__(self):
+        self.buffer = bytearray()
+
+    def write(self, b):
+        self.buffer += b
+
+    def close(self):
+        pass
+
+if __name__ == '__main__':
+    from pyspark.test_serializers import *
+    if xmlrunner:
+        unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
+    else:
+        unittest.main(verbosity=2)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/tests.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/tests.py
new file mode 100644
index 0000000000000000000000000000000000000000..050c2dd018360893f55241c32b420281b6370fcd
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/tests.py
@@ -0,0 +1,2487 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Unit tests for PySpark; additional tests are implemented as doctests in
+individual modules.
+"""
+
+from array import array
+from glob import glob
+import os
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+import time
+import zipfile
+import random
+import threading
+import hashlib
+
+from py4j.protocol import Py4JJavaError
+try:
+    import xmlrunner
+except ImportError:
+    xmlrunner = None
+
+if sys.version_info[:2] <= (2, 6):
+    try:
+        import unittest2 as unittest
+    except ImportError:
+        sys.stderr.write('Please install unittest2 to test with Python 2.6 or earlier')
+        sys.exit(1)
+else:
+    import unittest
+    if sys.version_info[0] >= 3:
+        xrange = range
+        basestring = str
+
+if sys.version >= "3":
+    from io import StringIO
+else:
+    from StringIO import StringIO
+
+
+from pyspark import keyword_only
+from pyspark.conf import SparkConf
+from pyspark.context import SparkContext
+from pyspark.rdd import RDD
+from pyspark.files import SparkFiles
+from pyspark.serializers import read_int, BatchedSerializer, MarshalSerializer, PickleSerializer, \
+    CloudPickleSerializer, CompressedSerializer, UTF8Deserializer, NoOpSerializer, \
+    PairDeserializer, CartesianDeserializer, AutoBatchedSerializer, AutoSerializer, \
+    FlattenedValuesSerializer
+from pyspark.shuffle import Aggregator, ExternalMerger, ExternalSorter
+from pyspark import shuffle
+from pyspark.profiler import BasicProfiler
+from pyspark.taskcontext import BarrierTaskContext, TaskContext
+
+_have_scipy = False
+_have_numpy = False
+try:
+    import scipy.sparse
+    _have_scipy = True
+except:
+    # No SciPy, but that's okay, we'll skip those tests
+    pass
+try:
+    import numpy as np
+    _have_numpy = True
+except:
+    # No NumPy, but that's okay, we'll skip those tests
+    pass
+
+
+SPARK_HOME = os.environ["SPARK_HOME"]
+
+
+class MergerTests(unittest.TestCase):
+
+    def setUp(self):
+        self.N = 1 << 12
+        self.l = [i for i in xrange(self.N)]
+        self.data = list(zip(self.l, self.l))
+        self.agg = Aggregator(lambda x: [x],
+                              lambda x, y: x.append(y) or x,
+                              lambda x, y: x.extend(y) or x)
+
+    def test_small_dataset(self):
+        m = ExternalMerger(self.agg, 1000)
+        m.mergeValues(self.data)
+        self.assertEqual(m.spills, 0)
+        self.assertEqual(sum(sum(v) for k, v in m.items()),
+                         sum(xrange(self.N)))
+
+        m = ExternalMerger(self.agg, 1000)
+        m.mergeCombiners(map(lambda x_y1: (x_y1[0], [x_y1[1]]), self.data))
+        self.assertEqual(m.spills, 0)
+        self.assertEqual(sum(sum(v) for k, v in m.items()),
+                         sum(xrange(self.N)))
+
+    def test_medium_dataset(self):
+        m = ExternalMerger(self.agg, 20)
+        m.mergeValues(self.data)
+        self.assertTrue(m.spills >= 1)
+        self.assertEqual(sum(sum(v) for k, v in m.items()),
+                         sum(xrange(self.N)))
+
+        m = ExternalMerger(self.agg, 10)
+        m.mergeCombiners(map(lambda x_y2: (x_y2[0], [x_y2[1]]), self.data * 3))
+        self.assertTrue(m.spills >= 1)
+        self.assertEqual(sum(sum(v) for k, v in m.items()),
+                         sum(xrange(self.N)) * 3)
+
+    def test_huge_dataset(self):
+        m = ExternalMerger(self.agg, 5, partitions=3)
+        m.mergeCombiners(map(lambda k_v: (k_v[0], [str(k_v[1])]), self.data * 10))
+        self.assertTrue(m.spills >= 1)
+        self.assertEqual(sum(len(v) for k, v in m.items()),
+                         self.N * 10)
+        m._cleanup()
+
+    def test_group_by_key(self):
+
+        def gen_data(N, step):
+            for i in range(1, N + 1, step):
+                for j in range(i):
+                    yield (i, [j])
+
+        def gen_gs(N, step=1):
+            return shuffle.GroupByKey(gen_data(N, step))
+
+        self.assertEqual(1, len(list(gen_gs(1))))
+        self.assertEqual(2, len(list(gen_gs(2))))
+        self.assertEqual(100, len(list(gen_gs(100))))
+        self.assertEqual(list(range(1, 101)), [k for k, _ in gen_gs(100)])
+        self.assertTrue(all(list(range(k)) == list(vs) for k, vs in gen_gs(100)))
+
+        for k, vs in gen_gs(50002, 10000):
+            self.assertEqual(k, len(vs))
+            self.assertEqual(list(range(k)), list(vs))
+
+        ser = PickleSerializer()
+        l = ser.loads(ser.dumps(list(gen_gs(50002, 30000))))
+        for k, vs in l:
+            self.assertEqual(k, len(vs))
+            self.assertEqual(list(range(k)), list(vs))
+
+    def test_stopiteration_is_raised(self):
+
+        def stopit(*args, **kwargs):
+            raise StopIteration()
+
+        def legit_create_combiner(x):
+            return [x]
+
+        def legit_merge_value(x, y):
+            return x.append(y) or x
+
+        def legit_merge_combiners(x, y):
+            return x.extend(y) or x
+
+        data = [(x % 2, x) for x in range(100)]
+
+        # wrong create combiner
+        m = ExternalMerger(Aggregator(stopit, legit_merge_value, legit_merge_combiners), 20)
+        with self.assertRaises((Py4JJavaError, RuntimeError)) as cm:
+            m.mergeValues(data)
+
+        # wrong merge value
+        m = ExternalMerger(Aggregator(legit_create_combiner, stopit, legit_merge_combiners), 20)
+        with self.assertRaises((Py4JJavaError, RuntimeError)) as cm:
+            m.mergeValues(data)
+
+        # wrong merge combiners
+        m = ExternalMerger(Aggregator(legit_create_combiner, legit_merge_value, stopit), 20)
+        with self.assertRaises((Py4JJavaError, RuntimeError)) as cm:
+            m.mergeCombiners(map(lambda x_y1: (x_y1[0], [x_y1[1]]), data))
+
+
+class SorterTests(unittest.TestCase):
+    def test_in_memory_sort(self):
+        l = list(range(1024))
+        random.shuffle(l)
+        sorter = ExternalSorter(1024)
+        self.assertEqual(sorted(l), list(sorter.sorted(l)))
+        self.assertEqual(sorted(l, reverse=True), list(sorter.sorted(l, reverse=True)))
+        self.assertEqual(sorted(l, key=lambda x: -x), list(sorter.sorted(l, key=lambda x: -x)))
+        self.assertEqual(sorted(l, key=lambda x: -x, reverse=True),
+                         list(sorter.sorted(l, key=lambda x: -x, reverse=True)))
+
+    def test_external_sort(self):
+        class CustomizedSorter(ExternalSorter):
+            def _next_limit(self):
+                return self.memory_limit
+        l = list(range(1024))
+        random.shuffle(l)
+        sorter = CustomizedSorter(1)
+        self.assertEqual(sorted(l), list(sorter.sorted(l)))
+        self.assertGreater(shuffle.DiskBytesSpilled, 0)
+        last = shuffle.DiskBytesSpilled
+        self.assertEqual(sorted(l, reverse=True), list(sorter.sorted(l, reverse=True)))
+        self.assertGreater(shuffle.DiskBytesSpilled, last)
+        last = shuffle.DiskBytesSpilled
+        self.assertEqual(sorted(l, key=lambda x: -x), list(sorter.sorted(l, key=lambda x: -x)))
+        self.assertGreater(shuffle.DiskBytesSpilled, last)
+        last = shuffle.DiskBytesSpilled
+        self.assertEqual(sorted(l, key=lambda x: -x, reverse=True),
+                         list(sorter.sorted(l, key=lambda x: -x, reverse=True)))
+        self.assertGreater(shuffle.DiskBytesSpilled, last)
+
+    def test_external_sort_in_rdd(self):
+        conf = SparkConf().set("spark.python.worker.memory", "1m")
+        sc = SparkContext(conf=conf)
+        l = list(range(10240))
+        random.shuffle(l)
+        rdd = sc.parallelize(l, 4)
+        self.assertEqual(sorted(l), rdd.sortBy(lambda x: x).collect())
+        sc.stop()
+
+
+class SerializationTestCase(unittest.TestCase):
+
+    def test_namedtuple(self):
+        from collections import namedtuple
+        from pickle import dumps, loads
+        P = namedtuple("P", "x y")
+        p1 = P(1, 3)
+        p2 = loads(dumps(p1, 2))
+        self.assertEqual(p1, p2)
+
+        from pyspark.cloudpickle import dumps
+        P2 = loads(dumps(P))
+        p3 = P2(1, 3)
+        self.assertEqual(p1, p3)
+
+    def test_itemgetter(self):
+        from operator import itemgetter
+        ser = CloudPickleSerializer()
+        d = range(10)
+        getter = itemgetter(1)
+        getter2 = ser.loads(ser.dumps(getter))
+        self.assertEqual(getter(d), getter2(d))
+
+        getter = itemgetter(0, 3)
+        getter2 = ser.loads(ser.dumps(getter))
+        self.assertEqual(getter(d), getter2(d))
+
+    def test_function_module_name(self):
+        ser = CloudPickleSerializer()
+        func = lambda x: x
+        func2 = ser.loads(ser.dumps(func))
+        self.assertEqual(func.__module__, func2.__module__)
+
+    def test_attrgetter(self):
+        from operator import attrgetter
+        ser = CloudPickleSerializer()
+
+        class C(object):
+            def __getattr__(self, item):
+                return item
+        d = C()
+        getter = attrgetter("a")
+        getter2 = ser.loads(ser.dumps(getter))
+        self.assertEqual(getter(d), getter2(d))
+        getter = attrgetter("a", "b")
+        getter2 = ser.loads(ser.dumps(getter))
+        self.assertEqual(getter(d), getter2(d))
+
+        d.e = C()
+        getter = attrgetter("e.a")
+        getter2 = ser.loads(ser.dumps(getter))
+        self.assertEqual(getter(d), getter2(d))
+        getter = attrgetter("e.a", "e.b")
+        getter2 = ser.loads(ser.dumps(getter))
+        self.assertEqual(getter(d), getter2(d))
+
+    # Regression test for SPARK-3415
+    def test_pickling_file_handles(self):
+        # to be corrected with SPARK-11160
+        if not xmlrunner:
+            ser = CloudPickleSerializer()
+            out1 = sys.stderr
+            out2 = ser.loads(ser.dumps(out1))
+            self.assertEqual(out1, out2)
+
+    def test_func_globals(self):
+
+        class Unpicklable(object):
+            def __reduce__(self):
+                raise Exception("not picklable")
+
+        global exit
+        exit = Unpicklable()
+
+        ser = CloudPickleSerializer()
+        self.assertRaises(Exception, lambda: ser.dumps(exit))
+
+        def foo():
+            sys.exit(0)
+
+        self.assertTrue("exit" in foo.__code__.co_names)
+        ser.dumps(foo)
+
+    def test_compressed_serializer(self):
+        ser = CompressedSerializer(PickleSerializer())
+        try:
+            from StringIO import StringIO
+        except ImportError:
+            from io import BytesIO as StringIO
+        io = StringIO()
+        ser.dump_stream(["abc", u"123", range(5)], io)
+        io.seek(0)
+        self.assertEqual(["abc", u"123", range(5)], list(ser.load_stream(io)))
+        ser.dump_stream(range(1000), io)
+        io.seek(0)
+        self.assertEqual(["abc", u"123", range(5)] + list(range(1000)), list(ser.load_stream(io)))
+        io.close()
+
+    def test_hash_serializer(self):
+        hash(NoOpSerializer())
+        hash(UTF8Deserializer())
+        hash(PickleSerializer())
+        hash(MarshalSerializer())
+        hash(AutoSerializer())
+        hash(BatchedSerializer(PickleSerializer()))
+        hash(AutoBatchedSerializer(MarshalSerializer()))
+        hash(PairDeserializer(NoOpSerializer(), UTF8Deserializer()))
+        hash(CartesianDeserializer(NoOpSerializer(), UTF8Deserializer()))
+        hash(CompressedSerializer(PickleSerializer()))
+        hash(FlattenedValuesSerializer(PickleSerializer()))
+
+
+class QuietTest(object):
+    def __init__(self, sc):
+        self.log4j = sc._jvm.org.apache.log4j
+
+    def __enter__(self):
+        self.old_level = self.log4j.LogManager.getRootLogger().getLevel()
+        self.log4j.LogManager.getRootLogger().setLevel(self.log4j.Level.FATAL)
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.log4j.LogManager.getRootLogger().setLevel(self.old_level)
+
+
+class PySparkTestCase(unittest.TestCase):
+
+    def setUp(self):
+        self._old_sys_path = list(sys.path)
+        class_name = self.__class__.__name__
+        self.sc = SparkContext('local[4]', class_name)
+
+    def tearDown(self):
+        self.sc.stop()
+        sys.path = self._old_sys_path
+
+
+class ReusedPySparkTestCase(unittest.TestCase):
+
+    @classmethod
+    def conf(cls):
+        """
+        Override this in subclasses to supply a more specific conf
+        """
+        return SparkConf()
+
+    @classmethod
+    def setUpClass(cls):
+        cls.sc = SparkContext('local[4]', cls.__name__, conf=cls.conf())
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.sc.stop()
+
+
+class CheckpointTests(ReusedPySparkTestCase):
+
+    def setUp(self):
+        self.checkpointDir = tempfile.NamedTemporaryFile(delete=False)
+        os.unlink(self.checkpointDir.name)
+        self.sc.setCheckpointDir(self.checkpointDir.name)
+
+    def tearDown(self):
+        shutil.rmtree(self.checkpointDir.name)
+
+    def test_basic_checkpointing(self):
+        parCollection = self.sc.parallelize([1, 2, 3, 4])
+        flatMappedRDD = parCollection.flatMap(lambda x: range(1, x + 1))
+
+        self.assertFalse(flatMappedRDD.isCheckpointed())
+        self.assertTrue(flatMappedRDD.getCheckpointFile() is None)
+
+        flatMappedRDD.checkpoint()
+        result = flatMappedRDD.collect()
+        time.sleep(1)  # 1 second
+        self.assertTrue(flatMappedRDD.isCheckpointed())
+        self.assertEqual(flatMappedRDD.collect(), result)
+        self.assertEqual("file:" + self.checkpointDir.name,
+                         os.path.dirname(os.path.dirname(flatMappedRDD.getCheckpointFile())))
+
+    def test_checkpoint_and_restore(self):
+        parCollection = self.sc.parallelize([1, 2, 3, 4])
+        flatMappedRDD = parCollection.flatMap(lambda x: [x])
+
+        self.assertFalse(flatMappedRDD.isCheckpointed())
+        self.assertTrue(flatMappedRDD.getCheckpointFile() is None)
+
+        flatMappedRDD.checkpoint()
+        flatMappedRDD.count()  # forces a checkpoint to be computed
+        time.sleep(1)  # 1 second
+
+        self.assertTrue(flatMappedRDD.getCheckpointFile() is not None)
+        recovered = self.sc._checkpointFile(flatMappedRDD.getCheckpointFile(),
+                                            flatMappedRDD._jrdd_deserializer)
+        self.assertEqual([1, 2, 3, 4], recovered.collect())
+
+
+class LocalCheckpointTests(ReusedPySparkTestCase):
+
+    def test_basic_localcheckpointing(self):
+        parCollection = self.sc.parallelize([1, 2, 3, 4])
+        flatMappedRDD = parCollection.flatMap(lambda x: range(1, x + 1))
+
+        self.assertFalse(flatMappedRDD.isCheckpointed())
+        self.assertFalse(flatMappedRDD.isLocallyCheckpointed())
+
+        flatMappedRDD.localCheckpoint()
+        result = flatMappedRDD.collect()
+        time.sleep(1)  # 1 second
+        self.assertTrue(flatMappedRDD.isCheckpointed())
+        self.assertTrue(flatMappedRDD.isLocallyCheckpointed())
+        self.assertEqual(flatMappedRDD.collect(), result)
+
+
+class AddFileTests(PySparkTestCase):
+
+    def test_add_py_file(self):
+        # To ensure that we're actually testing addPyFile's effects, check that
+        # this job fails due to `userlibrary` not being on the Python path:
+        # disable logging in log4j temporarily
+        def func(x):
+            from userlibrary import UserClass
+            return UserClass().hello()
+        with QuietTest(self.sc):
+            self.assertRaises(Exception, self.sc.parallelize(range(2)).map(func).first)
+
+        # Add the file, so the job should now succeed:
+        path = os.path.join(SPARK_HOME, "python/test_support/userlibrary.py")
+        self.sc.addPyFile(path)
+        res = self.sc.parallelize(range(2)).map(func).first()
+        self.assertEqual("Hello World!", res)
+
+    def test_add_file_locally(self):
+        path = os.path.join(SPARK_HOME, "python/test_support/hello/hello.txt")
+        self.sc.addFile(path)
+        download_path = SparkFiles.get("hello.txt")
+        self.assertNotEqual(path, download_path)
+        with open(download_path) as test_file:
+            self.assertEqual("Hello World!\n", test_file.readline())
+
+    def test_add_file_recursively_locally(self):
+        path = os.path.join(SPARK_HOME, "python/test_support/hello")
+        self.sc.addFile(path, True)
+        download_path = SparkFiles.get("hello")
+        self.assertNotEqual(path, download_path)
+        with open(download_path + "/hello.txt") as test_file:
+            self.assertEqual("Hello World!\n", test_file.readline())
+        with open(download_path + "/sub_hello/sub_hello.txt") as test_file:
+            self.assertEqual("Sub Hello World!\n", test_file.readline())
+
+    def test_add_py_file_locally(self):
+        # To ensure that we're actually testing addPyFile's effects, check that
+        # this fails due to `userlibrary` not being on the Python path:
+        def func():
+            from userlibrary import UserClass
+        self.assertRaises(ImportError, func)
+        path = os.path.join(SPARK_HOME, "python/test_support/userlibrary.py")
+        self.sc.addPyFile(path)
+        from userlibrary import UserClass
+        self.assertEqual("Hello World!", UserClass().hello())
+
+    def test_add_egg_file_locally(self):
+        # To ensure that we're actually testing addPyFile's effects, check that
+        # this fails due to `userlibrary` not being on the Python path:
+        def func():
+            from userlib import UserClass
+        self.assertRaises(ImportError, func)
+        path = os.path.join(SPARK_HOME, "python/test_support/userlib-0.1.zip")
+        self.sc.addPyFile(path)
+        from userlib import UserClass
+        self.assertEqual("Hello World from inside a package!", UserClass().hello())
+
+    def test_overwrite_system_module(self):
+        self.sc.addPyFile(os.path.join(SPARK_HOME, "python/test_support/SimpleHTTPServer.py"))
+
+        import SimpleHTTPServer
+        self.assertEqual("My Server", SimpleHTTPServer.__name__)
+
+        def func(x):
+            import SimpleHTTPServer
+            return SimpleHTTPServer.__name__
+
+        self.assertEqual(["My Server"], self.sc.parallelize(range(1)).map(func).collect())
+
+
+class TaskContextTests(PySparkTestCase):
+
+    def setUp(self):
+        self._old_sys_path = list(sys.path)
+        class_name = self.__class__.__name__
+        # Allow retries even though they are normally disabled in local mode
+        self.sc = SparkContext('local[4, 2]', class_name)
+
+    def test_stage_id(self):
+        """Test the stage ids are available and incrementing as expected."""
+        rdd = self.sc.parallelize(range(10))
+        stage1 = rdd.map(lambda x: TaskContext.get().stageId()).take(1)[0]
+        stage2 = rdd.map(lambda x: TaskContext.get().stageId()).take(1)[0]
+        # Test using the constructor directly rather than the get()
+        stage3 = rdd.map(lambda x: TaskContext().stageId()).take(1)[0]
+        self.assertEqual(stage1 + 1, stage2)
+        self.assertEqual(stage1 + 2, stage3)
+        self.assertEqual(stage2 + 1, stage3)
+
+    def test_partition_id(self):
+        """Test the partition id."""
+        rdd1 = self.sc.parallelize(range(10), 1)
+        rdd2 = self.sc.parallelize(range(10), 2)
+        pids1 = rdd1.map(lambda x: TaskContext.get().partitionId()).collect()
+        pids2 = rdd2.map(lambda x: TaskContext.get().partitionId()).collect()
+        self.assertEqual(0, pids1[0])
+        self.assertEqual(0, pids1[9])
+        self.assertEqual(0, pids2[0])
+        self.assertEqual(1, pids2[9])
+
+    def test_attempt_number(self):
+        """Verify the attempt numbers are correctly reported."""
+        rdd = self.sc.parallelize(range(10))
+        # Verify a simple job with no failures
+        attempt_numbers = rdd.map(lambda x: TaskContext.get().attemptNumber()).collect()
+        map(lambda attempt: self.assertEqual(0, attempt), attempt_numbers)
+
+        def fail_on_first(x):
+            """Fail on the first attempt so we get a positive attempt number"""
+            tc = TaskContext.get()
+            attempt_number = tc.attemptNumber()
+            partition_id = tc.partitionId()
+            attempt_id = tc.taskAttemptId()
+            if attempt_number == 0 and partition_id == 0:
+                raise Exception("Failing on first attempt")
+            else:
+                return [x, partition_id, attempt_number, attempt_id]
+        result = rdd.map(fail_on_first).collect()
+        # We should re-submit the first partition to it but other partitions should be attempt 0
+        self.assertEqual([0, 0, 1], result[0][0:3])
+        self.assertEqual([9, 3, 0], result[9][0:3])
+        first_partition = filter(lambda x: x[1] == 0, result)
+        map(lambda x: self.assertEqual(1, x[2]), first_partition)
+        other_partitions = filter(lambda x: x[1] != 0, result)
+        map(lambda x: self.assertEqual(0, x[2]), other_partitions)
+        # The task attempt id should be different
+        self.assertTrue(result[0][3] != result[9][3])
+
+    def test_tc_on_driver(self):
+        """Verify that getting the TaskContext on the driver returns None."""
+        tc = TaskContext.get()
+        self.assertTrue(tc is None)
+
+    def test_get_local_property(self):
+        """Verify that local properties set on the driver are available in TaskContext."""
+        key = "testkey"
+        value = "testvalue"
+        self.sc.setLocalProperty(key, value)
+        try:
+            rdd = self.sc.parallelize(range(1), 1)
+            prop1 = rdd.map(lambda _: TaskContext.get().getLocalProperty(key)).collect()[0]
+            self.assertEqual(prop1, value)
+            prop2 = rdd.map(lambda _: TaskContext.get().getLocalProperty("otherkey")).collect()[0]
+            self.assertTrue(prop2 is None)
+        finally:
+            self.sc.setLocalProperty(key, None)
+
+    def test_barrier(self):
+        """
+        Verify that BarrierTaskContext.barrier() performs global sync among all barrier tasks
+        within a stage.
+        """
+        rdd = self.sc.parallelize(range(10), 4)
+
+        def f(iterator):
+            yield sum(iterator)
+
+        def context_barrier(x):
+            tc = BarrierTaskContext.get()
+            time.sleep(random.randint(1, 10))
+            tc.barrier()
+            return time.time()
+
+        times = rdd.barrier().mapPartitions(f).map(context_barrier).collect()
+        self.assertTrue(max(times) - min(times) < 1)
+
+    def test_barrier_infos(self):
+        """
+        Verify that BarrierTaskContext.getTaskInfos() returns a list of all task infos in the
+        barrier stage.
+        """
+        rdd = self.sc.parallelize(range(10), 4)
+
+        def f(iterator):
+            yield sum(iterator)
+
+        taskInfos = rdd.barrier().mapPartitions(f).map(lambda x: BarrierTaskContext.get()
+                                                       .getTaskInfos()).collect()
+        self.assertTrue(len(taskInfos) == 4)
+        self.assertTrue(len(taskInfos[0]) == 4)
+
+
+class RDDTests(ReusedPySparkTestCase):
+
+    def test_range(self):
+        self.assertEqual(self.sc.range(1, 1).count(), 0)
+        self.assertEqual(self.sc.range(1, 0, -1).count(), 1)
+        self.assertEqual(self.sc.range(0, 1 << 40, 1 << 39).count(), 2)
+
+    def test_id(self):
+        rdd = self.sc.parallelize(range(10))
+        id = rdd.id()
+        self.assertEqual(id, rdd.id())
+        rdd2 = rdd.map(str).filter(bool)
+        id2 = rdd2.id()
+        self.assertEqual(id + 1, id2)
+        self.assertEqual(id2, rdd2.id())
+
+    def test_empty_rdd(self):
+        rdd = self.sc.emptyRDD()
+        self.assertTrue(rdd.isEmpty())
+
+    def test_sum(self):
+        self.assertEqual(0, self.sc.emptyRDD().sum())
+        self.assertEqual(6, self.sc.parallelize([1, 2, 3]).sum())
+
+    def test_to_localiterator(self):
+        from time import sleep
+        rdd = self.sc.parallelize([1, 2, 3])
+        it = rdd.toLocalIterator()
+        sleep(5)
+        self.assertEqual([1, 2, 3], sorted(it))
+
+        rdd2 = rdd.repartition(1000)
+        it2 = rdd2.toLocalIterator()
+        sleep(5)
+        self.assertEqual([1, 2, 3], sorted(it2))
+
+    def test_save_as_textfile_with_unicode(self):
+        # Regression test for SPARK-970
+        x = u"\u00A1Hola, mundo!"
+        data = self.sc.parallelize([x])
+        tempFile = tempfile.NamedTemporaryFile(delete=True)
+        tempFile.close()
+        data.saveAsTextFile(tempFile.name)
+        raw_contents = b''.join(open(p, 'rb').read()
+                                for p in glob(tempFile.name + "/part-0000*"))
+        self.assertEqual(x, raw_contents.strip().decode("utf-8"))
+
+    def test_save_as_textfile_with_utf8(self):
+        x = u"\u00A1Hola, mundo!"
+        data = self.sc.parallelize([x.encode("utf-8")])
+        tempFile = tempfile.NamedTemporaryFile(delete=True)
+        tempFile.close()
+        data.saveAsTextFile(tempFile.name)
+        raw_contents = b''.join(open(p, 'rb').read()
+                                for p in glob(tempFile.name + "/part-0000*"))
+        self.assertEqual(x, raw_contents.strip().decode('utf8'))
+
+    def test_transforming_cartesian_result(self):
+        # Regression test for SPARK-1034
+        rdd1 = self.sc.parallelize([1, 2])
+        rdd2 = self.sc.parallelize([3, 4])
+        cart = rdd1.cartesian(rdd2)
+        result = cart.map(lambda x_y3: x_y3[0] + x_y3[1]).collect()
+
+    def test_transforming_pickle_file(self):
+        # Regression test for SPARK-2601
+        data = self.sc.parallelize([u"Hello", u"World!"])
+        tempFile = tempfile.NamedTemporaryFile(delete=True)
+        tempFile.close()
+        data.saveAsPickleFile(tempFile.name)
+        pickled_file = self.sc.pickleFile(tempFile.name)
+        pickled_file.map(lambda x: x).collect()
+
+    def test_cartesian_on_textfile(self):
+        # Regression test for
+        path = os.path.join(SPARK_HOME, "python/test_support/hello/hello.txt")
+        a = self.sc.textFile(path)
+        result = a.cartesian(a).collect()
+        (x, y) = result[0]
+        self.assertEqual(u"Hello World!", x.strip())
+        self.assertEqual(u"Hello World!", y.strip())
+
+    def test_cartesian_chaining(self):
+        # Tests for SPARK-16589
+        rdd = self.sc.parallelize(range(10), 2)
+        self.assertSetEqual(
+            set(rdd.cartesian(rdd).cartesian(rdd).collect()),
+            set([((x, y), z) for x in range(10) for y in range(10) for z in range(10)])
+        )
+
+        self.assertSetEqual(
+            set(rdd.cartesian(rdd.cartesian(rdd)).collect()),
+            set([(x, (y, z)) for x in range(10) for y in range(10) for z in range(10)])
+        )
+
+        self.assertSetEqual(
+            set(rdd.cartesian(rdd.zip(rdd)).collect()),
+            set([(x, (y, y)) for x in range(10) for y in range(10)])
+        )
+
+    def test_zip_chaining(self):
+        # Tests for SPARK-21985
+        rdd = self.sc.parallelize('abc', 2)
+        self.assertSetEqual(
+            set(rdd.zip(rdd).zip(rdd).collect()),
+            set([((x, x), x) for x in 'abc'])
+        )
+        self.assertSetEqual(
+            set(rdd.zip(rdd.zip(rdd)).collect()),
+            set([(x, (x, x)) for x in 'abc'])
+        )
+
+    def test_deleting_input_files(self):
+        # Regression test for SPARK-1025
+        tempFile = tempfile.NamedTemporaryFile(delete=False)
+        tempFile.write(b"Hello World!")
+        tempFile.close()
+        data = self.sc.textFile(tempFile.name)
+        filtered_data = data.filter(lambda x: True)
+        self.assertEqual(1, filtered_data.count())
+        os.unlink(tempFile.name)
+        with QuietTest(self.sc):
+            self.assertRaises(Exception, lambda: filtered_data.count())
+
+    def test_sampling_default_seed(self):
+        # Test for SPARK-3995 (default seed setting)
+        data = self.sc.parallelize(xrange(1000), 1)
+        subset = data.takeSample(False, 10)
+        self.assertEqual(len(subset), 10)
+
+    def test_aggregate_mutable_zero_value(self):
+        # Test for SPARK-9021; uses aggregate and treeAggregate to build dict
+        # representing a counter of ints
+        # NOTE: dict is used instead of collections.Counter for Python 2.6
+        # compatibility
+        from collections import defaultdict
+
+        # Show that single or multiple partitions work
+        data1 = self.sc.range(10, numSlices=1)
+        data2 = self.sc.range(10, numSlices=2)
+
+        def seqOp(x, y):
+            x[y] += 1
+            return x
+
+        def comboOp(x, y):
+            for key, val in y.items():
+                x[key] += val
+            return x
+
+        counts1 = data1.aggregate(defaultdict(int), seqOp, comboOp)
+        counts2 = data2.aggregate(defaultdict(int), seqOp, comboOp)
+        counts3 = data1.treeAggregate(defaultdict(int), seqOp, comboOp, 2)
+        counts4 = data2.treeAggregate(defaultdict(int), seqOp, comboOp, 2)
+
+        ground_truth = defaultdict(int, dict((i, 1) for i in range(10)))
+        self.assertEqual(counts1, ground_truth)
+        self.assertEqual(counts2, ground_truth)
+        self.assertEqual(counts3, ground_truth)
+        self.assertEqual(counts4, ground_truth)
+
+    def test_aggregate_by_key_mutable_zero_value(self):
+        # Test for SPARK-9021; uses aggregateByKey to make a pair RDD that
+        # contains lists of all values for each key in the original RDD
+
+        # list(range(...)) for Python 3.x compatibility (can't use * operator
+        # on a range object)
+        # list(zip(...)) for Python 3.x compatibility (want to parallelize a
+        # collection, not a zip object)
+        tuples = list(zip(list(range(10))*2, [1]*20))
+        # Show that single or multiple partitions work
+        data1 = self.sc.parallelize(tuples, 1)
+        data2 = self.sc.parallelize(tuples, 2)
+
+        def seqOp(x, y):
+            x.append(y)
+            return x
+
+        def comboOp(x, y):
+            x.extend(y)
+            return x
+
+        values1 = data1.aggregateByKey([], seqOp, comboOp).collect()
+        values2 = data2.aggregateByKey([], seqOp, comboOp).collect()
+        # Sort lists to ensure clean comparison with ground_truth
+        values1.sort()
+        values2.sort()
+
+        ground_truth = [(i, [1]*2) for i in range(10)]
+        self.assertEqual(values1, ground_truth)
+        self.assertEqual(values2, ground_truth)
+
+    def test_fold_mutable_zero_value(self):
+        # Test for SPARK-9021; uses fold to merge an RDD of dict counters into
+        # a single dict
+        # NOTE: dict is used instead of collections.Counter for Python 2.6
+        # compatibility
+        from collections import defaultdict
+
+        counts1 = defaultdict(int, dict((i, 1) for i in range(10)))
+        counts2 = defaultdict(int, dict((i, 1) for i in range(3, 8)))
+        counts3 = defaultdict(int, dict((i, 1) for i in range(4, 7)))
+        counts4 = defaultdict(int, dict((i, 1) for i in range(5, 6)))
+        all_counts = [counts1, counts2, counts3, counts4]
+        # Show that single or multiple partitions work
+        data1 = self.sc.parallelize(all_counts, 1)
+        data2 = self.sc.parallelize(all_counts, 2)
+
+        def comboOp(x, y):
+            for key, val in y.items():
+                x[key] += val
+            return x
+
+        fold1 = data1.fold(defaultdict(int), comboOp)
+        fold2 = data2.fold(defaultdict(int), comboOp)
+
+        ground_truth = defaultdict(int)
+        for counts in all_counts:
+            for key, val in counts.items():
+                ground_truth[key] += val
+        self.assertEqual(fold1, ground_truth)
+        self.assertEqual(fold2, ground_truth)
+
+    def test_fold_by_key_mutable_zero_value(self):
+        # Test for SPARK-9021; uses foldByKey to make a pair RDD that contains
+        # lists of all values for each key in the original RDD
+
+        tuples = [(i, range(i)) for i in range(10)]*2
+        # Show that single or multiple partitions work
+        data1 = self.sc.parallelize(tuples, 1)
+        data2 = self.sc.parallelize(tuples, 2)
+
+        def comboOp(x, y):
+            x.extend(y)
+            return x
+
+        values1 = data1.foldByKey([], comboOp).collect()
+        values2 = data2.foldByKey([], comboOp).collect()
+        # Sort lists to ensure clean comparison with ground_truth
+        values1.sort()
+        values2.sort()
+
+        # list(range(...)) for Python 3.x compatibility
+        ground_truth = [(i, list(range(i))*2) for i in range(10)]
+        self.assertEqual(values1, ground_truth)
+        self.assertEqual(values2, ground_truth)
+
+    def test_aggregate_by_key(self):
+        data = self.sc.parallelize([(1, 1), (1, 1), (3, 2), (5, 1), (5, 3)], 2)
+
+        def seqOp(x, y):
+            x.add(y)
+            return x
+
+        def combOp(x, y):
+            x |= y
+            return x
+
+        sets = dict(data.aggregateByKey(set(), seqOp, combOp).collect())
+        self.assertEqual(3, len(sets))
+        self.assertEqual(set([1]), sets[1])
+        self.assertEqual(set([2]), sets[3])
+        self.assertEqual(set([1, 3]), sets[5])
+
+    def test_itemgetter(self):
+        rdd = self.sc.parallelize([range(10)])
+        from operator import itemgetter
+        self.assertEqual([1], rdd.map(itemgetter(1)).collect())
+        self.assertEqual([(2, 3)], rdd.map(itemgetter(2, 3)).collect())
+
+    def test_namedtuple_in_rdd(self):
+        from collections import namedtuple
+        Person = namedtuple("Person", "id firstName lastName")
+        jon = Person(1, "Jon", "Doe")
+        jane = Person(2, "Jane", "Doe")
+        theDoes = self.sc.parallelize([jon, jane])
+        self.assertEqual([jon, jane], theDoes.collect())
+
+    def test_large_broadcast(self):
+        N = 10000
+        data = [[float(i) for i in range(300)] for i in range(N)]
+        bdata = self.sc.broadcast(data)  # 27MB
+        m = self.sc.parallelize(range(1), 1).map(lambda x: len(bdata.value)).sum()
+        self.assertEqual(N, m)
+
+    def test_unpersist(self):
+        N = 1000
+        data = [[float(i) for i in range(300)] for i in range(N)]
+        bdata = self.sc.broadcast(data)  # 3MB
+        bdata.unpersist()
+        m = self.sc.parallelize(range(1), 1).map(lambda x: len(bdata.value)).sum()
+        self.assertEqual(N, m)
+        bdata.destroy()
+        try:
+            self.sc.parallelize(range(1), 1).map(lambda x: len(bdata.value)).sum()
+        except Exception as e:
+            pass
+        else:
+            raise Exception("job should fail after destroy the broadcast")
+
+    def test_multiple_broadcasts(self):
+        N = 1 << 21
+        b1 = self.sc.broadcast(set(range(N)))  # multiple blocks in JVM
+        r = list(range(1 << 15))
+        random.shuffle(r)
+        s = str(r).encode()
+        checksum = hashlib.md5(s).hexdigest()
+        b2 = self.sc.broadcast(s)
+        r = list(set(self.sc.parallelize(range(10), 10).map(
+            lambda x: (len(b1.value), hashlib.md5(b2.value).hexdigest())).collect()))
+        self.assertEqual(1, len(r))
+        size, csum = r[0]
+        self.assertEqual(N, size)
+        self.assertEqual(checksum, csum)
+
+        random.shuffle(r)
+        s = str(r).encode()
+        checksum = hashlib.md5(s).hexdigest()
+        b2 = self.sc.broadcast(s)
+        r = list(set(self.sc.parallelize(range(10), 10).map(
+            lambda x: (len(b1.value), hashlib.md5(b2.value).hexdigest())).collect()))
+        self.assertEqual(1, len(r))
+        size, csum = r[0]
+        self.assertEqual(N, size)
+        self.assertEqual(checksum, csum)
+
+    def test_multithread_broadcast_pickle(self):
+        import threading
+
+        b1 = self.sc.broadcast(list(range(3)))
+        b2 = self.sc.broadcast(list(range(3)))
+
+        def f1():
+            return b1.value
+
+        def f2():
+            return b2.value
+
+        funcs_num_pickled = {f1: None, f2: None}
+
+        def do_pickle(f, sc):
+            command = (f, None, sc.serializer, sc.serializer)
+            ser = CloudPickleSerializer()
+            ser.dumps(command)
+
+        def process_vars(sc):
+            broadcast_vars = list(sc._pickled_broadcast_vars)
+            num_pickled = len(broadcast_vars)
+            sc._pickled_broadcast_vars.clear()
+            return num_pickled
+
+        def run(f, sc):
+            do_pickle(f, sc)
+            funcs_num_pickled[f] = process_vars(sc)
+
+        # pickle f1, adds b1 to sc._pickled_broadcast_vars in main thread local storage
+        do_pickle(f1, self.sc)
+
+        # run all for f2, should only add/count/clear b2 from worker thread local storage
+        t = threading.Thread(target=run, args=(f2, self.sc))
+        t.start()
+        t.join()
+
+        # count number of vars pickled in main thread, only b1 should be counted and cleared
+        funcs_num_pickled[f1] = process_vars(self.sc)
+
+        self.assertEqual(funcs_num_pickled[f1], 1)
+        self.assertEqual(funcs_num_pickled[f2], 1)
+        self.assertEqual(len(list(self.sc._pickled_broadcast_vars)), 0)
+
+    def test_large_closure(self):
+        N = 200000
+        data = [float(i) for i in xrange(N)]
+        rdd = self.sc.parallelize(range(1), 1).map(lambda x: len(data))
+        self.assertEqual(N, rdd.first())
+        # regression test for SPARK-6886
+        self.assertEqual(1, rdd.map(lambda x: (x, 1)).groupByKey().count())
+
+    def test_zip_with_different_serializers(self):
+        a = self.sc.parallelize(range(5))
+        b = self.sc.parallelize(range(100, 105))
+        self.assertEqual(a.zip(b).collect(), [(0, 100), (1, 101), (2, 102), (3, 103), (4, 104)])
+        a = a._reserialize(BatchedSerializer(PickleSerializer(), 2))
+        b = b._reserialize(MarshalSerializer())
+        self.assertEqual(a.zip(b).collect(), [(0, 100), (1, 101), (2, 102), (3, 103), (4, 104)])
+        # regression test for SPARK-4841
+        path = os.path.join(SPARK_HOME, "python/test_support/hello/hello.txt")
+        t = self.sc.textFile(path)
+        cnt = t.count()
+        self.assertEqual(cnt, t.zip(t).count())
+        rdd = t.map(str)
+        self.assertEqual(cnt, t.zip(rdd).count())
+        # regression test for bug in _reserializer()
+        self.assertEqual(cnt, t.zip(rdd).count())
+
+    def test_zip_with_different_object_sizes(self):
+        # regress test for SPARK-5973
+        a = self.sc.parallelize(xrange(10000)).map(lambda i: '*' * i)
+        b = self.sc.parallelize(xrange(10000, 20000)).map(lambda i: '*' * i)
+        self.assertEqual(10000, a.zip(b).count())
+
+    def test_zip_with_different_number_of_items(self):
+        a = self.sc.parallelize(range(5), 2)
+        # different number of partitions
+        b = self.sc.parallelize(range(100, 106), 3)
+        self.assertRaises(ValueError, lambda: a.zip(b))
+        with QuietTest(self.sc):
+            # different number of batched items in JVM
+            b = self.sc.parallelize(range(100, 104), 2)
+            self.assertRaises(Exception, lambda: a.zip(b).count())
+            # different number of items in one pair
+            b = self.sc.parallelize(range(100, 106), 2)
+            self.assertRaises(Exception, lambda: a.zip(b).count())
+            # same total number of items, but different distributions
+            a = self.sc.parallelize([2, 3], 2).flatMap(range)
+            b = self.sc.parallelize([3, 2], 2).flatMap(range)
+            self.assertEqual(a.count(), b.count())
+            self.assertRaises(Exception, lambda: a.zip(b).count())
+
+    def test_count_approx_distinct(self):
+        rdd = self.sc.parallelize(xrange(1000))
+        self.assertTrue(950 < rdd.countApproxDistinct(0.03) < 1050)
+        self.assertTrue(950 < rdd.map(float).countApproxDistinct(0.03) < 1050)
+        self.assertTrue(950 < rdd.map(str).countApproxDistinct(0.03) < 1050)
+        self.assertTrue(950 < rdd.map(lambda x: (x, -x)).countApproxDistinct(0.03) < 1050)
+
+        rdd = self.sc.parallelize([i % 20 for i in range(1000)], 7)
+        self.assertTrue(18 < rdd.countApproxDistinct() < 22)
+        self.assertTrue(18 < rdd.map(float).countApproxDistinct() < 22)
+        self.assertTrue(18 < rdd.map(str).countApproxDistinct() < 22)
+        self.assertTrue(18 < rdd.map(lambda x: (x, -x)).countApproxDistinct() < 22)
+
+        self.assertRaises(ValueError, lambda: rdd.countApproxDistinct(0.00000001))
+
+    def test_histogram(self):
+        # empty
+        rdd = self.sc.parallelize([])
+        self.assertEqual([0], rdd.histogram([0, 10])[1])
+        self.assertEqual([0, 0], rdd.histogram([0, 4, 10])[1])
+        self.assertRaises(ValueError, lambda: rdd.histogram(1))
+
+        # out of range
+        rdd = self.sc.parallelize([10.01, -0.01])
+        self.assertEqual([0], rdd.histogram([0, 10])[1])
+        self.assertEqual([0, 0], rdd.histogram((0, 4, 10))[1])
+
+        # in range with one bucket
+        rdd = self.sc.parallelize(range(1, 5))
+        self.assertEqual([4], rdd.histogram([0, 10])[1])
+        self.assertEqual([3, 1], rdd.histogram([0, 4, 10])[1])
+
+        # in range with one bucket exact match
+        self.assertEqual([4], rdd.histogram([1, 4])[1])
+
+        # out of range with two buckets
+        rdd = self.sc.parallelize([10.01, -0.01])
+        self.assertEqual([0, 0], rdd.histogram([0, 5, 10])[1])
+
+        # out of range with two uneven buckets
+        rdd = self.sc.parallelize([10.01, -0.01])
+        self.assertEqual([0, 0], rdd.histogram([0, 4, 10])[1])
+
+        # in range with two buckets
+        rdd = self.sc.parallelize([1, 2, 3, 5, 6])
+        self.assertEqual([3, 2], rdd.histogram([0, 5, 10])[1])
+
+        # in range with two bucket and None
+        rdd = self.sc.parallelize([1, 2, 3, 5, 6, None, float('nan')])
+        self.assertEqual([3, 2], rdd.histogram([0, 5, 10])[1])
+
+        # in range with two uneven buckets
+        rdd = self.sc.parallelize([1, 2, 3, 5, 6])
+        self.assertEqual([3, 2], rdd.histogram([0, 5, 11])[1])
+
+        # mixed range with two uneven buckets
+        rdd = self.sc.parallelize([-0.01, 0.0, 1, 2, 3, 5, 6, 11.0, 11.01])
+        self.assertEqual([4, 3], rdd.histogram([0, 5, 11])[1])
+
+        # mixed range with four uneven buckets
+        rdd = self.sc.parallelize([-0.01, 0.0, 1, 2, 3, 5, 6, 11.01, 12.0, 199.0, 200.0, 200.1])
+        self.assertEqual([4, 2, 1, 3], rdd.histogram([0.0, 5.0, 11.0, 12.0, 200.0])[1])
+
+        # mixed range with uneven buckets and NaN
+        rdd = self.sc.parallelize([-0.01, 0.0, 1, 2, 3, 5, 6, 11.01, 12.0,
+                                   199.0, 200.0, 200.1, None, float('nan')])
+        self.assertEqual([4, 2, 1, 3], rdd.histogram([0.0, 5.0, 11.0, 12.0, 200.0])[1])
+
+        # out of range with infinite buckets
+        rdd = self.sc.parallelize([10.01, -0.01, float('nan'), float("inf")])
+        self.assertEqual([1, 2], rdd.histogram([float('-inf'), 0, float('inf')])[1])
+
+        # invalid buckets
+        self.assertRaises(ValueError, lambda: rdd.histogram([]))
+        self.assertRaises(ValueError, lambda: rdd.histogram([1]))
+        self.assertRaises(ValueError, lambda: rdd.histogram(0))
+        self.assertRaises(TypeError, lambda: rdd.histogram({}))
+
+        # without buckets
+        rdd = self.sc.parallelize(range(1, 5))
+        self.assertEqual(([1, 4], [4]), rdd.histogram(1))
+
+        # without buckets single element
+        rdd = self.sc.parallelize([1])
+        self.assertEqual(([1, 1], [1]), rdd.histogram(1))
+
+        # without bucket no range
+        rdd = self.sc.parallelize([1] * 4)
+        self.assertEqual(([1, 1], [4]), rdd.histogram(1))
+
+        # without buckets basic two
+        rdd = self.sc.parallelize(range(1, 5))
+        self.assertEqual(([1, 2.5, 4], [2, 2]), rdd.histogram(2))
+
+        # without buckets with more requested than elements
+        rdd = self.sc.parallelize([1, 2])
+        buckets = [1 + 0.2 * i for i in range(6)]
+        hist = [1, 0, 0, 0, 1]
+        self.assertEqual((buckets, hist), rdd.histogram(5))
+
+        # invalid RDDs
+        rdd = self.sc.parallelize([1, float('inf')])
+        self.assertRaises(ValueError, lambda: rdd.histogram(2))
+        rdd = self.sc.parallelize([float('nan')])
+        self.assertRaises(ValueError, lambda: rdd.histogram(2))
+
+        # string
+        rdd = self.sc.parallelize(["ab", "ac", "b", "bd", "ef"], 2)
+        self.assertEqual([2, 2], rdd.histogram(["a", "b", "c"])[1])
+        self.assertEqual((["ab", "ef"], [5]), rdd.histogram(1))
+        self.assertRaises(TypeError, lambda: rdd.histogram(2))
+
+    def test_repartitionAndSortWithinPartitions_asc(self):
+        rdd = self.sc.parallelize([(0, 5), (3, 8), (2, 6), (0, 8), (3, 8), (1, 3)], 2)
+
+        repartitioned = rdd.repartitionAndSortWithinPartitions(2, lambda key: key % 2, True)
+        partitions = repartitioned.glom().collect()
+        self.assertEqual(partitions[0], [(0, 5), (0, 8), (2, 6)])
+        self.assertEqual(partitions[1], [(1, 3), (3, 8), (3, 8)])
+
+    def test_repartitionAndSortWithinPartitions_desc(self):
+        rdd = self.sc.parallelize([(0, 5), (3, 8), (2, 6), (0, 8), (3, 8), (1, 3)], 2)
+
+        repartitioned = rdd.repartitionAndSortWithinPartitions(2, lambda key: key % 2, False)
+        partitions = repartitioned.glom().collect()
+        self.assertEqual(partitions[0], [(2, 6), (0, 5), (0, 8)])
+        self.assertEqual(partitions[1], [(3, 8), (3, 8), (1, 3)])
+
+    def test_repartition_no_skewed(self):
+        num_partitions = 20
+        a = self.sc.parallelize(range(int(1000)), 2)
+        l = a.repartition(num_partitions).glom().map(len).collect()
+        zeros = len([x for x in l if x == 0])
+        self.assertTrue(zeros == 0)
+        l = a.coalesce(num_partitions, True).glom().map(len).collect()
+        zeros = len([x for x in l if x == 0])
+        self.assertTrue(zeros == 0)
+
+    def test_repartition_on_textfile(self):
+        path = os.path.join(SPARK_HOME, "python/test_support/hello/hello.txt")
+        rdd = self.sc.textFile(path)
+        result = rdd.repartition(1).collect()
+        self.assertEqual(u"Hello World!", result[0])
+
+    def test_distinct(self):
+        rdd = self.sc.parallelize((1, 2, 3)*10, 10)
+        self.assertEqual(rdd.getNumPartitions(), 10)
+        self.assertEqual(rdd.distinct().count(), 3)
+        result = rdd.distinct(5)
+        self.assertEqual(result.getNumPartitions(), 5)
+        self.assertEqual(result.count(), 3)
+
+    def test_external_group_by_key(self):
+        self.sc._conf.set("spark.python.worker.memory", "1m")
+        N = 200001
+        kv = self.sc.parallelize(xrange(N)).map(lambda x: (x % 3, x))
+        gkv = kv.groupByKey().cache()
+        self.assertEqual(3, gkv.count())
+        filtered = gkv.filter(lambda kv: kv[0] == 1)
+        self.assertEqual(1, filtered.count())
+        self.assertEqual([(1, N // 3)], filtered.mapValues(len).collect())
+        self.assertEqual([(N // 3, N // 3)],
+                         filtered.values().map(lambda x: (len(x), len(list(x)))).collect())
+        result = filtered.collect()[0][1]
+        self.assertEqual(N // 3, len(result))
+        self.assertTrue(isinstance(result.data, shuffle.ExternalListOfList))
+
+    def test_sort_on_empty_rdd(self):
+        self.assertEqual([], self.sc.parallelize(zip([], [])).sortByKey().collect())
+
+    def test_sample(self):
+        rdd = self.sc.parallelize(range(0, 100), 4)
+        wo = rdd.sample(False, 0.1, 2).collect()
+        wo_dup = rdd.sample(False, 0.1, 2).collect()
+        self.assertSetEqual(set(wo), set(wo_dup))
+        wr = rdd.sample(True, 0.2, 5).collect()
+        wr_dup = rdd.sample(True, 0.2, 5).collect()
+        self.assertSetEqual(set(wr), set(wr_dup))
+        wo_s10 = rdd.sample(False, 0.3, 10).collect()
+        wo_s20 = rdd.sample(False, 0.3, 20).collect()
+        self.assertNotEqual(set(wo_s10), set(wo_s20))
+        wr_s11 = rdd.sample(True, 0.4, 11).collect()
+        wr_s21 = rdd.sample(True, 0.4, 21).collect()
+        self.assertNotEqual(set(wr_s11), set(wr_s21))
+
+    def test_null_in_rdd(self):
+        jrdd = self.sc._jvm.PythonUtils.generateRDDWithNull(self.sc._jsc)
+        rdd = RDD(jrdd, self.sc, UTF8Deserializer())
+        self.assertEqual([u"a", None, u"b"], rdd.collect())
+        rdd = RDD(jrdd, self.sc, NoOpSerializer())
+        self.assertEqual([b"a", None, b"b"], rdd.collect())
+
+    def test_multiple_python_java_RDD_conversions(self):
+        # Regression test for SPARK-5361
+        data = [
+            (u'1', {u'director': u'David Lean'}),
+            (u'2', {u'director': u'Andrew Dominik'})
+        ]
+        data_rdd = self.sc.parallelize(data)
+        data_java_rdd = data_rdd._to_java_object_rdd()
+        data_python_rdd = self.sc._jvm.SerDeUtil.javaToPython(data_java_rdd)
+        converted_rdd = RDD(data_python_rdd, self.sc)
+        self.assertEqual(2, converted_rdd.count())
+
+        # conversion between python and java RDD threw exceptions
+        data_java_rdd = converted_rdd._to_java_object_rdd()
+        data_python_rdd = self.sc._jvm.SerDeUtil.javaToPython(data_java_rdd)
+        converted_rdd = RDD(data_python_rdd, self.sc)
+        self.assertEqual(2, converted_rdd.count())
+
+    def test_narrow_dependency_in_join(self):
+        rdd = self.sc.parallelize(range(10)).map(lambda x: (x, x))
+        parted = rdd.partitionBy(2)
+        self.assertEqual(2, parted.union(parted).getNumPartitions())
+        self.assertEqual(rdd.getNumPartitions() + 2, parted.union(rdd).getNumPartitions())
+        self.assertEqual(rdd.getNumPartitions() + 2, rdd.union(parted).getNumPartitions())
+
+        tracker = self.sc.statusTracker()
+
+        self.sc.setJobGroup("test1", "test", True)
+        d = sorted(parted.join(parted).collect())
+        self.assertEqual(10, len(d))
+        self.assertEqual((0, (0, 0)), d[0])
+        jobId = tracker.getJobIdsForGroup("test1")[0]
+        self.assertEqual(2, len(tracker.getJobInfo(jobId).stageIds))
+
+        self.sc.setJobGroup("test2", "test", True)
+        d = sorted(parted.join(rdd).collect())
+        self.assertEqual(10, len(d))
+        self.assertEqual((0, (0, 0)), d[0])
+        jobId = tracker.getJobIdsForGroup("test2")[0]
+        self.assertEqual(3, len(tracker.getJobInfo(jobId).stageIds))
+
+        self.sc.setJobGroup("test3", "test", True)
+        d = sorted(parted.cogroup(parted).collect())
+        self.assertEqual(10, len(d))
+        self.assertEqual([[0], [0]], list(map(list, d[0][1])))
+        jobId = tracker.getJobIdsForGroup("test3")[0]
+        self.assertEqual(2, len(tracker.getJobInfo(jobId).stageIds))
+
+        self.sc.setJobGroup("test4", "test", True)
+        d = sorted(parted.cogroup(rdd).collect())
+        self.assertEqual(10, len(d))
+        self.assertEqual([[0], [0]], list(map(list, d[0][1])))
+        jobId = tracker.getJobIdsForGroup("test4")[0]
+        self.assertEqual(3, len(tracker.getJobInfo(jobId).stageIds))
+
+    # Regression test for SPARK-6294
+    def test_take_on_jrdd(self):
+        rdd = self.sc.parallelize(xrange(1 << 20)).map(lambda x: str(x))
+        rdd._jrdd.first()
+
+    def test_sortByKey_uses_all_partitions_not_only_first_and_last(self):
+        # Regression test for SPARK-5969
+        seq = [(i * 59 % 101, i) for i in range(101)]  # unsorted sequence
+        rdd = self.sc.parallelize(seq)
+        for ascending in [True, False]:
+            sort = rdd.sortByKey(ascending=ascending, numPartitions=5)
+            self.assertEqual(sort.collect(), sorted(seq, reverse=not ascending))
+            sizes = sort.glom().map(len).collect()
+            for size in sizes:
+                self.assertGreater(size, 0)
+
+    def test_pipe_functions(self):
+        data = ['1', '2', '3']
+        rdd = self.sc.parallelize(data)
+        with QuietTest(self.sc):
+            self.assertEqual([], rdd.pipe('cc').collect())
+            self.assertRaises(Py4JJavaError, rdd.pipe('cc', checkCode=True).collect)
+        result = rdd.pipe('cat').collect()
+        result.sort()
+        for x, y in zip(data, result):
+            self.assertEqual(x, y)
+        self.assertRaises(Py4JJavaError, rdd.pipe('grep 4', checkCode=True).collect)
+        self.assertEqual([], rdd.pipe('grep 4').collect())
+
+    def test_pipe_unicode(self):
+        # Regression test for SPARK-20947
+        data = [u'\u6d4b\u8bd5', '1']
+        rdd = self.sc.parallelize(data)
+        result = rdd.pipe('cat').collect()
+        self.assertEqual(data, result)
+
+    def test_stopiteration_in_user_code(self):
+
+        def stopit(*x):
+            raise StopIteration()
+
+        seq_rdd = self.sc.parallelize(range(10))
+        keyed_rdd = self.sc.parallelize((x % 2, x) for x in range(10))
+        msg = "Caught StopIteration thrown from user's code; failing the task"
+
+        self.assertRaisesRegexp(Py4JJavaError, msg, seq_rdd.map(stopit).collect)
+        self.assertRaisesRegexp(Py4JJavaError, msg, seq_rdd.filter(stopit).collect)
+        self.assertRaisesRegexp(Py4JJavaError, msg, seq_rdd.foreach, stopit)
+        self.assertRaisesRegexp(Py4JJavaError, msg, seq_rdd.reduce, stopit)
+        self.assertRaisesRegexp(Py4JJavaError, msg, seq_rdd.fold, 0, stopit)
+        self.assertRaisesRegexp(Py4JJavaError, msg, seq_rdd.foreach, stopit)
+        self.assertRaisesRegexp(Py4JJavaError, msg,
+                                seq_rdd.cartesian(seq_rdd).flatMap(stopit).collect)
+
+        # these methods call the user function both in the driver and in the executor
+        # the exception raised is different according to where the StopIteration happens
+        # RuntimeError is raised if in the driver
+        # Py4JJavaError is raised if in the executor (wraps the RuntimeError raised in the worker)
+        self.assertRaisesRegexp((Py4JJavaError, RuntimeError), msg,
+                                keyed_rdd.reduceByKeyLocally, stopit)
+        self.assertRaisesRegexp((Py4JJavaError, RuntimeError), msg,
+                                seq_rdd.aggregate, 0, stopit, lambda *x: 1)
+        self.assertRaisesRegexp((Py4JJavaError, RuntimeError), msg,
+                                seq_rdd.aggregate, 0, lambda *x: 1, stopit)
+
+
+class ProfilerTests(PySparkTestCase):
+
+    def setUp(self):
+        self._old_sys_path = list(sys.path)
+        class_name = self.__class__.__name__
+        conf = SparkConf().set("spark.python.profile", "true")
+        self.sc = SparkContext('local[4]', class_name, conf=conf)
+
+    def test_profiler(self):
+        self.do_computation()
+
+        profilers = self.sc.profiler_collector.profilers
+        self.assertEqual(1, len(profilers))
+        id, profiler, _ = profilers[0]
+        stats = profiler.stats()
+        self.assertTrue(stats is not None)
+        width, stat_list = stats.get_print_list([])
+        func_names = [func_name for fname, n, func_name in stat_list]
+        self.assertTrue("heavy_foo" in func_names)
+
+        old_stdout = sys.stdout
+        sys.stdout = io = StringIO()
+        self.sc.show_profiles()
+        self.assertTrue("heavy_foo" in io.getvalue())
+        sys.stdout = old_stdout
+
+        d = tempfile.gettempdir()
+        self.sc.dump_profiles(d)
+        self.assertTrue("rdd_%d.pstats" % id in os.listdir(d))
+
+    def test_custom_profiler(self):
+        class TestCustomProfiler(BasicProfiler):
+            def show(self, id):
+                self.result = "Custom formatting"
+
+        self.sc.profiler_collector.profiler_cls = TestCustomProfiler
+
+        self.do_computation()
+
+        profilers = self.sc.profiler_collector.profilers
+        self.assertEqual(1, len(profilers))
+        _, profiler, _ = profilers[0]
+        self.assertTrue(isinstance(profiler, TestCustomProfiler))
+
+        self.sc.show_profiles()
+        self.assertEqual("Custom formatting", profiler.result)
+
+    def do_computation(self):
+        def heavy_foo(x):
+            for i in range(1 << 18):
+                x = 1
+
+        rdd = self.sc.parallelize(range(100))
+        rdd.foreach(heavy_foo)
+
+
+class ProfilerTests2(unittest.TestCase):
+    def test_profiler_disabled(self):
+        sc = SparkContext(conf=SparkConf().set("spark.python.profile", "false"))
+        try:
+            self.assertRaisesRegexp(
+                RuntimeError,
+                "'spark.python.profile' configuration must be set",
+                lambda: sc.show_profiles())
+            self.assertRaisesRegexp(
+                RuntimeError,
+                "'spark.python.profile' configuration must be set",
+                lambda: sc.dump_profiles("/tmp/abc"))
+        finally:
+            sc.stop()
+
+
+class InputFormatTests(ReusedPySparkTestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        ReusedPySparkTestCase.setUpClass()
+        cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
+        os.unlink(cls.tempdir.name)
+        cls.sc._jvm.WriteInputFormatTestDataGenerator.generateData(cls.tempdir.name, cls.sc._jsc)
+
+    @classmethod
+    def tearDownClass(cls):
+        ReusedPySparkTestCase.tearDownClass()
+        shutil.rmtree(cls.tempdir.name)
+
+    @unittest.skipIf(sys.version >= "3", "serialize array of byte")
+    def test_sequencefiles(self):
+        basepath = self.tempdir.name
+        ints = sorted(self.sc.sequenceFile(basepath + "/sftestdata/sfint/",
+                                           "org.apache.hadoop.io.IntWritable",
+                                           "org.apache.hadoop.io.Text").collect())
+        ei = [(1, u'aa'), (1, u'aa'), (2, u'aa'), (2, u'bb'), (2, u'bb'), (3, u'cc')]
+        self.assertEqual(ints, ei)
+
+        doubles = sorted(self.sc.sequenceFile(basepath + "/sftestdata/sfdouble/",
+                                              "org.apache.hadoop.io.DoubleWritable",
+                                              "org.apache.hadoop.io.Text").collect())
+        ed = [(1.0, u'aa'), (1.0, u'aa'), (2.0, u'aa'), (2.0, u'bb'), (2.0, u'bb'), (3.0, u'cc')]
+        self.assertEqual(doubles, ed)
+
+        bytes = sorted(self.sc.sequenceFile(basepath + "/sftestdata/sfbytes/",
+                                            "org.apache.hadoop.io.IntWritable",
+                                            "org.apache.hadoop.io.BytesWritable").collect())
+        ebs = [(1, bytearray('aa', 'utf-8')),
+               (1, bytearray('aa', 'utf-8')),
+               (2, bytearray('aa', 'utf-8')),
+               (2, bytearray('bb', 'utf-8')),
+               (2, bytearray('bb', 'utf-8')),
+               (3, bytearray('cc', 'utf-8'))]
+        self.assertEqual(bytes, ebs)
+
+        text = sorted(self.sc.sequenceFile(basepath + "/sftestdata/sftext/",
+                                           "org.apache.hadoop.io.Text",
+                                           "org.apache.hadoop.io.Text").collect())
+        et = [(u'1', u'aa'),
+              (u'1', u'aa'),
+              (u'2', u'aa'),
+              (u'2', u'bb'),
+              (u'2', u'bb'),
+              (u'3', u'cc')]
+        self.assertEqual(text, et)
+
+        bools = sorted(self.sc.sequenceFile(basepath + "/sftestdata/sfbool/",
+                                            "org.apache.hadoop.io.IntWritable",
+                                            "org.apache.hadoop.io.BooleanWritable").collect())
+        eb = [(1, False), (1, True), (2, False), (2, False), (2, True), (3, True)]
+        self.assertEqual(bools, eb)
+
+        nulls = sorted(self.sc.sequenceFile(basepath + "/sftestdata/sfnull/",
+                                            "org.apache.hadoop.io.IntWritable",
+                                            "org.apache.hadoop.io.BooleanWritable").collect())
+        en = [(1, None), (1, None), (2, None), (2, None), (2, None), (3, None)]
+        self.assertEqual(nulls, en)
+
+        maps = self.sc.sequenceFile(basepath + "/sftestdata/sfmap/",
+                                    "org.apache.hadoop.io.IntWritable",
+                                    "org.apache.hadoop.io.MapWritable").collect()
+        em = [(1, {}),
+              (1, {3.0: u'bb'}),
+              (2, {1.0: u'aa'}),
+              (2, {1.0: u'cc'}),
+              (3, {2.0: u'dd'})]
+        for v in maps:
+            self.assertTrue(v in em)
+
+        # arrays get pickled to tuples by default
+        tuples = sorted(self.sc.sequenceFile(
+            basepath + "/sftestdata/sfarray/",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.spark.api.python.DoubleArrayWritable").collect())
+        et = [(1, ()),
+              (2, (3.0, 4.0, 5.0)),
+              (3, (4.0, 5.0, 6.0))]
+        self.assertEqual(tuples, et)
+
+        # with custom converters, primitive arrays can stay as arrays
+        arrays = sorted(self.sc.sequenceFile(
+            basepath + "/sftestdata/sfarray/",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.spark.api.python.DoubleArrayWritable",
+            valueConverter="org.apache.spark.api.python.WritableToDoubleArrayConverter").collect())
+        ea = [(1, array('d')),
+              (2, array('d', [3.0, 4.0, 5.0])),
+              (3, array('d', [4.0, 5.0, 6.0]))]
+        self.assertEqual(arrays, ea)
+
+        clazz = sorted(self.sc.sequenceFile(basepath + "/sftestdata/sfclass/",
+                                            "org.apache.hadoop.io.Text",
+                                            "org.apache.spark.api.python.TestWritable").collect())
+        cname = u'org.apache.spark.api.python.TestWritable'
+        ec = [(u'1', {u'__class__': cname, u'double': 1.0, u'int': 1, u'str': u'test1'}),
+              (u'2', {u'__class__': cname, u'double': 2.3, u'int': 2, u'str': u'test2'}),
+              (u'3', {u'__class__': cname, u'double': 3.1, u'int': 3, u'str': u'test3'}),
+              (u'4', {u'__class__': cname, u'double': 4.2, u'int': 4, u'str': u'test4'}),
+              (u'5', {u'__class__': cname, u'double': 5.5, u'int': 5, u'str': u'test56'})]
+        self.assertEqual(clazz, ec)
+
+        unbatched_clazz = sorted(self.sc.sequenceFile(basepath + "/sftestdata/sfclass/",
+                                                      "org.apache.hadoop.io.Text",
+                                                      "org.apache.spark.api.python.TestWritable",
+                                                      ).collect())
+        self.assertEqual(unbatched_clazz, ec)
+
+    def test_oldhadoop(self):
+        basepath = self.tempdir.name
+        ints = sorted(self.sc.hadoopFile(basepath + "/sftestdata/sfint/",
+                                         "org.apache.hadoop.mapred.SequenceFileInputFormat",
+                                         "org.apache.hadoop.io.IntWritable",
+                                         "org.apache.hadoop.io.Text").collect())
+        ei = [(1, u'aa'), (1, u'aa'), (2, u'aa'), (2, u'bb'), (2, u'bb'), (3, u'cc')]
+        self.assertEqual(ints, ei)
+
+        hellopath = os.path.join(SPARK_HOME, "python/test_support/hello/hello.txt")
+        oldconf = {"mapreduce.input.fileinputformat.inputdir": hellopath}
+        hello = self.sc.hadoopRDD("org.apache.hadoop.mapred.TextInputFormat",
+                                  "org.apache.hadoop.io.LongWritable",
+                                  "org.apache.hadoop.io.Text",
+                                  conf=oldconf).collect()
+        result = [(0, u'Hello World!')]
+        self.assertEqual(hello, result)
+
+    def test_newhadoop(self):
+        basepath = self.tempdir.name
+        ints = sorted(self.sc.newAPIHadoopFile(
+            basepath + "/sftestdata/sfint/",
+            "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.hadoop.io.Text").collect())
+        ei = [(1, u'aa'), (1, u'aa'), (2, u'aa'), (2, u'bb'), (2, u'bb'), (3, u'cc')]
+        self.assertEqual(ints, ei)
+
+        hellopath = os.path.join(SPARK_HOME, "python/test_support/hello/hello.txt")
+        newconf = {"mapreduce.input.fileinputformat.inputdir": hellopath}
+        hello = self.sc.newAPIHadoopRDD("org.apache.hadoop.mapreduce.lib.input.TextInputFormat",
+                                        "org.apache.hadoop.io.LongWritable",
+                                        "org.apache.hadoop.io.Text",
+                                        conf=newconf).collect()
+        result = [(0, u'Hello World!')]
+        self.assertEqual(hello, result)
+
+    def test_newolderror(self):
+        basepath = self.tempdir.name
+        self.assertRaises(Exception, lambda: self.sc.hadoopFile(
+            basepath + "/sftestdata/sfint/",
+            "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.hadoop.io.Text"))
+
+        self.assertRaises(Exception, lambda: self.sc.newAPIHadoopFile(
+            basepath + "/sftestdata/sfint/",
+            "org.apache.hadoop.mapred.SequenceFileInputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.hadoop.io.Text"))
+
+    def test_bad_inputs(self):
+        basepath = self.tempdir.name
+        self.assertRaises(Exception, lambda: self.sc.sequenceFile(
+            basepath + "/sftestdata/sfint/",
+            "org.apache.hadoop.io.NotValidWritable",
+            "org.apache.hadoop.io.Text"))
+        self.assertRaises(Exception, lambda: self.sc.hadoopFile(
+            basepath + "/sftestdata/sfint/",
+            "org.apache.hadoop.mapred.NotValidInputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.hadoop.io.Text"))
+        self.assertRaises(Exception, lambda: self.sc.newAPIHadoopFile(
+            basepath + "/sftestdata/sfint/",
+            "org.apache.hadoop.mapreduce.lib.input.NotValidInputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.hadoop.io.Text"))
+
+    def test_converters(self):
+        # use of custom converters
+        basepath = self.tempdir.name
+        maps = sorted(self.sc.sequenceFile(
+            basepath + "/sftestdata/sfmap/",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.hadoop.io.MapWritable",
+            keyConverter="org.apache.spark.api.python.TestInputKeyConverter",
+            valueConverter="org.apache.spark.api.python.TestInputValueConverter").collect())
+        em = [(u'\x01', []),
+              (u'\x01', [3.0]),
+              (u'\x02', [1.0]),
+              (u'\x02', [1.0]),
+              (u'\x03', [2.0])]
+        self.assertEqual(maps, em)
+
+    def test_binary_files(self):
+        path = os.path.join(self.tempdir.name, "binaryfiles")
+        os.mkdir(path)
+        data = b"short binary data"
+        with open(os.path.join(path, "part-0000"), 'wb') as f:
+            f.write(data)
+        [(p, d)] = self.sc.binaryFiles(path).collect()
+        self.assertTrue(p.endswith("part-0000"))
+        self.assertEqual(d, data)
+
+    def test_binary_records(self):
+        path = os.path.join(self.tempdir.name, "binaryrecords")
+        os.mkdir(path)
+        with open(os.path.join(path, "part-0000"), 'w') as f:
+            for i in range(100):
+                f.write('%04d' % i)
+        result = self.sc.binaryRecords(path, 4).map(int).collect()
+        self.assertEqual(list(range(100)), result)
+
+
+class OutputFormatTests(ReusedPySparkTestCase):
+
+    def setUp(self):
+        self.tempdir = tempfile.NamedTemporaryFile(delete=False)
+        os.unlink(self.tempdir.name)
+
+    def tearDown(self):
+        shutil.rmtree(self.tempdir.name, ignore_errors=True)
+
+    @unittest.skipIf(sys.version >= "3", "serialize array of byte")
+    def test_sequencefiles(self):
+        basepath = self.tempdir.name
+        ei = [(1, u'aa'), (1, u'aa'), (2, u'aa'), (2, u'bb'), (2, u'bb'), (3, u'cc')]
+        self.sc.parallelize(ei).saveAsSequenceFile(basepath + "/sfint/")
+        ints = sorted(self.sc.sequenceFile(basepath + "/sfint/").collect())
+        self.assertEqual(ints, ei)
+
+        ed = [(1.0, u'aa'), (1.0, u'aa'), (2.0, u'aa'), (2.0, u'bb'), (2.0, u'bb'), (3.0, u'cc')]
+        self.sc.parallelize(ed).saveAsSequenceFile(basepath + "/sfdouble/")
+        doubles = sorted(self.sc.sequenceFile(basepath + "/sfdouble/").collect())
+        self.assertEqual(doubles, ed)
+
+        ebs = [(1, bytearray(b'\x00\x07spam\x08')), (2, bytearray(b'\x00\x07spam\x08'))]
+        self.sc.parallelize(ebs).saveAsSequenceFile(basepath + "/sfbytes/")
+        bytes = sorted(self.sc.sequenceFile(basepath + "/sfbytes/").collect())
+        self.assertEqual(bytes, ebs)
+
+        et = [(u'1', u'aa'),
+              (u'2', u'bb'),
+              (u'3', u'cc')]
+        self.sc.parallelize(et).saveAsSequenceFile(basepath + "/sftext/")
+        text = sorted(self.sc.sequenceFile(basepath + "/sftext/").collect())
+        self.assertEqual(text, et)
+
+        eb = [(1, False), (1, True), (2, False), (2, False), (2, True), (3, True)]
+        self.sc.parallelize(eb).saveAsSequenceFile(basepath + "/sfbool/")
+        bools = sorted(self.sc.sequenceFile(basepath + "/sfbool/").collect())
+        self.assertEqual(bools, eb)
+
+        en = [(1, None), (1, None), (2, None), (2, None), (2, None), (3, None)]
+        self.sc.parallelize(en).saveAsSequenceFile(basepath + "/sfnull/")
+        nulls = sorted(self.sc.sequenceFile(basepath + "/sfnull/").collect())
+        self.assertEqual(nulls, en)
+
+        em = [(1, {}),
+              (1, {3.0: u'bb'}),
+              (2, {1.0: u'aa'}),
+              (2, {1.0: u'cc'}),
+              (3, {2.0: u'dd'})]
+        self.sc.parallelize(em).saveAsSequenceFile(basepath + "/sfmap/")
+        maps = self.sc.sequenceFile(basepath + "/sfmap/").collect()
+        for v in maps:
+            self.assertTrue(v, em)
+
+    def test_oldhadoop(self):
+        basepath = self.tempdir.name
+        dict_data = [(1, {}),
+                     (1, {"row1": 1.0}),
+                     (2, {"row2": 2.0})]
+        self.sc.parallelize(dict_data).saveAsHadoopFile(
+            basepath + "/oldhadoop/",
+            "org.apache.hadoop.mapred.SequenceFileOutputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.hadoop.io.MapWritable")
+        result = self.sc.hadoopFile(
+            basepath + "/oldhadoop/",
+            "org.apache.hadoop.mapred.SequenceFileInputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.hadoop.io.MapWritable").collect()
+        for v in result:
+            self.assertTrue(v, dict_data)
+
+        conf = {
+            "mapred.output.format.class": "org.apache.hadoop.mapred.SequenceFileOutputFormat",
+            "mapreduce.job.output.key.class": "org.apache.hadoop.io.IntWritable",
+            "mapreduce.job.output.value.class": "org.apache.hadoop.io.MapWritable",
+            "mapreduce.output.fileoutputformat.outputdir": basepath + "/olddataset/"
+        }
+        self.sc.parallelize(dict_data).saveAsHadoopDataset(conf)
+        input_conf = {"mapreduce.input.fileinputformat.inputdir": basepath + "/olddataset/"}
+        result = self.sc.hadoopRDD(
+            "org.apache.hadoop.mapred.SequenceFileInputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.hadoop.io.MapWritable",
+            conf=input_conf).collect()
+        for v in result:
+            self.assertTrue(v, dict_data)
+
+    def test_newhadoop(self):
+        basepath = self.tempdir.name
+        data = [(1, ""),
+                (1, "a"),
+                (2, "bcdf")]
+        self.sc.parallelize(data).saveAsNewAPIHadoopFile(
+            basepath + "/newhadoop/",
+            "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.hadoop.io.Text")
+        result = sorted(self.sc.newAPIHadoopFile(
+            basepath + "/newhadoop/",
+            "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.hadoop.io.Text").collect())
+        self.assertEqual(result, data)
+
+        conf = {
+            "mapreduce.job.outputformat.class":
+                "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat",
+            "mapreduce.job.output.key.class": "org.apache.hadoop.io.IntWritable",
+            "mapreduce.job.output.value.class": "org.apache.hadoop.io.Text",
+            "mapreduce.output.fileoutputformat.outputdir": basepath + "/newdataset/"
+        }
+        self.sc.parallelize(data).saveAsNewAPIHadoopDataset(conf)
+        input_conf = {"mapreduce.input.fileinputformat.inputdir": basepath + "/newdataset/"}
+        new_dataset = sorted(self.sc.newAPIHadoopRDD(
+            "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.hadoop.io.Text",
+            conf=input_conf).collect())
+        self.assertEqual(new_dataset, data)
+
+    @unittest.skipIf(sys.version >= "3", "serialize of array")
+    def test_newhadoop_with_array(self):
+        basepath = self.tempdir.name
+        # use custom ArrayWritable types and converters to handle arrays
+        array_data = [(1, array('d')),
+                      (1, array('d', [1.0, 2.0, 3.0])),
+                      (2, array('d', [3.0, 4.0, 5.0]))]
+        self.sc.parallelize(array_data).saveAsNewAPIHadoopFile(
+            basepath + "/newhadoop/",
+            "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.spark.api.python.DoubleArrayWritable",
+            valueConverter="org.apache.spark.api.python.DoubleArrayToWritableConverter")
+        result = sorted(self.sc.newAPIHadoopFile(
+            basepath + "/newhadoop/",
+            "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.spark.api.python.DoubleArrayWritable",
+            valueConverter="org.apache.spark.api.python.WritableToDoubleArrayConverter").collect())
+        self.assertEqual(result, array_data)
+
+        conf = {
+            "mapreduce.job.outputformat.class":
+                "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat",
+            "mapreduce.job.output.key.class": "org.apache.hadoop.io.IntWritable",
+            "mapreduce.job.output.value.class": "org.apache.spark.api.python.DoubleArrayWritable",
+            "mapreduce.output.fileoutputformat.outputdir": basepath + "/newdataset/"
+        }
+        self.sc.parallelize(array_data).saveAsNewAPIHadoopDataset(
+            conf,
+            valueConverter="org.apache.spark.api.python.DoubleArrayToWritableConverter")
+        input_conf = {"mapreduce.input.fileinputformat.inputdir": basepath + "/newdataset/"}
+        new_dataset = sorted(self.sc.newAPIHadoopRDD(
+            "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat",
+            "org.apache.hadoop.io.IntWritable",
+            "org.apache.spark.api.python.DoubleArrayWritable",
+            valueConverter="org.apache.spark.api.python.WritableToDoubleArrayConverter",
+            conf=input_conf).collect())
+        self.assertEqual(new_dataset, array_data)
+
+    def test_newolderror(self):
+        basepath = self.tempdir.name
+        rdd = self.sc.parallelize(range(1, 4)).map(lambda x: (x, "a" * x))
+        self.assertRaises(Exception, lambda: rdd.saveAsHadoopFile(
+            basepath + "/newolderror/saveAsHadoopFile/",
+            "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"))
+        self.assertRaises(Exception, lambda: rdd.saveAsNewAPIHadoopFile(
+            basepath + "/newolderror/saveAsNewAPIHadoopFile/",
+            "org.apache.hadoop.mapred.SequenceFileOutputFormat"))
+
+    def test_bad_inputs(self):
+        basepath = self.tempdir.name
+        rdd = self.sc.parallelize(range(1, 4)).map(lambda x: (x, "a" * x))
+        self.assertRaises(Exception, lambda: rdd.saveAsHadoopFile(
+            basepath + "/badinputs/saveAsHadoopFile/",
+            "org.apache.hadoop.mapred.NotValidOutputFormat"))
+        self.assertRaises(Exception, lambda: rdd.saveAsNewAPIHadoopFile(
+            basepath + "/badinputs/saveAsNewAPIHadoopFile/",
+            "org.apache.hadoop.mapreduce.lib.output.NotValidOutputFormat"))
+
+    def test_converters(self):
+        # use of custom converters
+        basepath = self.tempdir.name
+        data = [(1, {3.0: u'bb'}),
+                (2, {1.0: u'aa'}),
+                (3, {2.0: u'dd'})]
+        self.sc.parallelize(data).saveAsNewAPIHadoopFile(
+            basepath + "/converters/",
+            "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat",
+            keyConverter="org.apache.spark.api.python.TestOutputKeyConverter",
+            valueConverter="org.apache.spark.api.python.TestOutputValueConverter")
+        converted = sorted(self.sc.sequenceFile(basepath + "/converters/").collect())
+        expected = [(u'1', 3.0),
+                    (u'2', 1.0),
+                    (u'3', 2.0)]
+        self.assertEqual(converted, expected)
+
+    def test_reserialization(self):
+        basepath = self.tempdir.name
+        x = range(1, 5)
+        y = range(1001, 1005)
+        data = list(zip(x, y))
+        rdd = self.sc.parallelize(x).zip(self.sc.parallelize(y))
+        rdd.saveAsSequenceFile(basepath + "/reserialize/sequence")
+        result1 = sorted(self.sc.sequenceFile(basepath + "/reserialize/sequence").collect())
+        self.assertEqual(result1, data)
+
+        rdd.saveAsHadoopFile(
+            basepath + "/reserialize/hadoop",
+            "org.apache.hadoop.mapred.SequenceFileOutputFormat")
+        result2 = sorted(self.sc.sequenceFile(basepath + "/reserialize/hadoop").collect())
+        self.assertEqual(result2, data)
+
+        rdd.saveAsNewAPIHadoopFile(
+            basepath + "/reserialize/newhadoop",
+            "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat")
+        result3 = sorted(self.sc.sequenceFile(basepath + "/reserialize/newhadoop").collect())
+        self.assertEqual(result3, data)
+
+        conf4 = {
+            "mapred.output.format.class": "org.apache.hadoop.mapred.SequenceFileOutputFormat",
+            "mapreduce.job.output.key.class": "org.apache.hadoop.io.IntWritable",
+            "mapreduce.job.output.value.class": "org.apache.hadoop.io.IntWritable",
+            "mapreduce.output.fileoutputformat.outputdir": basepath + "/reserialize/dataset"}
+        rdd.saveAsHadoopDataset(conf4)
+        result4 = sorted(self.sc.sequenceFile(basepath + "/reserialize/dataset").collect())
+        self.assertEqual(result4, data)
+
+        conf5 = {"mapreduce.job.outputformat.class":
+                 "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat",
+                 "mapreduce.job.output.key.class": "org.apache.hadoop.io.IntWritable",
+                 "mapreduce.job.output.value.class": "org.apache.hadoop.io.IntWritable",
+                 "mapreduce.output.fileoutputformat.outputdir": basepath + "/reserialize/newdataset"
+                 }
+        rdd.saveAsNewAPIHadoopDataset(conf5)
+        result5 = sorted(self.sc.sequenceFile(basepath + "/reserialize/newdataset").collect())
+        self.assertEqual(result5, data)
+
+    def test_malformed_RDD(self):
+        basepath = self.tempdir.name
+        # non-batch-serialized RDD[[(K, V)]] should be rejected
+        data = [[(1, "a")], [(2, "aa")], [(3, "aaa")]]
+        rdd = self.sc.parallelize(data, len(data))
+        self.assertRaises(Exception, lambda: rdd.saveAsSequenceFile(
+            basepath + "/malformed/sequence"))
+
+
+class DaemonTests(unittest.TestCase):
+    def connect(self, port):
+        from socket import socket, AF_INET, SOCK_STREAM
+        sock = socket(AF_INET, SOCK_STREAM)
+        sock.connect(('127.0.0.1', port))
+        # send a split index of -1 to shutdown the worker
+        sock.send(b"\xFF\xFF\xFF\xFF")
+        sock.close()
+        return True
+
+    def do_termination_test(self, terminator):
+        from subprocess import Popen, PIPE
+        from errno import ECONNREFUSED
+
+        # start daemon
+        daemon_path = os.path.join(os.path.dirname(__file__), "daemon.py")
+        python_exec = sys.executable or os.environ.get("PYSPARK_PYTHON")
+        daemon = Popen([python_exec, daemon_path], stdin=PIPE, stdout=PIPE)
+
+        # read the port number
+        port = read_int(daemon.stdout)
+
+        # daemon should accept connections
+        self.assertTrue(self.connect(port))
+
+        # request shutdown
+        terminator(daemon)
+        time.sleep(1)
+
+        # daemon should no longer accept connections
+        try:
+            self.connect(port)
+        except EnvironmentError as exception:
+            self.assertEqual(exception.errno, ECONNREFUSED)
+        else:
+            self.fail("Expected EnvironmentError to be raised")
+
+    def test_termination_stdin(self):
+        """Ensure that daemon and workers terminate when stdin is closed."""
+        self.do_termination_test(lambda daemon: daemon.stdin.close())
+
+    def test_termination_sigterm(self):
+        """Ensure that daemon and workers terminate on SIGTERM."""
+        from signal import SIGTERM
+        self.do_termination_test(lambda daemon: os.kill(daemon.pid, SIGTERM))
+
+
+class WorkerTests(ReusedPySparkTestCase):
+    def test_cancel_task(self):
+        temp = tempfile.NamedTemporaryFile(delete=True)
+        temp.close()
+        path = temp.name
+
+        def sleep(x):
+            import os
+            import time
+            with open(path, 'w') as f:
+                f.write("%d %d" % (os.getppid(), os.getpid()))
+            time.sleep(100)
+
+        # start job in background thread
+        def run():
+            try:
+                self.sc.parallelize(range(1), 1).foreach(sleep)
+            except Exception:
+                pass
+        import threading
+        t = threading.Thread(target=run)
+        t.daemon = True
+        t.start()
+
+        daemon_pid, worker_pid = 0, 0
+        while True:
+            if os.path.exists(path):
+                with open(path) as f:
+                    data = f.read().split(' ')
+                daemon_pid, worker_pid = map(int, data)
+                break
+            time.sleep(0.1)
+
+        # cancel jobs
+        self.sc.cancelAllJobs()
+        t.join()
+
+        for i in range(50):
+            try:
+                os.kill(worker_pid, 0)
+                time.sleep(0.1)
+            except OSError:
+                break  # worker was killed
+        else:
+            self.fail("worker has not been killed after 5 seconds")
+
+        try:
+            os.kill(daemon_pid, 0)
+        except OSError:
+            self.fail("daemon had been killed")
+
+        # run a normal job
+        rdd = self.sc.parallelize(xrange(100), 1)
+        self.assertEqual(100, rdd.map(str).count())
+
+    def test_after_exception(self):
+        def raise_exception(_):
+            raise Exception()
+        rdd = self.sc.parallelize(xrange(100), 1)
+        with QuietTest(self.sc):
+            self.assertRaises(Exception, lambda: rdd.foreach(raise_exception))
+        self.assertEqual(100, rdd.map(str).count())
+
+    def test_after_jvm_exception(self):
+        tempFile = tempfile.NamedTemporaryFile(delete=False)
+        tempFile.write(b"Hello World!")
+        tempFile.close()
+        data = self.sc.textFile(tempFile.name, 1)
+        filtered_data = data.filter(lambda x: True)
+        self.assertEqual(1, filtered_data.count())
+        os.unlink(tempFile.name)
+        with QuietTest(self.sc):
+            self.assertRaises(Exception, lambda: filtered_data.count())
+
+        rdd = self.sc.parallelize(xrange(100), 1)
+        self.assertEqual(100, rdd.map(str).count())
+
+    def test_accumulator_when_reuse_worker(self):
+        from pyspark.accumulators import INT_ACCUMULATOR_PARAM
+        acc1 = self.sc.accumulator(0, INT_ACCUMULATOR_PARAM)
+        self.sc.parallelize(xrange(100), 20).foreach(lambda x: acc1.add(x))
+        self.assertEqual(sum(range(100)), acc1.value)
+
+        acc2 = self.sc.accumulator(0, INT_ACCUMULATOR_PARAM)
+        self.sc.parallelize(xrange(100), 20).foreach(lambda x: acc2.add(x))
+        self.assertEqual(sum(range(100)), acc2.value)
+        self.assertEqual(sum(range(100)), acc1.value)
+
+    def test_reuse_worker_after_take(self):
+        rdd = self.sc.parallelize(xrange(100000), 1)
+        self.assertEqual(0, rdd.first())
+
+        def count():
+            try:
+                rdd.count()
+            except Exception:
+                pass
+
+        t = threading.Thread(target=count)
+        t.daemon = True
+        t.start()
+        t.join(5)
+        self.assertTrue(not t.isAlive())
+        self.assertEqual(100000, rdd.count())
+
+    def test_with_different_versions_of_python(self):
+        rdd = self.sc.parallelize(range(10))
+        rdd.count()
+        version = self.sc.pythonVer
+        self.sc.pythonVer = "2.0"
+        try:
+            with QuietTest(self.sc):
+                self.assertRaises(Py4JJavaError, lambda: rdd.count())
+        finally:
+            self.sc.pythonVer = version
+
+
+class SparkSubmitTests(unittest.TestCase):
+
+    def setUp(self):
+        self.programDir = tempfile.mkdtemp()
+        tmp_dir = tempfile.gettempdir()
+        self.sparkSubmit = [
+            os.path.join(os.environ.get("SPARK_HOME"), "bin", "spark-submit"),
+            "--conf", "spark.driver.extraJavaOptions=-Djava.io.tmpdir={0}".format(tmp_dir),
+            "--conf", "spark.executor.extraJavaOptions=-Djava.io.tmpdir={0}".format(tmp_dir),
+        ]
+
+    def tearDown(self):
+        shutil.rmtree(self.programDir)
+
+    def createTempFile(self, name, content, dir=None):
+        """
+        Create a temp file with the given name and content and return its path.
+        Strips leading spaces from content up to the first '|' in each line.
+        """
+        pattern = re.compile(r'^ *\|', re.MULTILINE)
+        content = re.sub(pattern, '', content.strip())
+        if dir is None:
+            path = os.path.join(self.programDir, name)
+        else:
+            os.makedirs(os.path.join(self.programDir, dir))
+            path = os.path.join(self.programDir, dir, name)
+        with open(path, "w") as f:
+            f.write(content)
+        return path
+
+    def createFileInZip(self, name, content, ext=".zip", dir=None, zip_name=None):
+        """
+        Create a zip archive containing a file with the given content and return its path.
+        Strips leading spaces from content up to the first '|' in each line.
+        """
+        pattern = re.compile(r'^ *\|', re.MULTILINE)
+        content = re.sub(pattern, '', content.strip())
+        if dir is None:
+            path = os.path.join(self.programDir, name + ext)
+        else:
+            path = os.path.join(self.programDir, dir, zip_name + ext)
+        zip = zipfile.ZipFile(path, 'w')
+        zip.writestr(name, content)
+        zip.close()
+        return path
+
+    def create_spark_package(self, artifact_name):
+        group_id, artifact_id, version = artifact_name.split(":")
+        self.createTempFile("%s-%s.pom" % (artifact_id, version), ("""
+            |<?xml version="1.0" encoding="UTF-8"?>
+            |<project xmlns="http://maven.apache.org/POM/4.0.0"
+            |       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+            |       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
+            |       http://maven.apache.org/xsd/maven-4.0.0.xsd">
+            |   <modelVersion>4.0.0</modelVersion>
+            |   <groupId>%s</groupId>
+            |   <artifactId>%s</artifactId>
+            |   <version>%s</version>
+            |</project>
+            """ % (group_id, artifact_id, version)).lstrip(),
+            os.path.join(group_id, artifact_id, version))
+        self.createFileInZip("%s.py" % artifact_id, """
+            |def myfunc(x):
+            |    return x + 1
+            """, ".jar", os.path.join(group_id, artifact_id, version),
+                             "%s-%s" % (artifact_id, version))
+
+    def test_single_script(self):
+        """Submit and test a single script file"""
+        script = self.createTempFile("test.py", """
+            |from pyspark import SparkContext
+            |
+            |sc = SparkContext()
+            |print(sc.parallelize([1, 2, 3]).map(lambda x: x * 2).collect())
+            """)
+        proc = subprocess.Popen(self.sparkSubmit + [script], stdout=subprocess.PIPE)
+        out, err = proc.communicate()
+        self.assertEqual(0, proc.returncode)
+        self.assertIn("[2, 4, 6]", out.decode('utf-8'))
+
+    def test_script_with_local_functions(self):
+        """Submit and test a single script file calling a global function"""
+        script = self.createTempFile("test.py", """
+            |from pyspark import SparkContext
+            |
+            |def foo(x):
+            |    return x * 3
+            |
+            |sc = SparkContext()
+            |print(sc.parallelize([1, 2, 3]).map(foo).collect())
+            """)
+        proc = subprocess.Popen(self.sparkSubmit + [script], stdout=subprocess.PIPE)
+        out, err = proc.communicate()
+        self.assertEqual(0, proc.returncode)
+        self.assertIn("[3, 6, 9]", out.decode('utf-8'))
+
+    def test_module_dependency(self):
+        """Submit and test a script with a dependency on another module"""
+        script = self.createTempFile("test.py", """
+            |from pyspark import SparkContext
+            |from mylib import myfunc
+            |
+            |sc = SparkContext()
+            |print(sc.parallelize([1, 2, 3]).map(myfunc).collect())
+            """)
+        zip = self.createFileInZip("mylib.py", """
+            |def myfunc(x):
+            |    return x + 1
+            """)
+        proc = subprocess.Popen(self.sparkSubmit + ["--py-files", zip, script],
+                                stdout=subprocess.PIPE)
+        out, err = proc.communicate()
+        self.assertEqual(0, proc.returncode)
+        self.assertIn("[2, 3, 4]", out.decode('utf-8'))
+
+    def test_module_dependency_on_cluster(self):
+        """Submit and test a script with a dependency on another module on a cluster"""
+        script = self.createTempFile("test.py", """
+            |from pyspark import SparkContext
+            |from mylib import myfunc
+            |
+            |sc = SparkContext()
+            |print(sc.parallelize([1, 2, 3]).map(myfunc).collect())
+            """)
+        zip = self.createFileInZip("mylib.py", """
+            |def myfunc(x):
+            |    return x + 1
+            """)
+        proc = subprocess.Popen(self.sparkSubmit + ["--py-files", zip, "--master",
+                                "local-cluster[1,1,1024]", script],
+                                stdout=subprocess.PIPE)
+        out, err = proc.communicate()
+        self.assertEqual(0, proc.returncode)
+        self.assertIn("[2, 3, 4]", out.decode('utf-8'))
+
+    def test_package_dependency(self):
+        """Submit and test a script with a dependency on a Spark Package"""
+        script = self.createTempFile("test.py", """
+            |from pyspark import SparkContext
+            |from mylib import myfunc
+            |
+            |sc = SparkContext()
+            |print(sc.parallelize([1, 2, 3]).map(myfunc).collect())
+            """)
+        self.create_spark_package("a:mylib:0.1")
+        proc = subprocess.Popen(
+            self.sparkSubmit + ["--packages", "a:mylib:0.1", "--repositories",
+                                "file:" + self.programDir, script],
+            stdout=subprocess.PIPE)
+        out, err = proc.communicate()
+        self.assertEqual(0, proc.returncode)
+        self.assertIn("[2, 3, 4]", out.decode('utf-8'))
+
+    def test_package_dependency_on_cluster(self):
+        """Submit and test a script with a dependency on a Spark Package on a cluster"""
+        script = self.createTempFile("test.py", """
+            |from pyspark import SparkContext
+            |from mylib import myfunc
+            |
+            |sc = SparkContext()
+            |print(sc.parallelize([1, 2, 3]).map(myfunc).collect())
+            """)
+        self.create_spark_package("a:mylib:0.1")
+        proc = subprocess.Popen(
+            self.sparkSubmit + ["--packages", "a:mylib:0.1", "--repositories",
+                                "file:" + self.programDir, "--master", "local-cluster[1,1,1024]",
+                                script],
+            stdout=subprocess.PIPE)
+        out, err = proc.communicate()
+        self.assertEqual(0, proc.returncode)
+        self.assertIn("[2, 3, 4]", out.decode('utf-8'))
+
+    def test_single_script_on_cluster(self):
+        """Submit and test a single script on a cluster"""
+        script = self.createTempFile("test.py", """
+            |from pyspark import SparkContext
+            |
+            |def foo(x):
+            |    return x * 2
+            |
+            |sc = SparkContext()
+            |print(sc.parallelize([1, 2, 3]).map(foo).collect())
+            """)
+        # this will fail if you have different spark.executor.memory
+        # in conf/spark-defaults.conf
+        proc = subprocess.Popen(
+            self.sparkSubmit + ["--master", "local-cluster[1,1,1024]", script],
+            stdout=subprocess.PIPE)
+        out, err = proc.communicate()
+        self.assertEqual(0, proc.returncode)
+        self.assertIn("[2, 4, 6]", out.decode('utf-8'))
+
+    def test_user_configuration(self):
+        """Make sure user configuration is respected (SPARK-19307)"""
+        script = self.createTempFile("test.py", """
+            |from pyspark import SparkConf, SparkContext
+            |
+            |conf = SparkConf().set("spark.test_config", "1")
+            |sc = SparkContext(conf = conf)
+            |try:
+            |    if sc._conf.get("spark.test_config") != "1":
+            |        raise Exception("Cannot find spark.test_config in SparkContext's conf.")
+            |finally:
+            |    sc.stop()
+            """)
+        proc = subprocess.Popen(
+            self.sparkSubmit + ["--master", "local", script],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT)
+        out, err = proc.communicate()
+        self.assertEqual(0, proc.returncode, msg="Process failed with error:\n {0}".format(out))
+
+
+class ContextTests(unittest.TestCase):
+
+    def test_failed_sparkcontext_creation(self):
+        # Regression test for SPARK-1550
+        self.assertRaises(Exception, lambda: SparkContext("an-invalid-master-name"))
+
+    def test_get_or_create(self):
+        with SparkContext.getOrCreate() as sc:
+            self.assertTrue(SparkContext.getOrCreate() is sc)
+
+    def test_parallelize_eager_cleanup(self):
+        with SparkContext() as sc:
+            temp_files = os.listdir(sc._temp_dir)
+            rdd = sc.parallelize([0, 1, 2])
+            post_parallalize_temp_files = os.listdir(sc._temp_dir)
+            self.assertEqual(temp_files, post_parallalize_temp_files)
+
+    def test_set_conf(self):
+        # This is for an internal use case. When there is an existing SparkContext,
+        # SparkSession's builder needs to set configs into SparkContext's conf.
+        sc = SparkContext()
+        sc._conf.set("spark.test.SPARK16224", "SPARK16224")
+        self.assertEqual(sc._jsc.sc().conf().get("spark.test.SPARK16224"), "SPARK16224")
+        sc.stop()
+
+    def test_stop(self):
+        sc = SparkContext()
+        self.assertNotEqual(SparkContext._active_spark_context, None)
+        sc.stop()
+        self.assertEqual(SparkContext._active_spark_context, None)
+
+    def test_with(self):
+        with SparkContext() as sc:
+            self.assertNotEqual(SparkContext._active_spark_context, None)
+        self.assertEqual(SparkContext._active_spark_context, None)
+
+    def test_with_exception(self):
+        try:
+            with SparkContext() as sc:
+                self.assertNotEqual(SparkContext._active_spark_context, None)
+                raise Exception()
+        except:
+            pass
+        self.assertEqual(SparkContext._active_spark_context, None)
+
+    def test_with_stop(self):
+        with SparkContext() as sc:
+            self.assertNotEqual(SparkContext._active_spark_context, None)
+            sc.stop()
+        self.assertEqual(SparkContext._active_spark_context, None)
+
+    def test_progress_api(self):
+        with SparkContext() as sc:
+            sc.setJobGroup('test_progress_api', '', True)
+            rdd = sc.parallelize(range(10)).map(lambda x: time.sleep(100))
+
+            def run():
+                try:
+                    rdd.count()
+                except Exception:
+                    pass
+            t = threading.Thread(target=run)
+            t.daemon = True
+            t.start()
+            # wait for scheduler to start
+            time.sleep(1)
+
+            tracker = sc.statusTracker()
+            jobIds = tracker.getJobIdsForGroup('test_progress_api')
+            self.assertEqual(1, len(jobIds))
+            job = tracker.getJobInfo(jobIds[0])
+            self.assertEqual(1, len(job.stageIds))
+            stage = tracker.getStageInfo(job.stageIds[0])
+            self.assertEqual(rdd.getNumPartitions(), stage.numTasks)
+
+            sc.cancelAllJobs()
+            t.join()
+            # wait for event listener to update the status
+            time.sleep(1)
+
+            job = tracker.getJobInfo(jobIds[0])
+            self.assertEqual('FAILED', job.status)
+            self.assertEqual([], tracker.getActiveJobsIds())
+            self.assertEqual([], tracker.getActiveStageIds())
+
+            sc.stop()
+
+    def test_startTime(self):
+        with SparkContext() as sc:
+            self.assertGreater(sc.startTime, 0)
+
+
+class ConfTests(unittest.TestCase):
+    def test_memory_conf(self):
+        memoryList = ["1T", "1G", "1M", "1024K"]
+        for memory in memoryList:
+            sc = SparkContext(conf=SparkConf().set("spark.python.worker.memory", memory))
+            l = list(range(1024))
+            random.shuffle(l)
+            rdd = sc.parallelize(l, 4)
+            self.assertEqual(sorted(l), rdd.sortBy(lambda x: x).collect())
+            sc.stop()
+
+
+class KeywordOnlyTests(unittest.TestCase):
+    class Wrapped(object):
+        @keyword_only
+        def set(self, x=None, y=None):
+            if "x" in self._input_kwargs:
+                self._x = self._input_kwargs["x"]
+            if "y" in self._input_kwargs:
+                self._y = self._input_kwargs["y"]
+            return x, y
+
+    def test_keywords(self):
+        w = self.Wrapped()
+        x, y = w.set(y=1)
+        self.assertEqual(y, 1)
+        self.assertEqual(y, w._y)
+        self.assertIsNone(x)
+        self.assertFalse(hasattr(w, "_x"))
+
+    def test_non_keywords(self):
+        w = self.Wrapped()
+        self.assertRaises(TypeError, lambda: w.set(0, y=1))
+
+    def test_kwarg_ownership(self):
+        # test _input_kwargs is owned by each class instance and not a shared static variable
+        class Setter(object):
+            @keyword_only
+            def set(self, x=None, other=None, other_x=None):
+                if "other" in self._input_kwargs:
+                    self._input_kwargs["other"].set(x=self._input_kwargs["other_x"])
+                self._x = self._input_kwargs["x"]
+
+        a = Setter()
+        b = Setter()
+        a.set(x=1, other=b, other_x=2)
+        self.assertEqual(a._x, 1)
+        self.assertEqual(b._x, 2)
+
+
+class UtilTests(PySparkTestCase):
+    def test_py4j_exception_message(self):
+        from pyspark.util import _exception_message
+
+        with self.assertRaises(Py4JJavaError) as context:
+            # This attempts java.lang.String(null) which throws an NPE.
+            self.sc._jvm.java.lang.String(None)
+
+        self.assertTrue('NullPointerException' in _exception_message(context.exception))
+
+    def test_parsing_version_string(self):
+        from pyspark.util import VersionUtils
+        self.assertRaises(ValueError, lambda: VersionUtils.majorMinorVersion("abced"))
+
+
+@unittest.skipIf(not _have_scipy, "SciPy not installed")
+class SciPyTests(PySparkTestCase):
+
+    """General PySpark tests that depend on scipy """
+
+    def test_serialize(self):
+        from scipy.special import gammaln
+        x = range(1, 5)
+        expected = list(map(gammaln, x))
+        observed = self.sc.parallelize(x).map(gammaln).collect()
+        self.assertEqual(expected, observed)
+
+
+@unittest.skipIf(not _have_numpy, "NumPy not installed")
+class NumPyTests(PySparkTestCase):
+
+    """General PySpark tests that depend on numpy """
+
+    def test_statcounter_array(self):
+        x = self.sc.parallelize([np.array([1.0, 1.0]), np.array([2.0, 2.0]), np.array([3.0, 3.0])])
+        s = x.stats()
+        self.assertSequenceEqual([2.0, 2.0], s.mean().tolist())
+        self.assertSequenceEqual([1.0, 1.0], s.min().tolist())
+        self.assertSequenceEqual([3.0, 3.0], s.max().tolist())
+        self.assertSequenceEqual([1.0, 1.0], s.sampleStdev().tolist())
+
+        stats_dict = s.asDict()
+        self.assertEqual(3, stats_dict['count'])
+        self.assertSequenceEqual([2.0, 2.0], stats_dict['mean'].tolist())
+        self.assertSequenceEqual([1.0, 1.0], stats_dict['min'].tolist())
+        self.assertSequenceEqual([3.0, 3.0], stats_dict['max'].tolist())
+        self.assertSequenceEqual([6.0, 6.0], stats_dict['sum'].tolist())
+        self.assertSequenceEqual([1.0, 1.0], stats_dict['stdev'].tolist())
+        self.assertSequenceEqual([1.0, 1.0], stats_dict['variance'].tolist())
+
+        stats_sample_dict = s.asDict(sample=True)
+        self.assertEqual(3, stats_dict['count'])
+        self.assertSequenceEqual([2.0, 2.0], stats_sample_dict['mean'].tolist())
+        self.assertSequenceEqual([1.0, 1.0], stats_sample_dict['min'].tolist())
+        self.assertSequenceEqual([3.0, 3.0], stats_sample_dict['max'].tolist())
+        self.assertSequenceEqual([6.0, 6.0], stats_sample_dict['sum'].tolist())
+        self.assertSequenceEqual(
+            [0.816496580927726, 0.816496580927726], stats_sample_dict['stdev'].tolist())
+        self.assertSequenceEqual(
+            [0.6666666666666666, 0.6666666666666666], stats_sample_dict['variance'].tolist())
+
+
+if __name__ == "__main__":
+    from pyspark.tests import *
+    if xmlrunner:
+        unittest.main(testRunner=xmlrunner.XMLTestRunner(output='target/test-reports'), verbosity=2)
+    else:
+        unittest.main(verbosity=2)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/traceback_utils.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/traceback_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb8646df2b0bf2fa100279a2ee572ed465ea4d4b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/traceback_utils.py
@@ -0,0 +1,78 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from collections import namedtuple
+import os
+import traceback
+
+
+CallSite = namedtuple("CallSite", "function file linenum")
+
+
+def first_spark_call():
+    """
+    Return a CallSite representing the first Spark call in the current call stack.
+    """
+    tb = traceback.extract_stack()
+    if len(tb) == 0:
+        return None
+    file, line, module, what = tb[len(tb) - 1]
+    sparkpath = os.path.dirname(file)
+    first_spark_frame = len(tb) - 1
+    for i in range(0, len(tb)):
+        file, line, fun, what = tb[i]
+        if file.startswith(sparkpath):
+            first_spark_frame = i
+            break
+    if first_spark_frame == 0:
+        file, line, fun, what = tb[0]
+        return CallSite(function=fun, file=file, linenum=line)
+    sfile, sline, sfun, swhat = tb[first_spark_frame]
+    ufile, uline, ufun, uwhat = tb[first_spark_frame - 1]
+    return CallSite(function=sfun, file=ufile, linenum=uline)
+
+
+class SCCallSiteSync(object):
+    """
+    Helper for setting the spark context call site.
+
+    Example usage:
+    from pyspark.context import SCCallSiteSync
+    with SCCallSiteSync(<relevant SparkContext>) as css:
+        <a Spark call>
+    """
+
+    _spark_stack_depth = 0
+
+    def __init__(self, sc):
+        call_site = first_spark_call()
+        if call_site is not None:
+            self._call_site = "%s at %s:%s" % (
+                call_site.function, call_site.file, call_site.linenum)
+        else:
+            self._call_site = "Error! Could not extract traceback info"
+        self._context = sc
+
+    def __enter__(self):
+        if SCCallSiteSync._spark_stack_depth == 0:
+            self._context._jsc.setCallSite(self._call_site)
+        SCCallSiteSync._spark_stack_depth += 1
+
+    def __exit__(self, type, value, tb):
+        SCCallSiteSync._spark_stack_depth -= 1
+        if SCCallSiteSync._spark_stack_depth == 0:
+            self._context._jsc.setCallSite(None)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/util.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/util.py
new file mode 100644
index 0000000000000000000000000000000000000000..f906f49595438ce64dfb82577ab273d6efd8c0c6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/util.py
@@ -0,0 +1,113 @@
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import re
+import sys
+import inspect
+from py4j.protocol import Py4JJavaError
+
+__all__ = []
+
+
+def _exception_message(excp):
+    """Return the message from an exception as either a str or unicode object.  Supports both
+    Python 2 and Python 3.
+
+    >>> msg = "Exception message"
+    >>> excp = Exception(msg)
+    >>> msg == _exception_message(excp)
+    True
+
+    >>> msg = u"unicöde"
+    >>> excp = Exception(msg)
+    >>> msg == _exception_message(excp)
+    True
+    """
+    if isinstance(excp, Py4JJavaError):
+        # 'Py4JJavaError' doesn't contain the stack trace available on the Java side in 'message'
+        # attribute in Python 2. We should call 'str' function on this exception in general but
+        # 'Py4JJavaError' has an issue about addressing non-ascii strings. So, here we work
+        # around by the direct call, '__str__()'. Please see SPARK-23517.
+        return excp.__str__()
+    if hasattr(excp, "message"):
+        return excp.message
+    return str(excp)
+
+
+def _get_argspec(f):
+    """
+    Get argspec of a function. Supports both Python 2 and Python 3.
+    """
+    if sys.version_info[0] < 3:
+        argspec = inspect.getargspec(f)
+    else:
+        # `getargspec` is deprecated since python3.0 (incompatible with function annotations).
+        # See SPARK-23569.
+        argspec = inspect.getfullargspec(f)
+    return argspec
+
+
+class VersionUtils(object):
+    """
+    Provides utility method to determine Spark versions with given input string.
+    """
+    @staticmethod
+    def majorMinorVersion(sparkVersion):
+        """
+        Given a Spark version string, return the (major version number, minor version number).
+        E.g., for 2.0.1-SNAPSHOT, return (2, 0).
+
+        >>> sparkVersion = "2.4.0"
+        >>> VersionUtils.majorMinorVersion(sparkVersion)
+        (2, 4)
+        >>> sparkVersion = "2.3.0-SNAPSHOT"
+        >>> VersionUtils.majorMinorVersion(sparkVersion)
+        (2, 3)
+
+        """
+        m = re.search(r'^(\d+)\.(\d+)(\..*)?$', sparkVersion)
+        if m is not None:
+            return (int(m.group(1)), int(m.group(2)))
+        else:
+            raise ValueError("Spark tried to parse '%s' as a Spark" % sparkVersion +
+                             " version string, but it could not find the major and minor" +
+                             " version numbers.")
+
+
+def fail_on_stopiteration(f):
+    """
+    Wraps the input function to fail on 'StopIteration' by raising a 'RuntimeError'
+    prevents silent loss of data when 'f' is used in a for loop in Spark code
+    """
+    def wrapper(*args, **kwargs):
+        try:
+            return f(*args, **kwargs)
+        except StopIteration as exc:
+            raise RuntimeError(
+                "Caught StopIteration thrown from user's code; failing the task",
+                exc
+            )
+
+    return wrapper
+
+
+if __name__ == "__main__":
+    import doctest
+    (failure_count, test_count) = doctest.testmod()
+    if failure_count:
+        sys.exit(-1)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/version.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..16aa6d3fb9ef7359d038a84b411122f70a6422db
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/version.py
@@ -0,0 +1 @@
+__version__='2.4.0'
diff --git a/spark-2.4.0-bin-hadoop2.7/python/pyspark/worker.py b/spark-2.4.0-bin-hadoop2.7/python/pyspark/worker.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c59f1f999f18205fa592aefff56db84ba4f51c9
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/pyspark/worker.py
@@ -0,0 +1,410 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Worker that receives input from Piped RDD.
+"""
+from __future__ import print_function
+import os
+import sys
+import time
+import resource
+import socket
+import traceback
+
+from pyspark.accumulators import _accumulatorRegistry
+from pyspark.broadcast import Broadcast, _broadcastRegistry
+from pyspark.java_gateway import local_connect_and_auth
+from pyspark.taskcontext import BarrierTaskContext, TaskContext
+from pyspark.files import SparkFiles
+from pyspark.rdd import PythonEvalType
+from pyspark.serializers import write_with_length, write_int, read_long, read_bool, \
+    write_long, read_int, SpecialLengths, UTF8Deserializer, PickleSerializer, \
+    BatchedSerializer, ArrowStreamPandasSerializer
+from pyspark.sql.types import to_arrow_type
+from pyspark.util import _get_argspec, fail_on_stopiteration
+from pyspark import shuffle
+
+if sys.version >= '3':
+    basestring = str
+
+pickleSer = PickleSerializer()
+utf8_deserializer = UTF8Deserializer()
+
+
+def report_times(outfile, boot, init, finish):
+    write_int(SpecialLengths.TIMING_DATA, outfile)
+    write_long(int(1000 * boot), outfile)
+    write_long(int(1000 * init), outfile)
+    write_long(int(1000 * finish), outfile)
+
+
+def add_path(path):
+    # worker can be used, so donot add path multiple times
+    if path not in sys.path:
+        # overwrite system packages
+        sys.path.insert(1, path)
+
+
+def read_command(serializer, file):
+    command = serializer._read_with_length(file)
+    if isinstance(command, Broadcast):
+        command = serializer.loads(command.value)
+    return command
+
+
+def chain(f, g):
+    """chain two functions together """
+    return lambda *a: g(f(*a))
+
+
+def wrap_udf(f, return_type):
+    if return_type.needConversion():
+        toInternal = return_type.toInternal
+        return lambda *a: toInternal(f(*a))
+    else:
+        return lambda *a: f(*a)
+
+
+def wrap_scalar_pandas_udf(f, return_type):
+    arrow_return_type = to_arrow_type(return_type)
+
+    def verify_result_length(*a):
+        result = f(*a)
+        if not hasattr(result, "__len__"):
+            raise TypeError("Return type of the user-defined function should be "
+                            "Pandas.Series, but is {}".format(type(result)))
+        if len(result) != len(a[0]):
+            raise RuntimeError("Result vector from pandas_udf was not the required length: "
+                               "expected %d, got %d" % (len(a[0]), len(result)))
+        return result
+
+    return lambda *a: (verify_result_length(*a), arrow_return_type)
+
+
+def wrap_grouped_map_pandas_udf(f, return_type, argspec, runner_conf):
+    assign_cols_by_name = runner_conf.get(
+        "spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName", "true")
+    assign_cols_by_name = assign_cols_by_name.lower() == "true"
+
+    def wrapped(key_series, value_series):
+        import pandas as pd
+
+        if len(argspec.args) == 1:
+            result = f(pd.concat(value_series, axis=1))
+        elif len(argspec.args) == 2:
+            key = tuple(s[0] for s in key_series)
+            result = f(key, pd.concat(value_series, axis=1))
+
+        if not isinstance(result, pd.DataFrame):
+            raise TypeError("Return type of the user-defined function should be "
+                            "pandas.DataFrame, but is {}".format(type(result)))
+        if not len(result.columns) == len(return_type):
+            raise RuntimeError(
+                "Number of columns of the returned pandas.DataFrame "
+                "doesn't match specified schema. "
+                "Expected: {} Actual: {}".format(len(return_type), len(result.columns)))
+
+        # Assign result columns by schema name if user labeled with strings, else use position
+        if assign_cols_by_name and any(isinstance(name, basestring) for name in result.columns):
+            return [(result[field.name], to_arrow_type(field.dataType)) for field in return_type]
+        else:
+            return [(result[result.columns[i]], to_arrow_type(field.dataType))
+                    for i, field in enumerate(return_type)]
+
+    return wrapped
+
+
+def wrap_grouped_agg_pandas_udf(f, return_type):
+    arrow_return_type = to_arrow_type(return_type)
+
+    def wrapped(*series):
+        import pandas as pd
+        result = f(*series)
+        return pd.Series([result])
+
+    return lambda *a: (wrapped(*a), arrow_return_type)
+
+
+def wrap_window_agg_pandas_udf(f, return_type):
+    # This is similar to grouped_agg_pandas_udf, the only difference
+    # is that window_agg_pandas_udf needs to repeat the return value
+    # to match window length, where grouped_agg_pandas_udf just returns
+    # the scalar value.
+    arrow_return_type = to_arrow_type(return_type)
+
+    def wrapped(*series):
+        import pandas as pd
+        result = f(*series)
+        return pd.Series([result]).repeat(len(series[0]))
+
+    return lambda *a: (wrapped(*a), arrow_return_type)
+
+
+def read_single_udf(pickleSer, infile, eval_type, runner_conf):
+    num_arg = read_int(infile)
+    arg_offsets = [read_int(infile) for i in range(num_arg)]
+    row_func = None
+    for i in range(read_int(infile)):
+        f, return_type = read_command(pickleSer, infile)
+        if row_func is None:
+            row_func = f
+        else:
+            row_func = chain(row_func, f)
+
+    # make sure StopIteration's raised in the user code are not ignored
+    # when they are processed in a for loop, raise them as RuntimeError's instead
+    func = fail_on_stopiteration(row_func)
+
+    # the last returnType will be the return type of UDF
+    if eval_type == PythonEvalType.SQL_SCALAR_PANDAS_UDF:
+        return arg_offsets, wrap_scalar_pandas_udf(func, return_type)
+    elif eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF:
+        argspec = _get_argspec(row_func)  # signature was lost when wrapping it
+        return arg_offsets, wrap_grouped_map_pandas_udf(func, return_type, argspec, runner_conf)
+    elif eval_type == PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF:
+        return arg_offsets, wrap_grouped_agg_pandas_udf(func, return_type)
+    elif eval_type == PythonEvalType.SQL_WINDOW_AGG_PANDAS_UDF:
+        return arg_offsets, wrap_window_agg_pandas_udf(func, return_type)
+    elif eval_type == PythonEvalType.SQL_BATCHED_UDF:
+        return arg_offsets, wrap_udf(func, return_type)
+    else:
+        raise ValueError("Unknown eval type: {}".format(eval_type))
+
+
+def read_udfs(pickleSer, infile, eval_type):
+    runner_conf = {}
+
+    if eval_type in (PythonEvalType.SQL_SCALAR_PANDAS_UDF,
+                     PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
+                     PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF,
+                     PythonEvalType.SQL_WINDOW_AGG_PANDAS_UDF):
+
+        # Load conf used for pandas_udf evaluation
+        num_conf = read_int(infile)
+        for i in range(num_conf):
+            k = utf8_deserializer.loads(infile)
+            v = utf8_deserializer.loads(infile)
+            runner_conf[k] = v
+
+        # NOTE: if timezone is set here, that implies respectSessionTimeZone is True
+        timezone = runner_conf.get("spark.sql.session.timeZone", None)
+        ser = ArrowStreamPandasSerializer(timezone)
+    else:
+        ser = BatchedSerializer(PickleSerializer(), 100)
+
+    num_udfs = read_int(infile)
+    udfs = {}
+    call_udf = []
+    mapper_str = ""
+    if eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF:
+        # Create function like this:
+        #   lambda a: f([a[0]], [a[0], a[1]])
+
+        # We assume there is only one UDF here because grouped map doesn't
+        # support combining multiple UDFs.
+        assert num_udfs == 1
+
+        # See FlatMapGroupsInPandasExec for how arg_offsets are used to
+        # distinguish between grouping attributes and data attributes
+        arg_offsets, udf = read_single_udf(pickleSer, infile, eval_type, runner_conf)
+        udfs['f'] = udf
+        split_offset = arg_offsets[0] + 1
+        arg0 = ["a[%d]" % o for o in arg_offsets[1: split_offset]]
+        arg1 = ["a[%d]" % o for o in arg_offsets[split_offset:]]
+        mapper_str = "lambda a: f([%s], [%s])" % (", ".join(arg0), ", ".join(arg1))
+    else:
+        # Create function like this:
+        #   lambda a: (f0(a[0]), f1(a[1], a[2]), f2(a[3]))
+        # In the special case of a single UDF this will return a single result rather
+        # than a tuple of results; this is the format that the JVM side expects.
+        for i in range(num_udfs):
+            arg_offsets, udf = read_single_udf(pickleSer, infile, eval_type, runner_conf)
+            udfs['f%d' % i] = udf
+            args = ["a[%d]" % o for o in arg_offsets]
+            call_udf.append("f%d(%s)" % (i, ", ".join(args)))
+        mapper_str = "lambda a: (%s)" % (", ".join(call_udf))
+
+    mapper = eval(mapper_str, udfs)
+    func = lambda _, it: map(mapper, it)
+
+    # profiling is not supported for UDF
+    return func, None, ser, ser
+
+
+def main(infile, outfile):
+    try:
+        boot_time = time.time()
+        split_index = read_int(infile)
+        if split_index == -1:  # for unit tests
+            sys.exit(-1)
+
+        version = utf8_deserializer.loads(infile)
+        if version != "%d.%d" % sys.version_info[:2]:
+            raise Exception(("Python in worker has different version %s than that in " +
+                             "driver %s, PySpark cannot run with different minor versions." +
+                             "Please check environment variables PYSPARK_PYTHON and " +
+                             "PYSPARK_DRIVER_PYTHON are correctly set.") %
+                            ("%d.%d" % sys.version_info[:2], version))
+
+        # read inputs only for a barrier task
+        isBarrier = read_bool(infile)
+        boundPort = read_int(infile)
+        secret = UTF8Deserializer().loads(infile)
+
+        # set up memory limits
+        memory_limit_mb = int(os.environ.get('PYSPARK_EXECUTOR_MEMORY_MB', "-1"))
+        total_memory = resource.RLIMIT_AS
+        try:
+            if memory_limit_mb > 0:
+                (soft_limit, hard_limit) = resource.getrlimit(total_memory)
+                msg = "Current mem limits: {0} of max {1}\n".format(soft_limit, hard_limit)
+                print(msg, file=sys.stderr)
+
+                # convert to bytes
+                new_limit = memory_limit_mb * 1024 * 1024
+
+                if soft_limit == resource.RLIM_INFINITY or new_limit < soft_limit:
+                    msg = "Setting mem limits to {0} of max {1}\n".format(new_limit, new_limit)
+                    print(msg, file=sys.stderr)
+                    resource.setrlimit(total_memory, (new_limit, new_limit))
+
+        except (resource.error, OSError, ValueError) as e:
+            # not all systems support resource limits, so warn instead of failing
+            print("WARN: Failed to set memory limit: {0}\n".format(e), file=sys.stderr)
+
+        # initialize global state
+        taskContext = None
+        if isBarrier:
+            taskContext = BarrierTaskContext._getOrCreate()
+            BarrierTaskContext._initialize(boundPort, secret)
+        else:
+            taskContext = TaskContext._getOrCreate()
+        # read inputs for TaskContext info
+        taskContext._stageId = read_int(infile)
+        taskContext._partitionId = read_int(infile)
+        taskContext._attemptNumber = read_int(infile)
+        taskContext._taskAttemptId = read_long(infile)
+        taskContext._localProperties = dict()
+        for i in range(read_int(infile)):
+            k = utf8_deserializer.loads(infile)
+            v = utf8_deserializer.loads(infile)
+            taskContext._localProperties[k] = v
+
+        shuffle.MemoryBytesSpilled = 0
+        shuffle.DiskBytesSpilled = 0
+        _accumulatorRegistry.clear()
+
+        # fetch name of workdir
+        spark_files_dir = utf8_deserializer.loads(infile)
+        SparkFiles._root_directory = spark_files_dir
+        SparkFiles._is_running_on_worker = True
+
+        # fetch names of includes (*.zip and *.egg files) and construct PYTHONPATH
+        add_path(spark_files_dir)  # *.py files that were added will be copied here
+        num_python_includes = read_int(infile)
+        for _ in range(num_python_includes):
+            filename = utf8_deserializer.loads(infile)
+            add_path(os.path.join(spark_files_dir, filename))
+        if sys.version > '3':
+            import importlib
+            importlib.invalidate_caches()
+
+        # fetch names and values of broadcast variables
+        needs_broadcast_decryption_server = read_bool(infile)
+        num_broadcast_variables = read_int(infile)
+        if needs_broadcast_decryption_server:
+            # read the decrypted data from a server in the jvm
+            port = read_int(infile)
+            auth_secret = utf8_deserializer.loads(infile)
+            (broadcast_sock_file, _) = local_connect_and_auth(port, auth_secret)
+
+        for _ in range(num_broadcast_variables):
+            bid = read_long(infile)
+            if bid >= 0:
+                if needs_broadcast_decryption_server:
+                    read_bid = read_long(broadcast_sock_file)
+                    assert(read_bid == bid)
+                    _broadcastRegistry[bid] = \
+                        Broadcast(sock_file=broadcast_sock_file)
+                else:
+                    path = utf8_deserializer.loads(infile)
+                    _broadcastRegistry[bid] = Broadcast(path=path)
+
+            else:
+                bid = - bid - 1
+                _broadcastRegistry.pop(bid)
+
+        if needs_broadcast_decryption_server:
+            broadcast_sock_file.write(b'1')
+            broadcast_sock_file.close()
+
+        _accumulatorRegistry.clear()
+        eval_type = read_int(infile)
+        if eval_type == PythonEvalType.NON_UDF:
+            func, profiler, deserializer, serializer = read_command(pickleSer, infile)
+        else:
+            func, profiler, deserializer, serializer = read_udfs(pickleSer, infile, eval_type)
+
+        init_time = time.time()
+
+        def process():
+            iterator = deserializer.load_stream(infile)
+            serializer.dump_stream(func(split_index, iterator), outfile)
+
+        if profiler:
+            profiler.profile(process)
+        else:
+            process()
+    except Exception:
+        try:
+            write_int(SpecialLengths.PYTHON_EXCEPTION_THROWN, outfile)
+            write_with_length(traceback.format_exc().encode("utf-8"), outfile)
+        except IOError:
+            # JVM close the socket
+            pass
+        except Exception:
+            # Write the error to stderr if it happened while serializing
+            print("PySpark worker failed with exception:", file=sys.stderr)
+            print(traceback.format_exc(), file=sys.stderr)
+        sys.exit(-1)
+    finish_time = time.time()
+    report_times(outfile, boot_time, init_time, finish_time)
+    write_long(shuffle.MemoryBytesSpilled, outfile)
+    write_long(shuffle.DiskBytesSpilled, outfile)
+
+    # Mark the beginning of the accumulators section of the output
+    write_int(SpecialLengths.END_OF_DATA_SECTION, outfile)
+    write_int(len(_accumulatorRegistry), outfile)
+    for (aid, accum) in _accumulatorRegistry.items():
+        pickleSer._write_with_length((aid, accum._value), outfile)
+
+    # check end of stream
+    if read_int(infile) == SpecialLengths.END_OF_STREAM:
+        write_int(SpecialLengths.END_OF_STREAM, outfile)
+    else:
+        # write a different value to tell JVM to not reuse this worker
+        write_int(SpecialLengths.END_OF_DATA_SECTION, outfile)
+        sys.exit(-1)
+
+
+if __name__ == '__main__':
+    # Read information about how to connect back to the JVM from the environment.
+    java_port = int(os.environ["PYTHON_WORKER_FACTORY_PORT"])
+    auth_secret = os.environ["PYTHON_WORKER_FACTORY_SECRET"]
+    (sock_file, _) = local_connect_and_auth(java_port, auth_secret)
+    main(sock_file, sock_file)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/run-tests b/spark-2.4.0-bin-hadoop2.7/python/run-tests
new file mode 100755
index 0000000000000000000000000000000000000000..24949657ed7abb7ffc9a13c9a97b8bb7570fbfb7
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/run-tests
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+FWDIR="$(cd "`dirname $0`"/..; pwd)"
+cd "$FWDIR"
+
+exec python -u ./python/run-tests.py "$@"
diff --git a/spark-2.4.0-bin-hadoop2.7/python/run-tests-with-coverage b/spark-2.4.0-bin-hadoop2.7/python/run-tests-with-coverage
new file mode 100755
index 0000000000000000000000000000000000000000..6d74b563e9140bf864e51f9b90f1a944afd9c709
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/run-tests-with-coverage
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -o pipefail
+set -e
+
+# This variable indicates which coverage executable to run to combine coverages
+# and generate HTMLs, for example, 'coverage3' in Python 3.
+COV_EXEC="${COV_EXEC:-coverage}"
+FWDIR="$(cd "`dirname $0`"; pwd)"
+pushd "$FWDIR" > /dev/null
+
+# Ensure that coverage executable is installed.
+if ! hash $COV_EXEC 2>/dev/null; then
+  echo "Missing coverage executable in your path, skipping PySpark coverage"
+  exit 1
+fi
+
+# Set up the directories for coverage results.
+export COVERAGE_DIR="$FWDIR/test_coverage"
+rm -fr "$COVERAGE_DIR/coverage_data"
+rm -fr "$COVERAGE_DIR/htmlcov"
+mkdir -p "$COVERAGE_DIR/coverage_data"
+
+# Current directory are added in the python path so that it doesn't refer our built
+# pyspark zip library first.
+export PYTHONPATH="$FWDIR:$PYTHONPATH"
+# Also, our sitecustomize.py and coverage_daemon.py are included in the path.
+export PYTHONPATH="$COVERAGE_DIR:$PYTHONPATH"
+
+# We use 'spark.python.daemon.module' configuration to insert the coverage supported workers.
+export SPARK_CONF_DIR="$COVERAGE_DIR/conf"
+
+# This environment variable enables the coverage.
+export COVERAGE_PROCESS_START="$FWDIR/.coveragerc"
+
+# If you'd like to run a specific unittest class, you could do such as
+# SPARK_TESTING=1 ../bin/pyspark pyspark.sql.tests VectorizedUDFTests
+./run-tests "$@"
+
+# Don't run coverage for the coverage command itself
+unset COVERAGE_PROCESS_START
+
+# Coverage could generate empty coverage data files. Remove it to get rid of warnings when combining.
+find $COVERAGE_DIR/coverage_data -size 0 -print0 | xargs -0 rm
+echo "Combining collected coverage data under $COVERAGE_DIR/coverage_data"
+$COV_EXEC combine
+echo "Reporting the coverage data at $COVERAGE_DIR/coverage_data/coverage"
+$COV_EXEC report --include "pyspark/*"
+echo "Generating HTML files for PySpark coverage under $COVERAGE_DIR/htmlcov"
+$COV_EXEC html --ignore-errors --include "pyspark/*" --directory "$COVERAGE_DIR/htmlcov"
+
+popd
diff --git a/spark-2.4.0-bin-hadoop2.7/python/run-tests.py b/spark-2.4.0-bin-hadoop2.7/python/run-tests.py
new file mode 100755
index 0000000000000000000000000000000000000000..ccbdfac3f385069a2f4a0f3ff964ff8c7550d63d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/run-tests.py
@@ -0,0 +1,298 @@
+#!/usr/bin/env python
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+import logging
+from optparse import OptionParser
+import os
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+from threading import Thread, Lock
+import time
+import uuid
+if sys.version < '3':
+    import Queue
+else:
+    import queue as Queue
+from distutils.version import LooseVersion
+from multiprocessing import Manager
+
+
+# Append `SPARK_HOME/dev` to the Python path so that we can import the sparktestsupport module
+sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../dev/"))
+
+
+from sparktestsupport import SPARK_HOME  # noqa (suppress pep8 warnings)
+from sparktestsupport.shellutils import which, subprocess_check_output  # noqa
+from sparktestsupport.modules import all_modules, pyspark_sql  # noqa
+
+
+python_modules = dict((m.name, m) for m in all_modules if m.python_test_goals if m.name != 'root')
+
+
+def print_red(text):
+    print('\033[31m' + text + '\033[0m')
+
+
+SKIPPED_TESTS = Manager().dict()
+LOG_FILE = os.path.join(SPARK_HOME, "python/unit-tests.log")
+FAILURE_REPORTING_LOCK = Lock()
+LOGGER = logging.getLogger()
+
+# Find out where the assembly jars are located.
+# Later, add back 2.12 to this list:
+# for scala in ["2.11", "2.12"]:
+for scala in ["2.11"]:
+    build_dir = os.path.join(SPARK_HOME, "assembly", "target", "scala-" + scala)
+    if os.path.isdir(build_dir):
+        SPARK_DIST_CLASSPATH = os.path.join(build_dir, "jars", "*")
+        break
+else:
+    raise Exception("Cannot find assembly build directory, please build Spark first.")
+
+
+def run_individual_python_test(target_dir, test_name, pyspark_python):
+    env = dict(os.environ)
+    env.update({
+        'SPARK_DIST_CLASSPATH': SPARK_DIST_CLASSPATH,
+        'SPARK_TESTING': '1',
+        'SPARK_PREPEND_CLASSES': '1',
+        'PYSPARK_PYTHON': which(pyspark_python),
+        'PYSPARK_DRIVER_PYTHON': which(pyspark_python)
+    })
+
+    # Create a unique temp directory under 'target/' for each run. The TMPDIR variable is
+    # recognized by the tempfile module to override the default system temp directory.
+    tmp_dir = os.path.join(target_dir, str(uuid.uuid4()))
+    while os.path.isdir(tmp_dir):
+        tmp_dir = os.path.join(target_dir, str(uuid.uuid4()))
+    os.mkdir(tmp_dir)
+    env["TMPDIR"] = tmp_dir
+
+    # Also override the JVM's temp directory by setting driver and executor options.
+    spark_args = [
+        "--conf", "spark.driver.extraJavaOptions=-Djava.io.tmpdir={0}".format(tmp_dir),
+        "--conf", "spark.executor.extraJavaOptions=-Djava.io.tmpdir={0}".format(tmp_dir),
+        "pyspark-shell"
+    ]
+    env["PYSPARK_SUBMIT_ARGS"] = " ".join(spark_args)
+
+    LOGGER.info("Starting test(%s): %s", pyspark_python, test_name)
+    start_time = time.time()
+    try:
+        per_test_output = tempfile.TemporaryFile()
+        retcode = subprocess.Popen(
+            [os.path.join(SPARK_HOME, "bin/pyspark"), test_name],
+            stderr=per_test_output, stdout=per_test_output, env=env).wait()
+        shutil.rmtree(tmp_dir, ignore_errors=True)
+    except:
+        LOGGER.exception("Got exception while running %s with %s", test_name, pyspark_python)
+        # Here, we use os._exit() instead of sys.exit() in order to force Python to exit even if
+        # this code is invoked from a thread other than the main thread.
+        os._exit(1)
+    duration = time.time() - start_time
+    # Exit on the first failure.
+    if retcode != 0:
+        try:
+            with FAILURE_REPORTING_LOCK:
+                with open(LOG_FILE, 'ab') as log_file:
+                    per_test_output.seek(0)
+                    log_file.writelines(per_test_output)
+                per_test_output.seek(0)
+                for line in per_test_output:
+                    decoded_line = line.decode()
+                    if not re.match('[0-9]+', decoded_line):
+                        print(decoded_line, end='')
+                per_test_output.close()
+        except:
+            LOGGER.exception("Got an exception while trying to print failed test output")
+        finally:
+            print_red("\nHad test failures in %s with %s; see logs." % (test_name, pyspark_python))
+            # Here, we use os._exit() instead of sys.exit() in order to force Python to exit even if
+            # this code is invoked from a thread other than the main thread.
+            os._exit(-1)
+    else:
+        skipped_counts = 0
+        try:
+            per_test_output.seek(0)
+            # Here expects skipped test output from unittest when verbosity level is
+            # 2 (or --verbose option is enabled).
+            decoded_lines = map(lambda line: line.decode(), iter(per_test_output))
+            skipped_tests = list(filter(
+                lambda line: re.search(r'test_.* \(pyspark\..*\) ... skipped ', line),
+                decoded_lines))
+            skipped_counts = len(skipped_tests)
+            if skipped_counts > 0:
+                key = (pyspark_python, test_name)
+                SKIPPED_TESTS[key] = skipped_tests
+            per_test_output.close()
+        except:
+            import traceback
+            print_red("\nGot an exception while trying to store "
+                      "skipped test output:\n%s" % traceback.format_exc())
+            # Here, we use os._exit() instead of sys.exit() in order to force Python to exit even if
+            # this code is invoked from a thread other than the main thread.
+            os._exit(-1)
+        if skipped_counts != 0:
+            LOGGER.info(
+                "Finished test(%s): %s (%is) ... %s tests were skipped", pyspark_python, test_name,
+                duration, skipped_counts)
+        else:
+            LOGGER.info(
+                "Finished test(%s): %s (%is)", pyspark_python, test_name, duration)
+
+
+def get_default_python_executables():
+    python_execs = [x for x in ["python2.7", "python3.4", "pypy"] if which(x)]
+    if "python2.7" not in python_execs:
+        LOGGER.warning("Not testing against `python2.7` because it could not be found; falling"
+                       " back to `python` instead")
+        python_execs.insert(0, "python")
+    return python_execs
+
+
+def parse_opts():
+    parser = OptionParser(
+        prog="run-tests"
+    )
+    parser.add_option(
+        "--python-executables", type="string", default=','.join(get_default_python_executables()),
+        help="A comma-separated list of Python executables to test against (default: %default)"
+    )
+    parser.add_option(
+        "--modules", type="string",
+        default=",".join(sorted(python_modules.keys())),
+        help="A comma-separated list of Python modules to test (default: %default)"
+    )
+    parser.add_option(
+        "-p", "--parallelism", type="int", default=4,
+        help="The number of suites to test in parallel (default %default)"
+    )
+    parser.add_option(
+        "--verbose", action="store_true",
+        help="Enable additional debug logging"
+    )
+
+    (opts, args) = parser.parse_args()
+    if args:
+        parser.error("Unsupported arguments: %s" % ' '.join(args))
+    if opts.parallelism < 1:
+        parser.error("Parallelism cannot be less than 1")
+    return opts
+
+
+def _check_coverage(python_exec):
+    # Make sure if coverage is installed.
+    try:
+        subprocess_check_output(
+            [python_exec, "-c", "import coverage"],
+            stderr=open(os.devnull, 'w'))
+    except:
+        print_red("Coverage is not installed in Python executable '%s' "
+                  "but 'COVERAGE_PROCESS_START' environment variable is set, "
+                  "exiting." % python_exec)
+        sys.exit(-1)
+
+
+def main():
+    opts = parse_opts()
+    if (opts.verbose):
+        log_level = logging.DEBUG
+    else:
+        log_level = logging.INFO
+    logging.basicConfig(stream=sys.stdout, level=log_level, format="%(message)s")
+    LOGGER.info("Running PySpark tests. Output is in %s", LOG_FILE)
+    if os.path.exists(LOG_FILE):
+        os.remove(LOG_FILE)
+    python_execs = opts.python_executables.split(',')
+    modules_to_test = []
+    for module_name in opts.modules.split(','):
+        if module_name in python_modules:
+            modules_to_test.append(python_modules[module_name])
+        else:
+            print("Error: unrecognized module '%s'. Supported modules: %s" %
+                  (module_name, ", ".join(python_modules)))
+            sys.exit(-1)
+    LOGGER.info("Will test against the following Python executables: %s", python_execs)
+    LOGGER.info("Will test the following Python modules: %s", [x.name for x in modules_to_test])
+
+    task_queue = Queue.PriorityQueue()
+    for python_exec in python_execs:
+        # Check if the python executable has coverage installed when 'COVERAGE_PROCESS_START'
+        # environmental variable is set.
+        if "COVERAGE_PROCESS_START" in os.environ:
+            _check_coverage(python_exec)
+
+        python_implementation = subprocess_check_output(
+            [python_exec, "-c", "import platform; print(platform.python_implementation())"],
+            universal_newlines=True).strip()
+        LOGGER.debug("%s python_implementation is %s", python_exec, python_implementation)
+        LOGGER.debug("%s version is: %s", python_exec, subprocess_check_output(
+            [python_exec, "--version"], stderr=subprocess.STDOUT, universal_newlines=True).strip())
+        for module in modules_to_test:
+            if python_implementation not in module.blacklisted_python_implementations:
+                for test_goal in module.python_test_goals:
+                    if test_goal in ('pyspark.streaming.tests', 'pyspark.mllib.tests',
+                                     'pyspark.tests', 'pyspark.sql.tests'):
+                        priority = 0
+                    else:
+                        priority = 100
+                    task_queue.put((priority, (python_exec, test_goal)))
+
+    # Create the target directory before starting tasks to avoid races.
+    target_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'target'))
+    if not os.path.isdir(target_dir):
+        os.mkdir(target_dir)
+
+    def process_queue(task_queue):
+        while True:
+            try:
+                (priority, (python_exec, test_goal)) = task_queue.get_nowait()
+            except Queue.Empty:
+                break
+            try:
+                run_individual_python_test(target_dir, test_goal, python_exec)
+            finally:
+                task_queue.task_done()
+
+    start_time = time.time()
+    for _ in range(opts.parallelism):
+        worker = Thread(target=process_queue, args=(task_queue,))
+        worker.daemon = True
+        worker.start()
+    try:
+        task_queue.join()
+    except (KeyboardInterrupt, SystemExit):
+        print_red("Exiting due to interrupt")
+        sys.exit(-1)
+    total_duration = time.time() - start_time
+    LOGGER.info("Tests passed in %i seconds", total_duration)
+
+    for key, lines in sorted(SKIPPED_TESTS.items()):
+        pyspark_python, test_name = key
+        LOGGER.info("\nSkipped tests in %s with %s:" % (test_name, pyspark_python))
+        for line in lines:
+            LOGGER.info("    %s" % line.rstrip())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/setup.cfg b/spark-2.4.0-bin-hadoop2.7/python/setup.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..d100b932bbafc1bafc845d794d81c73b1e37ab35
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/setup.cfg
@@ -0,0 +1,22 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+[bdist_wheel]
+universal = 1
+
+[metadata]
+description-file = README.md
diff --git a/spark-2.4.0-bin-hadoop2.7/python/setup.py b/spark-2.4.0-bin-hadoop2.7/python/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..c447f2d40343dce05fb8b40c727dc7827f7a5afa
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/setup.py
@@ -0,0 +1,243 @@
+#!/usr/bin/env python
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+import glob
+import os
+import sys
+from setuptools import setup, find_packages
+from shutil import copyfile, copytree, rmtree
+
+if sys.version_info < (2, 7):
+    print("Python versions prior to 2.7 are not supported for pip installed PySpark.",
+          file=sys.stderr)
+    sys.exit(-1)
+
+try:
+    exec(open('pyspark/version.py').read())
+except IOError:
+    print("Failed to load PySpark version file for packaging. You must be in Spark's python dir.",
+          file=sys.stderr)
+    sys.exit(-1)
+VERSION = __version__  # noqa
+# A temporary path so we can access above the Python project root and fetch scripts and jars we need
+TEMP_PATH = "deps"
+SPARK_HOME = os.path.abspath("../")
+
+# Provide guidance about how to use setup.py
+incorrect_invocation_message = """
+If you are installing pyspark from spark source, you must first build Spark and
+run sdist.
+
+    To build Spark with maven you can run:
+      ./build/mvn -DskipTests clean package
+    Building the source dist is done in the Python directory:
+      cd python
+      python setup.py sdist
+      pip install dist/*.tar.gz"""
+
+# Figure out where the jars are we need to package with PySpark.
+JARS_PATH = glob.glob(os.path.join(SPARK_HOME, "assembly/target/scala-*/jars/"))
+
+if len(JARS_PATH) == 1:
+    JARS_PATH = JARS_PATH[0]
+elif (os.path.isfile("../RELEASE") and len(glob.glob("../jars/spark*core*.jar")) == 1):
+    # Release mode puts the jars in a jars directory
+    JARS_PATH = os.path.join(SPARK_HOME, "jars")
+elif len(JARS_PATH) > 1:
+    print("Assembly jars exist for multiple scalas ({0}), please cleanup assembly/target".format(
+        JARS_PATH), file=sys.stderr)
+    sys.exit(-1)
+elif len(JARS_PATH) == 0 and not os.path.exists(TEMP_PATH):
+    print(incorrect_invocation_message, file=sys.stderr)
+    sys.exit(-1)
+
+EXAMPLES_PATH = os.path.join(SPARK_HOME, "examples/src/main/python")
+SCRIPTS_PATH = os.path.join(SPARK_HOME, "bin")
+DATA_PATH = os.path.join(SPARK_HOME, "data")
+LICENSES_PATH = os.path.join(SPARK_HOME, "licenses")
+
+SCRIPTS_TARGET = os.path.join(TEMP_PATH, "bin")
+JARS_TARGET = os.path.join(TEMP_PATH, "jars")
+EXAMPLES_TARGET = os.path.join(TEMP_PATH, "examples")
+DATA_TARGET = os.path.join(TEMP_PATH, "data")
+LICENSES_TARGET = os.path.join(TEMP_PATH, "licenses")
+
+# Check and see if we are under the spark path in which case we need to build the symlink farm.
+# This is important because we only want to build the symlink farm while under Spark otherwise we
+# want to use the symlink farm. And if the symlink farm exists under while under Spark (e.g. a
+# partially built sdist) we should error and have the user sort it out.
+in_spark = (os.path.isfile("../core/src/main/scala/org/apache/spark/SparkContext.scala") or
+            (os.path.isfile("../RELEASE") and len(glob.glob("../jars/spark*core*.jar")) == 1))
+
+
+def _supports_symlinks():
+    """Check if the system supports symlinks (e.g. *nix) or not."""
+    return getattr(os, "symlink", None) is not None
+
+
+if (in_spark):
+    # Construct links for setup
+    try:
+        os.mkdir(TEMP_PATH)
+    except:
+        print("Temp path for symlink to parent already exists {0}".format(TEMP_PATH),
+              file=sys.stderr)
+        sys.exit(-1)
+
+# If you are changing the versions here, please also change ./python/pyspark/sql/utils.py and
+# ./python/run-tests.py. In case of Arrow, you should also check ./pom.xml.
+_minimum_pandas_version = "0.19.2"
+_minimum_pyarrow_version = "0.8.0"
+
+try:
+    # We copy the shell script to be under pyspark/python/pyspark so that the launcher scripts
+    # find it where expected. The rest of the files aren't copied because they are accessed
+    # using Python imports instead which will be resolved correctly.
+    try:
+        os.makedirs("pyspark/python/pyspark")
+    except OSError:
+        # Don't worry if the directory already exists.
+        pass
+    copyfile("pyspark/shell.py", "pyspark/python/pyspark/shell.py")
+
+    if (in_spark):
+        # Construct the symlink farm - this is necessary since we can't refer to the path above the
+        # package root and we need to copy the jars and scripts which are up above the python root.
+        if _supports_symlinks():
+            os.symlink(JARS_PATH, JARS_TARGET)
+            os.symlink(SCRIPTS_PATH, SCRIPTS_TARGET)
+            os.symlink(EXAMPLES_PATH, EXAMPLES_TARGET)
+            os.symlink(DATA_PATH, DATA_TARGET)
+            os.symlink(LICENSES_PATH, LICENSES_TARGET)
+        else:
+            # For windows fall back to the slower copytree
+            copytree(JARS_PATH, JARS_TARGET)
+            copytree(SCRIPTS_PATH, SCRIPTS_TARGET)
+            copytree(EXAMPLES_PATH, EXAMPLES_TARGET)
+            copytree(DATA_PATH, DATA_TARGET)
+            copytree(LICENSES_PATH, LICENSES_TARGET)
+    else:
+        # If we are not inside of SPARK_HOME verify we have the required symlink farm
+        if not os.path.exists(JARS_TARGET):
+            print("To build packaging must be in the python directory under the SPARK_HOME.",
+                  file=sys.stderr)
+
+    if not os.path.isdir(SCRIPTS_TARGET):
+        print(incorrect_invocation_message, file=sys.stderr)
+        sys.exit(-1)
+
+    # Scripts directive requires a list of each script path and does not take wild cards.
+    script_names = os.listdir(SCRIPTS_TARGET)
+    scripts = list(map(lambda script: os.path.join(SCRIPTS_TARGET, script), script_names))
+    # We add find_spark_home.py to the bin directory we install so that pip installed PySpark
+    # will search for SPARK_HOME with Python.
+    scripts.append("pyspark/find_spark_home.py")
+
+    # Parse the README markdown file into rst for PyPI
+    long_description = "!!!!! missing pandoc do not upload to PyPI !!!!"
+    try:
+        import pypandoc
+        long_description = pypandoc.convert('README.md', 'rst')
+    except ImportError:
+        print("Could not import pypandoc - required to package PySpark", file=sys.stderr)
+    except OSError:
+        print("Could not convert - pandoc is not installed", file=sys.stderr)
+
+    setup(
+        name='pyspark',
+        version=VERSION,
+        description='Apache Spark Python API',
+        long_description=long_description,
+        author='Spark Developers',
+        author_email='dev@spark.apache.org',
+        url='https://github.com/apache/spark/tree/master/python',
+        packages=['pyspark',
+                  'pyspark.mllib',
+                  'pyspark.mllib.linalg',
+                  'pyspark.mllib.stat',
+                  'pyspark.ml',
+                  'pyspark.ml.linalg',
+                  'pyspark.ml.param',
+                  'pyspark.sql',
+                  'pyspark.streaming',
+                  'pyspark.bin',
+                  'pyspark.jars',
+                  'pyspark.python.pyspark',
+                  'pyspark.python.lib',
+                  'pyspark.data',
+                  'pyspark.licenses',
+                  'pyspark.examples.src.main.python'],
+        include_package_data=True,
+        package_dir={
+            'pyspark.jars': 'deps/jars',
+            'pyspark.bin': 'deps/bin',
+            'pyspark.python.lib': 'lib',
+            'pyspark.data': 'deps/data',
+            'pyspark.licenses': 'deps/licenses',
+            'pyspark.examples.src.main.python': 'deps/examples',
+        },
+        package_data={
+            'pyspark.jars': ['*.jar'],
+            'pyspark.bin': ['*'],
+            'pyspark.python.lib': ['*.zip'],
+            'pyspark.data': ['*.txt', '*.data'],
+            'pyspark.licenses': ['*.txt'],
+            'pyspark.examples.src.main.python': ['*.py', '*/*.py']},
+        scripts=scripts,
+        license='http://www.apache.org/licenses/LICENSE-2.0',
+        install_requires=['py4j==0.10.7'],
+        setup_requires=['pypandoc'],
+        extras_require={
+            'ml': ['numpy>=1.7'],
+            'mllib': ['numpy>=1.7'],
+            'sql': [
+                'pandas>=%s' % _minimum_pandas_version,
+                'pyarrow>=%s' % _minimum_pyarrow_version,
+            ]
+        },
+        classifiers=[
+            'Development Status :: 5 - Production/Stable',
+            'License :: OSI Approved :: Apache Software License',
+            'Programming Language :: Python :: 2.7',
+            'Programming Language :: Python :: 3',
+            'Programming Language :: Python :: 3.4',
+            'Programming Language :: Python :: 3.5',
+            'Programming Language :: Python :: 3.6',
+            'Programming Language :: Python :: 3.7',
+            'Programming Language :: Python :: Implementation :: CPython',
+            'Programming Language :: Python :: Implementation :: PyPy']
+    )
+finally:
+    # We only cleanup the symlink farm if we were in Spark, otherwise we are installing rather than
+    # packaging.
+    if (in_spark):
+        # Depending on cleaning up the symlink farm or copied version
+        if _supports_symlinks():
+            os.remove(os.path.join(TEMP_PATH, "jars"))
+            os.remove(os.path.join(TEMP_PATH, "bin"))
+            os.remove(os.path.join(TEMP_PATH, "examples"))
+            os.remove(os.path.join(TEMP_PATH, "data"))
+            os.remove(os.path.join(TEMP_PATH, "licenses"))
+        else:
+            rmtree(os.path.join(TEMP_PATH, "jars"))
+            rmtree(os.path.join(TEMP_PATH, "bin"))
+            rmtree(os.path.join(TEMP_PATH, "examples"))
+            rmtree(os.path.join(TEMP_PATH, "data"))
+            rmtree(os.path.join(TEMP_PATH, "licenses"))
+        os.rmdir(TEMP_PATH)
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_coverage/conf/spark-defaults.conf b/spark-2.4.0-bin-hadoop2.7/python/test_coverage/conf/spark-defaults.conf
new file mode 100644
index 0000000000000000000000000000000000000000..bf44ea6e7cfec72810e9e5afb13d6f2031dc7dab
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/test_coverage/conf/spark-defaults.conf
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This is used to generate PySpark coverage results. Seems there's no way to
+# add a configuration when SPARK_TESTING environment variable is set because
+# we will directly execute modules by python -m.
+spark.python.daemon.module coverage_daemon
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_coverage/coverage_daemon.py b/spark-2.4.0-bin-hadoop2.7/python/test_coverage/coverage_daemon.py
new file mode 100644
index 0000000000000000000000000000000000000000..c87366a1ac23b7df7657da6df46d4d2e9501f154
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/test_coverage/coverage_daemon.py
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import imp
+
+
+# This is a hack to always refer the main code rather than built zip.
+main_code_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+daemon = imp.load_source("daemon", "%s/pyspark/daemon.py" % main_code_dir)
+
+if "COVERAGE_PROCESS_START" in os.environ:
+    worker = imp.load_source("worker", "%s/pyspark/worker.py" % main_code_dir)
+
+    def _cov_wrapped(*args, **kwargs):
+        import coverage
+        cov = coverage.coverage(
+            config_file=os.environ["COVERAGE_PROCESS_START"])
+        cov.start()
+        try:
+            worker.main(*args, **kwargs)
+        finally:
+            cov.stop()
+            cov.save()
+    daemon.worker_main = _cov_wrapped
+else:
+    raise RuntimeError("COVERAGE_PROCESS_START environment variable is not set, exiting.")
+
+
+if __name__ == '__main__':
+    daemon.manager()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_coverage/sitecustomize.py b/spark-2.4.0-bin-hadoop2.7/python/test_coverage/sitecustomize.py
new file mode 100644
index 0000000000000000000000000000000000000000..630237a518126f30e2b71e84a9b59139ea6c3fff
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/test_coverage/sitecustomize.py
@@ -0,0 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Note that this 'sitecustomize' module is a built-in feature in Python.
+# If this module is defined, it's executed when the Python session begins.
+# `coverage.process_startup()` seeks if COVERAGE_PROCESS_START environment
+# variable is set or not. If set, it starts to run the coverage.
+import coverage
+coverage.process_startup()
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/SimpleHTTPServer.py b/spark-2.4.0-bin-hadoop2.7/python/test_support/SimpleHTTPServer.py
new file mode 100644
index 0000000000000000000000000000000000000000..eddbd588e02dc6fb357462c8f3073f1bfb072cba
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/test_support/SimpleHTTPServer.py
@@ -0,0 +1,22 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Used to test override standard SimpleHTTPServer module.
+"""
+
+__name__ = "My Server"
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/hello/hello.txt b/spark-2.4.0-bin-hadoop2.7/python/test_support/hello/hello.txt
new file mode 100755
index 0000000000000000000000000000000000000000..980a0d5f19a64b4b30a87d4206aade58726b60e3
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/test_support/hello/hello.txt
@@ -0,0 +1 @@
+Hello World!
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/hello/sub_hello/sub_hello.txt b/spark-2.4.0-bin-hadoop2.7/python/test_support/hello/sub_hello/sub_hello.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce2d435b8c45fa74c9a856b24511243f3036d843
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/test_support/hello/sub_hello/sub_hello.txt
@@ -0,0 +1 @@
+Sub Hello World!
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/ages.csv b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/ages.csv
new file mode 100644
index 0000000000000000000000000000000000000000..18991feda788aadd1fc7fb8c6943baeb54139986
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/ages.csv
@@ -0,0 +1,4 @@
+Joe,20
+Tom,30
+Hyukjin,25
+
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/ages_newlines.csv b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/ages_newlines.csv
new file mode 100644
index 0000000000000000000000000000000000000000..d19f6731625fa66517c194f55aff11d5b6627b7b
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/ages_newlines.csv
@@ -0,0 +1,6 @@
+Joe,20,"Hi,
+I am Jeo"
+Tom,30,"My name is Tom"
+Hyukjin,25,"I am Hyukjin
+
+I love Spark!"
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/orc_partitioned/_SUCCESS b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/orc_partitioned/_SUCCESS
new file mode 100755
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/orc_partitioned/b=0/c=0/.part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc.crc b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/orc_partitioned/b=0/c=0/.part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc.crc
new file mode 100644
index 0000000000000000000000000000000000000000..834cf0b7f227244a3ccda18809a0bb49d27b59d2
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/orc_partitioned/b=0/c=0/.part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc.crc differ
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/orc_partitioned/b=0/c=0/part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/orc_partitioned/b=0/c=0/part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc
new file mode 100755
index 0000000000000000000000000000000000000000..49438018733565be297429b4f9349450441230f9
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/orc_partitioned/b=0/c=0/part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc differ
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/orc_partitioned/b=1/c=1/.part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc.crc b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/orc_partitioned/b=1/c=1/.part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc.crc
new file mode 100644
index 0000000000000000000000000000000000000000..693dceeee3ef2ff6fb24e8ac7fd006ffd7463074
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/orc_partitioned/b=1/c=1/.part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc.crc differ
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/orc_partitioned/b=1/c=1/part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/orc_partitioned/b=1/c=1/part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc
new file mode 100755
index 0000000000000000000000000000000000000000..4cbb95ae0242cd99ed08f5d28ae7531dc55ccee3
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/orc_partitioned/b=1/c=1/part-r-00000-829af031-b970-49d6-ad39-30460a0be2c8.orc differ
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/_SUCCESS b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/_SUCCESS
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/_common_metadata b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/_common_metadata
new file mode 100644
index 0000000000000000000000000000000000000000..7ef2320651dee5f2a4d588ecf8d0b281a2bc3018
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/_common_metadata differ
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/_metadata b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/_metadata
new file mode 100644
index 0000000000000000000000000000000000000000..78a1ca7d38279147e00a826e627f7514d988bd8e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/_metadata differ
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/.part-r-00008.gz.parquet.crc b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/.part-r-00008.gz.parquet.crc
new file mode 100644
index 0000000000000000000000000000000000000000..e93f42ed6f35018df1ae76c9746b26bd299c0339
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/.part-r-00008.gz.parquet.crc differ
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/part-r-00008.gz.parquet b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/part-r-00008.gz.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..461c382937ecdf13db1003bd2bf8c9a79c510d3e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/part-r-00008.gz.parquet differ
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00002.gz.parquet.crc b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00002.gz.parquet.crc
new file mode 100644
index 0000000000000000000000000000000000000000..b63c4d6d1e1dc71969b22cdd24ccd4f0673b22c0
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00002.gz.parquet.crc differ
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00004.gz.parquet.crc b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00004.gz.parquet.crc
new file mode 100644
index 0000000000000000000000000000000000000000..5bc0ebd7135638686a8e064ac02fa673b06228d4
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00004.gz.parquet.crc differ
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00002.gz.parquet b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00002.gz.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..62a63915beac2840b31ae7f1e8dec1c26b268b89
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00002.gz.parquet differ
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00004.gz.parquet b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00004.gz.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..67665a7b55da6413d2a534d6f46061e380f70b2e
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00004.gz.parquet differ
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/.part-r-00005.gz.parquet.crc b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/.part-r-00005.gz.parquet.crc
new file mode 100644
index 0000000000000000000000000000000000000000..ae94a15d08c816a6fb60117395462aa640da5026
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/.part-r-00005.gz.parquet.crc differ
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/part-r-00005.gz.parquet b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/part-r-00005.gz.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..6cb8538aa890475f9128c8025e2e9b0f72b1b785
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/part-r-00005.gz.parquet differ
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/.part-r-00007.gz.parquet.crc b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/.part-r-00007.gz.parquet.crc
new file mode 100644
index 0000000000000000000000000000000000000000..58d9bb5fc5883f0b65b637a8deea2672b9fee8a2
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/.part-r-00007.gz.parquet.crc differ
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/part-r-00007.gz.parquet b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/part-r-00007.gz.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9b00805481e7b311763d48ff965966b4d09daa99
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/part-r-00007.gz.parquet differ
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/people.json b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/people.json
new file mode 100644
index 0000000000000000000000000000000000000000..50a859cbd7ee826a613802a499772cad0907fe67
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/people.json
@@ -0,0 +1,3 @@
+{"name":"Michael"}
+{"name":"Andy", "age":30}
+{"name":"Justin", "age":19}
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/people1.json b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/people1.json
new file mode 100644
index 0000000000000000000000000000000000000000..6d217da77d1550a8212622e9058e409597c86ae6
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/people1.json
@@ -0,0 +1,2 @@
+{"name":"Jonathan", "aka": "John"}
+
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/people_array.json b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/people_array.json
new file mode 100644
index 0000000000000000000000000000000000000000..c27c48fe343e40ec3decc7612fd29b1f7359db85
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/people_array.json
@@ -0,0 +1,13 @@
+[
+  {
+    "name": "Michael"
+  },
+  {
+    "name": "Andy",
+    "age": 30
+  },
+  {
+    "name": "Justin",
+    "age": 19
+  }
+]
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/people_array_utf16le.json b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/people_array_utf16le.json
new file mode 100644
index 0000000000000000000000000000000000000000..9c657fa30ac9c651076ff8aa3676baa400b121fb
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/people_array_utf16le.json differ
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/streaming/text-test.txt b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/streaming/text-test.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae1e76c9e93a71a6bf3a13256cde999f9030a94a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/streaming/text-test.txt
@@ -0,0 +1,2 @@
+hello
+this
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/text-test.txt b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/text-test.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ae1e76c9e93a71a6bf3a13256cde999f9030a94a
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/test_support/sql/text-test.txt
@@ -0,0 +1,2 @@
+hello
+this
\ No newline at end of file
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/userlib-0.1.zip b/spark-2.4.0-bin-hadoop2.7/python/test_support/userlib-0.1.zip
new file mode 100644
index 0000000000000000000000000000000000000000..496e1349aa9674cfa4f232ea8b5e17fcdb49f4fd
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/python/test_support/userlib-0.1.zip differ
diff --git a/spark-2.4.0-bin-hadoop2.7/python/test_support/userlibrary.py b/spark-2.4.0-bin-hadoop2.7/python/test_support/userlibrary.py
new file mode 100755
index 0000000000000000000000000000000000000000..73fd26e71f10d1e0a7d543d0bbe0d0229e0681a5
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/python/test_support/userlibrary.py
@@ -0,0 +1,26 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Used to test shipping of code depenencies with SparkContext.addPyFile().
+"""
+
+
+class UserClass(object):
+
+    def hello(self):
+        return "Hello World!"
diff --git a/spark-2.4.0-bin-hadoop2.7/ref-packages.txt b/spark-2.4.0-bin-hadoop2.7/ref-packages.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2a8142703c420e7a058144dc01f32f630a0e9c8
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/ref-packages.txt
@@ -0,0 +1,116 @@
+apturl==0.5.2
+argcomplete==1.8.1
+asn1crypto==0.24.0
+beautifulsoup4==4.6.0
+blinker==1.4
+Brlapi==0.6.6
+certifi==2018.1.18
+chardet==3.0.4
+checkbox-support==0.22
+cmmnbuild-dep-manager==2.2.4
+command-not-found==0.3
+cryptography==2.1.4
+cssselect==1.1.0
+cupshelpers==1.0
+cycler==0.10.0
+decorator==4.1.2
+defer==1.0.6
+dell-recovery==0.0.0
+distro-info===0.18ubuntu0.18.04.1
+feedparser==5.2.1
+guacamole==0.9.2
+html5lib==0.999999999
+httplib2==0.9.2
+idna==2.6
+Jinja2==2.10
+JPype1==0.7.0
+keyring==10.6.0
+keyrings.alt==3.0
+language-selector==0.1
+launchpadlib==1.10.6
+lazr.restfulclient==0.13.5
+lazr.uri==1.0.3
+louis==3.5.0
+lxml==4.2.1
+macaroonbakery==1.1.3
+Mako==1.0.7
+Markdown==2.6.9
+MarkupSafe==1.0
+matplotlib==2.1.1
+mysql-connector-python==8.0.17
+netifaces==0.10.4
+numexpr==2.6.4
+numpy==1.13.3
+oauth==1.0.1
+oauthlib==2.0.6
+olefile==0.45.1
+onboard==1.4.1
+padme==1.1.1
+pandas==0.22.0
+pexpect==4.2.1
+Pillow==5.1.0
+plainbox==0.25
+powerline-status==2.6
+progressbar==2.3
+protobuf==3.0.0
+psutil==5.4.2
+py4j==0.10.7
+pyasn1==0.4.2
+pycairo==1.16.2
+pyconf==0.5
+pycrypto==2.6.1
+pycups==1.9.73
+pycurl==7.43.0.1
+pyenchant==2.0.0
+Pygments==2.2.0
+pygobject==3.26.1
+PyJWT==1.5.3
+pymacaroons==0.13.0
+PyNaCl==1.1.2
+pyparsing==2.2.0
+pyquery==1.4.0
+pyRFC3339==1.0
+pyspark==2.4.3
+pyspark-stubs==2.4.0.post3
+python-apt==1.6.4
+python-dateutil==2.6.1
+python-debian==0.1.32
+python-gnupg==0.4.1
+pytimber==2.7.0
+pytz==2018.3
+pyxdg==0.25
+PyYAML==3.12
+reportlab==3.4.0
+requests==2.18.4
+requests-unixsocket==0.1.5
+scipy==0.19.1
+SecretStorage==2.3.1
+simplejson==3.13.2
+six==1.11.0
+system-service==0.3
+systemd-python==234
+tables==3.4.2
+ubuntu-drivers-common==0.0.0
+Ubuntu-Make==18.5
+ufw==0.36
+unattended-upgrades==0.1
+unity-scope-calculator==0.1
+unity-scope-chromiumbookmarks==0.1
+unity-scope-colourlovers==0.1
+unity-scope-devhelp==0.1
+unity-scope-manpages==0.1
+unity-scope-openclipart==0.1
+unity-scope-texdoc==0.1
+unity-scope-tomboy==0.1
+unity-scope-virtualbox==0.1
+unity-scope-yelp==0.1
+unity-scope-zotero==0.1
+urllib3==1.22
+usb-creator==0.3.3
+virtualenv==15.1.0
+wadllib==1.3.2
+webencodings==0.5
+xdiagnose==3.8.8
+xkit==0.0.0
+XlsxWriter==0.9.6
+zope.interface==4.3.2
diff --git a/spark-2.4.0-bin-hadoop2.7/sbin/slaves.sh b/spark-2.4.0-bin-hadoop2.7/sbin/slaves.sh
new file mode 100755
index 0000000000000000000000000000000000000000..c971aa3296b093050e8e49ccc9477b874cf56ba8
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/sbin/slaves.sh
@@ -0,0 +1,103 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Run a shell command on all slave hosts.
+#
+# Environment Variables
+#
+#   SPARK_SLAVES    File naming remote hosts.
+#     Default is ${SPARK_CONF_DIR}/slaves.
+#   SPARK_CONF_DIR  Alternate conf dir. Default is ${SPARK_HOME}/conf.
+#   SPARK_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
+#   SPARK_SSH_OPTS Options passed to ssh when running remote commands.
+##
+
+usage="Usage: slaves.sh [--config <conf-dir>] command..."
+
+# if no args specified, show usage
+if [ $# -le 0 ]; then
+  echo $usage
+  exit 1
+fi
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+. "${SPARK_HOME}/sbin/spark-config.sh"
+
+# If the slaves file is specified in the command line,
+# then it takes precedence over the definition in
+# spark-env.sh. Save it here.
+if [ -f "$SPARK_SLAVES" ]; then
+  HOSTLIST=`cat "$SPARK_SLAVES"`
+fi
+
+# Check if --config is passed as an argument. It is an optional parameter.
+# Exit if the argument is not a directory.
+if [ "$1" == "--config" ]
+then
+  shift
+  conf_dir="$1"
+  if [ ! -d "$conf_dir" ]
+  then
+    echo "ERROR : $conf_dir is not a directory"
+    echo $usage
+    exit 1
+  else
+    export SPARK_CONF_DIR="$conf_dir"
+  fi
+  shift
+fi
+
+. "${SPARK_HOME}/bin/load-spark-env.sh"
+
+if [ "$HOSTLIST" = "" ]; then
+  if [ "$SPARK_SLAVES" = "" ]; then
+    if [ -f "${SPARK_CONF_DIR}/slaves" ]; then
+      HOSTLIST=`cat "${SPARK_CONF_DIR}/slaves"`
+    else
+      HOSTLIST=localhost
+    fi
+  else
+    HOSTLIST=`cat "${SPARK_SLAVES}"`
+  fi
+fi
+
+
+
+# By default disable strict host key checking
+if [ "$SPARK_SSH_OPTS" = "" ]; then
+  SPARK_SSH_OPTS="-o StrictHostKeyChecking=no"
+fi
+
+for slave in `echo "$HOSTLIST"|sed  "s/#.*$//;/^$/d"`; do
+  if [ -n "${SPARK_SSH_FOREGROUND}" ]; then
+    ssh $SPARK_SSH_OPTS "$slave" $"${@// /\\ }" \
+      2>&1 | sed "s/^/$slave: /"
+  else
+    ssh $SPARK_SSH_OPTS "$slave" $"${@// /\\ }" \
+      2>&1 | sed "s/^/$slave: /" &
+  fi
+  if [ "$SPARK_SLAVE_SLEEP" != "" ]; then
+    sleep $SPARK_SLAVE_SLEEP
+  fi
+done
+
+wait
diff --git a/spark-2.4.0-bin-hadoop2.7/sbin/spark-config.sh b/spark-2.4.0-bin-hadoop2.7/sbin/spark-config.sh
new file mode 100755
index 0000000000000000000000000000000000000000..bf3da18c3706e8f8d5f0934f735935357c27729d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/sbin/spark-config.sh
@@ -0,0 +1,33 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# included in all the spark scripts with source command
+# should not be executable directly
+# also should not be passed any arguments, since we need original $*
+
+# symlink and absolute path should rely on SPARK_HOME to resolve
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}"
+# Add the PySpark classes to the PYTHONPATH:
+if [ -z "${PYSPARK_PYTHONPATH_SET}" ]; then
+  export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}"
+  export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.7-src.zip:${PYTHONPATH}"
+  export PYSPARK_PYTHONPATH_SET=1
+fi
diff --git a/spark-2.4.0-bin-hadoop2.7/sbin/spark-daemon.sh b/spark-2.4.0-bin-hadoop2.7/sbin/spark-daemon.sh
new file mode 100755
index 0000000000000000000000000000000000000000..6de67e039b48fb1fbd5cb98f437981f048296a3c
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/sbin/spark-daemon.sh
@@ -0,0 +1,242 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Runs a Spark command as a daemon.
+#
+# Environment Variables
+#
+#   SPARK_CONF_DIR  Alternate conf dir. Default is ${SPARK_HOME}/conf.
+#   SPARK_LOG_DIR   Where log files are stored. ${SPARK_HOME}/logs by default.
+#   SPARK_MASTER    host:path where spark code should be rsync'd from
+#   SPARK_PID_DIR   The pid files are stored. /tmp by default.
+#   SPARK_IDENT_STRING   A string representing this instance of spark. $USER by default
+#   SPARK_NICENESS The scheduling priority for daemons. Defaults to 0.
+#   SPARK_NO_DAEMONIZE   If set, will run the proposed command in the foreground. It will not output a PID file.
+##
+
+usage="Usage: spark-daemon.sh [--config <conf-dir>] (start|stop|submit|status) <spark-command> <spark-instance-number> <args...>"
+
+# if no args specified, show usage
+if [ $# -le 1 ]; then
+  echo $usage
+  exit 1
+fi
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+. "${SPARK_HOME}/sbin/spark-config.sh"
+
+# get arguments
+
+# Check if --config is passed as an argument. It is an optional parameter.
+# Exit if the argument is not a directory.
+
+if [ "$1" == "--config" ]
+then
+  shift
+  conf_dir="$1"
+  if [ ! -d "$conf_dir" ]
+  then
+    echo "ERROR : $conf_dir is not a directory"
+    echo $usage
+    exit 1
+  else
+    export SPARK_CONF_DIR="$conf_dir"
+  fi
+  shift
+fi
+
+option=$1
+shift
+command=$1
+shift
+instance=$1
+shift
+
+spark_rotate_log ()
+{
+    log=$1;
+    num=5;
+    if [ -n "$2" ]; then
+	num=$2
+    fi
+    if [ -f "$log" ]; then # rotate logs
+	while [ $num -gt 1 ]; do
+	    prev=`expr $num - 1`
+	    [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
+	    num=$prev
+	done
+	mv "$log" "$log.$num";
+    fi
+}
+
+. "${SPARK_HOME}/bin/load-spark-env.sh"
+
+if [ "$SPARK_IDENT_STRING" = "" ]; then
+  export SPARK_IDENT_STRING="$USER"
+fi
+
+
+export SPARK_PRINT_LAUNCH_COMMAND="1"
+
+# get log directory
+if [ "$SPARK_LOG_DIR" = "" ]; then
+  export SPARK_LOG_DIR="${SPARK_HOME}/logs"
+fi
+mkdir -p "$SPARK_LOG_DIR"
+touch "$SPARK_LOG_DIR"/.spark_test > /dev/null 2>&1
+TEST_LOG_DIR=$?
+if [ "${TEST_LOG_DIR}" = "0" ]; then
+  rm -f "$SPARK_LOG_DIR"/.spark_test
+else
+  chown "$SPARK_IDENT_STRING" "$SPARK_LOG_DIR"
+fi
+
+if [ "$SPARK_PID_DIR" = "" ]; then
+  SPARK_PID_DIR=/tmp
+fi
+
+# some variables
+log="$SPARK_LOG_DIR/spark-$SPARK_IDENT_STRING-$command-$instance-$HOSTNAME.out"
+pid="$SPARK_PID_DIR/spark-$SPARK_IDENT_STRING-$command-$instance.pid"
+
+# Set default scheduling priority
+if [ "$SPARK_NICENESS" = "" ]; then
+    export SPARK_NICENESS=0
+fi
+
+execute_command() {
+  if [ -z ${SPARK_NO_DAEMONIZE+set} ]; then
+      nohup -- "$@" >> $log 2>&1 < /dev/null &
+      newpid="$!"
+
+      echo "$newpid" > "$pid"
+
+      # Poll for up to 5 seconds for the java process to start
+      for i in {1..10}
+      do
+        if [[ $(ps -p "$newpid" -o comm=) =~ "java" ]]; then
+           break
+        fi
+        sleep 0.5
+      done
+
+      sleep 2
+      # Check if the process has died; in that case we'll tail the log so the user can see
+      if [[ ! $(ps -p "$newpid" -o comm=) =~ "java" ]]; then
+        echo "failed to launch: $@"
+        tail -10 "$log" | sed 's/^/  /'
+        echo "full log in $log"
+      fi
+  else
+      "$@"
+  fi
+}
+
+run_command() {
+  mode="$1"
+  shift
+
+  mkdir -p "$SPARK_PID_DIR"
+
+  if [ -f "$pid" ]; then
+    TARGET_ID="$(cat "$pid")"
+    if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then
+      echo "$command running as process $TARGET_ID.  Stop it first."
+      exit 1
+    fi
+  fi
+
+  if [ "$SPARK_MASTER" != "" ]; then
+    echo rsync from "$SPARK_MASTER"
+    rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' "$SPARK_MASTER/" "${SPARK_HOME}"
+  fi
+
+  spark_rotate_log "$log"
+  echo "starting $command, logging to $log"
+
+  case "$mode" in
+    (class)
+      execute_command nice -n "$SPARK_NICENESS" "${SPARK_HOME}"/bin/spark-class "$command" "$@"
+      ;;
+
+    (submit)
+      execute_command nice -n "$SPARK_NICENESS" bash "${SPARK_HOME}"/bin/spark-submit --class "$command" "$@"
+      ;;
+
+    (*)
+      echo "unknown mode: $mode"
+      exit 1
+      ;;
+  esac
+
+}
+
+case $option in
+
+  (submit)
+    run_command submit "$@"
+    ;;
+
+  (start)
+    run_command class "$@"
+    ;;
+
+  (stop)
+
+    if [ -f $pid ]; then
+      TARGET_ID="$(cat "$pid")"
+      if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then
+        echo "stopping $command"
+        kill "$TARGET_ID" && rm -f "$pid"
+      else
+        echo "no $command to stop"
+      fi
+    else
+      echo "no $command to stop"
+    fi
+    ;;
+
+  (status)
+
+    if [ -f $pid ]; then
+      TARGET_ID="$(cat "$pid")"
+      if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then
+        echo $command is running.
+        exit 0
+      else
+        echo $pid file is present but $command not running
+        exit 1
+      fi
+    else
+      echo $command not running.
+      exit 2
+    fi
+    ;;
+
+  (*)
+    echo $usage
+    exit 1
+    ;;
+
+esac
+
+
diff --git a/spark-2.4.0-bin-hadoop2.7/sbin/spark-daemons.sh b/spark-2.4.0-bin-hadoop2.7/sbin/spark-daemons.sh
new file mode 100755
index 0000000000000000000000000000000000000000..dec2f4432df39ca7da223bb7d42aceb785f4f928
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/sbin/spark-daemons.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Run a Spark command on all slave hosts.
+
+usage="Usage: spark-daemons.sh [--config <conf-dir>] [start|stop] command instance-number args..."
+
+# if no args specified, show usage
+if [ $# -le 1 ]; then
+  echo $usage
+  exit 1
+fi
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+. "${SPARK_HOME}/sbin/spark-config.sh"
+
+exec "${SPARK_HOME}/sbin/slaves.sh" cd "${SPARK_HOME}" \; "${SPARK_HOME}/sbin/spark-daemon.sh" "$@"
diff --git a/spark-2.4.0-bin-hadoop2.7/sbin/start-all.sh b/spark-2.4.0-bin-hadoop2.7/sbin/start-all.sh
new file mode 100755
index 0000000000000000000000000000000000000000..a5d30d274ea6ea02f53fa3e9bec1f992915c06c8
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/sbin/start-all.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Start all spark daemons.
+# Starts the master on this node.
+# Starts a worker on each node specified in conf/slaves
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+# Load the Spark configuration
+. "${SPARK_HOME}/sbin/spark-config.sh"
+
+# Start Master
+"${SPARK_HOME}/sbin"/start-master.sh
+
+# Start Workers
+"${SPARK_HOME}/sbin"/start-slaves.sh
diff --git a/spark-2.4.0-bin-hadoop2.7/sbin/start-history-server.sh b/spark-2.4.0-bin-hadoop2.7/sbin/start-history-server.sh
new file mode 100755
index 0000000000000000000000000000000000000000..38a43b98c3992c92d9e96d1c4fecc8dd512337a2
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/sbin/start-history-server.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Starts the history server on the machine this script is executed on.
+#
+# Usage: start-history-server.sh
+#
+# Use the SPARK_HISTORY_OPTS environment variable to set history server configuration.
+#
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+. "${SPARK_HOME}/sbin/spark-config.sh"
+. "${SPARK_HOME}/bin/load-spark-env.sh"
+
+exec "${SPARK_HOME}/sbin"/spark-daemon.sh start org.apache.spark.deploy.history.HistoryServer 1 "$@"
diff --git a/spark-2.4.0-bin-hadoop2.7/sbin/start-master.sh b/spark-2.4.0-bin-hadoop2.7/sbin/start-master.sh
new file mode 100755
index 0000000000000000000000000000000000000000..97ee32159b6defa076829e57932bbadab006a028
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/sbin/start-master.sh
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Starts the master on the machine this script is executed on.
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+# NOTE: This exact class name is matched downstream by SparkSubmit.
+# Any changes need to be reflected there.
+CLASS="org.apache.spark.deploy.master.Master"
+
+if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+  echo "Usage: ./sbin/start-master.sh [options]"
+  pattern="Usage:"
+  pattern+="\|Using Spark's default log4j profile:"
+  pattern+="\|Registered signal handlers for"
+
+  "${SPARK_HOME}"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
+  exit 1
+fi
+
+ORIGINAL_ARGS="$@"
+
+. "${SPARK_HOME}/sbin/spark-config.sh"
+
+. "${SPARK_HOME}/bin/load-spark-env.sh"
+
+if [ "$SPARK_MASTER_PORT" = "" ]; then
+  SPARK_MASTER_PORT=7077
+fi
+
+if [ "$SPARK_MASTER_HOST" = "" ]; then
+  case `uname` in
+      (SunOS)
+	  SPARK_MASTER_HOST="`/usr/sbin/check-hostname | awk '{print $NF}'`"
+	  ;;
+      (*)
+	  SPARK_MASTER_HOST="`hostname -f`"
+	  ;;
+  esac
+fi
+
+if [ "$SPARK_MASTER_WEBUI_PORT" = "" ]; then
+  SPARK_MASTER_WEBUI_PORT=8080
+fi
+
+"${SPARK_HOME}/sbin"/spark-daemon.sh start $CLASS 1 \
+  --host $SPARK_MASTER_HOST --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT \
+  $ORIGINAL_ARGS
diff --git a/spark-2.4.0-bin-hadoop2.7/sbin/start-mesos-dispatcher.sh b/spark-2.4.0-bin-hadoop2.7/sbin/start-mesos-dispatcher.sh
new file mode 100755
index 0000000000000000000000000000000000000000..ecaad7ad09634c3850a4f1dcfcc466e413b05224
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/sbin/start-mesos-dispatcher.sh
@@ -0,0 +1,51 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Starts the Mesos Cluster Dispatcher on the machine this script is executed on.
+# The Mesos Cluster Dispatcher is responsible for launching the Mesos framework and
+# Rest server to handle driver requests for Mesos cluster mode.
+# Only one cluster dispatcher is needed per Mesos cluster.
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+. "${SPARK_HOME}/sbin/spark-config.sh"
+
+. "${SPARK_HOME}/bin/load-spark-env.sh"
+
+if [ "$SPARK_MESOS_DISPATCHER_PORT" = "" ]; then
+  SPARK_MESOS_DISPATCHER_PORT=7077
+fi
+
+if [ "$SPARK_MESOS_DISPATCHER_HOST" = "" ]; then
+  case `uname` in
+      (SunOS)
+	  SPARK_MESOS_DISPATCHER_HOST="`/usr/sbin/check-hostname | awk '{print $NF}'`"
+	  ;;
+      (*)
+	  SPARK_MESOS_DISPATCHER_HOST="`hostname -f`"
+	  ;;
+  esac
+fi
+
+if [ "$SPARK_MESOS_DISPATCHER_NUM" = "" ]; then
+  SPARK_MESOS_DISPATCHER_NUM=1
+fi
+
+"${SPARK_HOME}/sbin"/spark-daemon.sh start org.apache.spark.deploy.mesos.MesosClusterDispatcher $SPARK_MESOS_DISPATCHER_NUM --host $SPARK_MESOS_DISPATCHER_HOST --port $SPARK_MESOS_DISPATCHER_PORT "$@"
diff --git a/spark-2.4.0-bin-hadoop2.7/sbin/start-mesos-shuffle-service.sh b/spark-2.4.0-bin-hadoop2.7/sbin/start-mesos-shuffle-service.sh
new file mode 100755
index 0000000000000000000000000000000000000000..1845845676029f0571602822d49d19e2b8b8f5cc
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/sbin/start-mesos-shuffle-service.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Starts the Mesos external shuffle server on the machine this script is executed on.
+# The Mesos external shuffle service detects when an application exits and automatically
+# cleans up its shuffle files.
+#
+# Usage: start-mesos-shuffle-server.sh
+#
+# Use the SPARK_SHUFFLE_OPTS environment variable to set shuffle service configuration.
+#
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+. "${SPARK_HOME}/sbin/spark-config.sh"
+. "${SPARK_HOME}/bin/load-spark-env.sh"
+
+exec "${SPARK_HOME}/sbin"/spark-daemon.sh start org.apache.spark.deploy.mesos.MesosExternalShuffleService 1
diff --git a/spark-2.4.0-bin-hadoop2.7/sbin/start-shuffle-service.sh b/spark-2.4.0-bin-hadoop2.7/sbin/start-shuffle-service.sh
new file mode 100755
index 0000000000000000000000000000000000000000..793e165be6c78c15be19f1a48590c994ce186696
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/sbin/start-shuffle-service.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Starts the external shuffle server on the machine this script is executed on.
+#
+# Usage: start-shuffle-server.sh
+#
+# Use the SPARK_SHUFFLE_OPTS environment variable to set shuffle server configuration.
+#
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+. "${SPARK_HOME}/sbin/spark-config.sh"
+. "${SPARK_HOME}/bin/load-spark-env.sh"
+
+exec "${SPARK_HOME}/sbin"/spark-daemon.sh start org.apache.spark.deploy.ExternalShuffleService 1
diff --git a/spark-2.4.0-bin-hadoop2.7/sbin/start-slave.sh b/spark-2.4.0-bin-hadoop2.7/sbin/start-slave.sh
new file mode 100755
index 0000000000000000000000000000000000000000..8c268b8859155c2314c3d23616e49bdeeca9baeb
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/sbin/start-slave.sh
@@ -0,0 +1,91 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Starts a slave on the machine this script is executed on.
+#
+# Environment Variables
+#
+#   SPARK_WORKER_INSTANCES  The number of worker instances to run on this
+#                           slave.  Default is 1.
+#   SPARK_WORKER_PORT       The base port number for the first worker. If set,
+#                           subsequent workers will increment this number.  If
+#                           unset, Spark will find a valid port number, but
+#                           with no guarantee of a predictable pattern.
+#   SPARK_WORKER_WEBUI_PORT The base port for the web interface of the first
+#                           worker.  Subsequent workers will increment this
+#                           number.  Default is 8081.
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+# NOTE: This exact class name is matched downstream by SparkSubmit.
+# Any changes need to be reflected there.
+CLASS="org.apache.spark.deploy.worker.Worker"
+
+if [[ $# -lt 1 ]] || [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+  echo "Usage: ./sbin/start-slave.sh [options] <master>"
+  pattern="Usage:"
+  pattern+="\|Using Spark's default log4j profile:"
+  pattern+="\|Registered signal handlers for"
+
+  "${SPARK_HOME}"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
+  exit 1
+fi
+
+. "${SPARK_HOME}/sbin/spark-config.sh"
+
+. "${SPARK_HOME}/bin/load-spark-env.sh"
+
+# First argument should be the master; we need to store it aside because we may
+# need to insert arguments between it and the other arguments
+MASTER=$1
+shift
+
+# Determine desired worker port
+if [ "$SPARK_WORKER_WEBUI_PORT" = "" ]; then
+  SPARK_WORKER_WEBUI_PORT=8081
+fi
+
+# Start up the appropriate number of workers on this machine.
+# quick local function to start a worker
+function start_instance {
+  WORKER_NUM=$1
+  shift
+
+  if [ "$SPARK_WORKER_PORT" = "" ]; then
+    PORT_FLAG=
+    PORT_NUM=
+  else
+    PORT_FLAG="--port"
+    PORT_NUM=$(( $SPARK_WORKER_PORT + $WORKER_NUM - 1 ))
+  fi
+  WEBUI_PORT=$(( $SPARK_WORKER_WEBUI_PORT + $WORKER_NUM - 1 ))
+
+  "${SPARK_HOME}/sbin"/spark-daemon.sh start $CLASS $WORKER_NUM \
+     --webui-port "$WEBUI_PORT" $PORT_FLAG $PORT_NUM $MASTER "$@"
+}
+
+if [ "$SPARK_WORKER_INSTANCES" = "" ]; then
+  start_instance 1 "$@"
+else
+  for ((i=0; i<$SPARK_WORKER_INSTANCES; i++)); do
+    start_instance $(( 1 + $i )) "$@"
+  done
+fi
diff --git a/spark-2.4.0-bin-hadoop2.7/sbin/start-slaves.sh b/spark-2.4.0-bin-hadoop2.7/sbin/start-slaves.sh
new file mode 100755
index 0000000000000000000000000000000000000000..f5269df523dac3a26e34f71659182ad492465e60
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/sbin/start-slaves.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Starts a slave instance on each machine specified in the conf/slaves file.
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+. "${SPARK_HOME}/sbin/spark-config.sh"
+. "${SPARK_HOME}/bin/load-spark-env.sh"
+
+# Find the port number for the master
+if [ "$SPARK_MASTER_PORT" = "" ]; then
+  SPARK_MASTER_PORT=7077
+fi
+
+if [ "$SPARK_MASTER_HOST" = "" ]; then
+  case `uname` in
+      (SunOS)
+	  SPARK_MASTER_HOST="`/usr/sbin/check-hostname | awk '{print $NF}'`"
+	  ;;
+      (*)
+	  SPARK_MASTER_HOST="`hostname -f`"
+	  ;;
+  esac
+fi
+
+# Launch the slaves
+"${SPARK_HOME}/sbin/slaves.sh" cd "${SPARK_HOME}" \; "${SPARK_HOME}/sbin/start-slave.sh" "spark://$SPARK_MASTER_HOST:$SPARK_MASTER_PORT"
diff --git a/spark-2.4.0-bin-hadoop2.7/sbin/start-thriftserver.sh b/spark-2.4.0-bin-hadoop2.7/sbin/start-thriftserver.sh
new file mode 100755
index 0000000000000000000000000000000000000000..f02f31793e3467763307c1a6a18c05e549a247ec
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/sbin/start-thriftserver.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# Shell script for starting the Spark SQL Thrift server
+
+# Enter posix mode for bash
+set -o posix
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+# NOTE: This exact class name is matched downstream by SparkSubmit.
+# Any changes need to be reflected there.
+CLASS="org.apache.spark.sql.hive.thriftserver.HiveThriftServer2"
+
+function usage {
+  echo "Usage: ./sbin/start-thriftserver [options] [thrift server options]"
+  pattern="usage"
+  pattern+="\|Spark assembly has been built with Hive"
+  pattern+="\|NOTE: SPARK_PREPEND_CLASSES is set"
+  pattern+="\|Spark Command: "
+  pattern+="\|======="
+  pattern+="\|--help"
+
+  "${SPARK_HOME}"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
+  echo
+  echo "Thrift server options:"
+  "${SPARK_HOME}"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
+}
+
+if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+  usage
+  exit 0
+fi
+
+export SUBMIT_USAGE_FUNCTION=usage
+
+exec "${SPARK_HOME}"/sbin/spark-daemon.sh submit $CLASS 1 --name "Thrift JDBC/ODBC Server" "$@"
diff --git a/spark-2.4.0-bin-hadoop2.7/sbin/stop-all.sh b/spark-2.4.0-bin-hadoop2.7/sbin/stop-all.sh
new file mode 100755
index 0000000000000000000000000000000000000000..4e476ca05cb0555aa267f01e53ce48d98afc2d16
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/sbin/stop-all.sh
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Stop all spark daemons.
+# Run this on the master node.
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+# Load the Spark configuration
+. "${SPARK_HOME}/sbin/spark-config.sh"
+
+# Stop the slaves, then the master
+"${SPARK_HOME}/sbin"/stop-slaves.sh
+"${SPARK_HOME}/sbin"/stop-master.sh
+
+if [ "$1" == "--wait" ]
+then
+  printf "Waiting for workers to shut down..."
+  while true
+  do
+    running=`${SPARK_HOME}/sbin/slaves.sh ps -ef | grep -v grep | grep deploy.worker.Worker`
+    if [ -z "$running" ]
+    then
+      printf "\nAll workers successfully shut down.\n"
+      break
+    else
+      printf "."
+      sleep 10
+    fi
+  done
+fi
diff --git a/spark-2.4.0-bin-hadoop2.7/sbin/stop-history-server.sh b/spark-2.4.0-bin-hadoop2.7/sbin/stop-history-server.sh
new file mode 100755
index 0000000000000000000000000000000000000000..14e3af4be910acd621594823948a487bbcf73369
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/sbin/stop-history-server.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Stops the history server on the machine this script is executed on.
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+"${SPARK_HOME}/sbin/spark-daemon.sh" stop org.apache.spark.deploy.history.HistoryServer 1
diff --git a/spark-2.4.0-bin-hadoop2.7/sbin/stop-master.sh b/spark-2.4.0-bin-hadoop2.7/sbin/stop-master.sh
new file mode 100755
index 0000000000000000000000000000000000000000..14644ea72d43be41a256ce2ae898ec7167764f5f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/sbin/stop-master.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Stops the master on the machine this script is executed on.
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+. "${SPARK_HOME}/sbin/spark-config.sh"
+
+"${SPARK_HOME}/sbin"/spark-daemon.sh stop org.apache.spark.deploy.master.Master 1
diff --git a/spark-2.4.0-bin-hadoop2.7/sbin/stop-mesos-dispatcher.sh b/spark-2.4.0-bin-hadoop2.7/sbin/stop-mesos-dispatcher.sh
new file mode 100755
index 0000000000000000000000000000000000000000..b13e018c7d41ed2363b16049fff502980d950c80
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/sbin/stop-mesos-dispatcher.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Stop the Mesos Cluster dispatcher on the machine this script is executed on.
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+. "${SPARK_HOME}/sbin/spark-config.sh"
+
+if [ "$SPARK_MESOS_DISPATCHER_NUM" = "" ]; then
+  SPARK_MESOS_DISPATCHER_NUM=1
+fi
+
+"${SPARK_HOME}/sbin"/spark-daemon.sh stop org.apache.spark.deploy.mesos.MesosClusterDispatcher \
+    $SPARK_MESOS_DISPATCHER_NUM
+
diff --git a/spark-2.4.0-bin-hadoop2.7/sbin/stop-mesos-shuffle-service.sh b/spark-2.4.0-bin-hadoop2.7/sbin/stop-mesos-shuffle-service.sh
new file mode 100755
index 0000000000000000000000000000000000000000..d23cad375e1bdefef9e37b5e9145052e5d7c821d
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/sbin/stop-mesos-shuffle-service.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Stops the Mesos external shuffle service on the machine this script is executed on.
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+"${SPARK_HOME}/sbin"/spark-daemon.sh stop org.apache.spark.deploy.mesos.MesosExternalShuffleService 1
diff --git a/spark-2.4.0-bin-hadoop2.7/sbin/stop-shuffle-service.sh b/spark-2.4.0-bin-hadoop2.7/sbin/stop-shuffle-service.sh
new file mode 100755
index 0000000000000000000000000000000000000000..50d69cf34e0a58e5854f2a614971ac5cdea6456f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/sbin/stop-shuffle-service.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Stops the external shuffle service on the machine this script is executed on.
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+"${SPARK_HOME}/sbin"/spark-daemon.sh stop org.apache.spark.deploy.ExternalShuffleService 1
diff --git a/spark-2.4.0-bin-hadoop2.7/sbin/stop-slave.sh b/spark-2.4.0-bin-hadoop2.7/sbin/stop-slave.sh
new file mode 100755
index 0000000000000000000000000000000000000000..685bcf59b33aac64a1324a22f454687ab909981e
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/sbin/stop-slave.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# A shell script to stop all workers on a single slave
+#
+# Environment variables
+#
+#   SPARK_WORKER_INSTANCES The number of worker instances that should be
+#                          running on this slave.  Default is 1.
+
+# Usage: stop-slave.sh
+#   Stops all slaves on this worker machine
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+. "${SPARK_HOME}/sbin/spark-config.sh"
+
+. "${SPARK_HOME}/bin/load-spark-env.sh"
+
+if [ "$SPARK_WORKER_INSTANCES" = "" ]; then
+  "${SPARK_HOME}/sbin"/spark-daemon.sh stop org.apache.spark.deploy.worker.Worker 1
+else
+  for ((i=0; i<$SPARK_WORKER_INSTANCES; i++)); do
+    "${SPARK_HOME}/sbin"/spark-daemon.sh stop org.apache.spark.deploy.worker.Worker $(( $i + 1 ))
+  done
+fi
diff --git a/spark-2.4.0-bin-hadoop2.7/sbin/stop-slaves.sh b/spark-2.4.0-bin-hadoop2.7/sbin/stop-slaves.sh
new file mode 100755
index 0000000000000000000000000000000000000000..a57441b52a04a0101a8b035fd5b04f1cbcfcf94f
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/sbin/stop-slaves.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+. "${SPARK_HOME}/sbin/spark-config.sh"
+
+. "${SPARK_HOME}/bin/load-spark-env.sh"
+
+"${SPARK_HOME}/sbin/slaves.sh" cd "${SPARK_HOME}" \; "${SPARK_HOME}/sbin"/stop-slave.sh
diff --git a/spark-2.4.0-bin-hadoop2.7/sbin/stop-thriftserver.sh b/spark-2.4.0-bin-hadoop2.7/sbin/stop-thriftserver.sh
new file mode 100755
index 0000000000000000000000000000000000000000..cf45058f882a0c32ed5f854d01a210ae2daa4d00
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/sbin/stop-thriftserver.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Stops the thrift server on the machine this script is executed on.
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+"${SPARK_HOME}/sbin"/spark-daemon.sh stop org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 1
diff --git a/spark-2.4.0-bin-hadoop2.7/source-me.sh b/spark-2.4.0-bin-hadoop2.7/source-me.sh
new file mode 100755
index 0000000000000000000000000000000000000000..6e3ac553feadd56245a3de50d64a990c4abfe7d3
--- /dev/null
+++ b/spark-2.4.0-bin-hadoop2.7/source-me.sh
@@ -0,0 +1,77 @@
+#!/bin/sh
+
+echo "*** Preparing python virtual environment for NXCALS ***"
+
+nxcals_python_env_name=nxcals-python3-env
+
+echo "Check if Python3 is present on the system..."
+python3_executable_path=`command -v python3`
+if [ -z "$python3_executable_path" ]; then
+        echo "ERROR: Python3 is not present, supported version 3.6.x"
+        return 1
+fi
+
+version=$(python3 -V 2>&1)
+parsedVersion=$(echo "${version//[^0-9]/}")
+
+if [[  "$parsedVersion" -gt "360" && "$parsedVersion" -lt "370" ]]
+then
+    echo "Valid Python3 version $version"
+else
+    echo "WARN: Not supported Python3 version: $version, supported version is 3.6.x"
+fi
+
+pip_installed=`command -v pip`
+
+if [ ! -z "$pip_installed" ]; then
+ echo "Saving current pip packages to ref-packages.txt"
+ pip freeze > ref-packages.txt
+else
+ echo "pip not installed, cannot re-install current set of packages to the new virtual env..."
+fi
+
+python3 -m venv $nxcals_python_env_name
+if [ $? -eq 0 ]; then
+  echo "...python virtual environment created!"
+else
+  echo "ERROR: Cannot create virtual environment!"
+  return 2
+fi
+
+echo "Loading python virtual environment..."
+source $nxcals_python_env_name/bin/activate
+if [ $? -eq 0 ]; then
+  echo "...done!"
+else
+  echo "ERROR: Cannot load Python3 virtual environment"
+  return 3
+fi
+
+echo "Installing nxcals sources..."
+pip -v install ./extra-packages/nxcals_data_access_python3-0.1.164-py3-none-any.whl --no-index --find-links ./extra-packages
+if [ $? -eq 0 ]; then
+  echo "...done!"
+else
+  echo "ERROR: Cannot install nxcals sources!"
+  return 4
+fi
+
+read -p "Do you want to install packages from source Python3 environment (requires internet connection or proxy)? [Y/n]:" install_libs
+if [ "$install_libs" == "Y" ]; then
+  pip install -r ref-packages.txt
+else
+  echo "Not installing packages from previous Python3 env. You can always do it by executing 'pip install -r ref-packages.txt'"
+fi
+
+read -p "Do you want to install pandas and pyarrow for pandas_udf (requires internet connection or proxy)? [Y/n]:" install_pandas
+if [ "$install_pandas" == "Y" ]; then
+  pip install pyarrow pandas
+else
+  echo "Not installing pandas and pyarrow. You can always do it by executing 'pip install pyarrow pandas'"
+fi
+
+
+echo "*** Hints: ***"
+echo "*** Use pySpark: '$ ./bin/pySpark <options>'"
+echo "*** Use '$ deactivate' command to exit virtual env ***"
+echo "*** Use '$ source $nxcals_python_env_name/bin/activate' command to activate virtual env again ***"
diff --git a/spark-2.4.0-bin-hadoop2.7/yarn/spark-2.4.0-yarn-shuffle.jar b/spark-2.4.0-bin-hadoop2.7/yarn/spark-2.4.0-yarn-shuffle.jar
new file mode 100644
index 0000000000000000000000000000000000000000..f36ccd8731c594c3b3843d0124638a7c0c810192
Binary files /dev/null and b/spark-2.4.0-bin-hadoop2.7/yarn/spark-2.4.0-yarn-shuffle.jar differ