Merge branch 'develop' into check-rtd-build

ucbrise · Sep 2, 2017 · 8d27a87 · 8d27a87
2 parents e9d90d9 + b6ea450
commit 8d27a87
Show file tree

Hide file tree

Showing 6 changed files with 3 additions and 31 deletions.
diff --git a/ClipperTestsDockerfile b/ClipperTestsDockerfile
@@ -17,13 +17,7 @@ RUN echo 'export PATH=/opt/conda/bin:$PATH' > /etc/profile.d/conda.sh \
 ENV PATH "/opt/conda/bin:$PATH"
 RUN conda install -y libgcc pyzmq
 
-RUN pip install requests subprocess32 scikit-learn numpy pyyaml docker kubernetes pyspark findspark
-
-RUN mkdir /spark && cd /spark \
-        && curl -o spark.tgz https://d3kbcqa49mib13.cloudfront.net/spark-2.1.1-bin-hadoop2.7.tgz \
-        && tar zxf spark.tgz && rm spark.tgz
-
-ENV SPARK_HOME /spark/spark-2.1.1-bin-hadoop2.7
+RUN pip install requests subprocess32 scikit-learn numpy pyyaml docker kubernetes pyspark
 
 # Install maven
 ARG MAVEN_VERSION=3.5.0

diff --git a/PySparkContainerDockerfile b/PySparkContainerDockerfile
@@ -3,21 +3,17 @@ FROM clipper/py-rpc:${CODE_VERSION}
 
 COPY clipper_admin/clipper_admin/python_container_conda_deps.txt /lib/
 
-RUN curl -o /spark.tgz https://d3kbcqa49mib13.cloudfront.net/spark-2.1.1-bin-hadoop2.7.tgz \
-      && cd / && tar zxf /spark.tgz && mv /spark-2.1.1-bin-hadoop2.7 /spark \
-      && echo deb http://ftp.de.debian.org/debian jessie-backports main >> /etc/apt/sources.list \
+RUN echo deb http://ftp.de.debian.org/debian jessie-backports main >> /etc/apt/sources.list \
       && apt-get update --fix-missing \
       && apt-get install -yqq -t jessie-backports openjdk-8-jdk \
       && conda install -y --file /lib/python_container_conda_deps.txt \
-      && pip install findspark
+      && pip install pyspark
 
 COPY containers/python/pyspark_container.py containers/python/pyspark_container_entry.sh /container/
 COPY VERSION.txt /lib/
 COPY clipper_admin/ /lib/clipper_admin
 RUN pip install /lib/clipper_admin
 
-ENV SPARK_HOME="/spark"
-
 CMD ["/container/pyspark_container_entry.sh"]
 
 # vim: set filetype=dockerfile:
diff --git a/bin/run_unittests.sh b/bin/run_unittests.sh
@@ -126,18 +126,6 @@ function run_frontend_tests {
 function run_integration_tests {
   echo -e "\nRunning integration tests\n\n"
   cd $DIR
-  # Check if SPARK_HOME is set
-  if [ -z ${SPARK_HOME+x} ]; then
-    # Check if this script has downloaded spark previously
-    if [ ! -d "spark" ]; then
-      echo "Downloading Spark"
-      curl -o spark.tgz https://d3kbcqa49mib13.cloudfront.net/spark-2.1.1-bin-hadoop2.7.tgz
-      tar zxf spark.tgz && mv spark-2.1.1-bin-hadoop2.7 spark
-    fi
-    export SPARK_HOME=`pwd`/spark
-  else
-    echo "Found Spark at $SPARK_HOME"
-  fi
   python ../integration-tests/clipper_admin_tests.py
   python ../integration-tests/many_apps_many_models.py 2 3
   python ../integration-tests/deploy_pyspark_models.py

diff --git a/containers/python/pyspark_container.py b/containers/python/pyspark_container.py
@@ -9,8 +9,6 @@
 # sys.path.append(os.path.abspath("/lib/"))
 from clipper_admin.deployers import cloudpickle
 
-import findspark
-findspark.init()
 import pyspark
 from pyspark import SparkConf, SparkContext
 from pyspark.sql import SparkSession

diff --git a/integration-tests/deploy_pyspark_models.py b/integration-tests/deploy_pyspark_models.py
@@ -13,8 +13,6 @@
 from util_package import mock_module_in_package as mmip
 import mock_module as mm
 
-import findspark
-findspark.init()
 from pyspark.mllib.classification import LogisticRegressionWithSGD
 from pyspark.mllib.classification import SVMWithSGD
 from pyspark.mllib.tree import RandomForest

diff --git a/integration-tests/deploy_pyspark_pipeline_models.py b/integration-tests/deploy_pyspark_pipeline_models.py
@@ -7,8 +7,6 @@
 import time
 import logging
 
-import findspark
-findspark.init()
 from pyspark.ml import Pipeline
 from pyspark.ml.classification import LogisticRegression
 from pyspark.ml.feature import HashingTF, Tokenizer