From f70024f5c7c785309a3f7b62101ae28c8ff51387 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sat, 13 Nov 2021 21:41:30 -0800 Subject: [PATCH 1/2] [SPARK-XXX][K8S] Support K8s image building with Java 17 --- bin/docker-image-tool.sh | 11 +++- .../main/dockerfiles/spark/Dockerfile.java17 | 62 +++++++++++++++++++ .../kubernetes/integration-tests/pom.xml | 2 +- 3 files changed, 71 insertions(+), 4 deletions(-) create mode 100644 resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17 diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh index 83b13b83341d0..ad31bd1e7b7ab 100755 --- a/bin/docker-image-tool.sh +++ b/bin/docker-image-tool.sh @@ -232,7 +232,8 @@ Commands: push Push a pre-built image to a registry. Requires a repository address to be provided. Options: - -f file Dockerfile to build for JVM based Jobs. By default builds the Dockerfile shipped with Spark. + -f file (Optional) Dockerfile to build for JVM based Jobs. By default builds the Dockerfile shipped with Spark. + For Java 17, use `-f kubernetes/dockerfiles/spark/Dockerfile.java17` -p file (Optional) Dockerfile to build for PySpark Jobs. Builds Python dependencies and ships with Spark. Skips building PySpark docker image if not specified. -R file (Optional) Dockerfile to build for SparkR Jobs. Builds R dependencies and ships with Spark. @@ -267,15 +268,19 @@ Examples: $0 -r docker.io/myrepo -t v2.3.0 build $0 -r docker.io/myrepo -t v2.3.0 push - - Build and push JDK11-based image with tag "v3.0.0" to docker.io/myrepo + - Build and push Java11-based image with tag "v3.0.0" to docker.io/myrepo $0 -r docker.io/myrepo -t v3.0.0 -b java_image_tag=11-jre-slim build $0 -r docker.io/myrepo -t v3.0.0 push - - Build and push JDK11-based image for multiple archs to docker.io/myrepo + - Build and push Java11-based image for multiple archs to docker.io/myrepo $0 -r docker.io/myrepo -t v3.0.0 -X -b java_image_tag=11-jre-slim build # Note: buildx, which does cross building, needs to do the push during build # So there is no separate push step with -X + - Build and push Java17-based image with tag "v3.3.0" to docker.io/myrepo + $0 -r docker.io/myrepo -t v3.3.0 -f kubernetes/dockerfiles/spark/Dockerfile.java17 build + $0 -r docker.io/myrepo -t v3.3.0 push + EOF } diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17 b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17 new file mode 100644 index 0000000000000..f9ab64e94a54c --- /dev/null +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17 @@ -0,0 +1,62 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# We need to build from debian:bullseye-slim because openjdk switches its underlying OS +# from debian to oraclelinux from openjdk:12 +FROM debian:bullseye-slim + +ARG spark_uid=185 + +# Before building the docker image, first build and make a Spark distribution following +# the instructions in http://spark.apache.org/docs/latest/building-spark.html. +# If this docker file is being used in the context of building your images from a Spark +# distribution, the docker build command should be invoked from the top level directory +# of the Spark distribution. E.g.: +# docker build -t spark:latest -f kubernetes/dockerfiles/spark/Dockerfile . + +RUN set -ex && \ + apt-get update && \ + ln -s /lib /lib64 && \ + apt install -y bash tini libc6 libpam-modules krb5-user libnss3 procps openjdk-17-jre && \ + mkdir -p /opt/spark && \ + mkdir -p /opt/spark/examples && \ + mkdir -p /opt/spark/work-dir && \ + touch /opt/spark/RELEASE && \ + rm /bin/sh && \ + ln -sv /bin/bash /bin/sh && \ + echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \ + chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \ + rm -rf /var/cache/apt/* + +COPY jars /opt/spark/jars +COPY bin /opt/spark/bin +COPY sbin /opt/spark/sbin +COPY kubernetes/dockerfiles/spark/entrypoint.sh /opt/ +COPY kubernetes/dockerfiles/spark/decom.sh /opt/ +COPY examples /opt/spark/examples +COPY kubernetes/tests /opt/spark/tests +COPY data /opt/spark/data + +ENV SPARK_HOME /opt/spark + +WORKDIR /opt/spark/work-dir +RUN chmod g+w /opt/spark/work-dir +RUN chmod a+x /opt/decom.sh + +ENTRYPOINT [ "/opt/entrypoint.sh" ] + +# Specify the User that the actual main process will run as +USER ${spark_uid} diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index d281e38ebf05d..35ac88507b875 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -34,7 +34,7 @@ N/A ${project.build.directory}/spark-dist-unpacked - N/A + kubernetes/dockerfiles/spark/Dockerfile.java17 8-jre-slim ${project.build.directory}/imageTag.txt minikube From 8ffa233775a9e85e1570b6e8dc8ec3b9cd0e8c6b Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sun, 14 Nov 2021 01:00:50 -0800 Subject: [PATCH 2/2] revert the test change --- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 35ac88507b875..d281e38ebf05d 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -34,7 +34,7 @@ N/A ${project.build.directory}/spark-dist-unpacked - kubernetes/dockerfiles/spark/Dockerfile.java17 + N/A 8-jre-slim ${project.build.directory}/imageTag.txt minikube