|
1 | 1 | name: master
|
2 | 2 |
|
3 | 3 | on:
|
4 |
| - push: |
5 |
| - branches: |
6 |
| - - branch-3.0 |
7 | 4 | pull_request:
|
8 | 5 | branches:
|
9 | 6 | - branch-3.0
|
10 | 7 |
|
11 | 8 | jobs:
|
| 9 | + # TODO(SPARK-32248): Recover JDK 11 builds |
| 10 | + # Build: build Spark and run the tests for specified modules. |
12 | 11 | build:
|
13 |
| - |
| 12 | + name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})" |
14 | 13 | runs-on: ubuntu-latest
|
15 | 14 | strategy:
|
| 15 | + fail-fast: false |
16 | 16 | matrix:
|
17 |
| - java: [ '1.8', '11' ] |
18 |
| - hadoop: [ 'hadoop-2.7', 'hadoop-3.2' ] |
19 |
| - hive: [ 'hive-1.2', 'hive-2.3' ] |
20 |
| - exclude: |
21 |
| - - java: '11' |
22 |
| - hive: 'hive-1.2' |
23 |
| - - hadoop: 'hadoop-3.2' |
24 |
| - hive: 'hive-1.2' |
25 |
| - name: Build Spark - JDK${{ matrix.java }}/${{ matrix.hadoop }}/${{ matrix.hive }} |
26 |
| - |
| 17 | + java: |
| 18 | + - 1.8 |
| 19 | + hadoop: |
| 20 | + - hadoop3.2 |
| 21 | + hive: |
| 22 | + - hive2.3 |
| 23 | + # TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now. |
| 24 | + # Kinesis tests depends on external Amazon kinesis service. |
| 25 | + # Note that the modules below are from sparktestsupport/modules.py. |
| 26 | + modules: |
| 27 | + - |- |
| 28 | + core, unsafe, kvstore, avro, |
| 29 | + network_common, network_shuffle, repl, launcher |
| 30 | + examples, sketch, graphx |
| 31 | + - |- |
| 32 | + catalyst, hive-thriftserver |
| 33 | + - |- |
| 34 | + streaming, sql-kafka-0-10, streaming-kafka-0-10, |
| 35 | + mllib-local, mllib, |
| 36 | + yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl |
| 37 | + - |- |
| 38 | + pyspark-sql, pyspark-mllib |
| 39 | + - |- |
| 40 | + pyspark-core, pyspark-streaming, pyspark-ml |
| 41 | + - |- |
| 42 | + sparkr |
| 43 | + # Here, we split Hive and SQL tests into some of slow ones and the rest of them. |
| 44 | + included-tags: [""] |
| 45 | + excluded-tags: [""] |
| 46 | + comment: [""] |
| 47 | + include: |
| 48 | + # Hive tests |
| 49 | + - modules: hive |
| 50 | + java: 1.8 |
| 51 | + hadoop: hadoop3.2 |
| 52 | + hive: hive2.3 |
| 53 | + included-tags: org.apache.spark.tags.SlowHiveTest |
| 54 | + comment: "- slow tests" |
| 55 | + - modules: hive |
| 56 | + java: 1.8 |
| 57 | + hadoop: hadoop3.2 |
| 58 | + hive: hive2.3 |
| 59 | + excluded-tags: org.apache.spark.tags.SlowHiveTest |
| 60 | + comment: "- other tests" |
| 61 | + # SQL tests |
| 62 | + - modules: sql |
| 63 | + java: 1.8 |
| 64 | + hadoop: hadoop3.2 |
| 65 | + hive: hive2.3 |
| 66 | + included-tags: org.apache.spark.tags.ExtendedSQLTest |
| 67 | + comment: "- slow tests" |
| 68 | + - modules: sql |
| 69 | + java: 1.8 |
| 70 | + hadoop: hadoop3.2 |
| 71 | + hive: hive2.3 |
| 72 | + excluded-tags: org.apache.spark.tags.ExtendedSQLTest |
| 73 | + comment: "- other tests" |
| 74 | + env: |
| 75 | + TEST_ONLY_MODULES: ${{ matrix.modules }} |
| 76 | + TEST_ONLY_EXCLUDED_TAGS: ${{ matrix.excluded-tags }} |
| 77 | + TEST_ONLY_INCLUDED_TAGS: ${{ matrix.included-tags }} |
| 78 | + HADOOP_PROFILE: ${{ matrix.hadoop }} |
| 79 | + HIVE_PROFILE: ${{ matrix.hive }} |
| 80 | + # GitHub Actions' default miniconda to use in pip packaging test. |
| 81 | + CONDA_PREFIX: /usr/share/miniconda |
27 | 82 | steps:
|
28 |
| - - uses: actions/checkout@master |
29 |
| - # We split caches because GitHub Action Cache has a 400MB-size limit. |
30 |
| - - uses: actions/cache@v1 |
| 83 | + - name: Checkout Spark repository |
| 84 | + uses: actions/checkout@v2 |
| 85 | + # Cache local repositories. Note that GitHub Actions cache has a 2G limit. |
| 86 | + - name: Cache Scala, SBT, Maven and Zinc |
| 87 | + uses: actions/cache@v1 |
31 | 88 | with:
|
32 | 89 | path: build
|
33 | 90 | key: build-${{ hashFiles('**/pom.xml') }}
|
34 | 91 | restore-keys: |
|
35 | 92 | build-
|
36 |
| - - uses: actions/cache@v1 |
37 |
| - with: |
38 |
| - path: ~/.m2/repository/com |
39 |
| - key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-${{ hashFiles('**/pom.xml') }} |
40 |
| - restore-keys: | |
41 |
| - ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com- |
42 |
| - - uses: actions/cache@v1 |
| 93 | + - name: Cache Maven local repository |
| 94 | + uses: actions/cache@v2 |
43 | 95 | with:
|
44 |
| - path: ~/.m2/repository/org |
45 |
| - key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-${{ hashFiles('**/pom.xml') }} |
46 |
| - restore-keys: | |
47 |
| - ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org- |
48 |
| - - uses: actions/cache@v1 |
49 |
| - with: |
50 |
| - path: ~/.m2/repository/net |
51 |
| - key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-${{ hashFiles('**/pom.xml') }} |
| 96 | + path: ~/.m2/repository |
| 97 | + key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }} |
52 | 98 | restore-keys: |
|
53 |
| - ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net- |
54 |
| - - uses: actions/cache@v1 |
| 99 | + ${{ matrix.java }}-${{ matrix.hadoop }}-maven- |
| 100 | + - name: Cache Ivy local repository |
| 101 | + uses: actions/cache@v2 |
55 | 102 | with:
|
56 |
| - path: ~/.m2/repository/io |
57 |
| - key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-${{ hashFiles('**/pom.xml') }} |
| 103 | + path: ~/.ivy2/cache |
| 104 | + key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }} |
58 | 105 | restore-keys: |
|
59 |
| - ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io- |
60 |
| - - name: Set up JDK ${{ matrix.java }} |
| 106 | + ${{ matrix.java }}-${{ matrix.hadoop }}-ivy- |
| 107 | + - name: Install JDK ${{ matrix.java }} |
61 | 108 | uses: actions/setup-java@v1
|
62 | 109 | with:
|
63 | 110 | java-version: ${{ matrix.java }}
|
64 |
| - - name: Build with Maven |
65 |
| - run: | |
66 |
| - export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" |
67 |
| - export MAVEN_CLI_OPTS="--no-transfer-progress" |
68 |
| - mkdir -p ~/.m2 |
69 |
| - ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -P${{ matrix.hive }} -Phive-thriftserver -P${{ matrix.hadoop }} -Phadoop-cloud -Djava.version=${{ matrix.java }} install |
70 |
| - rm -rf ~/.m2/repository/org/apache/spark |
71 |
| -
|
72 |
| -
|
73 |
| - lint: |
74 |
| - runs-on: ubuntu-latest |
75 |
| - name: Linters (Java/Scala/Python), licenses, dependencies |
76 |
| - steps: |
77 |
| - - uses: actions/checkout@master |
78 |
| - - uses: actions/setup-java@v1 |
| 111 | + # PySpark |
| 112 | + - name: Install PyPy3 |
| 113 | + # SQL component also has Python related tests, for example, IntegratedUDFTestUtils. |
| 114 | + # Note that order of Python installations here matters because default python3 is |
| 115 | + # overridden by pypy3. |
| 116 | + uses: actions/setup-python@v2 |
| 117 | + if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) |
79 | 118 | with:
|
80 |
| - java-version: '11' |
81 |
| - - uses: actions/setup-python@v1 |
| 119 | + python-version: pypy3 |
| 120 | + architecture: x64 |
| 121 | + - name: Install Python 2.7 |
| 122 | + uses: actions/setup-python@v2 |
| 123 | + if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) |
82 | 124 | with:
|
83 |
| - python-version: '3.x' |
84 |
| - architecture: 'x64' |
85 |
| - - name: Scala |
86 |
| - run: ./dev/lint-scala |
87 |
| - - name: Java |
88 |
| - run: ./dev/lint-java |
89 |
| - - name: Python |
90 |
| - run: | |
91 |
| - pip install flake8 sphinx numpy |
92 |
| - ./dev/lint-python |
93 |
| - - name: License |
94 |
| - run: ./dev/check-license |
95 |
| - - name: Dependencies |
96 |
| - run: ./dev/test-dependencies.sh |
97 |
| - |
98 |
| - lintr: |
99 |
| - runs-on: ubuntu-latest |
100 |
| - name: Linter (R) |
101 |
| - steps: |
102 |
| - - uses: actions/checkout@master |
103 |
| - - uses: actions/setup-java@v1 |
| 125 | + python-version: 2.7 |
| 126 | + architecture: x64 |
| 127 | + - name: Install Python 3.6 |
| 128 | + uses: actions/setup-python@v2 |
| 129 | + if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) |
104 | 130 | with:
|
105 |
| - java-version: '11' |
106 |
| - - uses: r-lib/actions/setup-r@v1 |
| 131 | + python-version: 3.6 |
| 132 | + architecture: x64 |
| 133 | + - name: Install Python packages |
| 134 | + if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) |
| 135 | + # PyArrow is not supported in PyPy yet, see ARROW-2651. |
| 136 | + # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason. |
| 137 | + run: | |
| 138 | + python3 -m pip install numpy pyarrow pandas scipy |
| 139 | + python3 -m pip list |
| 140 | + python2 -m pip install numpy pyarrow pandas scipy |
| 141 | + python2 -m pip list |
| 142 | + pypy3 -m pip install numpy pandas |
| 143 | + pypy3 -m pip list |
| 144 | + # SparkR |
| 145 | + - name: Install R 3.6 |
| 146 | + uses: r-lib/actions/setup-r@v1 |
| 147 | + if: contains(matrix.modules, 'sparkr') |
107 | 148 | with:
|
108 |
| - r-version: '3.6.2' |
109 |
| - - name: Install lib |
| 149 | + r-version: 3.6 |
| 150 | + - name: Install R packages |
| 151 | + if: contains(matrix.modules, 'sparkr') |
110 | 152 | run: |
|
111 | 153 | sudo apt-get install -y libcurl4-openssl-dev
|
112 |
| - - name: install R packages |
| 154 | + sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), repos='https://cloud.r-project.org/')" |
| 155 | + # Show installed packages in R. |
| 156 | + sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]' |
| 157 | + # Run the tests. |
| 158 | + - name: "Run tests: ${{ matrix.modules }}" |
113 | 159 | run: |
|
114 |
| - sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')" |
115 |
| - sudo Rscript -e "devtools::install_github('jimhester/[email protected]')" |
116 |
| - - name: package and install SparkR |
117 |
| - run: ./R/install-dev.sh |
118 |
| - - name: lint-r |
119 |
| - run: ./dev/lint-r |
| 160 | + # Hive tests become flaky when running in parallel as it's too intensive. |
| 161 | + if [[ "$TEST_ONLY_MODULES" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi |
| 162 | + mkdir -p ~/.m2 |
| 163 | + ./dev/run-tests --parallelism 2 |
| 164 | + rm -rf ~/.m2/repository/org/apache/spark |
120 | 165 |
|
121 |
| - docs: |
| 166 | + # Static analysis, and documentation build |
| 167 | + lint: |
| 168 | + name: Linters, licenses, dependencies and documentation generation |
122 | 169 | runs-on: ubuntu-latest
|
123 |
| - name: Generate documents |
124 | 170 | steps:
|
125 |
| - - uses: actions/checkout@master |
126 |
| - - uses: actions/cache@v1 |
| 171 | + - name: Checkout Spark repository |
| 172 | + uses: actions/checkout@v2 |
| 173 | + - name: Cache Maven local repository |
| 174 | + uses: actions/cache@v2 |
127 | 175 | with:
|
128 | 176 | path: ~/.m2/repository
|
129 | 177 | key: docs-maven-repo-${{ hashFiles('**/pom.xml') }}
|
130 | 178 | restore-keys: |
|
131 |
| - docs-maven-repo- |
132 |
| - - uses: actions/setup-java@v1 |
| 179 | + docs-maven- |
| 180 | + - name: Install JDK 1.8 |
| 181 | + uses: actions/setup-java@v1 |
133 | 182 | with:
|
134 |
| - java-version: '1.8' |
135 |
| - - uses: actions/setup-python@v1 |
| 183 | + java-version: 1.8 |
| 184 | + - name: Install Python 3.6 |
| 185 | + uses: actions/setup-python@v2 |
136 | 186 | with:
|
137 |
| - python-version: '3.x' |
138 |
| - architecture: 'x64' |
139 |
| - - uses: actions/setup-ruby@v1 |
| 187 | + python-version: 3.6 |
| 188 | + architecture: x64 |
| 189 | + - name: Install Python linter dependencies |
| 190 | + run: | |
| 191 | + pip3 install flake8 sphinx numpy |
| 192 | + - name: Install R 3.6 |
| 193 | + uses: r-lib/actions/setup-r@v1 |
140 | 194 | with:
|
141 |
| - ruby-version: '2.7' |
142 |
| - - uses: r-lib/actions/setup-r@v1 |
| 195 | + r-version: 3.6 |
| 196 | + - name: Install R linter dependencies and SparkR |
| 197 | + run: | |
| 198 | + sudo apt-get install -y libcurl4-openssl-dev |
| 199 | + sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')" |
| 200 | + sudo Rscript -e "devtools::install_github('jimhester/[email protected]')" |
| 201 | + ./R/install-dev.sh |
| 202 | + - name: Install Ruby 2.7 for documentation generation |
| 203 | + uses: actions/setup-ruby@v1 |
143 | 204 | with:
|
144 |
| - r-version: '3.6.2' |
145 |
| - - name: Install lib and pandoc |
| 205 | + ruby-version: 2.7 |
| 206 | + - name: Install dependencies for documentation generation |
146 | 207 | run: |
|
147 | 208 | sudo apt-get install -y libcurl4-openssl-dev pandoc
|
148 |
| - - name: Install packages |
149 |
| - run: | |
150 | 209 | pip install sphinx mkdocs numpy
|
151 | 210 | gem install jekyll jekyll-redirect-from rouge
|
152 |
| - sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')" |
153 |
| - - name: Run jekyll build |
| 211 | + sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')" |
| 212 | + - name: Scala linter |
| 213 | + run: ./dev/lint-scala |
| 214 | + - name: Java linter |
| 215 | + run: ./dev/lint-java |
| 216 | + - name: Python linter |
| 217 | + run: ./dev/lint-python |
| 218 | + - name: R linter |
| 219 | + run: ./dev/lint-r |
| 220 | + - name: License test |
| 221 | + run: ./dev/check-license |
| 222 | + - name: Dependencies test |
| 223 | + run: ./dev/test-dependencies.sh |
| 224 | + - name: Run documentation build |
154 | 225 | run: |
|
155 | 226 | cd docs
|
156 | 227 | jekyll build
|
0 commit comments