|
| 1 | +name: master |
| 2 | + |
| 3 | +on: |
| 4 | + pull_request: |
| 5 | + branches: |
| 6 | + - branch-2.4 |
| 7 | + |
| 8 | +jobs: |
| 9 | + # TODO(SPARK-32248): Recover JDK 11 builds |
| 10 | + # Build: build Spark and run the tests for specified modules. |
| 11 | + build: |
| 12 | + name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})" |
| 13 | + runs-on: ubuntu-latest |
| 14 | + strategy: |
| 15 | + fail-fast: false |
| 16 | + matrix: |
| 17 | + java: |
| 18 | + - 1.8 |
| 19 | + hadoop: |
| 20 | + - hadoop2.6 |
| 21 | + # TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now. |
| 22 | + # Kinesis tests depends on external Amazon kinesis service. |
| 23 | + # Note that the modules below are from sparktestsupport/modules.py. |
| 24 | + modules: |
| 25 | + - |- |
| 26 | + core, unsafe, kvstore, avro, |
| 27 | + network_common, network_shuffle, repl, launcher |
| 28 | + examples, sketch, graphx |
| 29 | + - |- |
| 30 | + catalyst, hive-thriftserver |
| 31 | + - |- |
| 32 | + streaming, sql-kafka-0-10, streaming-kafka-0-10, |
| 33 | + mllib-local, mllib, |
| 34 | + yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl |
| 35 | + - |- |
| 36 | + pyspark-sql, pyspark-mllib |
| 37 | + - |- |
| 38 | + pyspark-core, pyspark-streaming, pyspark-ml |
| 39 | + - |- |
| 40 | + sparkr |
| 41 | + # Here, we split Hive and SQL tests into some of slow ones and the rest of them. |
| 42 | + included-tags: [""] |
| 43 | + excluded-tags: [""] |
| 44 | + comment: [""] |
| 45 | + include: |
| 46 | + # Hive tests |
| 47 | + - modules: hive |
| 48 | + java: 1.8 |
| 49 | + hadoop: hadoop2.6 |
| 50 | + included-tags: org.apache.spark.tags.SlowHiveTest |
| 51 | + comment: "- slow tests" |
| 52 | + - modules: hive |
| 53 | + java: 1.8 |
| 54 | + hadoop: hadoop2.6 |
| 55 | + excluded-tags: org.apache.spark.tags.SlowHiveTest |
| 56 | + comment: "- other tests" |
| 57 | + # SQL tests |
| 58 | + - modules: sql |
| 59 | + java: 1.8 |
| 60 | + hadoop: hadoop2.6 |
| 61 | + included-tags: org.apache.spark.tags.ExtendedSQLTest |
| 62 | + comment: "- slow tests" |
| 63 | + - modules: sql |
| 64 | + java: 1.8 |
| 65 | + hadoop: hadoop2.6 |
| 66 | + excluded-tags: org.apache.spark.tags.ExtendedSQLTest |
| 67 | + comment: "- other tests" |
| 68 | + env: |
| 69 | + TEST_ONLY_MODULES: ${{ matrix.modules }} |
| 70 | + TEST_ONLY_EXCLUDED_TAGS: ${{ matrix.excluded-tags }} |
| 71 | + TEST_ONLY_INCLUDED_TAGS: ${{ matrix.included-tags }} |
| 72 | + HADOOP_PROFILE: ${{ matrix.hadoop }} |
| 73 | + # GitHub Actions' default miniconda to use in pip packaging test. |
| 74 | + CONDA_PREFIX: /usr/share/miniconda |
| 75 | + steps: |
| 76 | + - name: Checkout Spark repository |
| 77 | + uses: actions/checkout@v2 |
| 78 | + # Cache local repositories. Note that GitHub Actions cache has a 2G limit. |
| 79 | + - name: Cache Scala, SBT, Maven and Zinc |
| 80 | + uses: actions/cache@v1 |
| 81 | + with: |
| 82 | + path: build |
| 83 | + key: build-${{ hashFiles('**/pom.xml') }} |
| 84 | + restore-keys: | |
| 85 | + build- |
| 86 | + - name: Cache Maven local repository |
| 87 | + uses: actions/cache@v2 |
| 88 | + with: |
| 89 | + path: ~/.m2/repository |
| 90 | + key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }} |
| 91 | + restore-keys: | |
| 92 | + ${{ matrix.java }}-${{ matrix.hadoop }}-maven- |
| 93 | + - name: Cache Ivy local repository |
| 94 | + uses: actions/cache@v2 |
| 95 | + with: |
| 96 | + path: ~/.ivy2/cache |
| 97 | + key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }} |
| 98 | + restore-keys: | |
| 99 | + ${{ matrix.java }}-${{ matrix.hadoop }}-ivy- |
| 100 | + - name: Install JDK ${{ matrix.java }} |
| 101 | + uses: actions/setup-java@v1 |
| 102 | + with: |
| 103 | + java-version: ${{ matrix.java }} |
| 104 | + # PySpark |
| 105 | + - name: Install PyPy3 |
| 106 | + # SQL component also has Python related tests, for example, IntegratedUDFTestUtils. |
| 107 | + # Note that order of Python installations here matters because default python3 is |
| 108 | + # overridden by pypy3. |
| 109 | + uses: actions/setup-python@v2 |
| 110 | + if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) |
| 111 | + with: |
| 112 | + python-version: pypy3 |
| 113 | + architecture: x64 |
| 114 | + - name: Install Python 2.7 |
| 115 | + uses: actions/setup-python@v2 |
| 116 | + if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) |
| 117 | + with: |
| 118 | + python-version: 2.7 |
| 119 | + architecture: x64 |
| 120 | + - name: Install Python 3.6 |
| 121 | + uses: actions/setup-python@v2 |
| 122 | + if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) |
| 123 | + with: |
| 124 | + python-version: 3.6 |
| 125 | + architecture: x64 |
| 126 | + - name: Install Python packages |
| 127 | + if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) |
| 128 | + # PyArrow is not supported in PyPy yet, see ARROW-2651. |
| 129 | + # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason. |
| 130 | + run: | |
| 131 | + python3 -m pip install numpy pyarrow pandas scipy |
| 132 | + python3 -m pip list |
| 133 | + python2 -m pip install numpy pyarrow pandas scipy |
| 134 | + python2 -m pip list |
| 135 | + pypy3 -m pip install numpy pandas |
| 136 | + pypy3 -m pip list |
| 137 | + # SparkR |
| 138 | + - name: Install R 3.6 |
| 139 | + uses: r-lib/actions/setup-r@v1 |
| 140 | + if: contains(matrix.modules, 'sparkr') |
| 141 | + with: |
| 142 | + r-version: 3.6 |
| 143 | + - name: Install R packages |
| 144 | + if: contains(matrix.modules, 'sparkr') |
| 145 | + run: | |
| 146 | + sudo apt-get install -y libcurl4-openssl-dev |
| 147 | + sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), repos='https://cloud.r-project.org/')" |
| 148 | + # Show installed packages in R. |
| 149 | + sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]' |
| 150 | + # Run the tests. |
| 151 | + - name: "Run tests: ${{ matrix.modules }}" |
| 152 | + run: | |
| 153 | + # Hive tests become flaky when running in parallel as it's too intensive. |
| 154 | + if [[ "$TEST_ONLY_MODULES" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi |
| 155 | + mkdir -p ~/.m2 |
| 156 | + ./dev/run-tests --parallelism 2 |
| 157 | + rm -rf ~/.m2/repository/org/apache/spark |
| 158 | +
|
| 159 | + # Static analysis, and documentation build |
| 160 | + lint: |
| 161 | + name: Linters, licenses, dependencies and documentation generation |
| 162 | + runs-on: ubuntu-latest |
| 163 | + steps: |
| 164 | + - name: Checkout Spark repository |
| 165 | + uses: actions/checkout@v2 |
| 166 | + - name: Cache Maven local repository |
| 167 | + uses: actions/cache@v2 |
| 168 | + with: |
| 169 | + path: ~/.m2/repository |
| 170 | + key: docs-maven-repo-${{ hashFiles('**/pom.xml') }} |
| 171 | + restore-keys: | |
| 172 | + docs-maven- |
| 173 | + - name: Install JDK 1.8 |
| 174 | + uses: actions/setup-java@v1 |
| 175 | + with: |
| 176 | + java-version: 1.8 |
| 177 | + - name: Install Python 3.6 |
| 178 | + uses: actions/setup-python@v2 |
| 179 | + with: |
| 180 | + python-version: 3.6 |
| 181 | + architecture: x64 |
| 182 | + - name: Install Python linter dependencies |
| 183 | + run: | |
| 184 | + pip3 install flake8 sphinx numpy |
| 185 | + - name: Install R 3.6 |
| 186 | + uses: r-lib/actions/setup-r@v1 |
| 187 | + with: |
| 188 | + r-version: 3.6 |
| 189 | + - name: Install R linter dependencies and SparkR |
| 190 | + run: | |
| 191 | + sudo apt-get install -y libcurl4-openssl-dev |
| 192 | + sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')" |
| 193 | + sudo Rscript -e "devtools::install_github('jimhester/[email protected]')" |
| 194 | + ./R/install-dev.sh |
| 195 | + - name: Install Ruby 2.7 for documentation generation |
| 196 | + uses: actions/setup-ruby@v1 |
| 197 | + with: |
| 198 | + ruby-version: 2.7 |
| 199 | + - name: Install dependencies for documentation generation |
| 200 | + run: | |
| 201 | + sudo apt-get install -y libcurl4-openssl-dev pandoc |
| 202 | + pip install sphinx mkdocs numpy |
| 203 | + gem install jekyll jekyll-redirect-from rouge |
| 204 | + sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')" |
| 205 | + - name: Scala linter |
| 206 | + run: ./dev/lint-scala |
| 207 | + - name: Java linter |
| 208 | + run: ./dev/lint-java |
| 209 | + - name: Python linter |
| 210 | + run: ./dev/lint-python |
| 211 | + - name: R linter |
| 212 | + run: ./dev/lint-r |
| 213 | + - name: License test |
| 214 | + run: ./dev/check-license |
| 215 | + - name: Dependencies test |
| 216 | + run: ./dev/test-dependencies.sh |
| 217 | + - name: Run documentation build |
| 218 | + run: | |
| 219 | + cd docs |
| 220 | + jekyll build |
0 commit comments