Skip to content

Commit 63baeab

Browse files
authored
Merge branch 'master' into np_upgrade
2 parents fa2676c + 27d4a77 commit 63baeab

File tree

256 files changed

+12642
-4186
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

256 files changed

+12642
-4186
lines changed

.asf.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ github:
3131
merge: false
3232
squash: true
3333
rebase: true
34+
ghp_branch: master
35+
ghp_path: /docs
3436

3537
notifications:
3638
pullrequests: [email protected]

.github/workflows/build_python_connect.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ jobs:
7171
python packaging/connect/setup.py sdist
7272
cd dist
7373
pip install pyspark*connect-*.tar.gz
74-
pip install 'six==1.16.0' 'pandas<=2.2.2' scipy 'plotly>=4.8' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' torch torchvision torcheval deepspeed unittest-xml-reporting
74+
pip install 'six==1.16.0' 'pandas<=2.2.2' scipy 'plotly>=4.8' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' torch torchvision torcheval deepspeed unittest-xml-reporting 'plotly>=4.8'
7575
- name: Run tests
7676
env:
7777
SPARK_TESTING: 1

.github/workflows/maven_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ on:
4040
description: OS to run this build.
4141
required: false
4242
type: string
43-
default: ubuntu-22.04
43+
default: ubuntu-latest
4444
envs:
4545
description: Additional environment variables to set when running the tests. Should be in JSON format.
4646
required: false

.github/workflows/pages.yml

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
#
19+
20+
name: GitHub Pages deployment
21+
22+
on:
23+
push:
24+
branches:
25+
- master
26+
27+
concurrency:
28+
group: 'docs preview'
29+
cancel-in-progress: false
30+
31+
jobs:
32+
docs:
33+
name: Build and deploy documentation
34+
runs-on: ubuntu-latest
35+
permissions:
36+
id-token: write
37+
pages: write
38+
environment:
39+
name: github-pages # https://github.com/actions/deploy-pages/issues/271
40+
env:
41+
SPARK_TESTING: 1 # Reduce some noise in the logs
42+
RELEASE_VERSION: 'In-Progress'
43+
steps:
44+
- name: Checkout Spark repository
45+
uses: actions/checkout@v4
46+
with:
47+
repository: apache/spark
48+
ref: 'master'
49+
- name: Install Java 17
50+
uses: actions/setup-java@v4
51+
with:
52+
distribution: zulu
53+
java-version: 17
54+
- name: Install Python 3.9
55+
uses: actions/setup-python@v5
56+
with:
57+
python-version: '3.9'
58+
architecture: x64
59+
cache: 'pip'
60+
- name: Install Python dependencies
61+
run: |
62+
pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
63+
ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow 'pandas==2.2.2' 'plotly>=4.8' 'docutils<0.18.0' \
64+
'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \
65+
'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
66+
'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
67+
- name: Install Ruby for documentation generation
68+
uses: ruby/setup-ruby@v1
69+
with:
70+
ruby-version: '3.3'
71+
bundler-cache: true
72+
- name: Install Pandoc
73+
run: |
74+
sudo apt-get update -y
75+
sudo apt-get install pandoc
76+
- name: Install dependencies for documentation generation
77+
run: |
78+
cd docs
79+
gem install bundler -v 2.4.22 -n /usr/local/bin
80+
bundle install --retry=100
81+
- name: Run documentation build
82+
run: |
83+
sed -i".tmp1" 's/SPARK_VERSION:.*$/SPARK_VERSION: '"$RELEASE_VERSION"'/g' docs/_config.yml
84+
sed -i".tmp2" 's/SPARK_VERSION_SHORT:.*$/SPARK_VERSION_SHORT: '"$RELEASE_VERSION"'/g' docs/_config.yml
85+
sed -i".tmp3" "s/'facetFilters':.*$/'facetFilters': [\"version:$RELEASE_VERSION\"]/g" docs/_config.yml
86+
sed -i".tmp4" 's/__version__: str = .*$/__version__: str = "'"$RELEASE_VERSION"'"/' python/pyspark/version.py
87+
cd docs
88+
SKIP_RDOC=1 bundle exec jekyll build
89+
- name: Setup Pages
90+
uses: actions/configure-pages@v5
91+
- name: Upload artifact
92+
uses: actions/upload-pages-artifact@v3
93+
with:
94+
path: 'docs/_site'
95+
- name: Deploy to GitHub Pages
96+
id: deployment
97+
uses: actions/deploy-pages@v4

common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ private static int lowercaseMatchLengthFrom(
109109
}
110110
// Compare the characters in the target and pattern strings.
111111
int matchLength = 0, codePointBuffer = -1, targetCodePoint, patternCodePoint;
112-
while (targetIterator.hasNext() && patternIterator.hasNext()) {
112+
while ((targetIterator.hasNext() || codePointBuffer != -1) && patternIterator.hasNext()) {
113113
if (codePointBuffer != -1) {
114114
targetCodePoint = codePointBuffer;
115115
codePointBuffer = -1;
@@ -211,7 +211,7 @@ private static int lowercaseMatchLengthUntil(
211211
}
212212
// Compare the characters in the target and pattern strings.
213213
int matchLength = 0, codePointBuffer = -1, targetCodePoint, patternCodePoint;
214-
while (targetIterator.hasNext() && patternIterator.hasNext()) {
214+
while ((targetIterator.hasNext() || codePointBuffer != -1) && patternIterator.hasNext()) {
215215
if (codePointBuffer != -1) {
216216
targetCodePoint = codePointBuffer;
217217
codePointBuffer = -1;

common/unsafe/src/test/java/org/apache/spark/unsafe/types/CollationSupportSuite.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -629,6 +629,8 @@ public void testStartsWith() throws SparkException {
629629
assertStartsWith("İonic", "Io", "UTF8_LCASE", false);
630630
assertStartsWith("İonic", "i\u0307o", "UTF8_LCASE", true);
631631
assertStartsWith("İonic", "İo", "UTF8_LCASE", true);
632+
assertStartsWith("oİ", "oİ", "UTF8_LCASE", true);
633+
assertStartsWith("oİ", "oi̇", "UTF8_LCASE", true);
632634
// Conditional case mapping (e.g. Greek sigmas).
633635
assertStartsWith("σ", "σ", "UTF8_BINARY", true);
634636
assertStartsWith("σ", "ς", "UTF8_BINARY", false);
@@ -880,6 +882,8 @@ public void testEndsWith() throws SparkException {
880882
assertEndsWith("the İo", "Io", "UTF8_LCASE", false);
881883
assertEndsWith("the İo", "i\u0307o", "UTF8_LCASE", true);
882884
assertEndsWith("the İo", "İo", "UTF8_LCASE", true);
885+
assertEndsWith("İo", "İo", "UTF8_LCASE", true);
886+
assertEndsWith("İo", "i̇o", "UTF8_LCASE", true);
883887
// Conditional case mapping (e.g. Greek sigmas).
884888
assertEndsWith("σ", "σ", "UTF8_BINARY", true);
885889
assertEndsWith("σ", "ς", "UTF8_BINARY", false);

connector/avro/src/main/scala/org/apache/spark/sql/avro/functions.scala

Lines changed: 0 additions & 93 deletions
This file was deleted.

connector/connect/client/jvm/pom.xml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,13 @@
8888
<artifactId>scalacheck_${scala.binary.version}</artifactId>
8989
<scope>test</scope>
9090
</dependency>
91+
<dependency>
92+
<groupId>org.apache.spark</groupId>
93+
<artifactId>spark-sql-api_${scala.binary.version}</artifactId>
94+
<version>${project.version}</version>
95+
<classifier>tests</classifier>
96+
<scope>test</scope>
97+
</dependency>
9198
<dependency>
9299
<groupId>org.apache.spark</groupId>
93100
<artifactId>spark-common-utils_${scala.binary.version}</artifactId>

connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,12 +1035,7 @@ class Dataset[T] private[sql] (
10351035
new MergeIntoWriterImpl[T](table, this, condition)
10361036
}
10371037

1038-
/**
1039-
* Interface for saving the content of the streaming Dataset out into external storage.
1040-
*
1041-
* @group basic
1042-
* @since 3.5.0
1043-
*/
1038+
/** @inheritdoc */
10441039
def writeStream: DataStreamWriter[T] = {
10451040
new DataStreamWriter[T](this)
10461041
}

0 commit comments

Comments
 (0)