Skip to content

Commit 8db3551

Browse files
committed
Merge branch 'upmaster' into SPARK-35253-FOLLOWUP
2 parents 527e102 + 746d80d commit 8db3551

File tree

173 files changed

+2972
-1450
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

173 files changed

+2972
-1450
lines changed

common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@
4747
import com.google.common.collect.Maps;
4848
import com.google.common.primitives.Ints;
4949
import com.google.common.primitives.Longs;
50+
import org.apache.commons.lang3.builder.ToStringBuilder;
51+
import org.apache.commons.lang3.builder.ToStringStyle;
5052
import org.roaringbitmap.RoaringBitmap;
5153
import org.slf4j.Logger;
5254
import org.slf4j.LoggerFactory;
@@ -770,9 +772,9 @@ public int hashCode() {
770772

771773
@Override
772774
public String toString() {
773-
return Objects.toStringHelper(this)
774-
.add("appId", appId)
775-
.add("shuffleId", shuffleId)
775+
return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
776+
.append("appId", appId)
777+
.append("shuffleId", shuffleId)
776778
.toString();
777779
}
778780
}

common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
import com.google.common.base.Objects;
2121
import io.netty.buffer.ByteBuf;
22+
import org.apache.commons.lang3.builder.ToStringBuilder;
23+
import org.apache.commons.lang3.builder.ToStringStyle;
2224

2325
import org.apache.spark.network.protocol.Encoders;
2426

@@ -51,9 +53,9 @@ public int hashCode() {
5153

5254
@Override
5355
public String toString() {
54-
return Objects.toStringHelper(this)
55-
.add("appId", appId)
56-
.add("shuffleId", shuffleId)
56+
return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
57+
.append("appId", appId)
58+
.append("shuffleId", shuffleId)
5759
.toString();
5860
}
5961

common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121

2222
import com.google.common.base.Objects;
2323
import io.netty.buffer.ByteBuf;
24+
import org.apache.commons.lang3.builder.ToStringBuilder;
25+
import org.apache.commons.lang3.builder.ToStringStyle;
2426
import org.roaringbitmap.RoaringBitmap;
2527

2628
import org.apache.spark.network.protocol.Encoders;
@@ -76,9 +78,9 @@ public int hashCode() {
7678

7779
@Override
7880
public String toString() {
79-
return Objects.toStringHelper(this)
80-
.add("shuffleId", shuffleId)
81-
.add("reduceId size", reduceIds.length)
81+
return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
82+
.append("shuffleId", shuffleId)
83+
.append("reduceId size", reduceIds.length)
8284
.toString();
8385
}
8486

common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
import com.google.common.base.Objects;
2121
import io.netty.buffer.ByteBuf;
22+
import org.apache.commons.lang3.builder.ToStringBuilder;
23+
import org.apache.commons.lang3.builder.ToStringStyle;
2224

2325
import org.apache.spark.network.protocol.Encoders;
2426

@@ -59,12 +61,12 @@ public int hashCode() {
5961

6062
@Override
6163
public String toString() {
62-
return Objects.toStringHelper(this)
63-
.add("appId", appId)
64-
.add("shuffleId", shuffleId)
65-
.add("mapIndex", mapIndex)
66-
.add("reduceId", reduceId)
67-
.add("index", index)
64+
return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
65+
.append("appId", appId)
66+
.append("shuffleId", shuffleId)
67+
.append("mapIndex", mapIndex)
68+
.append("reduceId", reduceId)
69+
.append("index", index)
6870
.toString();
6971
}
7072

dev/create-release/spark-rm/Dockerfile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ ARG APT_INSTALL="apt-get install --no-install-recommends -y"
4040
# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
4141
# See also https://github.com/sphinx-doc/sphinx/issues/7551.
4242
# We should use the latest Sphinx version once this is fixed.
43-
ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.1.2 numpy==1.19.4 pydata_sphinx_theme==0.4.1 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0"
43+
# TODO(SPARK-35375): Jinja2 3.0.0+ causes error when building with Sphinx.
44+
# See also https://issues.apache.org/jira/browse/SPARK-35375.
45+
ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.1.2 numpy==1.19.4 pydata_sphinx_theme==0.4.1 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0 jinja2==2.11.3"
4446
ARG GEM_PKGS="bundler:2.2.9"
4547

4648
# Install extra needed repos and refresh.

docs/README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,13 @@ To generate API docs for any language, you'll need to install these libraries:
6767
<!--
6868
TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
6969
See also https://github.com/sphinx-doc/sphinx/issues/7551.
70+
71+
TODO(SPARK-35375): Jinja2 3.0.0+ causes error when building with Sphinx.
72+
See also https://issues.apache.org/jira/browse/SPARK-35375.
7073
-->
7174

7275
```sh
73-
$ sudo pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx numpydoc
76+
$ sudo pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx numpydoc 'jinja2<3.0.0'
7477
```
7578

7679
## Generating the Documentation HTML

docs/configuration.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -943,8 +943,8 @@ Apart from these, the following properties are also available, and may be useful
943943
<td>false</td>
944944
<td>
945945
Enables the external shuffle service. This service preserves the shuffle files written by
946-
executors so the executors can be safely removed. The external shuffle service
947-
must be set up in order to enable it. See
946+
executors e.g. so that executors can be safely removed, or so that shuffle fetches can continue in
947+
the event of executor failure. The external shuffle service must be set up in order to enable it. See
948948
<a href="job-scheduling.html#configuration-and-setup">dynamic allocation
949949
configuration and setup documentation</a> for more information.
950950
</td>

docs/job-scheduling.md

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -142,13 +142,12 @@ an executor should not be idle if there are still pending tasks to be scheduled.
142142

143143
### Graceful Decommission of Executors
144144

145-
Before dynamic allocation, a Spark executor exits either on failure or when the associated
146-
application has also exited. In both scenarios, all state associated with the executor is no
147-
longer needed and can be safely discarded. With dynamic allocation, however, the application
148-
is still running when an executor is explicitly removed. If the application attempts to access
149-
state stored in or written by the executor, it will have to perform a recompute the state. Thus,
150-
Spark needs a mechanism to decommission an executor gracefully by preserving its state before
151-
removing it.
145+
Before dynamic allocation, if a Spark executor exits when the associated application has also exited
146+
then all state associated with the executor is no longer needed and can be safely discarded.
147+
With dynamic allocation, however, the application is still running when an executor is explicitly
148+
removed. If the application attempts to access state stored in or written by the executor, it will
149+
have to perform a recompute the state. Thus, Spark needs a mechanism to decommission an executor
150+
gracefully by preserving its state before removing it.
152151

153152
This requirement is especially important for shuffles. During a shuffle, the Spark executor first
154153
writes its own map outputs locally to disk, and then acts as the server for those files when other

python/docs/source/migration_guide/pyspark_3.1_to_3.2.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,5 @@ Upgrading from PySpark 3.1 to 3.2
2121
=================================
2222

2323
* In Spark 3.2, the PySpark methods from sql, ml, spark_on_pandas modules raise the ``TypeError`` instead of ``ValueError`` when are applied to an param of inappropriate type.
24+
25+
* In Spark 3.2, the traceback from Python UDFs, pandas UDFs and pandas function APIs are simplified by default without the traceback from the internal Python workers. In Spark 3.1 or earlier, the traceback from Python workers was printed out. To restore the behavior before Spark 3.2, you can set ``spark.sql.execution.pyspark.udf.simplifiedTraceback.enabled`` to ``false``.

python/mypy.ini

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ disallow_untyped_defs = False
8585
[mypy-pyspark.sql.utils]
8686
disallow_untyped_defs = False
8787

88+
[mypy-pyspark.pandas.*]
89+
strict_optional = False
90+
disallow_untyped_defs = False
91+
8892
[mypy-pyspark.tests.*]
8993
disallow_untyped_defs = False
9094

@@ -125,6 +129,14 @@ ignore_missing_imports = True
125129
[mypy-psutil.*]
126130
ignore_missing_imports = True
127131

128-
# TODO(SPARK-34941): Enable mypy for pandas-on-Spark
129-
[mypy-pyspark.pandas.*]
130-
ignore_errors = True
132+
[mypy-matplotlib.*]
133+
ignore_missing_imports = True
134+
135+
[mypy-plotly.*]
136+
ignore_missing_imports = True
137+
138+
[mypy-mlflow.*]
139+
ignore_missing_imports = True
140+
141+
[mypy-sklearn.*]
142+
ignore_missing_imports = True

0 commit comments

Comments
 (0)