Skip to content

Commit 72f35b1

Browse files
committed
DataFrame.withColumn can replace original column with identical column name.
1 parent de4fa6b commit 72f35b1

File tree

2 files changed

+17
-1
lines changed

2 files changed

+17
-1
lines changed

sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -747,7 +747,19 @@ class DataFrame private[sql](
747747
* Returns a new [[DataFrame]] by adding a column.
748748
* @group dfops
749749
*/
750-
def withColumn(colName: String, col: Column): DataFrame = select(Column("*"), col.as(colName))
750+
def withColumn(colName: String, col: Column): DataFrame = {
751+
val resolver = sqlContext.analyzer.resolver
752+
val replaced = schema.exists(f => resolver(f.name, colName))
753+
if (replaced) {
754+
val colNames = schema.map { field =>
755+
val name = field.name
756+
if (resolver(name, colName)) col.as(colName) else Column(name)
757+
}
758+
select(colNames :_*)
759+
} else {
760+
select(Column("*"), col.as(colName))
761+
}
762+
}
751763

752764
/**
753765
* Returns a new [[DataFrame]] with a column renamed.

sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,10 @@ class DataFrameSuite extends QueryTest {
457457
Row(key, value, key + 1)
458458
}.toSeq)
459459
assert(df.schema.map(_.name).toSeq === Seq("key", "value", "newCol"))
460+
461+
val df2 = TestSQLContext.sparkContext.parallelize(Array(1, 2, 3)).toDF("x")
462+
val df3 = df2.withColumn("x", df2("x") + 1)
463+
assert(df3.select("x").collect().toSeq === Seq(Row(2), Row(3), Row(4)))
460464
}
461465

462466
test("withColumnRenamed") {

0 commit comments

Comments
 (0)