diff --git a/NuGet.config b/NuGet.config
index 7b7b765e2..9d2866825 100644
--- a/NuGet.config
+++ b/NuGet.config
@@ -6,5 +6,7 @@
+
+
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 20215a7b2..8ba73e0c1 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -11,9 +11,9 @@ variables:
_SignType: real
_TeamName: DotNetSpark
MSBUILDSINGLELOADCONTEXT: 1
- # backwardCompatibleRelease/forwardCompatibleRelease is the "oldest" releases that work with the current release
- backwardCompatibleRelease: '0.9.0'
+ # forwardCompatibleRelease/backwardCompatibleRelease is the "oldest" releases that work with the current release
forwardCompatibleRelease: '0.9.0'
+ backwardCompatibleRelease: '0.9.0'
TestsToFilterOut: "(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.DataFrameTests.TestDataFrameGroupedMapUdf)&\
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.DataFrameTests.TestDataFrameVectorUdf)&\
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.BroadcastTests.TestDestroy)&\
@@ -22,7 +22,8 @@ variables:
(FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfSimpleTypesTests.TestUdfWithReturnAsTimestampType)&\
(FullyQualifiedName!=Microsoft.Spark.E2ETest.UdfTests.UdfSimpleTypesTests.TestUdfWithTimestampType)&\
(FullyQualifiedName!=Microsoft.Spark.E2ETest.IpcTests.SparkSessionTests.TestCreateDataFrameWithTimestamp)"
- LatestDotnetWorkerDir: '$(Build.ArtifactStagingDirectory)\Microsoft.Spark.Worker\netcoreapp3.1\win-x64'
+ ArtifactPath: '$(Build.ArtifactStagingDirectory)\Microsoft.Spark.Binaries'
+ CurrentDotnetWorkerDir: '$(ArtifactPath)\Microsoft.Spark.Worker\netcoreapp3.1\win-x64'
BackwardCompatibleDotnetWorkerDir: $(Build.BinariesDirectory)\Microsoft.Spark.Worker-$(backwardCompatibleRelease)
# Azure DevOps variables are transformed into environment variables, with these variables we
@@ -38,404 +39,131 @@ resources:
name: dotnet/spark
ref: refs/tags/v$(forwardCompatibleRelease)
-jobs:
-- job: Build
- displayName: Build and Test Sources
- pool: Hosted VS2017
-
- variables:
- ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
- _OfficialBuildIdArgs: /p:OfficialBuildId=$(BUILD.BUILDNUMBER)
- HADOOP_HOME: $(Build.BinariesDirectory)\hadoop
-
- steps:
- - checkout: self
- path: s\master
- - checkout: forwardCompatibleRelease
- path: s\$(forwardCompatibleRelease)
-
- - task: Maven@3
- displayName: 'Maven build src'
- inputs:
- mavenPomFile: master/src/scala/pom.xml
-
- - task: Maven@3
- displayName: 'Maven build benchmark'
- inputs:
- mavenPomFile: master/benchmark/scala/pom.xml
-
- - task: BatchScript@1
- displayName: Download Spark Distros & Winutils.exe
- inputs:
- filename: master\script\download-spark-distros.cmd
- arguments: $(Build.BinariesDirectory)
-
- - task: BatchScript@1
- displayName: Download backward compatible worker v$(backwardCompatibleRelease)
- inputs:
- filename: master\script\download-worker-release.cmd
- arguments: '$(Build.BinariesDirectory) $(backwardCompatibleRelease)'
-
- - script: master\build.cmd -pack
- -c $(buildConfiguration)
- -ci
- $(_OfficialBuildIdArgs)
- /p:PublishSparkWorker=true
- /p:SparkWorkerPublishDir=$(Build.ArtifactStagingDirectory)\Microsoft.Spark.Worker
- displayName: '.NET build'
-
- - task: DotNetCoreCLI@2
- displayName: '.NET unit tests'
- inputs:
- command: test
- projects: 'master/**/*UnitTest/*.csproj'
- arguments: '--configuration $(buildConfiguration)'
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.3.0'
- inputs:
- command: test
- projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.0-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.3.1'
- inputs:
- command: test
- projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.1-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.3.2'
- inputs:
- command: test
- projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.2-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.3.3'
- inputs:
- command: test
- projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.3-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.3.4'
- inputs:
- command: test
- projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.4-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.4.0'
- inputs:
- command: test
- projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.0-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.4.1'
- inputs:
- command: test
- projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.1-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.4.3'
- inputs:
- command: test
- projects: 'master/**/Microsoft.Spark*.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.3-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.4.4'
- inputs:
- command: test
- projects: 'master/**/Microsoft.Spark*.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.4-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.4.5'
- inputs:
- command: test
- projects: 'master/**/Microsoft.Spark*.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.5-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.3.0 with backward compatible worker v$(backwardCompatibleRelease)'
- inputs:
- command: test
- projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.0-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(BackwardCompatibleDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.3.1 with backward compatible worker v$(backwardCompatibleRelease)'
- inputs:
- command: test
- projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.1-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(BackwardCompatibleDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.3.2 with backward compatible worker v$(backwardCompatibleRelease)'
- inputs:
- command: test
- projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.2-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(BackwardCompatibleDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.3.3 with backward compatible worker v$(backwardCompatibleRelease)'
- inputs:
- command: test
- projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.3-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(BackwardCompatibleDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.3.4 with backward compatible worker v$(backwardCompatibleRelease)'
- inputs:
- command: test
- projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.4-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(BackwardCompatibleDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.4.0 with backward compatible worker v$(backwardCompatibleRelease)'
- inputs:
- command: test
- projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.0-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(BackwardCompatibleDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.4.1 with backward compatible worker v$(backwardCompatibleRelease)'
- inputs:
- command: test
- projects: 'master/**/Microsoft.Spark.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.1-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(BackwardCompatibleDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.4.3 with backward compatible worker v$(backwardCompatibleRelease)'
- inputs:
- command: test
- projects: 'master/**/Microsoft.Spark*.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.3-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(BackwardCompatibleDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.4.4 with backward compatible worker v$(backwardCompatibleRelease)'
- inputs:
- command: test
- projects: 'master/**/Microsoft.Spark*.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.4-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(BackwardCompatibleDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.4.5 with backward compatible worker v$(backwardCompatibleRelease)'
- inputs:
- command: test
- projects: 'master/**/Microsoft.Spark*.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.5-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(BackwardCompatibleDotnetWorkerDir)
-
- - task: Maven@3
- displayName: 'Maven build src for forward compatible release v$(forwardCompatibleRelease)'
- inputs:
- mavenPomFile: $(forwardCompatibleRelease)/src/scala/pom.xml
-
- - script: $(forwardCompatibleRelease)\build.cmd
- -c $(buildConfiguration)
- -ci
- $(_OfficialBuildIdArgs)
- /p:PublishSparkWorker=false
- displayName: '.NET build for forward compatible release v$(forwardCompatibleRelease)'
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.3.0 from forward compatible release v$(forwardCompatibleRelease)'
- inputs:
- command: test
- projects: '$(forwardCompatibleRelease)/**/Microsoft.Spark.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.0-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.3.1 from forward compatible release v$(forwardCompatibleRelease)'
- inputs:
- command: test
- projects: '$(forwardCompatibleRelease)/**/Microsoft.Spark.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.1-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.3.2 from forward compatible release v$(forwardCompatibleRelease)'
- inputs:
- command: test
- projects: '$(forwardCompatibleRelease)/**/Microsoft.Spark.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.2-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.3.3 from forward compatible release v$(forwardCompatibleRelease)'
- inputs:
- command: test
- projects: '$(forwardCompatibleRelease)/**/Microsoft.Spark.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.3-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.3.4 from forward compatible release v$(forwardCompatibleRelease)'
- inputs:
- command: test
- projects: '$(forwardCompatibleRelease)/**/Microsoft.Spark.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.4-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.4.0 from forward compatible release v$(forwardCompatibleRelease)'
- inputs:
- command: test
- projects: '$(forwardCompatibleRelease)/**/Microsoft.Spark.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.0-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.4.1 from forward compatible release v$(forwardCompatibleRelease)'
- inputs:
- command: test
- projects: '$(forwardCompatibleRelease)/**/Microsoft.Spark.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.1-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.4.3 from forward compatible release v$(forwardCompatibleRelease)'
- inputs:
- command: test
- projects: '$(forwardCompatibleRelease)/**/Microsoft.Spark*.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.3-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.4.4 from forward compatible release v$(forwardCompatibleRelease)'
- inputs:
- command: test
- projects: '$(forwardCompatibleRelease)/**/Microsoft.Spark*.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.4-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
-
- - task: DotNetCoreCLI@2
- displayName: 'E2E tests for Spark 2.4.5 from forward compatible release v$(forwardCompatibleRelease)'
- inputs:
- command: test
- projects: '$(forwardCompatibleRelease)/**/Microsoft.Spark*.E2ETest/*.csproj'
- arguments: '--configuration $(buildConfiguration)'
- env:
- SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.5-bin-hadoop2.7
- DOTNET_WORKER_DIR: $(LatestDotnetWorkerDir)
+stages:
+- stage: Build
+ displayName: Build Sources
+ jobs:
+ - job: Build
+ pool: Hosted VS2017
- - ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
- - task: CopyFiles@2
- displayName: Stage .NET artifacts
+ variables:
+ ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
+ _OfficialBuildIdArgs: /p:OfficialBuildId=$(BUILD.BUILDNUMBER)
+
+ steps:
+ - task: Maven@3
+ displayName: 'Maven build src'
+ inputs:
+ mavenPomFile: src/scala/pom.xml
+
+ - task: Maven@3
+ displayName: 'Maven build benchmark'
+ inputs:
+ mavenPomFile: benchmark/scala/pom.xml
+
+ - script: build.cmd -pack
+ -c $(buildConfiguration)
+ -ci
+ $(_OfficialBuildIdArgs)
+ /p:PublishSparkWorker=true
+ /p:SparkWorkerPublishDir=$(Build.ArtifactStagingDirectory)\Microsoft.Spark.Worker
+ displayName: '.NET build'
+
+ - task: DotNetCoreCLI@2
+ displayName: '.NET unit tests'
inputs:
- sourceFolder: $(Build.SourcesDirectory)/master/artifacts/packages/$(buildConfiguration)/Shipping
- contents: |
- **/*.nupkg
- **/*.snupkg
- targetFolder: $(Build.ArtifactStagingDirectory)/BuildArtifacts/artifacts/packages/$(buildConfiguration)/Shipping
+ command: test
+ projects: '**/*UnitTest/*.csproj'
+ arguments: '--configuration $(buildConfiguration)'
- task: CopyFiles@2
- displayName: Stage build logs
+ displayName: Stage Maven build jars
inputs:
- sourceFolder: $(Build.SourcesDirectory)/master/artifacts/log
- targetFolder: $(Build.ArtifactStagingDirectory)/BuildArtifacts/artifacts/log
+ sourceFolder: $(Build.SourcesDirectory)/src/scala
+ contents: '**/*.jar'
+ targetFolder: $(Build.ArtifactStagingDirectory)/Jars
+
+ - ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
+ - task: CopyFiles@2
+ displayName: Stage .NET artifacts
+ inputs:
+ sourceFolder: $(Build.SourcesDirectory)/artifacts/packages/$(buildConfiguration)/Shipping
+ contents: |
+ **/*.nupkg
+ **/*.snupkg
+ targetFolder: $(Build.ArtifactStagingDirectory)/BuildArtifacts/artifacts/packages/$(buildConfiguration)/Shipping
+
+ - task: CopyFiles@2
+ displayName: Stage build logs
+ inputs:
+ sourceFolder: $(Build.SourcesDirectory)/artifacts/log
+ targetFolder: $(Build.ArtifactStagingDirectory)/BuildArtifacts/artifacts/log
- task: PublishBuildArtifacts@1
inputs:
pathtoPublish: '$(Build.ArtifactStagingDirectory)'
artifactName: Microsoft.Spark.Binaries
-- ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
- - job: SignPublish
- dependsOn:
- - Build
- displayName: Sign and Publish Artifacts
- pool:
- name: NetCoreInternal-Pool
- queue: buildpool.windows.10.amd64.vs2017
+ - ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
+ - job: SignPublish
+ dependsOn:
+ - Build
+ displayName: Sign and Publish Artifacts
+ pool:
+ name: NetCoreInternal-Pool
+ queue: buildpool.windows.10.amd64.vs2017
+
+ variables:
+ ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
+ _OfficialBuildIdArgs: /p:OfficialBuildId=$(BUILD.BUILDNUMBER)
+
+ steps:
+ - task: DownloadBuildArtifacts@0
+ displayName: Download Build Artifacts
+ inputs:
+ artifactName: Microsoft.Spark.Binaries
+ downloadPath: $(Build.ArtifactStagingDirectory)
+
+ - task: MicroBuildSigningPlugin@2
+ displayName: Install MicroBuild plugin
+ inputs:
+ signType: $(_SignType)
+ zipSources: false
+ feedSource: https://dnceng.pkgs.visualstudio.com/_packaging/MicroBuildToolset/nuget/v3/index.json
+ env:
+ TeamName: $(_TeamName)
+ condition: and(succeeded(), in(variables['_SignType'], 'real', 'test'), eq(variables['Agent.Os'], 'Windows_NT'))
+
+ - task: PowerShell@2
+ displayName: Sign artifacts and Package Microsoft.Spark.Worker
+ inputs:
+ filePath: eng\common\build.ps1
+ arguments: -restore -sign -publish
+ -c $(buildConfiguration)
+ -ci
+ $(_OfficialBuildIdArgs)
+ /p:DotNetSignType=$(_SignType)
+ /p:SparkPackagesDir=$(ArtifactPath)\BuildArtifacts\artifacts\packages
+ /p:SparkWorkerPublishDir=$(ArtifactPath)\Microsoft.Spark.Worker
+ /p:SparkWorkerPackageOutputDir=$(ArtifactPath)
+
+ - task: PublishBuildArtifacts@1
+ inputs:
+ pathtoPublish: '$(ArtifactPath)'
+ artifactName: Microsoft.Spark.Binaries
+
+- stage: Test
+ displayName: E2E Tests
+ dependsOn: Build
+ jobs:
+ - job: Run
+ pool: Hosted VS2017
variables:
${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
_OfficialBuildIdArgs: /p:OfficialBuildId=$(BUILD.BUILDNUMBER)
+ HADOOP_HOME: $(Build.BinariesDirectory)\hadoop
+ DOTNET_WORKER_DIR: $(CurrentDotnetWorkerDir)
steps:
- task: DownloadBuildArtifacts@0
@@ -443,31 +171,378 @@ jobs:
inputs:
artifactName: Microsoft.Spark.Binaries
downloadPath: $(Build.ArtifactStagingDirectory)
-
- - task: MicroBuildSigningPlugin@2
- displayName: Install MicroBuild plugin
+
+ - task: CopyFiles@2
+ displayName: Copy jars
+ inputs:
+ sourceFolder: $(ArtifactPath)/Jars
+ contents: '**/*.jar'
+ targetFolder: $(Build.SourcesDirectory)/src/scala
+
+ - task: BatchScript@1
+ displayName: Download Spark Distros & Winutils.exe
+ inputs:
+ filename: script\download-spark-distros.cmd
+ arguments: $(Build.BinariesDirectory)
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.3.0'
inputs:
- signType: $(_SignType)
- zipSources: false
- feedSource: https://dnceng.pkgs.visualstudio.com/_packaging/MicroBuildToolset/nuget/v3/index.json
+ command: test
+ projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration)'
env:
- TeamName: $(_TeamName)
- condition: and(succeeded(), in(variables['_SignType'], 'real', 'test'), eq(variables['Agent.Os'], 'Windows_NT'))
-
- - task: PowerShell@2
- displayName: Sign artifacts and Package Microsoft.Spark.Worker
- inputs:
- filePath: eng\common\build.ps1
- arguments: -restore -sign -publish
- -c $(buildConfiguration)
- -ci
- $(_OfficialBuildIdArgs)
- /p:DotNetSignType=$(_SignType)
- /p:SparkPackagesDir=$(Build.ArtifactStagingDirectory)\Microsoft.Spark.Binaries\BuildArtifacts\artifacts\packages
- /p:SparkWorkerPublishDir=$(Build.ArtifactStagingDirectory)\Microsoft.Spark.Binaries\Microsoft.Spark.Worker
- /p:SparkWorkerPackageOutputDir=$(Build.ArtifactStagingDirectory)\Microsoft.Spark.Binaries
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.0-bin-hadoop2.7
- - task: PublishBuildArtifacts@1
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.3.1'
inputs:
- pathtoPublish: '$(Build.ArtifactStagingDirectory)/Microsoft.Spark.Binaries'
- artifactName: Microsoft.Spark.Binaries
+ command: test
+ projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.1-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.3.2'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.2-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.3.3'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.3-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.3.4'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.4-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.4.0'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.0-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.4.1'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.1-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.4.3'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.3-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.4.4'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.4-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.4.5'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.5-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.4.6'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.6-bin-hadoop2.7
+
+- stage: ForwardCompatibility
+ displayName: E2E Forward Compatibility Tests
+ dependsOn: Build
+ jobs:
+ - job: Run
+ pool: Hosted VS2017
+
+ variables:
+ ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
+ _OfficialBuildIdArgs: /p:OfficialBuildId=$(BUILD.BUILDNUMBER)
+ HADOOP_HOME: $(Build.BinariesDirectory)\hadoop
+ DOTNET_WORKER_DIR: $(CurrentDotnetWorkerDir)
+
+ steps:
+ - checkout: forwardCompatibleRelease
+ path: s\$(forwardCompatibleRelease)
+
+ - task: Maven@3
+ displayName: 'Maven build src for forward compatible release v$(forwardCompatibleRelease)'
+ inputs:
+ mavenPomFile: src/scala/pom.xml
+
+ - task: DownloadBuildArtifacts@0
+ displayName: Download Build Artifacts
+ inputs:
+ artifactName: Microsoft.Spark.Binaries
+ downloadPath: $(Build.ArtifactStagingDirectory)
+
+ - task: BatchScript@1
+ displayName: Download Spark Distros & Winutils.exe
+ inputs:
+ filename: script\download-spark-distros.cmd
+ arguments: $(Build.BinariesDirectory)
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.3.0'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.0-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.3.1'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.1-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.3.2'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.2-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.3.3'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.3-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.3.4'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.4-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.4.0'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.0-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.4.1'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.1-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.4.3'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.3-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.4.4'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.4-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.4.5'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.5-bin-hadoop2.7
+
+- stage: BackwardCompatibility
+ displayName: E2E Backward Compatibility Tests
+ dependsOn: Build
+ jobs:
+ - job: Run
+ pool: Hosted VS2017
+
+ variables:
+ ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}:
+ _OfficialBuildIdArgs: /p:OfficialBuildId=$(BUILD.BUILDNUMBER)
+ HADOOP_HOME: $(Build.BinariesDirectory)\hadoop
+ DOTNET_WORKER_DIR: $(BackwardCompatibleDotnetWorkerDir)
+
+ steps:
+ - task: DownloadBuildArtifacts@0
+ displayName: Download Build Artifacts
+ inputs:
+ artifactName: Microsoft.Spark.Binaries
+ downloadPath: $(Build.ArtifactStagingDirectory)
+
+ - task: CopyFiles@2
+ displayName: Copy jars
+ inputs:
+ sourceFolder: $(ArtifactPath)/Jars
+ contents: '**/*.jar'
+ targetFolder: $(Build.SourcesDirectory)/src/scala
+
+ - task: BatchScript@1
+ displayName: Download Spark Distros & Winutils.exe
+ inputs:
+ filename: script\download-spark-distros.cmd
+ arguments: $(Build.BinariesDirectory)
+
+ - task: BatchScript@1
+ displayName: Download backward compatible worker v$(backwardCompatibleRelease)
+ inputs:
+ filename: script\download-worker-release.cmd
+ arguments: '$(Build.BinariesDirectory) $(backwardCompatibleRelease)'
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.3.0'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.0-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.3.1'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.1-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.3.2'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.2-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.3.3'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.3-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.3.4'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.3.4-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.4.0'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.0-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.4.1'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.1-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.4.3'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.3-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.4.4'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.4-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.4.5'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.5-bin-hadoop2.7
+
+ - task: DotNetCoreCLI@2
+ displayName: 'E2E tests for Spark 2.4.6'
+ inputs:
+ command: test
+ projects: '**/Microsoft.Spark*.E2ETest/*.csproj'
+ arguments: '--configuration $(buildConfiguration) --filter $(TestsToFilterOut)'
+ env:
+ SPARK_HOME: $(Build.BinariesDirectory)\spark-2.4.6-bin-hadoop2.7
+
diff --git a/docs/broadcast-guide.md b/docs/broadcast-guide.md
new file mode 100644
index 000000000..c3026516b
--- /dev/null
+++ b/docs/broadcast-guide.md
@@ -0,0 +1,92 @@
+# Guide to using Broadcast Variables
+
+This is a guide to show how to use broadcast variables in .NET for Apache Spark.
+
+## What are Broadcast Variables
+
+[Broadcast variables in Apache Spark](https://spark.apache.org/docs/2.2.0/rdd-programming-guide.html#broadcast-variables) are a mechanism for sharing variables across executors that are meant to be read-only. They allow the programmer to keep a read-only variable cached on each machine rather than shipping a copy of it with tasks. They can be used, for example, to give every node a copy of a large input dataset in an efficient manner.
+
+### How to use broadcast variables in .NET for Apache Spark
+
+Broadcast variables are created from a variable `v` by calling `SparkContext.Broadcast(v)`. The broadcast variable is a wrapper around `v`, and its value can be accessed by calling the `Value()` method.
+
+Example:
+
+```csharp
+string v = "Variable to be broadcasted";
+Broadcast bv = SparkContext.Broadcast(v);
+
+// Using the broadcast variable in a UDF:
+Func udf = Udf(
+ str => $"{str}: {bv.Value()}");
+```
+
+The type parameter for `Broadcast` should be the type of the variable being broadcasted.
+
+### Deleting broadcast variables
+
+The broadcast variable can be deleted from all executors by calling the `Destroy()` method on it.
+
+```csharp
+// Destroying the broadcast variable bv:
+bv.Destroy();
+```
+
+> Note: `Destroy()` deletes all data and metadata related to the broadcast variable. Use this with caution - once a broadcast variable has been destroyed, it cannot be used again.
+
+#### Caveat of using Destroy
+
+One important thing to keep in mind while using broadcast variables in UDFs is to limit the scope of the variable to only the UDF that is referencing it. The [guide to using UDFs](udf-guide.md) describes this phenomenon in detail. This is especially crucial when calling `Destroy` on the broadcast variable. If the broadcast variable that has been destroyed is visible to or accessible from other UDFs, it gets picked up for serialization by all those UDFs, even if it is not being referenced by them. This will throw an error as .NET for Apache Spark is not able to serialize the destroyed broadcast variable.
+
+Example to demonstrate:
+
+```csharp
+string v = "Variable to be broadcasted";
+Broadcast bv = SparkContext.Broadcast(v);
+
+// Using the broadcast variable in a UDF:
+Func udf1 = Udf(
+ str => $"{str}: {bv.Value()}");
+
+// Destroying bv
+bv.Destroy();
+
+// Calling udf1 after destroying bv throws the following expected exception:
+// org.apache.spark.SparkException: Attempted to use Broadcast(0) after it was destroyed
+df.Select(udf1(df["_1"])).Show();
+
+// Different UDF udf2 that is not referencing bv
+Func udf2 = Udf(
+ str => $"{str}: not referencing broadcast variable");
+
+// Calling udf2 throws the following (unexpected) exception:
+// [Error] [JvmBridge] org.apache.spark.SparkException: Task not serializable
+df.Select(udf2(df["_1"])).Show();
+```
+
+The recommended way of implementing above desired behavior:
+
+```csharp
+string v = "Variable to be broadcasted";
+// Restricting the visibility of bv to only the UDF referencing it
+{
+ Broadcast bv = SparkContext.Broadcast(v);
+
+ // Using the broadcast variable in a UDF:
+ Func udf1 = Udf(
+ str => $"{str}: {bv.Value()}");
+
+ // Destroying bv
+ bv.Destroy();
+}
+
+// Different UDF udf2 that is not referencing bv
+Func udf2 = Udf(
+ str => $"{str}: not referencing broadcast variable");
+
+// Calling udf2 works fine as expected
+df.Select(udf2(df["_1"])).Show();
+```
+ This ensures that destroying `bv` doesn't affect calling `udf2` because of unexpected serialization behavior.
+
+ Broadcast variables are useful for transmitting read-only data to all executors, as the data is sent only once and this can give performance benefits when compared with using local variables that get shipped to the executors with each task. Please refer to the [official documentation](https://spark.apache.org/docs/2.2.0/rdd-programming-guide.html#broadcast-variables) to get a deeper understanding of broadcast variables and why they are used.
\ No newline at end of file
diff --git a/docs/building/ubuntu-instructions.md b/docs/building/ubuntu-instructions.md
index 8bb11b163..0e3dbdf40 100644
--- a/docs/building/ubuntu-instructions.md
+++ b/docs/building/ubuntu-instructions.md
@@ -35,14 +35,14 @@ If you already have all the pre-requisites, skip to the [build](ubuntu-instructi
```bash
sudo update-alternatives --config java
```
- 3. Install **[Apache Maven 3.6.0+](https://maven.apache.org/download.cgi)**
+ 3. Install **[Apache Maven 3.6.3+](https://maven.apache.org/download.cgi)**
- Run the following command:
```bash
mkdir -p ~/bin/maven
cd ~/bin/maven
- wget https://www-us.apache.org/dist/maven/maven-3/3.6.0/binaries/apache-maven-3.6.0-bin.tar.gz
- tar -xvzf apache-maven-3.6.0-bin.tar.gz
- ln -s apache-maven-3.6.0 current
+ wget https://www-us.apache.org/dist/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz
+ tar -xvzf apache-maven-3.6.3-bin.tar.gz
+ ln -s apache-maven-3.6.3 current
export M2_HOME=~/bin/maven/current
export PATH=${M2_HOME}/bin:${PATH}
source ~/.bashrc
@@ -54,11 +54,11 @@ If you already have all the pre-requisites, skip to the [build](ubuntu-instructi
📙 Click to see sample mvn -version output
```
- Apache Maven 3.6.0 (97c98ec64a1fdfee7767ce5ffb20918da4f719f3; 2018-10-24T18:41:47Z)
- Maven home: ~/bin/apache-maven-3.6.0
- Java version: 1.8.0_191, vendor: Oracle Corporation, runtime: /usr/lib/jvm/java-8-openjdk-amd64/jre
- Default locale: en, platform encoding: UTF-8
- OS name: "linux", version: "4.4.0-17763-microsoft", arch: "amd64", family: "unix"
+ Apache Maven 3.6.3 (cecedd343002696d0abb50b32b541b8a6ba2883f)
+ Maven home: ~/bin/apache-maven-3.6.3
+ Java version: 1.8.0_242, vendor: Oracle Corporation, runtime: /usr/lib/jvm/java-8-openjdk-amd64/jre
+ Default locale: en_US, platform encoding: ANSI_X3.4-1968
+ OS name: "linux", version: "4.4.0-142-generic", arch: "amd64", family: "unix"
```
4. Install **[Apache Spark 2.3+](https://spark.apache.org/downloads.html)**
- Download [Apache Spark 2.3+](https://spark.apache.org/downloads.html) and extract it into a local folder (e.g., `~/bin/spark-2.3.2-bin-hadoop2.7`)
diff --git a/docs/udf-guide.md b/docs/udf-guide.md
new file mode 100644
index 000000000..6a2905bf4
--- /dev/null
+++ b/docs/udf-guide.md
@@ -0,0 +1,171 @@
+# Guide to User-Defined Functions (UDFs)
+
+This is a guide to show how to use UDFs in .NET for Apache Spark.
+
+## What are UDFs
+
+[User-Defined Functions (UDFs)](https://spark.apache.org/docs/latest/api/java/org/apache/spark/sql/expressions/UserDefinedFunction.html) are a feature of Spark that allow developers to use custom functions to extend the system's built-in functionality. They transform values from a single row within a table to produce a single corresponding output value per row based on the logic defined in the UDF.
+
+Let's take the following as an example for a UDF definition:
+
+```csharp
+string s1 = "hello";
+Func udf = Udf(
+ str => $"{s1} {str}");
+
+```
+The above defined UDF takes a `string` as an input (in the form of a [Column](https://github.com/dotnet/spark/blob/master/src/csharp/Microsoft.Spark/Sql/Column.cs#L14) of a [Dataframe](https://github.com/dotnet/spark/blob/master/src/csharp/Microsoft.Spark/Sql/DataFrame.cs#L24)), and returns a `string` with `hello` appended in front of the input.
+
+For a sample Dataframe, let's take the following Dataframe `df`:
+
+```text
++-------+
+| name|
++-------+
+|Michael|
+| Andy|
+| Justin|
++-------+
+```
+
+Now let's apply the above defined `udf` to the dataframe `df`:
+
+```csharp
+DataFrame udfResult = df.Select(udf(df["name"]));
+```
+
+This would return the below as the Dataframe `udfResult`:
+
+```text
++-------------+
+| name|
++-------------+
+|hello Michael|
+| hello Andy|
+| hello Justin|
++-------------+
+```
+To get a better understanding of how to implement UDFs, please take a look at the [UDF helper functions](https://github.com/dotnet/spark/blob/master/src/csharp/Microsoft.Spark/Sql/Functions.cs#L3616) and some [test examples](https://github.com/dotnet/spark/blob/master/src/csharp/Microsoft.Spark.E2ETest/UdfTests/UdfSimpleTypesTests.cs#L49).
+
+## UDF serialization
+
+Since UDFs are functions that need to be executed on the workers, they have to be serialized and sent to the workers as part of the payload from the driver. This involves serializing the [delegate](https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/delegates/) which is a reference to the method, along with its [target](https://docs.microsoft.com/en-us/dotnet/api/system.delegate.target?view=netframework-4.8) which is the class instance on which the current delegate invokes the instance method. Please take a look at this [code](https://github.com/dotnet/spark/blob/master/src/csharp/Microsoft.Spark/Utils/CommandSerDe.cs#L149) to get a better understanding of how UDF serialization is being done.
+
+## Good to know while implementing UDFs
+
+One behavior to be aware of while implementing UDFs in .NET for Apache Spark is how the target of the UDF gets serialized. .NET for Apache Spark uses .NET Core, which does not support serializing delegates, so it is instead done by using reflection to serialize the target where the delegate is defined. When multiple delegates are defined in a common scope, they have a shared closure that becomes the target of reflection for serialization. Let's take an example to illustrate what that means.
+
+The following code snippet defines two string variables that are being referenced in two function delegates that return the respective strings as result:
+
+```csharp
+using System;
+
+public class C {
+ public void M() {
+ string s1 = "s1";
+ string s2 = "s2";
+ Func a = str => s1;
+ Func b = str => s2;
+ }
+}
+```
+
+The above C# code generates the following C# disassembly (credit source: [sharplab.io](https://sharplab.io)) code from the compiler:
+
+```csharp
+public class C
+{
+ [CompilerGenerated]
+ private sealed class <>c__DisplayClass0_0
+ {
+ public string s1;
+
+ public string s2;
+
+ internal string b__0(string str)
+ {
+ return s1;
+ }
+
+ internal string b__1(string str)
+ {
+ return s2;
+ }
+ }
+
+ public void M()
+ {
+ <>c__DisplayClass0_0 <>c__DisplayClass0_ = new <>c__DisplayClass0_0();
+ <>c__DisplayClass0_.s1 = "s1";
+ <>c__DisplayClass0_.s2 = "s2";
+ Func func = new Func(<>c__DisplayClass0_.b__0);
+ Func func2 = new Func(<>c__DisplayClass0_.b__1);
+ }
+}
+```
+As can be seen in the above decompiled code, both `func` and `func2` share the same closure `<>c__DisplayClass0_0`, which is the target that is serialized when serializing the delegates `func` and `func2`. Hence, even though `Func a` is only referencing `s1`, `s2` also gets serialized when sending over the bytes to the workers.
+
+This can lead to some unexpected behaviors at runtime (like in the case of using [broadcast variables](broadcast-guide.md)), which is why we recommend restricting the visibility of the variables used in a function to that function's scope.
+
+Going back to the above example, the following is the recommended way to implement the desired behavior of previous code snippet:
+
+```csharp
+using System;
+
+public class C {
+ public void M() {
+ {
+ string s1 = "s1";
+ Func a = str => s1;
+ }
+ {
+ string s2 = "s2";
+ Func b = str => s2;
+ }
+ }
+}
+```
+
+The above C# code generates the following C# disassembly (credit source: [sharplab.io](https://sharplab.io)) code from the compiler:
+
+```csharp
+public class C
+{
+ [CompilerGenerated]
+ private sealed class <>c__DisplayClass0_0
+ {
+ public string s1;
+
+ internal string b__0(string str)
+ {
+ return s1;
+ }
+ }
+
+ [CompilerGenerated]
+ private sealed class <>c__DisplayClass0_1
+ {
+ public string s2;
+
+ internal string b__1(string str)
+ {
+ return s2;
+ }
+ }
+
+ public void M()
+ {
+ <>c__DisplayClass0_0 <>c__DisplayClass0_ = new <>c__DisplayClass0_0();
+ <>c__DisplayClass0_.s1 = "s1";
+ Func func = new Func(<>c__DisplayClass0_.b__0);
+ <>c__DisplayClass0_1 <>c__DisplayClass0_2 = new <>c__DisplayClass0_1();
+ <>c__DisplayClass0_2.s2 = "s2";
+ Func func2 = new Func(<>c__DisplayClass0_2.b__1);
+ }
+}
+```
+
+Here we see that `func` and `func2` no longer share a closure and have their own separate closures `<>c__DisplayClass0_0` and `<>c__DisplayClass0_1` respectively. When used as the target for serialization, nothing other than the referenced variables will get serialized for the delegate.
+
+This behavior is important to keep in mind while implementing multiple UDFs in a common scope.
+To learn more about UDFs in general, please review the following articles that explain UDFs and how to use them: [UDFs in databricks(scala)](https://docs.databricks.com/spark/latest/spark-sql/udf-scala.html), [Spark UDFs and some gotchas](https://medium.com/@achilleus/spark-udfs-we-can-use-them-but-should-we-use-them-2c5a561fde6d).
\ No newline at end of file
diff --git a/examples/Microsoft.Spark.CSharp.Examples/MachineLearning/Sentiment/Program.cs b/examples/Microsoft.Spark.CSharp.Examples/MachineLearning/Sentiment/Program.cs
index efb85e468..51f63078d 100644
--- a/examples/Microsoft.Spark.CSharp.Examples/MachineLearning/Sentiment/Program.cs
+++ b/examples/Microsoft.Spark.CSharp.Examples/MachineLearning/Sentiment/Program.cs
@@ -27,7 +27,7 @@ public void Run(string[] args)
SparkSession spark = SparkSession
.Builder()
- .AppName(".NET for Apache Spark Sentiment Analysis")
+ .AppName("Sentiment Analysis using .NET for Apache Spark")
.GetOrCreate();
// Read in and display Yelp reviews
diff --git a/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Basic.cs b/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Basic.cs
index 6ef95eefa..fe57f7d1b 100644
--- a/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Basic.cs
+++ b/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Basic.cs
@@ -26,7 +26,7 @@ public void Run(string[] args)
SparkSession spark = SparkSession
.Builder()
- .AppName(".NET Spark SQL basic example")
+ .AppName("SQL basic example using .NET for Apache Spark")
.Config("spark.some.config.option", "some-value")
.GetOrCreate();
diff --git a/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Datasource.cs b/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Datasource.cs
index cf41eeceb..0945df791 100644
--- a/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Datasource.cs
+++ b/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Datasource.cs
@@ -32,7 +32,7 @@ public void Run(string[] args)
SparkSession spark = SparkSession
.Builder()
- .AppName(".NET Spark SQL Datasource example")
+ .AppName("SQL Datasource example using .NET for Apache Spark")
.Config("spark.some.config.option", "some-value")
.GetOrCreate();
diff --git a/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/VectorDataFrameUdfs.cs b/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/VectorDataFrameUdfs.cs
index 697301733..aafea7256 100644
--- a/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/VectorDataFrameUdfs.cs
+++ b/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/VectorDataFrameUdfs.cs
@@ -31,7 +31,7 @@ public void Run(string[] args)
.Builder()
// Lower the shuffle partitions to speed up groupBy() operations.
.Config("spark.sql.shuffle.partitions", "3")
- .AppName(".NET Spark SQL VectorUdfs example")
+ .AppName("SQL VectorUdfs example using .NET for Apache Spark")
.GetOrCreate();
DataFrame df = spark.Read().Schema("age INT, name STRING").Json(args[0]);
diff --git a/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/VectorUdfs.cs b/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/VectorUdfs.cs
index 369cc3aff..2497d5ef3 100644
--- a/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/VectorUdfs.cs
+++ b/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/VectorUdfs.cs
@@ -29,7 +29,7 @@ public void Run(string[] args)
.Builder()
// Lower the shuffle partitions to speed up groupBy() operations.
.Config("spark.sql.shuffle.partitions", "3")
- .AppName(".NET Spark SQL VectorUdfs example")
+ .AppName("SQL VectorUdfs example using .NET for Apache Spark")
.GetOrCreate();
DataFrame df = spark.Read().Schema("age INT, name STRING").Json(args[0]);
diff --git a/script/download-spark-distros.cmd b/script/download-spark-distros.cmd
index d02bb49a7..0d2435a00 100644
--- a/script/download-spark-distros.cmd
+++ b/script/download-spark-distros.cmd
@@ -23,5 +23,7 @@ curl -k -L -o spark-2.4.1.tgz https://archive.apache.org/dist/spark/spark-2.4.1/
curl -k -L -o spark-2.4.3.tgz https://archive.apache.org/dist/spark/spark-2.4.3/spark-2.4.3-bin-hadoop2.7.tgz && tar xzvf spark-2.4.3.tgz
curl -k -L -o spark-2.4.4.tgz https://archive.apache.org/dist/spark/spark-2.4.4/spark-2.4.4-bin-hadoop2.7.tgz && tar xzvf spark-2.4.4.tgz
curl -k -L -o spark-2.4.5.tgz https://archive.apache.org/dist/spark/spark-2.4.5/spark-2.4.5-bin-hadoop2.7.tgz && tar xzvf spark-2.4.5.tgz
+curl -k -L -o spark-2.4.6.tgz https://archive.apache.org/dist/spark/spark-2.4.6/spark-2.4.6-bin-hadoop2.7.tgz && tar xzvf spark-2.4.6.tgz
+
+endlocal
-endlocal
\ No newline at end of file
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.Delta.E2ETest/DeltaTableTests.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.Delta.E2ETest/DeltaTableTests.cs
index 69249d8c5..fab7c74dc 100644
--- a/src/csharp/Extensions/Microsoft.Spark.Extensions.Delta.E2ETest/DeltaTableTests.cs
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.Delta.E2ETest/DeltaTableTests.cs
@@ -11,6 +11,7 @@
using Microsoft.Spark.Sql;
using Microsoft.Spark.Sql.Streaming;
using Microsoft.Spark.Sql.Types;
+using Microsoft.Spark.UnitTest.TestUtils;
using Xunit;
namespace Microsoft.Spark.Extensions.Delta.E2ETest
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest.csproj b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest.csproj
new file mode 100644
index 000000000..391582751
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest.csproj
@@ -0,0 +1,23 @@
+
+
+
+ netcoreapp3.1
+ Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest
+ false
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/PackageResolverTests.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/PackageResolverTests.cs
new file mode 100644
index 000000000..219c533ff
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/PackageResolverTests.cs
@@ -0,0 +1,95 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using Microsoft.DotNet.Interactive.Utility;
+using Microsoft.Spark.UnitTest.TestUtils;
+using Microsoft.Spark.Utils;
+using Moq;
+using Xunit;
+
+namespace Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest
+{
+ public class PackageResolverTests
+ {
+ [Fact]
+ public void TestPackageResolver()
+ {
+ using var tempDir = new TemporaryDirectory();
+
+ string packageName = "package.name";
+ string packageVersion = "0.1.0";
+ string packageRootPath =
+ Path.Combine(tempDir.Path, "path", "to", "packages", packageName, packageVersion);
+ string packageFrameworkPath = Path.Combine(packageRootPath, "lib", "framework");
+
+ Directory.CreateDirectory(packageRootPath);
+ var nugetFile = new FileInfo(
+ Path.Combine(packageRootPath, $"{packageName}.{packageVersion}.nupkg"));
+ using (File.Create(nugetFile.FullName))
+ {
+ }
+
+ var assemblyPaths = new List
+ {
+ new FileInfo(Path.Combine(packageFrameworkPath, "1.dll")),
+ new FileInfo(Path.Combine(packageFrameworkPath, "2.dll"))
+ };
+ var probingPaths = new List { new DirectoryInfo(packageRootPath) };
+
+ var mockSupportNugetWrapper = new Mock();
+ mockSupportNugetWrapper
+ .SetupGet(m => m.ResolvedPackageReferences)
+ .Returns(new ResolvedPackageReference[]
+ {
+ new ResolvedPackageReference(
+ packageName,
+ packageVersion,
+ assemblyPaths,
+ new DirectoryInfo(packageRootPath),
+ probingPaths)
+ });
+
+ var packageResolver = new PackageResolver(mockSupportNugetWrapper.Object);
+ IEnumerable actualFiles = packageResolver.GetFiles(tempDir.Path);
+
+ string metadataFilePath =
+ Path.Combine(tempDir.Path, DependencyProviderUtils.CreateFileName(1));
+ var expectedFiles = new string[]
+ {
+ nugetFile.FullName,
+ metadataFilePath
+ };
+ Assert.True(expectedFiles.SequenceEqual(actualFiles));
+ Assert.True(File.Exists(metadataFilePath));
+
+ DependencyProviderUtils.Metadata actualMetadata =
+ DependencyProviderUtils.Metadata.Deserialize(metadataFilePath);
+ var expectedMetadata = new DependencyProviderUtils.Metadata
+ {
+ AssemblyProbingPaths = new string[]
+ {
+ Path.Combine(packageName, packageVersion, "lib", "framework", "1.dll"),
+ Path.Combine(packageName, packageVersion, "lib", "framework", "2.dll")
+ },
+ NativeProbingPaths = new string[]
+ {
+ Path.Combine(packageName, packageVersion)
+ },
+ NuGets = new DependencyProviderUtils.NuGetMetadata[]
+ {
+ new DependencyProviderUtils.NuGetMetadata
+ {
+ FileName = $"{packageName}.{packageVersion}.nupkg",
+ PackageName = packageName,
+ PackageVersion = packageVersion
+ }
+ }
+ };
+ Assert.True(expectedMetadata.Equals(actualMetadata));
+ }
+ }
+}
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/AssemblyKernelExtension.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/AssemblyKernelExtension.cs
new file mode 100644
index 000000000..2deff5869
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/AssemblyKernelExtension.cs
@@ -0,0 +1,156 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Threading.Tasks;
+using Microsoft.CodeAnalysis;
+using Microsoft.DotNet.Interactive;
+using Microsoft.DotNet.Interactive.Commands;
+using Microsoft.DotNet.Interactive.CSharp;
+using Microsoft.DotNet.Interactive.Utility;
+using Microsoft.Spark.Interop;
+using Microsoft.Spark.Sql;
+using Microsoft.Spark.Utils;
+
+namespace Microsoft.Spark.Extensions.DotNet.Interactive
+{
+ ///
+ /// A kernel extension when using .NET for Apache Spark with Microsoft.DotNet.Interactive
+ /// Adds nuget and assembly dependencies to the default
+ /// using .
+ ///
+ public class AssemblyKernelExtension : IKernelExtension
+ {
+ private const string TempDirEnvVar = "DOTNET_SPARK_EXTENSION_INTERACTIVE_TMPDIR";
+
+ private readonly PackageResolver _packageResolver =
+ new PackageResolver(new SupportNugetWrapper());
+
+ ///
+ /// Called by the Microsoft.DotNet.Interactive Assembly Extension Loader.
+ ///
+ /// The kernel calling this method.
+ /// when extension is loaded.
+ public Task OnLoadAsync(IKernel kernel)
+ {
+ if (kernel is CompositeKernel kernelBase)
+ {
+ Environment.SetEnvironmentVariable(Constants.RunningREPLEnvVar, "true");
+
+ DirectoryInfo tempDir = CreateTempDirectory();
+ kernelBase.RegisterForDisposal(new DisposableDirectory(tempDir));
+
+ kernelBase.AddMiddleware(async (command, context, next) =>
+ {
+ if ((context.HandlingKernel is CSharpKernel kernel) &&
+ (command is SubmitCode) &&
+ TryGetSparkSession(out SparkSession sparkSession) &&
+ TryEmitAssembly(kernel, tempDir.FullName, out string assemblyPath))
+ {
+ sparkSession.SparkContext.AddFile(assemblyPath);
+
+ foreach (string filePath in GetPackageFiles(tempDir.FullName))
+ {
+ sparkSession.SparkContext.AddFile(filePath);
+ }
+ }
+
+ await next(command, context);
+ });
+ }
+
+ return Task.CompletedTask;
+ }
+
+ private DirectoryInfo CreateTempDirectory()
+ {
+ string envTempDir = Environment.GetEnvironmentVariable(TempDirEnvVar);
+ string tempDirBasePath = string.IsNullOrEmpty(envTempDir) ?
+ Directory.GetCurrentDirectory() :
+ envTempDir;
+
+ if (!IsPathValid(tempDirBasePath))
+ {
+ throw new Exception($"[{GetType().Name}] Spaces in " +
+ $"'{tempDirBasePath}' is unsupported. Set the {TempDirEnvVar} " +
+ "environment variable to control the base path. Please see " +
+ "https://issues.apache.org/jira/browse/SPARK-30126 and " +
+ "https://github.com/apache/spark/pull/26773 for more details.");
+ }
+
+ return Directory.CreateDirectory(
+ Path.Combine(tempDirBasePath, Path.GetRandomFileName()));
+ }
+
+ private bool TryEmitAssembly(CSharpKernel kernel, string dstPath, out string assemblyPath)
+ {
+ Compilation compilation = kernel.ScriptState.Script.GetCompilation();
+ string assemblyName =
+ AssemblyLoader.NormalizeAssemblyName(compilation.AssemblyName);
+ assemblyPath = Path.Combine(dstPath, $"{assemblyName}.dll");
+ if (!File.Exists(assemblyPath))
+ {
+ FileSystemExtensions.Emit(compilation, assemblyPath);
+ return true;
+ }
+
+ throw new Exception(
+ $"TryEmitAssembly() unexpected duplicate assembly: ${assemblyPath}");
+ }
+
+ private bool TryGetSparkSession(out SparkSession sparkSession)
+ {
+ sparkSession = SparkSession.GetDefaultSession();
+ return sparkSession != null;
+ }
+
+ private IEnumerable GetPackageFiles(string path)
+ {
+ foreach (string filePath in _packageResolver.GetFiles(path))
+ {
+ if (IsPathValid(filePath))
+ {
+ yield return filePath;
+ }
+ else
+ {
+ // Copy file to a path without spaces.
+ string fileDestPath = Path.Combine(
+ path,
+ Path.GetFileName(filePath).Replace(" ", string.Empty));
+ File.Copy(filePath, fileDestPath);
+ yield return fileDestPath;
+ }
+ }
+ }
+
+ ///
+ /// In some versions of Spark, spaces is unsupported when using
+ /// .
+ ///
+ /// For more details please see:
+ /// - https://issues.apache.org/jira/browse/SPARK-30126
+ /// - https://github.com/apache/spark/pull/26773
+ ///
+ /// The path to validate.
+ /// true if the path is supported by Spark, false otherwise.
+ private bool IsPathValid(string path)
+ {
+ if (!path.Contains(" "))
+ {
+ return true;
+ }
+
+ Version version = SparkEnvironment.SparkVersion;
+ return (version.Major, version.Minor, version.Build) switch
+ {
+ (2, _, _) => false,
+ (3, 0, _) => true,
+ _ => throw new NotSupportedException($"Spark {version} not supported.")
+ };
+ }
+ }
+}
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/Microsoft.Spark.Extensions.DotNet.Interactive.csproj b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/Microsoft.Spark.Extensions.DotNet.Interactive.csproj
new file mode 100644
index 000000000..da330c762
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/Microsoft.Spark.Extensions.DotNet.Interactive.csproj
@@ -0,0 +1,38 @@
+
+
+
+ Library
+ netcoreapp3.1
+ Microsoft.Spark.Extensions.DotNet.Interactive
+ true
+ true
+
+ NU5100;$(NoWarn)
+
+ DotNet Interactive Extension for .NET for Apache Spark
+ https://github.com/dotnet/spark/tree/master/docs/release-notes
+ spark;dotnet;csharp;interactive;dotnet-interactive
+
+
+
+
+
+
+
+
+
+ all
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/PackageResolver.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/PackageResolver.cs
new file mode 100644
index 000000000..f9a76e43f
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/PackageResolver.cs
@@ -0,0 +1,165 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.IO;
+using System.Threading;
+using Microsoft.DotNet.Interactive.Utility;
+using Microsoft.Spark.Utils;
+
+namespace Microsoft.Spark.Extensions.DotNet.Interactive
+{
+ internal class PackageResolver
+ {
+ private readonly SupportNugetWrapper _supportNugetWrapper;
+ private readonly ConcurrentDictionary _filesCopied;
+ private long _metadataCounter;
+
+ internal PackageResolver(SupportNugetWrapper supportNugetWrapper)
+ {
+ _supportNugetWrapper = supportNugetWrapper;
+ _filesCopied = new ConcurrentDictionary();
+ _metadataCounter = 0;
+ }
+
+ ///
+ /// Generates and serializes a to
+ /// . Returns a list of file paths which include the
+ /// the serialized and nuget file
+ /// dependencies.
+ ///
+ /// Path to write metadata.
+ ///
+ /// List of file paths of the serialized
+ /// and nuget file dependencies.
+ ///
+ internal IEnumerable GetFiles(string writePath)
+ {
+ IEnumerable nugetPackagesToCopy = GetNewPackages();
+
+ var assemblyProbingPaths = new List();
+ var nativeProbingPaths = new List();
+ var nugetMetadata = new List();
+
+ foreach (ResolvedNuGetPackage package in nugetPackagesToCopy)
+ {
+ ResolvedPackageReference resolvedPackage = package.ResolvedPackage;
+
+ foreach (FileInfo asmPath in resolvedPackage.AssemblyPaths)
+ {
+ // asmPath.FullName
+ // /path/to/packages/package.name/package.version/lib/framework/1.dll
+ // resolvedPackage.PackageRoot
+ // /path/to/packages/package.name/package.version/
+ // GetRelativeToPackages(..)
+ // package.name/package.version/lib/framework/1.dll
+ assemblyProbingPaths.Add(
+ GetPathRelativeToPackages(
+ asmPath.FullName,
+ resolvedPackage.PackageRoot));
+ }
+
+ foreach (DirectoryInfo probePath in resolvedPackage.ProbingPaths)
+ {
+ // probePath.FullName
+ // /path/to/packages/package.name/package.version/
+ // resolvedPackage.PackageRoot
+ // /path/to/packages/package.name/package.version/
+ // GetRelativeToPackages(..)
+ // package.name/package.version
+ nativeProbingPaths.Add(
+ GetPathRelativeToPackages(
+ probePath.FullName,
+ resolvedPackage.PackageRoot));
+ }
+
+ nugetMetadata.Add(
+ new DependencyProviderUtils.NuGetMetadata
+ {
+ FileName = package.NuGetFile.Name,
+ PackageName = resolvedPackage.PackageName,
+ PackageVersion = resolvedPackage.PackageVersion
+ });
+
+ yield return package.NuGetFile.FullName;
+ }
+
+ if (nugetMetadata.Count > 0)
+ {
+ var metadataPath =
+ Path.Combine(
+ writePath,
+ DependencyProviderUtils.CreateFileName(
+ Interlocked.Increment(ref _metadataCounter)));
+ new DependencyProviderUtils.Metadata
+ {
+ AssemblyProbingPaths = assemblyProbingPaths.ToArray(),
+ NativeProbingPaths = nativeProbingPaths.ToArray(),
+ NuGets = nugetMetadata.ToArray()
+ }.Serialize(metadataPath);
+
+ yield return metadataPath;
+ }
+ }
+
+ ///
+ /// Return the delta of the list of packages that have been introduced
+ /// since the last call.
+ ///
+ /// The delta of the list of packages.
+ private IEnumerable GetNewPackages()
+ {
+ IEnumerable packages =
+ _supportNugetWrapper.ResolvedPackageReferences;
+ foreach (ResolvedPackageReference package in packages)
+ {
+ IEnumerable files =
+ package.PackageRoot.EnumerateFiles("*.nupkg", SearchOption.AllDirectories);
+
+ foreach (FileInfo file in files)
+ {
+ if (_filesCopied.TryAdd(file.Name, 1))
+ {
+ yield return new ResolvedNuGetPackage
+ {
+ ResolvedPackage = package,
+ NuGetFile = file
+ };
+ }
+ }
+ }
+ }
+
+ ///
+ /// Given a , get the relative path to the packages directory.
+ /// The package is a subfolder within the packages directory.
+ ///
+ /// Examples:
+ /// path:
+ /// /path/to/packages/package.name/package.version/lib/framework/1.dll
+ /// directory:
+ /// /path/to/packages/package.name/package.version/
+ /// relative path:
+ /// package.name/package.version/lib/framework/1.dll
+ ///
+ /// path:
+ /// /path/to/packages/package.name/package.version/
+ /// directory:
+ /// /path/to/packages/package.name/package.version/
+ /// relative path:
+ /// package.name/package.version
+ ///
+ /// The full path used to determine the relative path.
+ /// The package directory.
+ /// The relative path to the packages directory.
+ private string GetPathRelativeToPackages(string path, DirectoryInfo directory)
+ {
+ string strippedRoot = path
+ .Substring(directory.FullName.Length)
+ .Trim(Path.DirectorySeparatorChar, Path.AltDirectorySeparatorChar);
+ return Path.Combine(directory.Parent.Name, directory.Name, strippedRoot);
+ }
+ }
+}
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/ResolvedNugetPackage.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/ResolvedNugetPackage.cs
new file mode 100644
index 000000000..57106c16a
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/ResolvedNugetPackage.cs
@@ -0,0 +1,15 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.IO;
+using Microsoft.DotNet.Interactive.Utility;
+
+namespace Microsoft.Spark.Extensions.DotNet.Interactive
+{
+ internal class ResolvedNuGetPackage
+ {
+ public ResolvedPackageReference ResolvedPackage { get; set; }
+ public FileInfo NuGetFile { get; set; }
+ }
+}
diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/SupportNugetWrapper.cs b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/SupportNugetWrapper.cs
new file mode 100644
index 000000000..489e39e94
--- /dev/null
+++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/SupportNugetWrapper.cs
@@ -0,0 +1,13 @@
+using System.Collections.Generic;
+using Microsoft.DotNet.Interactive;
+using Microsoft.DotNet.Interactive.Utility;
+
+namespace Microsoft.Spark.Extensions.DotNet.Interactive
+{
+ internal class SupportNugetWrapper
+ {
+ internal virtual IEnumerable ResolvedPackageReferences =>
+ ((ISupportNuget)KernelInvocationContext.Current.HandlingKernel)
+ .ResolvedPackageReferences;
+ }
+}
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/BroadcastTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/BroadcastTests.cs
index 000c8f27e..511f5a122 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/BroadcastTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/BroadcastTests.cs
@@ -1,10 +1,8 @@
using System;
-using System.Collections.Generic;
using System.Linq;
-using Microsoft.Spark.E2ETest.Utils;
using Microsoft.Spark.Sql;
-using static Microsoft.Spark.Sql.Functions;
using Xunit;
+using static Microsoft.Spark.Sql.Functions;
namespace Microsoft.Spark.E2ETest.IpcTests
{
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/BucketizerTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/BucketizerTests.cs
index 11037bc6d..a075334de 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/BucketizerTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/BucketizerTests.cs
@@ -4,9 +4,9 @@
using System.Collections.Generic;
using System.IO;
-using Microsoft.Spark.E2ETest.Utils;
using Microsoft.Spark.ML.Feature;
using Microsoft.Spark.Sql;
+using Microsoft.Spark.UnitTest.TestUtils;
using Xunit;
namespace Microsoft.Spark.E2ETest.IpcTests.ML.Feature
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/HashingTFTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/HashingTFTests.cs
index 7b6882bea..df459ed7a 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/HashingTFTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/HashingTFTests.cs
@@ -2,13 +2,10 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
-using System;
-using System.Collections.Generic;
using System.IO;
-using System.Linq;
-using Microsoft.Spark.E2ETest.Utils;
using Microsoft.Spark.ML.Feature;
using Microsoft.Spark.Sql;
+using Microsoft.Spark.UnitTest.TestUtils;
using Xunit;
namespace Microsoft.Spark.E2ETest.IpcTests.ML.Feature
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/IDFModelTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/IDFModelTests.cs
index 623b7322c..202187809 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/IDFModelTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/IDFModelTests.cs
@@ -3,9 +3,9 @@
// See the LICENSE file in the project root for more information.
using System.IO;
-using Microsoft.Spark.E2ETest.Utils;
using Microsoft.Spark.ML.Feature;
using Microsoft.Spark.Sql;
+using Microsoft.Spark.UnitTest.TestUtils;
using Xunit;
namespace Microsoft.Spark.E2ETest.IpcTests.ML.Feature
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/IDFTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/IDFTests.cs
index 3dea63de7..72da97887 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/IDFTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/IDFTests.cs
@@ -3,9 +3,9 @@
// See the LICENSE file in the project root for more information.
using System.IO;
-using Microsoft.Spark.E2ETest.Utils;
using Microsoft.Spark.ML.Feature;
using Microsoft.Spark.Sql;
+using Microsoft.Spark.UnitTest.TestUtils;
using Xunit;
namespace Microsoft.Spark.E2ETest.IpcTests.ML.Feature
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/TokenizerTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/TokenizerTests.cs
index 8cdb4e03a..4b1998f50 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/TokenizerTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/TokenizerTests.cs
@@ -3,9 +3,9 @@
// See the LICENSE file in the project root for more information.
using System.IO;
-using Microsoft.Spark.E2ETest.Utils;
using Microsoft.Spark.ML.Feature;
using Microsoft.Spark.Sql;
+using Microsoft.Spark.UnitTest.TestUtils;
using Xunit;
namespace Microsoft.Spark.E2ETest.IpcTests.ML.Feature
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/Word2VecModelTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/Word2VecModelTests.cs
index 4845e011a..a5227149b 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/Word2VecModelTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/Word2VecModelTests.cs
@@ -2,11 +2,10 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
-using System;
using System.IO;
-using Microsoft.Spark.E2ETest.Utils;
using Microsoft.Spark.ML.Feature;
using Microsoft.Spark.Sql;
+using Microsoft.Spark.UnitTest.TestUtils;
using Xunit;
namespace Microsoft.Spark.E2ETest.IpcTests.ML.Feature
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/Word2VecTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/Word2VecTests.cs
index 30e14ed28..1d5da5335 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/Word2VecTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/Word2VecTests.cs
@@ -3,9 +3,9 @@
// See the LICENSE file in the project root for more information.
using System.IO;
-using Microsoft.Spark.E2ETest.Utils;
using Microsoft.Spark.ML.Feature;
using Microsoft.Spark.Sql;
+using Microsoft.Spark.UnitTest.TestUtils;
using Xunit;
namespace Microsoft.Spark.E2ETest.IpcTests.ML.Feature
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/SparkContextTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/SparkContextTests.cs
index 07fbf2372..ca752570a 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/SparkContextTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/SparkContextTests.cs
@@ -3,7 +3,7 @@
// See the LICENSE file in the project root for more information.
using System;
-using Microsoft.Spark.E2ETest.Utils;
+using Microsoft.Spark.UnitTest.TestUtils;
using Xunit;
namespace Microsoft.Spark.E2ETest.IpcTests
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/DataFrameTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/DataFrameTests.cs
index 7359bdb6b..46e899a87 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/DataFrameTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/DataFrameTests.cs
@@ -3,13 +3,13 @@
// See the LICENSE file in the project root for more information.
using System;
-using System.Collections.Generic;
using System.Linq;
using Apache.Arrow;
using Microsoft.Data.Analysis;
using Microsoft.Spark.E2ETest.Utils;
using Microsoft.Spark.Sql;
using Microsoft.Spark.Sql.Types;
+using Microsoft.Spark.UnitTest.TestUtils;
using Xunit;
using static Microsoft.Spark.Sql.Functions;
using static Microsoft.Spark.UnitTest.TestUtils.ArrowTestUtils;
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/DataFrameWriterTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/DataFrameWriterTests.cs
index a7e214160..4f0d06742 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/DataFrameWriterTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/DataFrameWriterTests.cs
@@ -3,8 +3,8 @@
// See the LICENSE file in the project root for more information.
using System.Collections.Generic;
-using Microsoft.Spark.E2ETest.Utils;
using Microsoft.Spark.Sql;
+using Microsoft.Spark.UnitTest.TestUtils;
using Xunit;
namespace Microsoft.Spark.E2ETest.IpcTests
diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/DataStreamWriterTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/DataStreamWriterTests.cs
index 4e87dc6c6..15c2a22a7 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/DataStreamWriterTests.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/Streaming/DataStreamWriterTests.cs
@@ -10,6 +10,7 @@
using Microsoft.Spark.Sql;
using Microsoft.Spark.Sql.Streaming;
using Microsoft.Spark.Sql.Types;
+using Microsoft.Spark.UnitTest.TestUtils;
using Xunit;
using static Microsoft.Spark.Sql.Functions;
diff --git a/src/csharp/Microsoft.Spark.E2ETest/Microsoft.Spark.E2ETest.csproj b/src/csharp/Microsoft.Spark.E2ETest/Microsoft.Spark.E2ETest.csproj
index abe436ec9..e03519853 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/Microsoft.Spark.E2ETest.csproj
+++ b/src/csharp/Microsoft.Spark.E2ETest/Microsoft.Spark.E2ETest.csproj
@@ -23,6 +23,7 @@
+
diff --git a/src/csharp/Microsoft.Spark.E2ETest/SparkFixture.cs b/src/csharp/Microsoft.Spark.E2ETest/SparkFixture.cs
index fc8272c5b..6d8dadbac 100644
--- a/src/csharp/Microsoft.Spark.E2ETest/SparkFixture.cs
+++ b/src/csharp/Microsoft.Spark.E2ETest/SparkFixture.cs
@@ -7,9 +7,9 @@
using System.IO;
using System.Reflection;
using System.Runtime.InteropServices;
-using Microsoft.Spark.E2ETest.Utils;
using Microsoft.Spark.Interop.Ipc;
using Microsoft.Spark.Sql;
+using Microsoft.Spark.UnitTest.TestUtils;
using Xunit;
namespace Microsoft.Spark.E2ETest
diff --git a/src/csharp/Microsoft.Spark.UnitTest/AssemblyLoaderTests.cs b/src/csharp/Microsoft.Spark.UnitTest/AssemblyLoaderTests.cs
index da7d05197..f2f0dd30e 100644
--- a/src/csharp/Microsoft.Spark.UnitTest/AssemblyLoaderTests.cs
+++ b/src/csharp/Microsoft.Spark.UnitTest/AssemblyLoaderTests.cs
@@ -9,17 +9,19 @@
namespace Microsoft.Spark.UnitTest
{
+ [Collection("Spark Unit Tests")]
public class AssemblyLoaderTests
{
[Fact]
public void TestAssemblySearchPathResolver()
{
+ string sparkFilesDir = SparkFiles.GetRootDirectory();
string curDir = Directory.GetCurrentDirectory();
string appDir = AppDomain.CurrentDomain.BaseDirectory;
// Test the default scenario.
string[] searchPaths = AssemblySearchPathResolver.GetAssemblySearchPaths();
- Assert.Equal(new[] { curDir, appDir }, searchPaths);
+ Assert.Equal(new[] { sparkFilesDir, curDir, appDir }, searchPaths);
// Test the case where DOTNET_ASSEMBLY_SEARCH_PATHS is defined.
char sep = Path.PathSeparator;
@@ -34,6 +36,7 @@ public void TestAssemblySearchPathResolver()
"mydir2",
Path.Combine(curDir, $".{sep}mydir3"),
Path.Combine(curDir, $".{sep}mydir4"),
+ sparkFilesDir,
curDir,
appDir },
searchPaths);
diff --git a/src/csharp/Microsoft.Spark.UnitTest/CollectionUtilsTests.cs b/src/csharp/Microsoft.Spark.UnitTest/CollectionUtilsTests.cs
new file mode 100644
index 000000000..9a723b2b5
--- /dev/null
+++ b/src/csharp/Microsoft.Spark.UnitTest/CollectionUtilsTests.cs
@@ -0,0 +1,26 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.Spark.Utils;
+using Xunit;
+
+namespace Microsoft.Spark.UnitTest
+{
+ public class CollectionUtilsTests
+ {
+ [Fact]
+ public void TestArrayEquals()
+ {
+ Assert.False(CollectionUtils.ArrayEquals(new int[] { 1 }, null));
+ Assert.False(CollectionUtils.ArrayEquals(null, new int[] { 1 }));
+ Assert.False(CollectionUtils.ArrayEquals(new int[] { }, new int[] { 1 }));
+ Assert.False(CollectionUtils.ArrayEquals(new int[] { 1 }, new int[] { }));
+ Assert.False(CollectionUtils.ArrayEquals(new int[] { 1 }, new int[] { 1, 2 }));
+ Assert.False(CollectionUtils.ArrayEquals(new int[] { 1 }, new int[] { 2 }));
+
+ Assert.True(CollectionUtils.ArrayEquals