Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/building/windows-instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ If you already have all the pre-requisites, skip to the [build](windows-instruct
3. Install **[Java 1.8](https://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html)**
- Select the appropriate version for your operating system e.g., jdk-8u201-windows-x64.exe for Win x64 machine.
- Install using the installer and verify you are able to run `java` from your command-line
4. Install **[Apache Maven 3.6.0+](https://maven.apache.org/download.cgi)**
- Download [Apache Maven 3.6.0](http://mirror.metrocast.net/apache/maven/maven-3/3.6.0/binaries/apache-maven-3.6.0-bin.zip)
- Extract to a local directory e.g., `c:\bin\apache-maven-3.6.0\`
- Add Apache Maven to your [PATH environment variable](https://www.java.com/en/download/help/path.xml) e.g., `c:\bin\apache-maven-3.6.0\bin`
4. Install **[Apache Maven 3.6.3+](https://maven.apache.org/download.cgi)**
- Download [Apache Maven 3.6.3](http://mirror.metrocast.net/apache/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.zip)
- Extract to a local directory e.g., `c:\bin\apache-maven-3.6.3\`
- Add Apache Maven to your [PATH environment variable](https://www.java.com/en/download/help/path.xml) e.g., `c:\bin\apache-maven-3.6.3\bin`
- Verify you are able to run `mvn` from your command-line
5. Install **[Apache Spark 2.3+](https://spark.apache.org/downloads.html)**
- Download [Apache Spark 2.3+](https://spark.apache.org/downloads.html) and extract it into a local folder (e.g., `c:\bin\spark-2.3.2-bin-hadoop2.7\`) using [7-zip](https://www.7-zip.org/).
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

namespace Microsoft.Spark.Extensions.Hyperspace.E2ETest
{
/// <summary>
/// Constants related to the Hyperspace test suite.
/// </summary>
internal class Constants
{
public const string HyperspaceTestContainerName = "Hyperspace Tests";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using Microsoft.Spark.E2ETest;
using Xunit;

namespace Microsoft.Spark.Extensions.Hyperspace.E2ETest
{
public class HyperspaceFixture
{
public HyperspaceFixture()
{
Environment.SetEnvironmentVariable(
SparkFixture.EnvironmentVariableNames.ExtraSparkSubmitArgs,
"--packages com.microsoft.hyperspace:hyperspace-core_2.11:0.1.0");

SparkFixture = new SparkFixture();
}

public SparkFixture SparkFixture { get; private set; }
}

[CollectionDefinition(Constants.HyperspaceTestContainerName)]
public class HyperspaceTestCollection : ICollectionFixture<HyperspaceFixture>
{
// This class has no code, and is never created. Its purpose is simply
// to be the place to apply [CollectionDefinition] and all the
// ICollectionFixture<> interfaces.
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using Microsoft.Spark.E2ETest.Utils;
using Microsoft.Spark.Extensions.Hyperspace.Index;
using Microsoft.Spark.Sql;
using Microsoft.Spark.UnitTest.TestUtils;
using Xunit;

namespace Microsoft.Spark.Extensions.Hyperspace.E2ETest
{
/// <summary>
/// Test suite for Hyperspace index management APIs.
/// </summary>
[Collection(Constants.HyperspaceTestContainerName)]
public class HyperspaceTests : IDisposable
{
private readonly SparkSession _spark;
private readonly TemporaryDirectory _hyperspaceSystemDirectory;
private readonly Hyperspace _hyperspace;

// Fields needed for sample DataFrame.
private readonly DataFrame _sampleDataFrame;
private readonly string _sampleIndexName;
private readonly IndexConfig _sampleIndexConfig;

public HyperspaceTests(HyperspaceFixture fixture)
{
_spark = fixture.SparkFixture.Spark;
_hyperspaceSystemDirectory = new TemporaryDirectory();
_spark.Conf().Set("spark.hyperspace.system.path", _hyperspaceSystemDirectory.Path);
_hyperspace = new Hyperspace(_spark);

_sampleDataFrame = _spark.Read()
.Option("header", true)
.Option("delimiter", ";")
.Csv("Resources\\people.csv");
_sampleIndexName = "sample_dataframe";
_sampleIndexConfig = new IndexConfig(_sampleIndexName, new[] { "job" }, new[] { "name" });
_hyperspace.CreateIndex(_sampleDataFrame, _sampleIndexConfig);
}

/// <summary>
/// Clean up the Hyperspace system directory in between tests.
/// </summary>
public void Dispose()
{
_hyperspaceSystemDirectory.Dispose();
}

/// <summary>
/// Test the method signatures for all Hyperspace APIs.
/// </summary>
[SkipIfSparkVersionIsLessThan(Versions.V2_4_0)]
public void TestSignatures()
{
// Indexes API.
Assert.IsType<DataFrame>(_hyperspace.Indexes());

// Delete and Restore APIs.
_hyperspace.DeleteIndex(_sampleIndexName);
_hyperspace.RestoreIndex(_sampleIndexName);

// Refresh API.
_hyperspace.RefreshIndex(_sampleIndexName);

// Cancel API.
Assert.Throws<Exception>(() => _hyperspace.Cancel(_sampleIndexName));

// Explain API.
_hyperspace.Explain(_sampleDataFrame, true);
_hyperspace.Explain(_sampleDataFrame, true, s => Console.WriteLine(s));

// Delete and Vacuum APIs.
_hyperspace.DeleteIndex(_sampleIndexName);
_hyperspace.VacuumIndex(_sampleIndexName);

// Enable and disable Hyperspace.
Assert.IsType<SparkSession>(_spark.EnableHyperspace());
Assert.IsType<SparkSession>(_spark.DisableHyperspace());
Assert.IsType<bool>(_spark.IsHyperspaceEnabled());
}

/// <summary>
/// Test E2E functionality of index CRUD APIs.
/// </summary>
[SkipIfSparkVersionIsLessThan(Versions.V2_4_0)]
public void TestIndexCreateAndDelete()
{
// Should be one active index.
DataFrame indexes = _hyperspace.Indexes();
Assert.Equal(1, indexes.Count());
Assert.Equal(_sampleIndexName, indexes.SelectExpr("name").First()[0]);
Assert.Equal(States.Active, indexes.SelectExpr("state").First()[0]);

// Delete the index then verify it has been deleted.
_hyperspace.DeleteIndex(_sampleIndexName);
indexes = _hyperspace.Indexes();
Assert.Equal(1, indexes.Count());
Assert.Equal(States.Deleted, indexes.SelectExpr("state").First()[0]);

// Restore the index to active state and verify it is back.
_hyperspace.RestoreIndex(_sampleIndexName);
indexes = _hyperspace.Indexes();
Assert.Equal(1, indexes.Count());
Assert.Equal(States.Active, indexes.SelectExpr("state").First()[0]);

// Delete and vacuum the index, then verify it is gone.
_hyperspace.DeleteIndex(_sampleIndexName);
_hyperspace.VacuumIndex(_sampleIndexName);
Assert.Equal(0, _hyperspace.Indexes().Count());
}

/// <summary>
/// Test that the explain API generates the expected string.
/// </summary>
[SkipIfSparkVersionIsLessThan(Versions.V2_4_0)]
public void TestExplainAPI()
{
// Run a query that hits the index.
DataFrame queryDataFrame = _sampleDataFrame
.Where("job == 'Developer'")
.Select("name");

string explainString = string.Empty;
_hyperspace.Explain(queryDataFrame, true, s => explainString = s);
Assert.False(string.IsNullOrEmpty(explainString));
}

/// <summary>
/// Index states used in testing.
/// </summary>
private static class States
{
public const string Active = "ACTIVE";
public const string Deleted = "DELETED";
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Collections.Generic;
using System.Linq;
using Microsoft.Spark.E2ETest.Utils;
using Microsoft.Spark.Extensions.Hyperspace.Index;
using Xunit;

namespace Microsoft.Spark.Extensions.Hyperspace.E2ETest.Index
{
/// <summary>
/// Test suite for Hyperspace IndexConfig tests.
/// </summary>
[Collection(Constants.HyperspaceTestContainerName)]
public class IndexConfigTests
{
public IndexConfigTests(HyperspaceFixture fixture)
{
}

/// <summary>
/// Test the method signatures for IndexConfig and IndexConfigBuilder APIs.
/// </summary>
[SkipIfSparkVersionIsLessThan(Versions.V2_4_0)]
public void TestSignatures()
{
string indexName = "testIndexName";
var indexConfig = new IndexConfig(indexName, new[] { "Id" }, new string[] { });
Assert.IsType<string>(indexConfig.IndexName);
Assert.IsType<List<string>>(indexConfig.IndexedColumns);
Assert.IsType<List<string>>(indexConfig.IncludedColumns);
Assert.IsType<Builder>(IndexConfig.Builder());
Assert.IsType<bool>(indexConfig.Equals(indexConfig));
Assert.IsType<int>(indexConfig.GetHashCode());
Assert.IsType<string>(indexConfig.ToString());

Builder builder = IndexConfig.Builder();
Assert.IsType<Builder>(builder);
Assert.IsType<Builder>(builder.IndexName("indexName"));
Assert.IsType<Builder>(builder.IndexBy("indexed1", "indexed2"));
Assert.IsType<Builder>(builder.Include("included1"));
Assert.IsType<IndexConfig>(builder.Create());
}

/// <summary>
/// Test creating an IndexConfig using its class constructor.
/// </summary>
[SkipIfSparkVersionIsLessThan(Versions.V2_4_0)]
public void TestIndexConfigConstructor()
{
string indexName = "indexName";
string[] indexedColumns = { "idx1" };
string[] includedColumns = { "inc1", "inc2", "inc3" };
var config = new IndexConfig(indexName, indexedColumns, includedColumns);

// Validate that the config was built correctly.
Assert.Equal(indexName, config.IndexName);
Assert.Equal(indexedColumns, config.IndexedColumns);
Assert.Equal(includedColumns, config.IncludedColumns);
}

/// <summary>
/// Test creating an IndexConfig using the builder pattern.
/// </summary>
[SkipIfSparkVersionIsLessThan(Versions.V2_4_0)]
public void TestIndexConfigBuilder()
{
string indexName = "indexName";
string[] indexedColumns = { "idx1" };
string[] includedColumns = { "inc1", "inc2", "inc3" };

Builder builder = IndexConfig.Builder();
builder.IndexName(indexName);
builder.Include(includedColumns[0], includedColumns[1], includedColumns[2]);
builder.IndexBy(indexedColumns[0]);

// Validate that the config was built correctly.
IndexConfig config = builder.Create();
Assert.Equal(indexName, config.IndexName);
Assert.Equal(indexedColumns, config.IndexedColumns);
Assert.Equal(includedColumns, config.IncludedColumns);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>
<IsPackable>false</IsPackable>
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="..\..\Microsoft.Spark.E2ETest\Microsoft.Spark.E2ETest.csproj" />
<ProjectReference Include="..\..\Microsoft.Spark\Microsoft.Spark.csproj" />
<ProjectReference Include="..\Microsoft.Spark.Extensions.Hyperspace\Microsoft.Spark.Extensions.Hyperspace.csproj" />
</ItemGroup>
</Project>
Loading