Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -346,9 +346,6 @@ public HiveWriter createWriter(Page partitionColumns, int position, OptionalInt
switch (insertExistingPartitionsBehavior) {
case APPEND:
checkState(!immutablePartitions);
if (bucketNumber.isPresent()) {
Comment thread
electrum marked this conversation as resolved.
Outdated
throw new PrestoException(HIVE_TABLE_READ_ONLY, "Cannot insert into bucketed unpartitioned Hive table");
}
updateMode = UpdateMode.APPEND;
writeInfo = locationService.getTableWriteInfo(locationHandle, false);
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,113 @@ public void testCreateTableNonSupportedVarcharColumn()
assertUpdate("CREATE TABLE test_create_table_non_supported_varchar_column (apple varchar(65536))");
}

@Test
public void testEmptyBucketedTable()
{
// go through all storage formats to make sure the empty buckets are correctly created
Comment thread
electrum marked this conversation as resolved.
Outdated
testWithAllStorageFormats(this::testEmptyBucketedTable);
}

private void testEmptyBucketedTable(Session session, HiveStorageFormat storageFormat)
{
testEmptyBucketedTable(session, storageFormat, true);
testEmptyBucketedTable(session, storageFormat, false);
}

private void testEmptyBucketedTable(Session session, HiveStorageFormat storageFormat, boolean createEmpty)
{
String tableName = "test_empty_bucketed_table";

@Language("SQL") String createTable = "" +
"CREATE TABLE " + tableName + " " +
"(bucket_key VARCHAR, col_1 VARCHAR, col2 VARCHAR) " +
"WITH (" +
"format = '" + storageFormat + "', " +
"bucketed_by = ARRAY[ 'bucket_key' ], " +
"bucket_count = 11 " +
") ";

assertUpdate(createTable);

TableMetadata tableMetadata = getTableMetadata(catalog, TPCH_SCHEMA, tableName);
assertEquals(tableMetadata.getMetadata().getProperties().get(STORAGE_FORMAT_PROPERTY), storageFormat);

assertNull(tableMetadata.getMetadata().getProperties().get(PARTITIONED_BY_PROPERTY));
assertEquals(tableMetadata.getMetadata().getProperties().get(BUCKETED_BY_PROPERTY), ImmutableList.of("bucket_key"));
assertEquals(tableMetadata.getMetadata().getProperties().get(BUCKET_COUNT_PROPERTY), 11);

assertEquals(computeActual("SELECT * from " + tableName).getRowCount(), 0);

// make sure that we will get one file per bucket regardless of writer count configured
Session parallelWriter = Session.builder(getParallelWriteSession())
.setCatalogSessionProperty(catalog, "create_empty_bucket_files", String.valueOf(createEmpty))
.build();
assertUpdate(parallelWriter, "INSERT INTO " + tableName + " VALUES ('a0', 'b0', 'c0')", 1);
assertUpdate(parallelWriter, "INSERT INTO " + tableName + " VALUES ('a1', 'b1', 'c1')", 1);

assertQuery("SELECT * from " + tableName, "VALUES ('a0', 'b0', 'c0'), ('a1', 'b1', 'c1')");

assertUpdate(session, "DROP TABLE " + tableName);
assertFalse(getQueryRunner().tableExists(session, tableName));
}

@Test
public void testBucketedTable()
{
// go through all storage formats to make sure the empty buckets are correctly created
testWithAllStorageFormats(this::testBucketedTable);
}

private void testBucketedTable(Session session, HiveStorageFormat storageFormat)
{
testBucketedTable(session, storageFormat, true);
testBucketedTable(session, storageFormat, false);
}

private void testBucketedTable(Session session, HiveStorageFormat storageFormat, boolean createEmpty)
{
String tableName = "test_bucketed_table";

@Language("SQL") String createTable = "" +
"CREATE TABLE " + tableName + " " +
"WITH (" +
"format = '" + storageFormat + "', " +
"bucketed_by = ARRAY[ 'bucket_key' ], " +
"bucket_count = 11 " +
") " +
"AS " +
"SELECT * " +
"FROM (" +
"VALUES " +
" (VARCHAR 'a', VARCHAR 'b', VARCHAR 'c'), " +
" ('aa', 'bb', 'cc'), " +
" ('aaa', 'bbb', 'ccc')" +
") t (bucket_key, col_1, col_2)";

// make sure that we will get one file per bucket regardless of writer count configured
Session parallelWriter = Session.builder(getParallelWriteSession())
.setCatalogSessionProperty(catalog, "create_empty_bucket_files", String.valueOf(createEmpty))
.build();
assertUpdate(parallelWriter, createTable, 3);

TableMetadata tableMetadata = getTableMetadata(catalog, TPCH_SCHEMA, tableName);
assertEquals(tableMetadata.getMetadata().getProperties().get(STORAGE_FORMAT_PROPERTY), storageFormat);

assertNull(tableMetadata.getMetadata().getProperties().get(PARTITIONED_BY_PROPERTY));
assertEquals(tableMetadata.getMetadata().getProperties().get(BUCKETED_BY_PROPERTY), ImmutableList.of("bucket_key"));
assertEquals(tableMetadata.getMetadata().getProperties().get(BUCKET_COUNT_PROPERTY), 11);

assertQuery("SELECT * from " + tableName, "VALUES ('a', 'b', 'c'), ('aa', 'bb', 'cc'), ('aaa', 'bbb', 'ccc')");

assertUpdate(parallelWriter, "INSERT INTO " + tableName + " VALUES ('a0', 'b0', 'c0')", 1);
assertUpdate(parallelWriter, "INSERT INTO " + tableName + " VALUES ('a1', 'b1', 'c1')", 1);

assertQuery("SELECT * from " + tableName, "VALUES ('a', 'b', 'c'), ('aa', 'bb', 'cc'), ('aaa', 'bbb', 'ccc'), ('a0', 'b0', 'c0'), ('a1', 'b1', 'c1')");

assertUpdate(session, "DROP TABLE " + tableName);
assertFalse(getQueryRunner().tableExists(session, tableName));
}

@Test
public void testCreatePartitionedBucketedTableAsFewRows()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,13 +278,17 @@ public void testInsertBucketed()
assertThat(statisticsAfterCreate.getNumRows().getAsLong()).isEqualTo(25);
assertThat(statisticsAfterCreate.getNumFiles().getAsLong()).isEqualTo(50);

// Insert into bucketed unpartitioned table is unsupported
assertThatThrownBy(() -> insertNationData(onPresto(), tableName))
.hasMessageContaining("Cannot insert into bucketed unpartitioned Hive table");
insertNationData(onPresto(), tableName);

BasicStatistics statisticsAfterInsert = getBasicStatisticsForTable(onHive(), tableName);
assertThat(statisticsAfterInsert.getNumRows().getAsLong()).isEqualTo(25);
assertThat(statisticsAfterCreate.getNumFiles().getAsLong()).isEqualTo(50);
assertThat(statisticsAfterInsert.getNumRows().getAsLong()).isEqualTo(50);
assertThat(statisticsAfterInsert.getNumFiles().getAsLong()).isEqualTo(100);

insertNationData(onPresto(), tableName);

BasicStatistics statisticsAfterInsert2 = getBasicStatisticsForTable(onHive(), tableName);
assertThat(statisticsAfterInsert2.getNumRows().getAsLong()).isEqualTo(75);
assertThat(statisticsAfterInsert2.getNumFiles().getAsLong()).isEqualTo(150);
}
finally {
onPresto().executeQuery(format("DROP TABLE IF EXISTS %s", tableName));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,12 @@ public void testInsertIntoBucketedTables()
{
String tableName = mutableTablesState().get(BUCKETED_NATION).getNameInDatabase();

assertThat(() -> query(format("INSERT INTO %s SELECT * FROM %s", tableName, NATION.getName())))
.failsWithMessage("Cannot insert into bucketed unpartitioned Hive table");
query(format("INSERT INTO %s SELECT * FROM %s", tableName, NATION.getName()));
Comment thread
electrum marked this conversation as resolved.
Outdated
// make sure that insert will not overwrite existing data
query(format("INSERT INTO %s SELECT * FROM %s", tableName, NATION.getName()));

assertThat(query(format("SELECT count(*) FROM %s", tableName))).containsExactly(row(50));
assertThat(query(format("SELECT count(*) FROM %s WHERE n_regionkey=0", tableName))).containsExactly(row(10));
}

@Test
Expand Down