trinodb · kokosing · Aug 19, 2019 · Jul 15, 2019
diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveWriterFactory.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveWriterFactory.java
@@ -346,9 +346,6 @@ public HiveWriter createWriter(Page partitionColumns, int position, OptionalInt
                     switch (insertExistingPartitionsBehavior) {
                         case APPEND:
                             checkState(!immutablePartitions);
-                            if (bucketNumber.isPresent()) {
-                                throw new PrestoException(HIVE_TABLE_READ_ONLY, "Cannot insert into bucketed unpartitioned Hive table");
-                            }
                             updateMode = UpdateMode.APPEND;
                             writeInfo = locationService.getTableWriteInfo(locationHandle, false);
                             break;

diff --git a/presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveIntegrationSmokeTest.java b/presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveIntegrationSmokeTest.java
@@ -746,6 +746,113 @@ public void testCreateTableNonSupportedVarcharColumn()
         assertUpdate("CREATE TABLE test_create_table_non_supported_varchar_column (apple varchar(65536))");
     }
 
+    @Test
+    public void testEmptyBucketedTable()
+    {
+        // go through all storage formats to make sure the empty buckets are correctly created
+        testWithAllStorageFormats(this::testEmptyBucketedTable);
+    }
+
+    private void testEmptyBucketedTable(Session session, HiveStorageFormat storageFormat)
+    {
+        testEmptyBucketedTable(session, storageFormat, true);
+        testEmptyBucketedTable(session, storageFormat, false);
+    }
+
+    private void testEmptyBucketedTable(Session session, HiveStorageFormat storageFormat, boolean createEmpty)
+    {
+        String tableName = "test_empty_bucketed_table";
+
+        @Language("SQL") String createTable = "" +
+                "CREATE TABLE " + tableName + " " +
+                "(bucket_key VARCHAR, col_1 VARCHAR, col2 VARCHAR) " +
+                "WITH (" +
+                "format = '" + storageFormat + "', " +
+                "bucketed_by = ARRAY[ 'bucket_key' ], " +
+                "bucket_count = 11 " +
+                ") ";
+
+        assertUpdate(createTable);
+
+        TableMetadata tableMetadata = getTableMetadata(catalog, TPCH_SCHEMA, tableName);
+        assertEquals(tableMetadata.getMetadata().getProperties().get(STORAGE_FORMAT_PROPERTY), storageFormat);
+
+        assertNull(tableMetadata.getMetadata().getProperties().get(PARTITIONED_BY_PROPERTY));
+        assertEquals(tableMetadata.getMetadata().getProperties().get(BUCKETED_BY_PROPERTY), ImmutableList.of("bucket_key"));
+        assertEquals(tableMetadata.getMetadata().getProperties().get(BUCKET_COUNT_PROPERTY), 11);
+
+        assertEquals(computeActual("SELECT * from " + tableName).getRowCount(), 0);
+
+        // make sure that we will get one file per bucket regardless of writer count configured
+        Session parallelWriter = Session.builder(getParallelWriteSession())
+                .setCatalogSessionProperty(catalog, "create_empty_bucket_files", String.valueOf(createEmpty))
+                .build();
+        assertUpdate(parallelWriter, "INSERT INTO " + tableName + " VALUES ('a0', 'b0', 'c0')", 1);
+        assertUpdate(parallelWriter, "INSERT INTO " + tableName + " VALUES ('a1', 'b1', 'c1')", 1);
+
+        assertQuery("SELECT * from " + tableName, "VALUES ('a0', 'b0', 'c0'), ('a1', 'b1', 'c1')");
+
+        assertUpdate(session, "DROP TABLE " + tableName);
+        assertFalse(getQueryRunner().tableExists(session, tableName));
+    }
+
+    @Test
+    public void testBucketedTable()
+    {
+        // go through all storage formats to make sure the empty buckets are correctly created
+        testWithAllStorageFormats(this::testBucketedTable);
+    }
+
+    private void testBucketedTable(Session session, HiveStorageFormat storageFormat)
+    {
+        testBucketedTable(session, storageFormat, true);
+        testBucketedTable(session, storageFormat, false);
+    }
+
+    private void testBucketedTable(Session session, HiveStorageFormat storageFormat, boolean createEmpty)
+    {
+        String tableName = "test_bucketed_table";
+
+        @Language("SQL") String createTable = "" +
+                "CREATE TABLE " + tableName + " " +
+                "WITH (" +
+                "format = '" + storageFormat + "', " +
+                "bucketed_by = ARRAY[ 'bucket_key' ], " +
+                "bucket_count = 11 " +
+                ") " +
+                "AS " +
+                "SELECT * " +
+                "FROM (" +
+                "VALUES " +
+                "  (VARCHAR 'a', VARCHAR 'b', VARCHAR 'c'), " +
+                "  ('aa', 'bb', 'cc'), " +
+                "  ('aaa', 'bbb', 'ccc')" +
+                ") t (bucket_key, col_1, col_2)";
+
+        // make sure that we will get one file per bucket regardless of writer count configured
+        Session parallelWriter = Session.builder(getParallelWriteSession())
+                .setCatalogSessionProperty(catalog, "create_empty_bucket_files", String.valueOf(createEmpty))
+                .build();
+        assertUpdate(parallelWriter, createTable, 3);
+
+        TableMetadata tableMetadata = getTableMetadata(catalog, TPCH_SCHEMA, tableName);
+        assertEquals(tableMetadata.getMetadata().getProperties().get(STORAGE_FORMAT_PROPERTY), storageFormat);
+
+        assertNull(tableMetadata.getMetadata().getProperties().get(PARTITIONED_BY_PROPERTY));
+        assertEquals(tableMetadata.getMetadata().getProperties().get(BUCKETED_BY_PROPERTY), ImmutableList.of("bucket_key"));
+        assertEquals(tableMetadata.getMetadata().getProperties().get(BUCKET_COUNT_PROPERTY), 11);
+
+        assertQuery("SELECT * from " + tableName, "VALUES ('a', 'b', 'c'), ('aa', 'bb', 'cc'), ('aaa', 'bbb', 'ccc')");
+
+        assertUpdate(parallelWriter, "INSERT INTO " + tableName + " VALUES ('a0', 'b0', 'c0')", 1);
+        assertUpdate(parallelWriter, "INSERT INTO " + tableName + " VALUES ('a1', 'b1', 'c1')", 1);
+
+        assertQuery("SELECT * from " + tableName, "VALUES ('a', 'b', 'c'), ('aa', 'bb', 'cc'), ('aaa', 'bbb', 'ccc'), ('a0', 'b0', 'c0'), ('a1', 'b1', 'c1')");
+
+        assertUpdate(session, "DROP TABLE " + tableName);
+        assertFalse(getQueryRunner().tableExists(session, tableName));
+    }
+
     @Test
     public void testCreatePartitionedBucketedTableAsFewRows()
     {

diff --git a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveBasicTableStatistics.java b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveBasicTableStatistics.java
@@ -278,13 +278,17 @@ public void testInsertBucketed()
             assertThat(statisticsAfterCreate.getNumRows().getAsLong()).isEqualTo(25);
             assertThat(statisticsAfterCreate.getNumFiles().getAsLong()).isEqualTo(50);
 
-            // Insert into bucketed unpartitioned table is unsupported
-            assertThatThrownBy(() -> insertNationData(onPresto(), tableName))
-                    .hasMessageContaining("Cannot insert into bucketed unpartitioned Hive table");
+            insertNationData(onPresto(), tableName);
 
             BasicStatistics statisticsAfterInsert = getBasicStatisticsForTable(onHive(), tableName);
-            assertThat(statisticsAfterInsert.getNumRows().getAsLong()).isEqualTo(25);
-            assertThat(statisticsAfterCreate.getNumFiles().getAsLong()).isEqualTo(50);
+            assertThat(statisticsAfterInsert.getNumRows().getAsLong()).isEqualTo(50);
+            assertThat(statisticsAfterInsert.getNumFiles().getAsLong()).isEqualTo(100);
+
+            insertNationData(onPresto(), tableName);
+
+            BasicStatistics statisticsAfterInsert2 = getBasicStatisticsForTable(onHive(), tableName);
+            assertThat(statisticsAfterInsert2.getNumRows().getAsLong()).isEqualTo(75);
+            assertThat(statisticsAfterInsert2.getNumFiles().getAsLong()).isEqualTo(150);
         }
         finally {
             onPresto().executeQuery(format("DROP TABLE IF EXISTS %s", tableName));

diff --git a/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveBucketedTables.java b/presto-product-tests/src/main/java/io/prestosql/tests/hive/TestHiveBucketedTables.java
@@ -242,8 +242,12 @@ public void testInsertIntoBucketedTables()
     {
         String tableName = mutableTablesState().get(BUCKETED_NATION).getNameInDatabase();
 
-        assertThat(() -> query(format("INSERT INTO %s SELECT * FROM %s", tableName, NATION.getName())))
-                .failsWithMessage("Cannot insert into bucketed unpartitioned Hive table");
+        query(format("INSERT INTO %s SELECT * FROM %s", tableName, NATION.getName()));
+        // make sure that insert will not overwrite existing data
+        query(format("INSERT INTO %s SELECT * FROM %s", tableName, NATION.getName()));
+
+        assertThat(query(format("SELECT count(*) FROM %s", tableName))).containsExactly(row(50));
+        assertThat(query(format("SELECT count(*) FROM %s WHERE n_regionkey=0", tableName))).containsExactly(row(10));
     }
 
     @Test