Skip to content

Commit a672803

Browse files
committed
Conv2d enabled for Intel with disabled collectives, disabled for Apple
1 parent aa47258 commit a672803

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3039,7 +3039,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
30393039
uint32_t conv2d_BS_K = 128;
30403040
uint32_t conv2d_BS_CRS = 16;
30413041
uint32_t use_collectives = 0; // Enables subgroup ops for preventing the re-calculation of indices.
3042-
if(device->subgroup_shuffle){
3042+
if(device->subgroup_shuffle && device->vendor_id != VK_VENDOR_ID_INTEL){ // Do not enable collectives on Intel, see PR 14316
30433043
use_collectives = 1;
30443044
conv2d_BS_CRS = std::min(device->subgroup_size, conv2d_BS_CRS); // CRS block size should be capped at sugroup size for correctness when shuffle is used.
30453045
}
@@ -3048,7 +3048,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
30483048
uint32_t conv2d_shmem_req = (conv2d_BS_K*(conv2d_BS_CRS+1) + conv2d_BS_CRS*(conv2d_BS_NPQ+1))*sizeof(float);
30493049
if(device->properties.limits.maxComputeSharedMemorySize < conv2d_shmem_req){
30503050
conv2d_BS_CRS = 8;
3051-
if(device->subgroup_shuffle){
3051+
if(use_collectives){
30523052
conv2d_BS_CRS = std::min(device->subgroup_size, conv2d_BS_CRS);
30533053
}
30543054
}
@@ -10816,19 +10816,19 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
1081610816
return true;
1081710817
case GGML_OP_CONV_TRANSPOSE_1D:
1081810818
return op->src[0]->type == GGML_TYPE_F32 && op->src[1]->type == GGML_TYPE_F32;
10819-
case GGML_OP_CONV_2D:
10819+
case GGML_OP_CONV_2D:
1082010820
{
10821-
// Op is disabled for Intel
10821+
// Op is disabled for Apple because it segfaults at pipeline create time on MoltenVK
1082210822
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
10823-
const vk_device& device = ggml_vk_get_device(ctx->device);
10824-
bool is_Intel = ggml_vk_get_device(ctx->device)->vendor_id == VK_VENDOR_ID_INTEL;
10823+
const vk_device& device = ggml_vk_get_device(ctx->device);
10824+
bool is_Apple = ggml_vk_get_device(ctx->device)->vendor_id == VK_VENDOR_ID_APPLE;
1082510825
// Channel-contiguous format is not supported yet.
1082610826
return (op->src[0]->type == GGML_TYPE_F32 &&
1082710827
op->src[1]->type == GGML_TYPE_F32 &&
1082810828
op->type == GGML_TYPE_F32 &&
1082910829
ggml_is_contiguous(op->src[0]) &&
1083010830
ggml_is_contiguous(op->src[1]) &&
10831-
ggml_is_contiguous(op)) && !is_Intel;
10831+
ggml_is_contiguous(op)) && !is_Apple;
1083210832
}
1083310833
default:
1083410834
return false;

0 commit comments

Comments
 (0)