Skip to content

Commit 4c20538

Browse files
committed
sycl: GGML_SYCL_DISABLE_OPT on by default for all Intel Devices
1 parent 093e3f1 commit 4c20538

File tree

3 files changed

+11
-24
lines changed

3 files changed

+11
-24
lines changed

docs/backend/SYCL.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -757,7 +757,7 @@ use 1 SYCL GPUs: [0] with Max compute units:512
757757
| Name | Value | Function |
758758
|-------------------|------------------|---------------------------------------------------------------------------------------------------------------------------|
759759
| GGML_SYCL_DEBUG | 0 (default) or 1 | Enable log function by macro: GGML_SYCL_DEBUG |
760-
| GGML_SYCL_DISABLE_OPT | 0 (default) or 1 | Disable optimize features based on Intel GPU type, to compare the performance increase |
760+
| GGML_SYCL_DISABLE_OPT | 0 (default) or 1 | Disable optimize features for Intel GPUs. (Recommended to 1 for intel devices older than Gen 10) |
761761
| GGML_SYCL_DISABLE_GRAPH | 0 or 1 (default) | Disable running computations through SYCL Graphs feature. Disabled by default because graph performance isn't yet better than non-graph performance. |
762762
| GGML_SYCL_DISABLE_DNN | 0 (default) or 1 | Disable running computations through oneDNN and always use oneMKL. |
763763
| ZES_ENABLE_SYSMAN | 0 (default) or 1 | Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory.<br>Recommended to use when --split-mode = layer |

ggml/src/ggml-sycl/common.hpp

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,6 @@ struct sycl_device_info {
201201
// size_t smpb; // max. shared memory per block
202202
bool vmm; // virtual memory support
203203
size_t total_vram;
204-
sycl_hw_info hw_info;
205204
optimize_feature opt_feature;
206205
};
207206

@@ -288,26 +287,16 @@ struct ggml_tensor_extra_gpu {
288287

289288
void release_extra_gpu(ggml_tensor_extra_gpu * extra, std::vector<queue_ptr> streams={});
290289

291-
inline optimize_feature check_gpu_optimize_feature(syclex::architecture &arch) {
290+
inline optimize_feature check_gpu_optimize_feature(const std::string& name) {
292291
optimize_feature opt;
293292

294-
opt.reorder =
295-
(arch == syclex::architecture::intel_gpu_dg1 ||
296-
arch == syclex::architecture::intel_gpu_acm_g10 ||
297-
arch == syclex::architecture::intel_gpu_acm_g11 ||
298-
arch == syclex::architecture::intel_gpu_acm_g12 ||
299-
arch == syclex::architecture::intel_gpu_pvc ||
300-
arch == syclex::architecture::intel_gpu_pvc_vg ||
301-
arch == syclex::architecture::intel_gpu_mtl_u ||
302-
arch == syclex::architecture::intel_gpu_mtl_s ||
303-
arch == syclex::architecture::intel_gpu_mtl_h ||
304-
arch == syclex::architecture::intel_gpu_arl_u ||
305-
arch == syclex::architecture::intel_gpu_arl_s ||
306-
arch == syclex::architecture::intel_gpu_arl_h ||
307-
arch == syclex::architecture::intel_gpu_bmg_g21 ||
308-
arch == syclex::architecture::intel_gpu_lnl_m
309-
);
310-
293+
// enable reorder optimization only on Intel devices
294+
if (name.find("Intel") != std::string::npos) {
295+
opt.reorder = true;
296+
}
297+
else {
298+
opt.reorder = false;
299+
}
311300
return opt;
312301
}
313302

ggml/src/ggml-sycl/ggml-sycl.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,7 @@ static ggml_sycl_device_info ggml_sycl_init() {
8383

8484
info.devices[i].cc =
8585
100 * prop.get_major_version() + 10 * prop.get_minor_version();
86-
info.devices[i].hw_info = get_device_hw_info(&device);
87-
info.devices[i].opt_feature = check_gpu_optimize_feature(info.devices[i].hw_info.arch);
88-
86+
info.devices[i].opt_feature = check_gpu_optimize_feature(prop.get_name());
8987
info.max_work_group_sizes[i] = prop.get_max_work_group_size();
9088
}
9189

@@ -195,7 +193,7 @@ static void ggml_check_sycl() try {
195193

196194
if (!initialized) {
197195
g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
198-
g_ggml_sycl_disable_optimize= get_sycl_env("GGML_SYCL_DISABLE_OPT", 1);
196+
g_ggml_sycl_disable_optimize= get_sycl_env("GGML_SYCL_DISABLE_OPT", 0);
199197
g_ggml_sycl_disable_graph = get_sycl_env("GGML_SYCL_DISABLE_GRAPH", 1);
200198
g_ggml_sycl_disable_dnn = get_sycl_env("GGML_SYCL_DISABLE_DNN", 0);
201199
g_ggml_sycl_prioritize_dmmv = get_sycl_env("GGML_SYCL_PRIORITIZE_DMMV", 0);

0 commit comments

Comments
 (0)