Skip to content

Commit cb4a63a

Browse files
authored
sycl: fixed semantics of block offset calculation (#14814)
1 parent 86f5623 commit cb4a63a

File tree

2 files changed

+10
-15
lines changed

2 files changed

+10
-15
lines changed

ggml/src/ggml-sycl/quants.hpp

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,11 @@ template <> struct block_q_t<GGML_TYPE_Q4_0> {
4848
};
4949

5050
static constexpr std::pair<int, int> get_block_offset(const int block_index, const int /* nblocks */) {
51-
return { block_index * (traits::qk / traits::qr), 0 };
51+
return { block_index * (QK4_0 / QR4_0), 0 };
5252
}
5353

5454
static constexpr std::pair<int, int> get_d_offset(int nrows, int ncols, const int block_index) {
55-
return { (ncols / traits::qr * nrows) + block_index * sizeof(ggml_half), 0 };
55+
return { (ncols / QR4_0 * nrows) + block_index * sizeof(ggml_half), 0 };
5656
}
5757

5858
static constexpr int block_to_q8_1_ratio() { return traits::qk / QK8_1; }
@@ -71,14 +71,12 @@ template <> struct block_q_t<GGML_TYPE_Q4_K> {
7171
}
7272

7373
static constexpr std::pair<int, int> get_d_offset(int nrows, int ncols, const int block_index) {
74-
auto nblocks = (nrows * (ncols / traits::qk));
75-
return { nblocks * (QK_K / 2),
74+
auto nblocks = (nrows * (ncols / QK_K));
75+
return { nblocks * (QK_K / 2) + (block_index * K_SCALE_SIZE),
7676
(nblocks * QK_K / 2) + (nblocks * K_SCALE_SIZE) + (block_index * sizeof(ggml_half2)) };
7777
}
7878

7979
static constexpr int block_to_q8_1_ratio() { return traits::qk / QK8_1; }
80-
81-
constexpr size_t get_total_qs_bytes(int nblocks) { return nblocks * QK_K / 2; }
8280
};
8381

8482
template <> struct block_q_t<GGML_TYPE_Q6_K> {
@@ -90,22 +88,23 @@ template <> struct block_q_t<GGML_TYPE_Q6_K> {
9088
};
9189

9290
static constexpr std::pair<int, int> get_block_offset(const int block_index, const int n_blocks) {
93-
auto low_bits_index = block_index * (traits::qk / traits::qr);
91+
auto low_bits_index = block_index * (QK_K / QR6_K);
9492
// the index of high bits it's after all low bits
9593
auto high_bits_index = n_blocks * (QK_K / 2) + (block_index * (QK_K / 4));
9694
return { low_bits_index, high_bits_index };
9795
}
9896

9997
static constexpr std::pair<int, int> get_d_offset(int nrows, int ncols, const int block_index) {
100-
auto nblocks = (nrows * (ncols / traits::qk));
98+
auto nblocks = (nrows * (ncols / QK_K));
10199
auto total_qs_bytes = nblocks * (QK_K / 2) + nblocks * (QK_K / 4);
102100
auto block_scales = total_qs_bytes + block_index * (QK_K / 16);
103-
auto sb_scale = total_qs_bytes + nblocks * (QK_K / 16);
101+
auto sb_scale = total_qs_bytes + nblocks * (QK_K / 16) + block_index * sizeof(ggml_half);
104102
return { block_scales, sb_scale };
105103
}
106104

107105
static constexpr int block_to_q8_1_ratio() { return traits::qk / QK8_1; }
108106
};
107+
109108
} // namespace ggml_sycl_reordered
110109

111110
#endif // GGML_SYCL_QUANTS_HPP

ggml/src/ggml-sycl/vecdotq.hpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -350,11 +350,9 @@ template <> struct reorder_vec_dot_q_sycl<GGML_TYPE_Q4_K> {
350350
__dpct_inline__ float operator()(const void * __restrict__ vbq, const std::pair<int, int> ibx_offset,
351351
const std::pair<int, int> d_offset, const int8_t * q8_1_quant_ptr,
352352
const sycl::half2 * q8_1_ds, const int & iqs) {
353-
const int ib = ibx_offset.first / (QK_K / 2);
354-
355353
const uint8_t * base = static_cast<const uint8_t *>(vbq);
356354
const uint8_t * qs = base + ibx_offset.first;
357-
const uint8_t * scs = base + d_offset.first + ib * K_SCALE_SIZE;
355+
const uint8_t * scs = base + d_offset.first;
358356
const ggml_half2 * dms = reinterpret_cast<const ggml_half2 *>(base + d_offset.second);
359357

360358
const int bq8_offset = QR4_K * ((iqs / 2) / (QI8_1 / 2));
@@ -427,13 +425,11 @@ template <> struct reorder_vec_dot_q_sycl<GGML_TYPE_Q6_K> {
427425
__dpct_inline__ float operator()(const void * __restrict__ vbq, const std::pair<int, int> ibx_offset,
428426
const std::pair<int, int> d_offset, const int8_t * q8_1_quant_ptr, const sycl::half2 * q8_1_ds,
429427
const int iqs) {
430-
const int ib = ibx_offset.first / (QK_K / 2);
431-
432428
const uint8_t * base = static_cast<const uint8_t *>(vbq);
433429
const uint8_t * ql = base + ibx_offset.first;
434430
const uint8_t * qh = base + ibx_offset.second;
435431
const int8_t * scales = reinterpret_cast<const int8_t *>(base + d_offset.first);
436-
const ggml_half * d = (const ggml_half *) (base + d_offset.second) + ib;
432+
const ggml_half * d = (const ggml_half *) (base + d_offset.second);
437433

438434
const int bq8_offset = 2 * QR6_K * (iqs / (QI6_K / 2)) + (iqs % (QI6_K / 2)) / (QI6_K / 4);
439435
const int scale_offset = (QI6_K / 4) * (iqs / (QI6_K / 2)) + (iqs % (QI6_K / 2)) / (QI6_K / 8);

0 commit comments

Comments
 (0)