Skip to content

Commit cc20079

Browse files
committed
FWHT: Introduce fwht_8
1 parent 119a540 commit cc20079

File tree

1 file changed

+118
-0
lines changed

1 file changed

+118
-0
lines changed

src/engine/fwht.rs

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@ pub(crate) fn fwht(data: &mut [GfElement; GF_ORDER], m_truncated: usize) {
1010
// Note to self: fwht_8 is slightly faster on x86 (AMD Ryzen 5 3600),
1111
// but slower on ARM (Apple silicon M1).
1212
// fwht_16 is always slower. See branch: AndersTrier/FWHT_8_and_16
13+
14+
if m_truncated >= GF_ORDER {
15+
return fwht_8_full(data);
16+
}
17+
1318
let mut dist = 1;
1419
let mut dist4 = 4;
1520
while dist4 <= GF_ORDER {
@@ -45,6 +50,7 @@ fn fwht_4(data: &mut [GfElement; GF_ORDER], offset: u16, dist: u16) {
4550

4651
let (s0, d0) = fwht_2(data[i0], data[i1]);
4752
let (s1, d1) = fwht_2(data[i2], data[i3]);
53+
4854
let (s2, d2) = fwht_2(s0, s1);
4955
let (s3, d3) = fwht_2(d0, d1);
5056

@@ -54,6 +60,69 @@ fn fwht_4(data: &mut [GfElement; GF_ORDER], offset: u16, dist: u16) {
5460
data[i3] = d3;
5561
}
5662

63+
#[inline(always)]
64+
fn fwht_8_full(data: &mut [GfElement; GF_ORDER]) {
65+
fwht_8_truncated(data, GF_ORDER)
66+
}
67+
68+
#[inline(always)]
69+
fn fwht_8_truncated(data: &mut [GfElement; GF_ORDER], m_truncated: usize) {
70+
let mut dist = 1;
71+
let mut dist8 = 8;
72+
while dist8 <= GF_ORDER {
73+
for r in (0..m_truncated).step_by(dist8) {
74+
for offset in r..r + dist {
75+
fwht_8(data, offset as u16, dist as u16);
76+
}
77+
}
78+
79+
dist = dist8;
80+
dist8 <<= 3;
81+
}
82+
83+
for i in 0..32768 {
84+
let (s0, d0) = fwht_2(data[i], data[i + dist]);
85+
data[i] = s0;
86+
data[i + dist] = d0;
87+
}
88+
}
89+
90+
#[inline(always)]
91+
fn fwht_8(data: &mut [GfElement; GF_ORDER], offset: u16, dist: u16) {
92+
let t0 = usize::from(offset);
93+
let t1 = usize::from(offset + dist);
94+
let t2 = usize::from(offset + dist * 2);
95+
let t3 = usize::from(offset + dist * 3);
96+
let t4 = usize::from(offset + dist * 4);
97+
let t5 = usize::from(offset + dist * 5);
98+
let t6 = usize::from(offset + dist * 6);
99+
let t7 = usize::from(offset + dist * 7);
100+
101+
let (s0, d0) = fwht_2(data[t0], data[t1]);
102+
let (s1, d1) = fwht_2(data[t2], data[t3]);
103+
let (s2, d2) = fwht_2(data[t4], data[t5]);
104+
let (s3, d3) = fwht_2(data[t6], data[t7]);
105+
106+
let (s4, d4) = fwht_2(s0, s1);
107+
let (s5, d5) = fwht_2(s2, s3);
108+
let (s6, d6) = fwht_2(d0, d1);
109+
let (s7, d7) = fwht_2(d2, d3);
110+
111+
let (s8, d8) = fwht_2(s4, s5);
112+
let (s9, d9) = fwht_2(s6, s7);
113+
let (s10, d10) = fwht_2(d4, d5);
114+
let (s11, d11) = fwht_2(d6, d7);
115+
116+
data[t0] = s8;
117+
data[t1] = s9;
118+
data[t2] = s10;
119+
data[t3] = s11;
120+
data[t4] = d8;
121+
data[t5] = d9;
122+
data[t6] = d10;
123+
data[t7] = d11;
124+
}
125+
57126
// ======================================================================
58127
// FWHT - TESTS
59128

@@ -130,4 +199,53 @@ mod tests {
130199
assert_eq!(data1, data2);
131200
}
132201
}
202+
203+
#[test]
204+
fn test_8_full() {
205+
let mut rng = ChaCha8Rng::from_seed([0; 32]);
206+
207+
let mut data1 = [(); GF_ORDER].map(|_| rng.gen());
208+
let mut data2 = data1;
209+
210+
fwht_8_full(&mut data1);
211+
fwht_naive(&mut data2);
212+
213+
assert_eq!(data1, data2);
214+
}
215+
216+
#[test]
217+
fn test_8_truncated() {
218+
let mut rng = ChaCha8Rng::from_seed([0; 32]);
219+
let random: Vec<GfElement> = (0..GF_ORDER).map(|_| rng.gen()).collect();
220+
221+
for nonzero_count in [
222+
0,
223+
1,
224+
2,
225+
3,
226+
4,
227+
64,
228+
127,
229+
16384 - 1,
230+
16384 + 1,
231+
GF_ORDER / 2 - 1,
232+
GF_ORDER / 2,
233+
GF_ORDER / 2 + 1,
234+
GF_ORDER - 4,
235+
GF_ORDER - 3,
236+
GF_ORDER - 2,
237+
GF_ORDER - 1,
238+
GF_ORDER,
239+
] {
240+
let mut data1 = [0; GF_ORDER];
241+
242+
data1[..nonzero_count].copy_from_slice(&random[..nonzero_count]);
243+
let mut data2 = data1;
244+
245+
fwht_8_truncated(&mut data1, nonzero_count);
246+
fwht_naive(&mut data2);
247+
248+
assert_eq!(data1, data2);
249+
}
250+
}
133251
}

0 commit comments

Comments
 (0)