Skip to content

Commit 99d63b7

Browse files
committed
[codegen] Test and document codegen for FromZeros
Makes progress towards #3079. gherrit-pr-id: Gea71a24b6b02a2d552b4af3e0980e71a50ab8f52
1 parent 8679ffd commit 99d63b7

32 files changed

+1297
-0
lines changed

benches/extend_vec_zeroed.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
use zerocopy::*;
2+
3+
#[path = "formats/coco_static_size.rs"]
4+
mod format;
5+
6+
#[unsafe(no_mangle)]
7+
fn bench_extend_vec_zeroed(v: &mut Vec<format::LocoPacket>, additional: usize) -> Option<()> {
8+
FromZeros::extend_vec_zeroed(v, additional).ok()
9+
}

benches/extend_vec_zeroed.x86-64

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
bench_extend_vec_zeroed:
2+
push r15
3+
push r14
4+
push r13
5+
push r12
6+
push rbx
7+
sub rsp, 32
8+
mov rbx, rdi
9+
mov rax, qword ptr [rdi]
10+
mov r12, qword ptr [rdi + 16]
11+
mov rcx, rax
12+
sub rcx, r12
13+
cmp rsi, rcx
14+
jbe .LBB6_3
15+
mov r15, r12
16+
add r15, rsi
17+
jae .LBB6_6
18+
.LBB6_2:
19+
xor eax, eax
20+
jmp .LBB6_5
21+
.LBB6_3:
22+
mov rax, qword ptr [rbx + 8]
23+
lea r15, [r12 + rsi]
24+
.LBB6_4:
25+
lea rcx, [r12 + 2*r12]
26+
lea rdi, [rax + 2*rcx]
27+
add rsi, rsi
28+
lea rdx, [rsi + 2*rsi]
29+
xor esi, esi
30+
call qword ptr [rip + memset@GOTPCREL]
31+
mov qword ptr [rbx + 16], r15
32+
mov al, 1
33+
.LBB6_5:
34+
add rsp, 32
35+
pop rbx
36+
pop r12
37+
pop r13
38+
pop r14
39+
pop r15
40+
ret
41+
.LBB6_6:
42+
mov r13, rsi
43+
lea rcx, [rax + rax]
44+
cmp r15, rcx
45+
cmova rcx, r15
46+
cmp rcx, 5
47+
mov r14d, 4
48+
cmovae r14, rcx
49+
mov rdx, qword ptr [rbx + 8]
50+
lea rdi, [rsp + 8]
51+
mov rsi, rax
52+
mov rcx, r14
53+
call <alloc::raw_vec::RawVecInner>::finish_grow
54+
cmp dword ptr [rsp + 8], 1
55+
je .LBB6_2
56+
mov rax, qword ptr [rsp + 16]
57+
mov qword ptr [rbx + 8], rax
58+
mov qword ptr [rbx], r14
59+
mov rsi, r13
60+
jmp .LBB6_4
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
Iterations: 100
2+
Instructions: 5400
3+
Total Cycles: 6595
4+
Total uOps: 6800
5+
6+
Dispatch Width: 4
7+
uOps Per Cycle: 1.03
8+
IPC: 0.82
9+
Block RThroughput: 17.0
10+
11+
12+
Instruction Info:
13+
[1]: #uOps
14+
[2]: Latency
15+
[3]: RThroughput
16+
[4]: MayLoad
17+
[5]: MayStore
18+
[6]: HasSideEffects (U)
19+
20+
[1] [2] [3] [4] [5] [6] Instructions:
21+
2 5 1.00 * push r15
22+
2 5 1.00 * push r14
23+
2 5 1.00 * push r13
24+
2 5 1.00 * push r12
25+
2 5 1.00 * push rbx
26+
1 1 0.33 sub rsp, 32
27+
1 1 0.33 mov rbx, rdi
28+
1 5 0.50 * mov rax, qword ptr [rdi]
29+
1 5 0.50 * mov r12, qword ptr [rdi + 16]
30+
1 1 0.33 mov rcx, rax
31+
1 1 0.33 sub rcx, r12
32+
1 1 0.33 cmp rsi, rcx
33+
1 1 1.00 jbe .LBB6_3
34+
1 1 0.33 mov r15, r12
35+
1 1 0.33 add r15, rsi
36+
1 1 1.00 jae .LBB6_6
37+
1 0 0.25 xor eax, eax
38+
1 1 1.00 jmp .LBB6_5
39+
1 5 0.50 * mov rax, qword ptr [rbx + 8]
40+
1 1 0.50 lea r15, [r12 + rsi]
41+
1 1 0.50 lea rcx, [r12 + 2*r12]
42+
1 1 0.50 lea rdi, [rax + 2*rcx]
43+
1 1 0.33 add rsi, rsi
44+
1 1 0.50 lea rdx, [rsi + 2*rsi]
45+
1 0 0.25 xor esi, esi
46+
4 7 1.00 * call qword ptr [rip + memset@GOTPCREL]
47+
1 1 1.00 * mov qword ptr [rbx + 16], r15
48+
1 1 0.33 mov al, 1
49+
1 1 0.33 add rsp, 32
50+
1 6 0.50 * pop rbx
51+
1 6 0.50 * pop r12
52+
1 6 0.50 * pop r13
53+
1 6 0.50 * pop r14
54+
1 6 0.50 * pop r15
55+
1 1 1.00 U ret
56+
1 1 0.33 mov r13, rsi
57+
1 1 0.50 lea rcx, [rax + rax]
58+
1 1 0.33 cmp r15, rcx
59+
3 3 1.00 cmova rcx, r15
60+
1 1 0.33 cmp rcx, 5
61+
1 1 0.33 mov r14d, 4
62+
2 2 0.67 cmovae r14, rcx
63+
1 5 0.50 * mov rdx, qword ptr [rbx + 8]
64+
1 1 0.50 lea rdi, [rsp + 8]
65+
1 1 0.33 mov rsi, rax
66+
1 1 0.33 mov rcx, r14
67+
3 5 1.00 call <alloc::raw_vec::RawVecInner>::finish_grow
68+
2 6 0.50 * cmp dword ptr [rsp + 8], 1
69+
1 1 1.00 je .LBB6_2
70+
1 5 0.50 * mov rax, qword ptr [rsp + 16]
71+
1 1 1.00 * mov qword ptr [rbx + 8], rax
72+
1 1 1.00 * mov qword ptr [rbx], r14
73+
1 1 0.33 mov rsi, r13
74+
1 1 1.00 jmp .LBB6_4
75+
76+
77+
Resources:
78+
[0] - SBDivider
79+
[1] - SBFPDivider
80+
[2] - SBPort0
81+
[3] - SBPort1
82+
[4] - SBPort4
83+
[5] - SBPort5
84+
[6.0] - SBPort23
85+
[6.1] - SBPort23
86+
87+
88+
Resource pressure per iteration:
89+
[0] [1] [2] [3] [4] [5] [6.0] [6.1]
90+
- - 12.00 12.00 10.00 13.00 11.00 11.00
91+
92+
Resource pressure by instruction:
93+
[0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
94+
- - - - 1.00 - 0.49 0.51 push r15
95+
- - - - 1.00 - 0.51 0.49 push r14
96+
- - - - 1.00 - 0.50 0.50 push r13
97+
- - - - 1.00 - 0.50 0.50 push r12
98+
- - - - 1.00 - 0.50 0.50 push rbx
99+
- - 0.01 0.99 - - - - sub rsp, 32
100+
- - - - - 1.00 - - mov rbx, rdi
101+
- - - - - - 0.50 0.50 mov rax, qword ptr [rdi]
102+
- - - - - - 0.50 0.50 mov r12, qword ptr [rdi + 16]
103+
- - - 1.00 - - - - mov rcx, rax
104+
- - - 0.99 - 0.01 - - sub rcx, r12
105+
- - - - - 1.00 - - cmp rsi, rcx
106+
- - - - - 1.00 - - jbe .LBB6_3
107+
- - 0.01 0.98 - 0.01 - - mov r15, r12
108+
- - 0.99 0.01 - - - - add r15, rsi
109+
- - - - - 1.00 - - jae .LBB6_6
110+
- - - - - - - - xor eax, eax
111+
- - - - - 1.00 - - jmp .LBB6_5
112+
- - - - - - 0.50 0.50 mov rax, qword ptr [rbx + 8]
113+
- - 1.00 - - - - - lea r15, [r12 + rsi]
114+
- - 0.98 0.02 - - - - lea rcx, [r12 + 2*r12]
115+
- - 0.99 0.01 - - - - lea rdi, [rax + 2*rcx]
116+
- - - 1.00 - - - - add rsi, rsi
117+
- - 0.99 0.01 - - - - lea rdx, [rsi + 2*rsi]
118+
- - - - - - - - xor esi, esi
119+
- - - - 1.00 1.00 1.00 1.00 call qword ptr [rip + memset@GOTPCREL]
120+
- - - - 1.00 - 0.50 0.50 mov qword ptr [rbx + 16], r15
121+
- - 0.01 0.99 - - - - mov al, 1
122+
- - 1.00 - - - - - add rsp, 32
123+
- - - - - - 0.50 0.50 pop rbx
124+
- - - - - - 0.50 0.50 pop r12
125+
- - - - - - 0.50 0.50 pop r13
126+
- - - - - - 0.50 0.50 pop r14
127+
- - - - - - 0.50 0.50 pop r15
128+
- - - - - 1.00 - - ret
129+
- - 1.00 - - - - - mov r13, rsi
130+
- - 0.01 0.99 - - - - lea rcx, [rax + rax]
131+
- - 0.99 0.01 - - - - cmp r15, rcx
132+
- - 2.00 0.01 - 0.99 - - cmova rcx, r15
133+
- - 0.01 0.99 - - - - cmp rcx, 5
134+
- - 0.01 0.99 - - - - mov r14d, 4
135+
- - 1.00 0.01 - 0.99 - - cmovae r14, rcx
136+
- - - - - - 0.50 0.50 mov rdx, qword ptr [rbx + 8]
137+
- - 0.01 0.99 - - - - lea rdi, [rsp + 8]
138+
- - - 1.00 - - - - mov rsi, rax
139+
- - - 0.01 - 0.99 - - mov rcx, r14
140+
- - - - 1.00 1.00 0.50 0.50 call <alloc::raw_vec::RawVecInner>::finish_grow
141+
- - - 0.99 - 0.01 0.50 0.50 cmp dword ptr [rsp + 8], 1
142+
- - - - - 1.00 - - je .LBB6_2
143+
- - - - - - 0.50 0.50 mov rax, qword ptr [rsp + 16]
144+
- - - - 1.00 - 0.49 0.51 mov qword ptr [rbx + 8], rax
145+
- - - - 1.00 - 0.51 0.49 mov qword ptr [rbx], r14
146+
- - 0.99 0.01 - - - - mov rsi, r13
147+
- - - - - 1.00 - - jmp .LBB6_4

benches/insert_vec_zeroed.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
use zerocopy::*;
2+
3+
#[path = "formats/coco_static_size.rs"]
4+
mod format;
5+
6+
#[unsafe(no_mangle)]
7+
fn bench_insert_vec_zeroed(
8+
v: &mut Vec<format::LocoPacket>,
9+
position: usize,
10+
additional: usize,
11+
) -> Option<()> {
12+
FromZeros::insert_vec_zeroed(v, position, additional).ok()
13+
}

benches/insert_vec_zeroed.x86-64

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
bench_insert_vec_zeroed:
2+
push rbp
3+
push r15
4+
push r14
5+
push r13
6+
push r12
7+
push rbx
8+
sub rsp, 24
9+
mov r12, qword ptr [rdi + 16]
10+
mov r13, r12
11+
sub r13, rsi
12+
jb .LBB6_10
13+
mov rbx, rdi
14+
mov rax, qword ptr [rdi]
15+
mov rcx, rax
16+
sub rcx, r12
17+
cmp rdx, rcx
18+
jbe .LBB6_4
19+
add r12, rdx
20+
jae .LBB6_7
21+
.LBB6_3:
22+
xor eax, eax
23+
jmp .LBB6_6
24+
.LBB6_4:
25+
mov rax, qword ptr [rbx + 8]
26+
add r12, rdx
27+
.LBB6_5:
28+
lea rcx, [rsi + 2*rsi]
29+
lea r14, [rax + 2*rcx]
30+
add rdx, rdx
31+
lea r15, [rdx + 2*rdx]
32+
lea rdi, [r14 + r15]
33+
add r13, r13
34+
lea rdx, [2*r13]
35+
add rdx, r13
36+
mov rsi, r14
37+
call qword ptr [rip + memmove@GOTPCREL]
38+
mov rdi, r14
39+
xor esi, esi
40+
mov rdx, r15
41+
call qword ptr [rip + memset@GOTPCREL]
42+
mov qword ptr [rbx + 16], r12
43+
mov al, 1
44+
.LBB6_6:
45+
add rsp, 24
46+
pop rbx
47+
pop r12
48+
pop r13
49+
pop r14
50+
pop r15
51+
pop rbp
52+
ret
53+
.LBB6_7:
54+
mov r15, rsi
55+
mov rbp, rdx
56+
lea rcx, [rax + rax]
57+
cmp r12, rcx
58+
cmova rcx, r12
59+
cmp rcx, 5
60+
mov r14d, 4
61+
cmovae r14, rcx
62+
mov rdx, qword ptr [rbx + 8]
63+
mov rdi, rsp
64+
mov rsi, rax
65+
mov rcx, r14
66+
call <alloc::raw_vec::RawVecInner>::finish_grow
67+
cmp dword ptr [rsp], 1
68+
je .LBB6_3
69+
mov rax, qword ptr [rsp + 8]
70+
mov qword ptr [rbx + 8], rax
71+
mov qword ptr [rbx], r14
72+
mov rdx, rbp
73+
mov rsi, r15
74+
jmp .LBB6_5
75+
.LBB6_10:
76+
lea rdi, [rip + .Lanon.c6b21fff2f8c453605ed96c7079e1c5c.1]
77+
lea rdx, [rip + .Lanon.c6b21fff2f8c453605ed96c7079e1c5c.3]
78+
mov esi, 37
79+
call qword ptr [rip + core::panicking::panic@GOTPCREL]

0 commit comments

Comments
 (0)