-
Notifications
You must be signed in to change notification settings - Fork 14.5k
Open
Labels
Description
Discussed in depth in https://stackoverflow.com/questions/71019078/efficient-overflow-immune-arithmetic-mean-in-c-c
The optimal code for either
uint32_t avg(uint32_t a, uint32_t b) {
return (a & b) + ((a ^ b) >> 1);
}
mov eax, esi
and eax, edi
xor esi, edi
shr esi
add eax, esi
ret
uint32_t avg2(uint32_t a, uint32_t b) {
return a/2 + b/2 + (a%2 + b%2)/2;
}
mov eax, edi
shr eax
mov ecx, esi
shr ecx
add eax, ecx
and edi, 1
and esi, 1
add esi, edi
shr esi
add eax, esi
ret
uint32_t avg_by_widening(uint32_t a, uint32_t b) {
return ((uint64_t)a + b) >> 1;
}
could use rotate through carry instruction on x64 backend, especially suitable as the replacement of avg_by_widening(uint64_t, uint64_t)
, which would need uint128_t
as the intermediate variable type.
add rax, rbx
rcr rax, 1