Performance of denormal numbers
From
Bonita Montero@21:1/5 to
All on Sat Sep 17 16:35:18 2022
XPost: comp.lang.c++
I wanted to check if denormal numbers have slower performance on
modern CPUs. Intel introduced the DAZ / FTZ Bits with SSE1 because
denormals were even handled in microcode:
#include <iostream>
#include <bit>
#include <cstdint>
#include <chrono>
#include <utility>
#include <atomic>
using namespace std;
using namespace chrono;
uint64_t denScale( uint64_t rounds, bool den );
int main()
{
auto bench = []( bool den ) -> double
{
constexpr uint64_t ROUNDS = 25'000'000;
auto start = high_resolution_clock::now();
int64_t nScale = denScale( ROUNDS, den );
return (double)duration_cast<nanoseconds>( high_resolution_clock::now() - start ).count() / nScale;
};
double
tDen = bench( true ),
tNorm = bench( false ),
rel = tDen / tNorm - 1;
cout << tDen << endl;
cout << tNorm << endl;
cout << trunc( 100 * 10 * rel + 0.5 ) / 10 << "%" << endl;
}
MASM code:
PUBLIC ?denScale@@YA_K_K_N@Z
CONST SEGMENT
DEN DQ 00008000000000000h
ONE DQ 03FF0000000000000h
P5 DQ 03fe0000000000000h
CONST ENDS
_TEXT SEGMENT
?denScale@@YA_K_K_N@Z PROC
xor rax, rax
test rcx, rcx
jz byeBye
mov r8, ONE
mov r9, DEN
test dl, dl
cmovnz r8, r9
movq xmm1, P5
mov rax, rcx
loopThis:
movq xmm0, r8
REPT 52
mulsd xmm0, xmm1
ENDM
sub rcx, 1
jae loopThis
mov rdx, 52
mul rdx
byeBye:
ret
?denScale@@YA_K_K_N@Z ENDP
_TEXT ENDS
END
For my PC normal numbers have a 25% higher throughput.
Feel free to post your results also.
--- SoupGate-Win32 v1.05
* Origin: fsxNet Usenet Gateway (21:1/5)