Look at the xor r10d, r10d instruction in the heapsort.asm file.and the entire instruction is also 3 bytes long.
This known technique of limiting code length by working on the lower 32 bits of 64-bit registers, which works great for rax-rdx registers, unfortunately does not work for extended registers r8-r15. Even the 32-bit xor r10d, r10d requires a prefix byte
It's a pity, but we can swap the roles of the r10 and rax registers in the sorting loop part of the code to be able to write xor eax, eax (which is 2 bytes long) instead of xor r10d, r10d and get 1 byte shorter code :)
And that's exactly what I did in the patched version of heapsort.asm below. Now the revised assembled code is 181 bytes long. It is really short! When you honestly compare this length with the numbers you saw earlier in this thread remember that this code has a sift code inlined in two places for quicker operation.
Who next of you will shorten this code more or find some improvement to this code? :)
---------- start of file: heapsort.asm, cut below this line ----------
BITS 64
; rdi = array's address
; rsi = array's number of elements = n
heapsort: mov rax, rsi
shr rax, 1
; nothing to sort if there is 0 or 1 element in the array
jz the_end
; now we are sure that always n>=2
; r10 = index of the last parent node
; r10 is also the counter of the heap building loop
lea r10, [rax-1]
; rbx = index of the left child of the last parent node
; rbx is also index of the left child of r10 node in the loop
lea rbx, [rax+rax-1]
; heap building loop
; sift the counter node's value stored in r9 to the proper position
; r9 = array[r10]
build_loop: mov r9, qword [rdi+8*r10]
; r11 is index of target place for sifted values
; we start sifting from the counter node's index
mov r11, r10
; rax is the index of a current child of r11 node
; we start r11 with the index of the left child
mov rax, rbx
jmp sift1_while
; we will find the child with bigger value and store its index in rax
; we will load this bigger value to rdx
; rcx holds the index of the right child
sift1: lea rcx, [rax+1]
mov rdx, qword [rdi+8*rax]
; note that the right child may not exist
; then we consider the left child to be bigger
cmp rcx, rsi
jnb sift1_check
; we load the right child node's value to r8
mov r8, qword [rdi+8*rcx]
; if the right child node's value is bigger than the left child's
cmp r8, rdx
jle sift1_check
; then we will proceed the right child node
mov rdx, r8
mov rax, rcx
; compare the values of the current child node with the sifting node sift1_check: cmp r9, rdx
; if the sifting node's value is bigger then we end sifting
jnl build_next
; we store the child's value into the parent node's place
mov qword [rdi+8*r11], rdx
; we make the child's node the new parent's node
mov r11, rax
lea rax, [rax+rax+1]
; do sift if there is still at least one child
sift1_while: cmp rax, rsi
jb sift1
; we store sifted value into the target place and repeat the loop
build_next: mov qword [rdi+8*r11], r9
sub rbx, 2
sub r10, 1
jnc build_loop
; the sorting loop starts with n-1
; r10 will hold the counter of the sorting loop
lea r10, [rsi-1]
; the sorting loop
; rdx = array[0]
; r9 = array[counter]
sort_loop: mov rdx, qword [rdi]
mov r9, qword [rdi+8*r10]
; we will sift the node rax=0 which value is in r9
xor eax, eax
; we store array[counter]=array[0]
mov qword [rdi+8*r10], rdx
; now we will store in rdx the left child node's index
mov edx, 1
jmp sift2_while
; sift2 works like sift1 with changed registers
; we will find the child with bigger value and store its index in rdx
; we will load this bigger value to rsi
; rcx holds the index of the right child
sift2: lea rcx, [rdx+1]
mov rsi, qword [rdi+8*rdx]
; note that the right child may not exist
; then we consider the left child to be bigger
cmp rcx, r10
jnb sift2_check
; we load the right child node's value to r8
mov r8, qword [rdi+8*rcx]
; if the right child node's value is bigger than the left child's
cmp r8, rsi
jle sift2_check
; then we will proceed the right child node
mov rsi, r8
mov rdx, rcx
; compare the values of the current child node with the sifting node sift2_check: cmp r9, rsi
; if the sifting node's value is bigger then we end sifting
jnl sort_next
; we store the child's value into the parent node's place
mov qword [rdi+8*rax], rsi
; we make the child's node the new parent's node
mov rax, rdx
lea rdx, [rdx+rdx+1]
; do sift if there is still at least one child
sift2_while: cmp rdx, r10
jb sift2
; we store sifted value into the target place
sort_next: mov qword [rdi+8*rax], r9
; next step of the sorting loop
sub r10, 1
; the sorting loop ends with 1
jnz sort_loop
the_end:
---------- end of file: heapsort.asm, cut above this line ----------
Sysop: | Keyop |
---|---|
Location: | Huddersfield, West Yorkshire, UK |
Users: | 304 |
Nodes: | 16 (1 / 15) |
Uptime: | 01:21:39 |
Calls: | 6,825 |
Calls today: | 5 |
Files: | 12,338 |
Messages: | 5,407,461 |