Fix FillQWord_SSE2 stack usage.

This commit is contained in:
Rika Ichinose 2024-01-02 01:27:11 +03:00 committed by FPK
parent 94a56ba3d5
commit 35345fe145

View File

@ -593,14 +593,19 @@ end;
procedure FillQWord_SSE2(var x;count:SizeInt;value:QWord);assembler;nostackframe;
{ eax = x, edx = count, [esp + 4] = value }
asm
cmp $1, %edx
jle .LOneOrLess
cmp $4, %edx
jle .L2to4
jle .L4OrLess
movq 4(%esp), %xmm0
punpcklqdq %xmm0, %xmm0
{ Stack is 12 bytes:
[esp] = return address, [esp + 4] = value (not required anymore).
Convert to 8 bytes expected by FillXxxx_MoreThanTwoXMMs:
[esp] = esi, [esp + 4] = return address. }
mov (%esp), %ecx
add $4, %esp
mov %esi, (%esp)
mov %ecx, 4(%esp)
shl $3, %edx
push %esi
movdqu %xmm0, (%eax)
movdqa %xmm0, %xmm1
test $7, %eax { Since misaligning the pattern is not very trivial, shortcut if x is aligned. }
@ -618,17 +623,12 @@ asm
por %xmm2, %xmm1
jmp FillXxxx_MoreThanTwoXMMs
.LOneOrLess:
.L4OrLess: { Doing this with 64-bit half-XMM MOVQs is a lot simpler but 2x slower (Coffee Lake). :\ }
cmp $1, %edx
jl .LQuit
mov 4(%esp), %ecx
mov %ecx, (%eax)
mov 8(%esp), %ecx
mov %ecx, 4(%eax)
.LQuit:
ret $8
.L2to4:
mov 4(%esp), %ecx
mov %ecx, (%eax)
je .LSecondHalfOf1
mov %ecx, 8(%eax)
mov %ecx, -16(%eax,%edx,8)
mov %ecx, -8(%eax,%edx,8)
@ -637,6 +637,11 @@ asm
mov %ecx, 12(%eax)
mov %ecx, -12(%eax,%edx,8)
mov %ecx, -4(%eax,%edx,8)
.LQuit:
ret $8
.LSecondHalfOf1:
mov 8(%esp), %ecx
mov %ecx, 4(%eax)
end;
procedure FillQWord_Dispatch(var x;count:SizeInt;value:qword); forward;