+ keep addresses of thread vars in registers if possible and likely an advantage

This commit is contained in:
florian 2021-09-12 19:40:51 +02:00
parent 5a617cd108
commit 4f77cbbac4

View File

@ -50,7 +50,7 @@ unit optcse;
globtype,globals, globtype,globals,
cutils,cclasses, cutils,cclasses,
nutils,compinnr, nutils,compinnr,
nbas,nld,ninl,ncal,nadd,nmem, nbas,nld,ninl,ncal,nadd,nmem,ncnv,
pass_1, pass_1,
procinfo, procinfo,
paramgr, paramgr,
@ -604,13 +604,15 @@ unit optcse;
begin begin
result:=fen_true; result:=fen_true;
consts:=pconstentries(arg); consts:=pconstentries(arg);
if (n.nodetype=realconstn) if ((n.nodetype=realconstn)
{$ifdef x86} {$ifdef x86}
{ x87 consts would end up in memory, so loading them in temps. makes no sense } { x87 consts would end up in memory, so loading them in temps. makes no sense }
and use_vectorfpu(n.resultdef) and use_vectorfpu(n.resultdef)
{$endif x86} {$endif x86}
) or
then ((n.nodetype=loadn) and (tloadnode(n).symtableentry.typ=staticvarsym) and
(vo_is_thread_var in tstaticvarsym(tloadnode(n).symtableentry).varoptions)
) then
begin begin
found:=false; found:=false;
i:=0; i:=0;
@ -645,7 +647,16 @@ unit optcse;
result:=fen_true; result:=fen_true;
if tnode(pconstentry(arg)^.valuenode).isequal(n) then if tnode(pconstentry(arg)^.valuenode).isequal(n) then
begin begin
hp:=ctemprefnode.create(pconstentry(arg)^.temp); { threadvar, so we took the address? }
if (pconstentry(arg)^.valuenode.nodetype=loadn) and (tloadnode(pconstentry(arg)^.valuenode).symtableentry.typ=staticvarsym) and
(vo_is_thread_var in tstaticvarsym(tloadnode(pconstentry(arg)^.valuenode).symtableentry).varoptions) then
begin
hp:=ctypeconvnode.create_internal(cderefnode.create(ctemprefnode.create(pconstentry(arg)^.temp)),pconstentry(arg)^.valuenode.resultdef);
tderefnode(hp).left.fileinfo:=n.fileinfo;
end
else
hp:=ctemprefnode.create(pconstentry(arg)^.temp);
hp.fileinfo:=n.fileinfo; hp.fileinfo:=n.fileinfo;
n.Free; n.Free;
n:=hp; n:=hp;
@ -700,7 +711,8 @@ unit optcse;
createblock, createblock,
deleteblock, deleteblock,
rootblock : tblocknode; rootblock : tblocknode;
i, maxassigned, regsassigned: Integer; i, max_fpu_regs_assigned, fpu_regs_assigned,
max_int_regs_assigned, int_regs_assigned: Integer;
old_current_filepos: tfileposinfo; old_current_filepos: tfileposinfo;
begin begin
{$ifdef csedebug} {$ifdef csedebug}
@ -716,19 +728,24 @@ unit optcse;
deleteblock:=nil; deleteblock:=nil;
rootblock:=nil; rootblock:=nil;
{ estimate how many registers can be used for constants } { estimate how many registers can be used for constants }
if pi_do_call in current_procinfo.flags then
max_int_regs_assigned:=length(paramanager.get_saved_registers_int(current_procinfo.procdef.proccalloption)) div 4
else
max_int_regs_assigned:=max(first_int_imreg div 4,1);
{$if defined(x86) or defined(aarch64) or defined(arm)} {$if defined(x86) or defined(aarch64) or defined(arm)}
{ x86, aarch64 and arm (neglecting fpa) use mm registers for floats } { x86, aarch64 and arm (neglecting fpa) use mm registers for floats }
if pi_do_call in current_procinfo.flags then if pi_do_call in current_procinfo.flags then
maxassigned:=length(paramanager.get_saved_registers_mm(current_procinfo.procdef.proccalloption)) div 4 max_fpu_regs_assigned:=length(paramanager.get_saved_registers_mm(current_procinfo.procdef.proccalloption)) div 5
else else
maxassigned:=max(first_mm_imreg div 4,1); max_fpu_regs_assigned:=max(first_mm_imreg div 5,1);
{$else defined(x86) or defined(aarch64) or defined(arm)} {$else defined(x86) or defined(aarch64) or defined(arm)}
if pi_do_call in current_procinfo.flags then if pi_do_call in current_procinfo.flags then
maxassigned:=length(paramanager.get_saved_registers_fpu(current_procinfo.procdef.proccalloption)) div 4 max_fpu_regs_assigned:=length(paramanager.get_saved_registers_fpu(current_procinfo.procdef.proccalloption)) div 4
else else
maxassigned:=max(first_fpu_imreg div 4,1); max_fpu_regs_assigned:=max(first_fpu_imreg div 4,1);
{$endif defined(x86) or defined(aarch64) or defined(arm)} {$endif defined(x86) or defined(aarch64) or defined(arm)}
regsassigned:=0; fpu_regs_assigned:=0;
int_regs_assigned:=0;
if Length(constentries)>0 then if Length(constentries)>0 then
begin begin
{ sort entries by weight } { sort entries by weight }
@ -736,13 +753,14 @@ unit optcse;
{ assign only the constants with the highest weight to a register } { assign only the constants with the highest weight to a register }
for i:=High(constentries) downto 0 do for i:=High(constentries) downto 0 do
begin begin
if regsassigned>=maxassigned then if (constentries[i].valuenode.nodetype=realconstn) and
break; { if there is a call, we need most likely to save/restore a register }
if { if there is a call, we need most likely to save/restore a register }
((constentries[i].weight>3) or ((constentries[i].weight>3) or
((constentries[i].weight>1) and not(pi_do_call in current_procinfo.flags))) ((constentries[i].weight>1) and not(pi_do_call in current_procinfo.flags)))
then then
begin begin
if fpu_regs_assigned>=max_fpu_regs_assigned then
break;
old_current_filepos:=current_filepos; old_current_filepos:=current_filepos;
current_filepos:=current_procinfo.entrypos; current_filepos:=current_procinfo.entrypos;
if not(assigned(createblock)) then if not(assigned(createblock)) then
@ -757,6 +775,33 @@ unit optcse;
addstatement(creates,cassignmentnode.create_internal(ctemprefnode.create(constentries[i].temp),constentries[i].valuenode)); addstatement(creates,cassignmentnode.create_internal(ctemprefnode.create(constentries[i].temp),constentries[i].valuenode));
current_filepos:=old_current_filepos; current_filepos:=old_current_filepos;
foreachnodestatic(pm_postprocess,rootnode,@replaceconsts,@constentries[i]); foreachnodestatic(pm_postprocess,rootnode,@replaceconsts,@constentries[i]);
inc(fpu_regs_assigned);
end
else if (constentries[i].valuenode.nodetype=loadn) and (tloadnode(constentries[i].valuenode).symtableentry.typ=staticvarsym) and
(vo_is_thread_var in tstaticvarsym(tloadnode(constentries[i].valuenode).symtableentry).varoptions) and
{ if there is a call, we need most likely to save/restore a register }
((constentries[i].weight>2) or
((constentries[i].weight>1) and not(pi_do_call in current_procinfo.flags)))
then
begin
if int_regs_assigned>=max_int_regs_assigned then
break;
old_current_filepos:=current_filepos;
current_filepos:=current_procinfo.entrypos;
if not(assigned(createblock)) then
begin
rootblock:=internalstatements(statements);
createblock:=internalstatements(creates);
deleteblock:=internalstatements(deletes);
end;
constentries[i].temp:=ctempcreatenode.create(voidpointertype,
voidpointertype.size,tt_persistent,true);
addstatement(creates,constentries[i].temp);
addstatement(creates,cassignmentnode.create_internal(ctemprefnode.create(constentries[i].temp),
caddrnode.create_internal(constentries[i].valuenode)));
current_filepos:=old_current_filepos;
foreachnodestatic(pm_postprocess,rootnode,@replaceconsts,@constentries[i]);
inc(int_regs_assigned);
end; end;
end; end;
end; end;