* synchronized with trunk

git-svn-id: branches/z80@44721 -
This commit is contained in:
nickysn 2020-04-13 21:10:10 +00:00
commit c3ca85e349
18 changed files with 380 additions and 43 deletions

2
.gitattributes vendored
View File

@ -106,6 +106,7 @@ compiler/arm/rarmstd.inc svneol=native#text/plain
compiler/arm/rarmsup.inc svneol=native#text/plain
compiler/arm/rgcpu.pas svneol=native#text/plain
compiler/arm/symcpu.pas svneol=native#text/plain
compiler/armgen/aoptarm.pas svneol=native#text/pascal
compiler/armgen/armpara.pas svneol=native#text/plain
compiler/assemble.pas svneol=native#text/plain
compiler/avr/aasmcpu.pas svneol=native#text/plain
@ -15811,6 +15812,7 @@ tests/test/units/math/tdivmod.pp svneol=native#text/plain
tests/test/units/math/tmask.inc svneol=native#text/plain
tests/test/units/math/tmask.pp svneol=native#text/plain
tests/test/units/math/tmask2.pp svneol=native#text/plain
tests/test/units/math/tminmaxconst.pp svneol=native#text/pascal
tests/test/units/math/tnaninf.pp svneol=native#text/plain
tests/test/units/math/tpower.pp svneol=native#text/pascal
tests/test/units/math/troundm.pp svneol=native#text/plain

View File

@ -128,8 +128,11 @@
'lsl',
'lsr',
'ror',
'sxt',
'uxt',
'sxtb',
'sxth',
'sxtw',
'uxtb',
'uxth',
'neg',
'ngc',
'mvn',

View File

@ -188,5 +188,8 @@ attsufNONE,
attsufNONE,
attsufNONE,
attsufNONE,
attsufNONE,
attsufNONE,
attsufNONE,
attsufNONE
);

View File

@ -257,9 +257,15 @@
[ROR]
[SXT]
[SXTB]
[UXT]
[SXTH]
[SXTW]
[UXTB]
[UXTH]
[NEG]

View File

@ -128,8 +128,11 @@ A_ASR,
A_LSL,
A_LSR,
A_ROR,
A_SXT,
A_UXT,
A_SXTB,
A_SXTH,
A_SXTW,
A_UXTB,
A_UXTH,
A_NEG,
A_NGC,
A_MVN,

View File

@ -32,10 +32,11 @@ Interface
uses
globtype, globals,
cutils,
cgbase, cpubase, aasmtai, aasmcpu, aopt, aoptcpub;
cgbase, cpubase, aasmtai, aasmcpu,
aopt, aoptcpub, aoptarm;
Type
TCpuAsmOptimizer = class(TAsmOptimizer)
TCpuAsmOptimizer = class(TARMAsmOptimizer)
{ uses the same constructor as TAopObj }
function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
function PostPeepHoleOptsCpu(var p: tai): boolean; override;

View File

@ -681,15 +681,32 @@ implementation
procedure tcgaarch64.a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference);
var
reg: tregister;
href: treference;
i: Integer;
begin
{ use the zero register if possible }
if a=0 then
begin
if size in [OS_64,OS_S64] then
reg:=NR_XZR
href:=ref;
inc(href.offset,tcgsize2size[size]-1);
if (tcgsize2size[size]>1) and (ref.alignment=1) and (simple_ref_type(A_STP,OS_8,PF_None,ref)=sr_simple) and
(simple_ref_type(A_STP,OS_8,PF_None,href)=sr_simple) then
begin
href:=ref;
for i:=0 to tcgsize2size[size]-1 do
begin
a_load_const_ref(list,OS_8,0,href);
inc(href.offset);
end;
end
else
reg:=NR_WZR;
a_load_reg_ref(list,size,size,reg,ref);
begin
if size in [OS_64,OS_S64] then
reg:=NR_XZR
else
reg:=NR_WZR;
a_load_reg_ref(list,size,size,reg,ref);
end;
end
else
inherited;
@ -907,13 +924,13 @@ implementation
begin
case tosize of
OS_8:
list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_B));
list.concat(taicpu.op_reg_reg(A_UXTB,reg2,makeregsize(reg1,OS_32)));
OS_16:
list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_H));
list.concat(taicpu.op_reg_reg(A_UXTH,reg2,makeregsize(reg1,OS_32)));
OS_S8:
list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_B));
list.concat(taicpu.op_reg_reg(A_SXTB,reg2,makeregsize(reg1,OS_32)));
OS_S16:
list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_H));
list.concat(taicpu.op_reg_reg(A_SXTH,reg2,makeregsize(reg1,OS_32)));
{ while "mov wN, wM" automatically inserts a zero-extension and
hence we could encode a 64->32 bit move like that, the problem
is that we then can't distinguish 64->32 from 32->32 moves, and
@ -928,7 +945,7 @@ implementation
list.concat(taicpu.op_reg_reg_const_const(A_UBFIZ,makeregsize(reg2,OS_64),makeregsize(reg1,OS_64),0,32));
OS_64,
OS_S64:
list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_W));
list.concat(taicpu.op_reg_reg(A_SXTW,reg2,makeregsize(reg1,OS_32)));
else
internalerror(2002090901);
end;
@ -1157,7 +1174,7 @@ implementation
list.Concat(taicpu.op_reg_reg_reg_cond(A_CSINV,dst,dst,makeregsize(NR_XZR,dstsize),C_NE));
{ mask the -1 to 255 if src was 0 (anyone find a two-instruction
branch-free version? All of mine are 3...) }
list.Concat(setoppostfix(taicpu.op_reg_reg(A_UXT,makeregsize(dst,OS_32),makeregsize(dst,OS_32)),PF_B));
list.Concat(taicpu.op_reg_reg(A_UXTB,makeregsize(dst,OS_32),makeregsize(dst,OS_32)));
end;

View File

@ -30,10 +30,13 @@ Unit aoptcpu;
Interface
uses cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
uses
cgbase, cgutils, cpubase, aasmtai,
aasmcpu,
aopt, aoptobj, aoptarm;
Type
TCpuAsmOptimizer = class(TAsmOptimizer)
TCpuAsmOptimizer = class(TARMAsmOptimizer)
{ Can't be done in some cases due to the limited range of jumps }
function CanDoJumpOpts: Boolean; override;
@ -2247,6 +2250,208 @@ Implementation
RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
Result:=true;
end;
A_SXTB:
begin
{
change
sxtb reg2,reg1
strb reg2,[...]
dealloc reg2
to
strb reg1,[...]
}
if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
(taicpu(p).ops=2) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
{ the reference in strb might not use reg2 }
not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
{ reg1 might not be modified inbetween }
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
begin
DebugMsg('Peephole SxtbStrb2Strb done', p);
taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
GetNextInstruction(p,hp2);
asml.remove(p);
p.free;
p:=hp2;
result:=true;
end
{
change
sxtb reg2,reg1
sxth reg3,reg2
dealloc reg2
to
sxtb reg3,reg1
}
else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
(taicpu(p).ops=2) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
(taicpu(hp1).ops = 2) and
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
{ reg1 might not be modified inbetween }
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
begin
DebugMsg('Peephole SxtbSxth2Sxtb done', p);
AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
asml.remove(hp1);
hp1.free;
result:=true;
end
{
change
sxtb reg2,reg1
sxtb reg3,reg2
dealloc reg2
to
uxtb reg3,reg1
}
else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
(taicpu(p).ops=2) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1, A_SXTB, [C_None], [PF_None]) and
(taicpu(hp1).ops = 2) and
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
{ reg1 might not be modified inbetween }
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
begin
DebugMsg('Peephole SxtbSxtb2Sxtb done', p);
AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
asml.remove(hp1);
hp1.free;
result:=true;
end
{
change
sxtb reg2,reg1
and reg3,reg2,#0x*FF
dealloc reg2
to
uxtb reg3,reg1
}
else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
(taicpu(p).ops=2) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
(taicpu(hp1).ops=3) and
(taicpu(hp1).oper[2]^.typ=top_const) and
((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
{ reg1 might not be modified inbetween }
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
begin
DebugMsg('Peephole SxtbAndImm2Sxtb done', p);
taicpu(hp1).opcode:=A_SXTB;
taicpu(hp1).ops:=2;
taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
GetNextInstruction(p,hp2);
asml.remove(p);
p.free;
p:=hp2;
result:=true;
end
else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
RemoveSuperfluousMove(p, hp1, 'SxtbMov2Data') then
Result:=true;
end;
A_SXTH:
begin
{
change
sxth reg2,reg1
strh reg2,[...]
dealloc reg2
to
strh reg1,[...]
}
if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
(taicpu(p).ops=2) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
{ the reference in strb might not use reg2 }
not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
{ reg1 might not be modified inbetween }
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
begin
DebugMsg('Peephole SXTHStrh2Strh done', p);
taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
GetNextInstruction(p, hp1);
asml.remove(p);
p.free;
p:=hp1;
result:=true;
end
{
change
sxth reg2,reg1
sxth reg3,reg2
dealloc reg2
to
sxth reg3,reg1
}
else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
(taicpu(p).ops=2) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
(taicpu(hp1).ops=2) and
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
{ reg1 might not be modified inbetween }
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
begin
DebugMsg('Peephole SxthSxth2Sxth done', p);
AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
taicpu(hp1).opcode:=A_SXTH;
taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
GetNextInstruction(p, hp1);
asml.remove(p);
p.free;
p:=hp1;
result:=true;
end
{
change
sxth reg2,reg1
and reg3,reg2,#65535
dealloc reg2
to
sxth reg3,reg1
}
else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
(taicpu(p).ops=2) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
(taicpu(hp1).ops=3) and
(taicpu(hp1).oper[2]^.typ=top_const) and
((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
{ reg1 might not be modified inbetween }
not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
begin
DebugMsg('Peephole SxthAndImm2Sxth done', p);
taicpu(hp1).opcode:=A_SXTH;
taicpu(hp1).ops:=2;
taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
GetNextInstruction(p, hp1);
asml.remove(p);
p.free;
p:=hp1;
result:=true;
end
else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
RemoveSuperfluousMove(p, hp1, 'SxthMov2Data') then
Result:=true;
end;
A_CMP:
begin
{

View File

@ -77,14 +77,11 @@ Type
fpu_vfpv4,
fpu_fpv4_sp_d16, { 32 registers single precision, for load/store/move they can be accessed as 16 double registers }
fpu_neon_vfpv4
{ when new elements added afterwards, update also fpu_vfp_last below and
update class procedure tarmnodeutils.InsertObjectInfo; in narmutil.pas }
{ when new elements added afterwards, update
class procedure tarmnodeutils.InsertObjectInfo; in narmutil.pas }
);
Const
fpu_vfp_first = fpu_vfpv2;
fpu_vfp_last = fpu_neon_vfpv4;
fputypestrllvm : array[tfputype] of string[15] = ('',
'',
'',

View File

@ -83,7 +83,7 @@ implementation
(target_info.abi<>abi_eabihf) and
(procdefinition.proccalloption<>pocall_hardfloat) and
((cs_fp_emulation in current_settings.moduleswitches) or
(current_settings.fputype in [fpu_vfp_first..fpu_vfp_last])) then
(FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype])) then
begin
{ keep the fpu values in integer registers for now, the code
generator will move them to memory or an mmregister when necessary

View File

@ -40,7 +40,9 @@ interface
implementation
uses
verbose,globtype,globals,symdef,aasmbase,aasmtai,aasmdata,symtable,
verbose,globtype,globals,
systems,
symdef,aasmbase,aasmtai,aasmdata,symtable,
defutil,
cgbase,cgutils,
pass_1,pass_2,procinfo,ncal,
@ -99,17 +101,19 @@ implementation
fpu_fpa10,
fpu_fpa11:
expectloc:=LOC_FPUREGISTER;
fpu_vfp_first..fpu_vfp_last:
expectloc:=LOC_MMREGISTER;
else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
expectloc:=LOC_MMREGISTER
else
internalerror(2009112702);
end;
end;
end;
function tarmtypeconvnode.first_real_to_real: tnode;
begin
if not(FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
if not(FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) and
not (target_info.system in systems_wince) then
begin
case tfloatdef(left.resultdef).floattype of
s32real:

View File

@ -86,7 +86,12 @@ implementation
location.loc := LOC_FPUREGISTER;
end;
end;
fpu_vfp_first..fpu_vfp_last:
fpu_soft:
begin
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
location_copy(location,left.location);
end
else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
begin
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
location_copy(location,left.location);
@ -95,11 +100,6 @@ implementation
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
location.loc := LOC_MMREGISTER;
end;
end;
fpu_soft:
begin
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
location_copy(location,left.location);
end
else
internalerror(2009111801);

View File

@ -197,8 +197,12 @@ interface
Internalerror(2019100602);
end;
case current_settings.fputype of
fpu_none,
fpu_soft,
fpu_libgcc:
fpu_libgcc,
fpu_fpa,
fpu_fpa10,
fpu_fpa11:
current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,0));
fpu_vfpv2:
current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,2));
@ -213,8 +217,8 @@ interface
fpu_vfpv4,
fpu_neon_vfpv4:
current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,5));
else
Internalerror(2019100603);
{ else not needed anymore PM 2020/04/13
Internalerror(2019100603); }
end;
if FPUARM_HAS_FMA in fpu_capabilities[current_settings.fputype] then
current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_Advanced_SIMD_arch,2))

View File

@ -0,0 +1,52 @@
{
Copyright (c) 1998-2020 by Jonas Maebe and Florian Klaempfl, members of the Free Pascal
Development Team
This unit implements an ARM optimizer object used commonly for ARM and AAarch64
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
****************************************************************************
}
Unit aoptarm;
{$i fpcdefs.inc}
{ $define DEBUG_PREREGSCHEDULER}
{ $define DEBUG_AOPTCPU}
Interface
uses
cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
Type
{ while ARM and AAarch64 look not very similar at a first glance,
several optimizations can be shared between both }
TARMAsmOptimizer = class(TAsmOptimizer)
End;
Implementation
uses
cutils,verbose,globtype,globals,
systems,
cpuinfo,
cgobj,procinfo,
aasmbase,aasmdata;
end.

View File

@ -3181,6 +3181,7 @@ implementation
{ must be done before code below, because we need the
typeconversions for ordconstn's as well }
case convtype of
tc_bool_2_int,
tc_int_2_bool,
tc_int_2_int:
begin
@ -3216,6 +3217,13 @@ implementation
left:=nil;
exit;
end;
if (convtype=tc_int_2_int) and (left.nodetype=typeconvn) and (ttypeconvnode(left).convtype=tc_bool_2_int) then
begin
ttypeconvnode(left).resultdef:=resultdef;
result:=left;
left:=nil;
exit;
end;
end;
end;
else

View File

@ -1082,7 +1082,9 @@ Var
begin
SetLength(EnvList, 0);
// pr_LocalVars are introduced with OS2.0
{$ifdef AMIGA68k}
if PLibrary(AOS_ExecBase)^.lib_Version >= 36 then
{$endif}
begin
ThisProcess := PProcess(FindTask(nil)); //Get the pointer to our process
LocalVars_List := @(ThisProcess^.pr_LocalVars); //get the list of pr_LocalVars as pointer

View File

@ -71,13 +71,19 @@ Const
{ Ranges of the IEEE floating point types, including denormals }
{$ifdef FPC_HAS_TYPE_SINGLE}
const
MinSingle = 1.5e-45;
MaxSingle = 3.4e+38;
{ values according to
https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Single-precision_examples
}
MinSingle = 1.1754943508e-38;
MaxSingle = 3.4028234664e+38;
{$endif FPC_HAS_TYPE_SINGLE}
{$ifdef FPC_HAS_TYPE_DOUBLE}
const
MinDouble = 5.0e-324;
MaxDouble = 1.7e+308;
{ values according to
https://en.wikipedia.org/wiki/Double-precision_floating-point_format#Double-precision_examples
}
MinDouble = 2.2250738585072014e-308;
MaxDouble = 1.7976931348623157e+308;
{$endif FPC_HAS_TYPE_DOUBLE}
{$ifdef FPC_HAS_TYPE_EXTENDED}
const

View File

@ -0,0 +1,24 @@
uses
sysutils,math;
var
s: Single;
d: Double;
begin
s := MaxSingle;
d := MaxDouble;
Writeln(IntToHex(PLongInt(@s)^, 8));
if IntToHex(PLongInt(@s)^, 8)<>'7F7FFFFF' then
halt(1);
Writeln(IntToHex(PInt64(@d)^, 16));
if IntToHex(PInt64(@d)^, 16)<>'7FEFFFFFFFFFFFFF' then
halt(2);
s := MinSingle;
d := MinDouble;
Writeln(IntToHex(PLongInt(@s)^, 8));
if IntToHex(PLongInt(@s)^, 8)<>'00800000' then
halt(3);
Writeln(IntToHex(PInt64(@d)^, 16));
if IntToHex(PInt64(@d)^, 16)<>'0010000000000000' then
halt(4);
writeln('ok');
end.