mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-11 09:26:15 +02:00
* perform floating point type conversions directly in assignment nodes
when possible, as this can save a lot of useless memory traffic (and fpu<->sse conversions on x86) git-svn-id: trunk@9716 -
This commit is contained in:
parent
9772da7eeb
commit
e72fab1e43
@ -260,6 +260,7 @@ unit cgobj;
|
|||||||
procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize:tcgsize; reg1, reg2: tregister); virtual; abstract;
|
procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize:tcgsize; reg1, reg2: tregister); virtual; abstract;
|
||||||
procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); virtual; abstract;
|
procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); virtual; abstract;
|
||||||
procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); virtual; abstract;
|
procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); virtual; abstract;
|
||||||
|
procedure a_loadfpu_ref_ref(list: TAsmList; fromsize, tosize: tcgsize; const ref1,ref2: treference);
|
||||||
procedure a_loadfpu_loc_reg(list: TAsmList; tosize: tcgsize; const loc: tlocation; const reg: tregister);
|
procedure a_loadfpu_loc_reg(list: TAsmList; tosize: tcgsize; const loc: tlocation; const reg: tregister);
|
||||||
procedure a_loadfpu_reg_loc(list: TAsmList; fromsize: tcgsize; const reg: tregister; const loc: tlocation);
|
procedure a_loadfpu_reg_loc(list: TAsmList; fromsize: tcgsize; const reg: tregister; const loc: tlocation);
|
||||||
procedure a_paramfpu_reg(list : TAsmList;size : tcgsize;const r : tregister;const cgpara : TCGPara);virtual;
|
procedure a_paramfpu_reg(list : TAsmList;size : tcgsize;const r : tregister;const cgpara : TCGPara);virtual;
|
||||||
@ -2469,6 +2470,21 @@ implementation
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
procedure tcg.a_loadfpu_ref_ref(list: TAsmList; fromsize, tosize: tcgsize; const ref1,ref2: treference);
|
||||||
|
var
|
||||||
|
reg: tregister;
|
||||||
|
regsize: tcgsize;
|
||||||
|
begin
|
||||||
|
if (fromsize>=tosize) then
|
||||||
|
regsize:=fromsize
|
||||||
|
else
|
||||||
|
regsize:=tosize;
|
||||||
|
reg:=getfpuregister(list,regsize);
|
||||||
|
a_loadfpu_ref_reg(list,fromsize,regsize,ref1,reg);
|
||||||
|
a_loadfpu_reg_ref(list,regsize,tosize,reg,ref2);
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
procedure tcg.a_paramfpu_reg(list : TAsmList;size : tcgsize;const r : tregister;const cgpara : TCGPara);
|
procedure tcg.a_paramfpu_reg(list : TAsmList;size : tcgsize;const r : tregister;const cgpara : TCGPara);
|
||||||
var
|
var
|
||||||
ref : treference;
|
ref : treference;
|
||||||
|
@ -692,27 +692,57 @@ implementation
|
|||||||
LOC_REFERENCE,
|
LOC_REFERENCE,
|
||||||
LOC_CREFERENCE :
|
LOC_CREFERENCE :
|
||||||
begin
|
begin
|
||||||
{$warning HACK: unaligned test, maybe remove all unaligned locations (array of char) from the compiler}
|
if (left.resultdef.typ=floatdef) and
|
||||||
{ Use unaligned copy when the offset is not aligned }
|
(right.resultdef.typ=floatdef) and
|
||||||
len:=left.resultdef.size;
|
(left.location.size<>right.location.size) then
|
||||||
if (right.location.reference.offset mod sizeof(aint)<>0) or
|
begin
|
||||||
(left.location.reference.offset mod sizeof(aint)<>0) or
|
cg.a_loadfpu_ref_ref(current_asmdata.CurrAsmList,
|
||||||
(right.resultdef.alignment<sizeof(aint)) or
|
right.location.size,left.location.size,
|
||||||
((right.location.reference.alignment<>0) and
|
right.location.reference,left.location.reference)
|
||||||
(right.location.reference.alignment<sizeof(aint))) or
|
end
|
||||||
((left.location.reference.alignment<>0) and
|
|
||||||
(left.location.reference.alignment<sizeof(aint))) then
|
|
||||||
cg.g_concatcopy_unaligned(current_asmdata.CurrAsmList,right.location.reference,left.location.reference,len)
|
|
||||||
else
|
else
|
||||||
cg.g_concatcopy(current_asmdata.CurrAsmList,right.location.reference,left.location.reference,len);
|
begin
|
||||||
|
{$warning HACK: unaligned test, maybe remove all unaligned locations (array of char) from the compiler}
|
||||||
|
{ Use unaligned copy when the offset is not aligned }
|
||||||
|
len:=left.resultdef.size;
|
||||||
|
if (right.location.reference.offset mod sizeof(aint)<>0) or
|
||||||
|
(left.location.reference.offset mod sizeof(aint)<>0) or
|
||||||
|
(right.resultdef.alignment<sizeof(aint)) or
|
||||||
|
((right.location.reference.alignment<>0) and
|
||||||
|
(right.location.reference.alignment<sizeof(aint))) or
|
||||||
|
((left.location.reference.alignment<>0) and
|
||||||
|
(left.location.reference.alignment<sizeof(aint))) then
|
||||||
|
cg.g_concatcopy_unaligned(current_asmdata.CurrAsmList,right.location.reference,left.location.reference,len)
|
||||||
|
else
|
||||||
|
cg.g_concatcopy(current_asmdata.CurrAsmList,right.location.reference,left.location.reference,len);
|
||||||
|
end;
|
||||||
end;
|
end;
|
||||||
LOC_MMREGISTER,
|
LOC_MMREGISTER,
|
||||||
LOC_CMMREGISTER:
|
LOC_CMMREGISTER:
|
||||||
cg.a_loadmm_ref_reg(current_asmdata.CurrAsmList,
|
begin
|
||||||
right.location.size,
|
{$ifdef x86}
|
||||||
left.location.size,
|
if not use_sse(right.resultdef) then
|
||||||
right.location.reference,
|
begin
|
||||||
left.location.register,mms_movescalar);
|
{ perform size conversion if needed (the mm-code cannot }
|
||||||
|
{ convert an extended into a double/single, since sse }
|
||||||
|
{ doesn't support extended) }
|
||||||
|
r:=cg.getfpuregister(current_asmdata.CurrAsmList,right.location.size);
|
||||||
|
tg.gettemp(current_asmdata.CurrAsmList,left.resultdef.size,tt_normal,href);
|
||||||
|
cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,right.location.size,right.location.size,right.location.reference,r);
|
||||||
|
cg.a_loadfpu_reg_ref(current_asmdata.CurrAsmList,right.location.size,left.location.size,r,href);
|
||||||
|
if releaseright then
|
||||||
|
location_freetemp(current_asmdata.CurrAsmList,right.location);
|
||||||
|
releaseright:=true;
|
||||||
|
location_reset(right.location,LOC_REFERENCE,left.location.size);
|
||||||
|
right.location.reference:=href;
|
||||||
|
end;
|
||||||
|
{$endif}
|
||||||
|
cg.a_loadmm_ref_reg(current_asmdata.CurrAsmList,
|
||||||
|
right.location.size,
|
||||||
|
left.location.size,
|
||||||
|
right.location.reference,
|
||||||
|
left.location.register,mms_movescalar);
|
||||||
|
end;
|
||||||
LOC_SUBSETREG,
|
LOC_SUBSETREG,
|
||||||
LOC_CSUBSETREG:
|
LOC_CSUBSETREG:
|
||||||
cg.a_load_ref_subsetreg(current_asmdata.CurrAsmList,right.location.size,left.location.size,right.location.reference,left.location.sreg);
|
cg.a_load_ref_subsetreg(current_asmdata.CurrAsmList,right.location.size,left.location.size,right.location.reference,left.location.sreg);
|
||||||
@ -769,6 +799,17 @@ implementation
|
|||||||
{ we can't do direct moves between fpu and mm registers }
|
{ we can't do direct moves between fpu and mm registers }
|
||||||
if left.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER] then
|
if left.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER] then
|
||||||
begin
|
begin
|
||||||
|
{$ifdef x86}
|
||||||
|
if not use_sse(right.resultdef) then
|
||||||
|
begin
|
||||||
|
{ perform size conversion if needed (the mm-code cannot convert an }
|
||||||
|
{ extended into a double/single, since sse doesn't support extended) }
|
||||||
|
tg.gettemp(current_asmdata.CurrAsmList,left.resultdef.size,tt_normal,href);
|
||||||
|
cg.a_loadfpu_reg_ref(current_asmdata.CurrAsmList,right.location.size,left.location.size,right.location.register,href);
|
||||||
|
location_reset(right.location,LOC_REFERENCE,left.location.size);
|
||||||
|
right.location.reference:=href;
|
||||||
|
end;
|
||||||
|
{$endif}
|
||||||
location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,false);
|
location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,false);
|
||||||
cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,
|
cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,
|
||||||
right.location.size,left.location.size,
|
right.location.size,left.location.size,
|
||||||
|
@ -72,6 +72,7 @@ interface
|
|||||||
function dogetcopy : tnode;override;
|
function dogetcopy : tnode;override;
|
||||||
function pass_1 : tnode;override;
|
function pass_1 : tnode;override;
|
||||||
function pass_typecheck:tnode;override;
|
function pass_typecheck:tnode;override;
|
||||||
|
function simplify : tnode;override;
|
||||||
{$ifdef state_tracking}
|
{$ifdef state_tracking}
|
||||||
function track_state_pass(exec_known:boolean):boolean;override;
|
function track_state_pass(exec_known:boolean):boolean;override;
|
||||||
{$endif state_tracking}
|
{$endif state_tracking}
|
||||||
@ -472,6 +473,22 @@ implementation
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
function tassignmentnode.simplify : tnode;
|
||||||
|
begin
|
||||||
|
result:=nil;
|
||||||
|
{ assignment nodes can perform several floating point }
|
||||||
|
{ type conversions directly, so no typeconversions }
|
||||||
|
{ are inserted in those cases. When inlining, a }
|
||||||
|
{ variable may be replaced by a constant which can be }
|
||||||
|
{ converted at compile time, so check for this case }
|
||||||
|
if is_real(left.resultdef) and
|
||||||
|
is_real(right.resultdef) and
|
||||||
|
is_constrealnode(right) and
|
||||||
|
not equal_defs(right.resultdef,left.resultdef) then
|
||||||
|
inserttypeconv(right,left.resultdef);
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
function tassignmentnode.pass_typecheck:tnode;
|
function tassignmentnode.pass_typecheck:tnode;
|
||||||
var
|
var
|
||||||
hp : tnode;
|
hp : tnode;
|
||||||
@ -553,6 +570,21 @@ implementation
|
|||||||
exit;
|
exit;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
{ floating point assignments can also perform the conversion directly }
|
||||||
|
else if is_real(left.resultdef) and is_real(right.resultdef) and
|
||||||
|
not is_constrealnode(right)
|
||||||
|
|
||||||
|
{$ifdef x86}
|
||||||
|
{ the assignment node code can't convert a double in an }
|
||||||
|
{ sse register to an extended value in memory more }
|
||||||
|
{ efficiently than a type conversion node, so don't }
|
||||||
|
{ bother implementing support for that }
|
||||||
|
and (use_sse(left.resultdef) or not(use_sse(right.resultdef)))
|
||||||
|
{$endif}
|
||||||
|
then
|
||||||
|
begin
|
||||||
|
check_ranges(fileinfo,right,left.resultdef);
|
||||||
|
end
|
||||||
else
|
else
|
||||||
begin
|
begin
|
||||||
{ check if the assignment may cause a range check error }
|
{ check if the assignment may cause a range check error }
|
||||||
|
Loading…
Reference in New Issue
Block a user