From ee7d5f7b2a2868722ec7b70d7a2f839a09dbe01e Mon Sep 17 00:00:00 2001 From: Jonas Maebe Date: Sat, 10 Aug 2002 17:15:31 +0000 Subject: [PATCH] * various fixes and optimizations --- compiler/powerpc/cgcpu.pas | 184 ++++++++++++++++++++++++++--------- compiler/powerpc/nppcadd.pas | 37 +++---- compiler/powerpc/nppccnv.pas | 12 ++- compiler/powerpc/nppcmat.pas | 23 +++-- compiler/powerpc/nppcmem.pas | 19 ++-- 5 files changed, 193 insertions(+), 82 deletions(-) diff --git a/compiler/powerpc/cgcpu.pas b/compiler/powerpc/cgcpu.pas index 9aec39f493..8ff2e2808b 100644 --- a/compiler/powerpc/cgcpu.pas +++ b/compiler/powerpc/cgcpu.pas @@ -96,7 +96,8 @@ unit cgcpu; procedure g_restore_standard_registers(list : taasmoutput);override; procedure g_save_all_registers(list : taasmoutput);override; procedure g_restore_all_registers(list : taasmoutput;selfused,accused,acchiused:boolean);override; - private + + private procedure a_jmp_cond(list : taasmoutput;cond : TOpCmp;l: tasmlabel); @@ -106,7 +107,15 @@ unit cgcpu; { Make sure ref is a valid reference for the PowerPC and sets the } { base to the value of the index if (base = R_NO). } - procedure fixref(list: taasmoutput; var ref: treference); + { Returns true if the reference contained a base, index and an } + { offset or symbol, in which case the base will have been changed } + { to a tempreg (which has to be freed by the caller) containing } + { the sum of part of the original reference } + function fixref(list: taasmoutput; var ref: treference): boolean; + + { returns whether a reference can be used immediately in a powerpc } + { instruction } + function issimpleref(const ref: treference): boolean; { contains the common code of a_load_reg_ref and a_load_ref_reg } procedure a_load_store(list:taasmoutput;op: tasmop;reg:tregister; @@ -153,7 +162,7 @@ const begin case locpara.loc of - LOC_REGISTER: + LOC_REGISTER,LOC_CREGISTER: a_load_const_reg(list,size,a,locpara.register); LOC_REFERENCE: begin @@ -178,7 +187,7 @@ const begin case locpara.loc of - LOC_REGISTER: + LOC_REGISTER,LOC_CREGISTER: a_load_ref_reg(list,size,r,locpara.register); LOC_REFERENCE: begin @@ -190,7 +199,7 @@ const a_load_reg_ref(list,size,tmpreg,ref); free_scratch_reg(list,tmpreg); end; - LOC_FPUREGISTER: + LOC_FPUREGISTER,LOC_CFPUREGISTER: case size of OS_32: a_loadfpu_ref_reg(list,OS_F32,r,locpara.register); @@ -215,7 +224,7 @@ const begin case locpara.loc of - LOC_REGISTER: + LOC_REGISTER,LOC_CREGISTER: a_loadaddr_ref_reg(list,r,locpara.register); LOC_REFERENCE: begin @@ -266,7 +275,8 @@ const else if ((a and $ffff) <> 0) then begin list.concat(taicpu.op_reg_const(A_LI,reg,smallint(a and $ffff))); - if ((a shr 16) <> 0) then + if ((a shr 16) <> 0) or + (smallint(a and $ffff) < 0) then list.concat(taicpu.op_reg_const(A_ADDIS,reg, smallint((a shr 16)+ord(smallint(a and $ffff) < 0)))) end @@ -286,10 +296,10 @@ const var op: TAsmOp; ref2: TReference; - + freereg: boolean; begin ref2 := ref; - FixRef(list,ref2); + freereg := fixref(list,ref2); if size in [OS_S8..OS_S16] then { storing is the same for signed and unsigned values } size := tcgsize(ord(size)-(ord(OS_S8)-ord(OS_8))); @@ -298,6 +308,8 @@ const internalerror(200109236); op := storeinstr[tcgsize2unsigned[size],ref2.index<>R_NO,false]; a_load_store(list,op,reg,ref2); + if freereg then + cg.free_scratch_reg(list,ref2.base); End; @@ -319,12 +331,15 @@ const op: tasmop; tmpreg: tregister; ref2, tmpref: treference; + freereg: boolean; begin ref2 := ref; - fixref(list,ref2); + freereg := fixref(list,ref2); op := loadinstr[size,ref2.index<>R_NO,false]; a_load_store(list,op,reg,ref2); + if freereg then + free_scratch_reg(list,ref2.base); { sign extend shortint if necessary, since there is no } { load instruction that does that automatically (JM) } if size = OS_S8 then @@ -366,7 +381,7 @@ const procedure tcgppc.a_loadfpu_reg_reg(list: taasmoutput; reg1, reg2: tregister); begin - list.concat(taicpu.op_reg_reg(A_FMR,reg1,reg2)); + list.concat(taicpu.op_reg_reg(A_FMR,reg2,reg1)); end; procedure tcgppc.a_loadfpu_ref_reg(list: taasmoutput; size: tcgsize; const ref: treference; reg: tregister); @@ -379,6 +394,8 @@ const var op: tasmop; ref2: treference; + freereg: boolean; + begin { several functions call this procedure with OS_32 or OS_64 } { so this makes life easier (FK) } @@ -391,9 +408,11 @@ const internalerror(200201121); end; ref2 := ref; - fixref(list,ref2); + freereg := fixref(list,ref2); op := fpuloadinstr[size,ref2.index <> R_NO,false]; a_load_store(list,op,reg,ref2); + if freereg then + cg.free_scratch_reg(list,ref2.base); end; procedure tcgppc.a_loadfpu_reg_ref(list: taasmoutput; size: tcgsize; reg: tregister; const ref: treference); @@ -406,13 +425,17 @@ const var op: tasmop; ref2: treference; + freereg: boolean; + begin if not(size in [OS_F32,OS_F64]) then internalerror(200201122); ref2 := ref; - fixref(list,ref2); + freereg := fixref(list,ref2); op := fpustoreinstr[size,ref2.index <> R_NO,false]; a_load_store(list,op,reg,ref2); + if freereg then + cg.free_scratch_reg(list,ref2.base); end; @@ -465,7 +488,7 @@ const ophi := TOpCG2AsmOpConstHi[op]; oplo := TOpCG2AsmOpConstLo[op]; gotrlwi := get_rlwi_const(a,l1,l2); - if (op in [OP_ADD,OP_AND,OP_OR,OP_XOR]) then + if (op in [OP_AND,OP_OR,OP_XOR]) then begin if (a = 0) then begin @@ -473,8 +496,7 @@ const list.concat(taicpu.op_reg_const(A_LI,dst,0)); exit; end - else if (a = high(aword)) and - (op in [OP_AND,OP_OR,OP_XOR]) then + else if (a = high(aword)) then begin case op of OP_OR: @@ -484,15 +506,11 @@ const end; exit; end - else if (longint(a) >= 0) and - (longint(a) <= high(word)) and + else if (a <= high(word)) and ((op <> OP_AND) or not gotrlwi) then begin - if (op = OP_ADD) then - list.concat(taicpu.op_reg_reg_const(oplo,dst,src,smallint(a))) - else - list.concat(taicpu.op_reg_reg_const(oplo,dst,src,word(a))); + list.concat(taicpu.op_reg_reg_const(oplo,dst,src,word(a))); exit; end; { all basic constant instructions also have a shifted form that } @@ -505,15 +523,58 @@ const list.concat(taicpu.op_reg_reg_const(ophi,dst,src,word(a shr 16))); exit; end; - end; + end + else if (op = OP_ADD) then + if a = 0 then + exit + else if (longint(a) >= low(smallint)) and + (longint(a) <= high(smallint)) then + begin + list.concat(taicpu.op_reg_reg_const(A_ADDI,dst,src,smallint(a))); + exit; + end; + { otherwise, the instructions we can generate depend on the } { operation } useReg := false; case op of OP_DIV,OP_IDIV: - useReg := true; + if (a = 0) then + internalerror(200208103) + else if (a = 1) then + begin + a_load_reg_reg(list,OS_INT,src,dst); + exit + end + else if ispowerof2(a,l1) then + begin + case op of + OP_DIV: + list.concat(taicpu.op_reg_reg_const(A_SRWI,dst,src,l1)); + OP_IDIV: + begin + list.concat(taicpu.op_reg_reg_const(A_SRAWI,dst,src,l1)); + list.concat(taicpu.op_reg_reg(A_ADDZE,dst,dst)); + end; + end; + exit; + end + else + usereg := true; OP_IMUL, OP_MUL: - if (longint(a) >= low(smallint)) and + if (a = 0) then + begin + list.concat(taicpu.op_reg_const(A_LI,dst,0)); + exit + end + else if (a = 1) then + begin + a_load_reg_reg(list,OS_INT,src,dst); + exit + end + else if ispowerof2(a,l1) then + list.concat(taicpu.op_reg_reg_const(A_SLWI,dst,src,l1)) + else if (longint(a) >= low(smallint)) and (longint(a) <= high(smallint)) then list.concat(taicpu.op_reg_reg_const(A_MULLI,dst,src,smallint(a))) else @@ -1090,10 +1151,11 @@ const var ref2, tmpref: treference; + freereg: boolean; begin ref2 := ref; - FixRef(list,ref2); + freereg := fixref(list,ref2); if assigned(ref2.symbol) then { add the symbol's value to the base of the reference, and if the } { reference doesn't have a base, create one } @@ -1103,8 +1165,15 @@ const tmpref.symbol := ref2.symbol; tmpref.symaddr := refs_ha; if ref2.base <> R_NO then - list.concat(taicpu.op_reg_reg_ref(A_ADDIS,r, - ref2.base,tmpref)) + begin + list.concat(taicpu.op_reg_reg_ref(A_ADDIS,r, + ref2.base,tmpref)); + if freereg then + begin + cg.free_scratch_reg(list,ref2.base); + freereg := false; + end; + end else list.concat(taicpu.op_reg_ref(A_LIS,r,tmpref)); tmpref.base := R_NO; @@ -1124,6 +1193,8 @@ const else if (ref2.base <> R_NO) and (r <> ref2.base) then list.concat(taicpu.op_reg_reg(A_MR,r,ref2.base)); + if freereg then + cg.free_scratch_reg(list,ref2.base); end; { ************* concatcopy ************ } @@ -1135,7 +1206,7 @@ const src, dst: TReference; lab: tasmlabel; count, count2: aword; - orgsrc, orgdst : boolean; + orgsrc, orgdst: boolean; begin {$ifdef extdebug} @@ -1166,11 +1237,6 @@ const exit; end; - { make sure source and dest are valid } - src := source; - fixref(list,src); - dst := dest; - fixref(list,dst); reference_reset(src); reference_reset(dst); { load the address of source into src.base } @@ -1180,8 +1246,9 @@ const a_load_ref_reg(list,OS_32,source,src.base); orgsrc := false; end - else if assigned(source.symbol) or - ((source.offset + longint(len)) > high(smallint)) then + else if not issimpleref(source) or + ((source.index <> R_NO) and + ((source.offset + longint(len)) > high(smallint))) then begin src.base := get_scratch_reg_address(list); a_loadaddr_ref_reg(list,source,src.base); @@ -1195,8 +1262,9 @@ const if not orgsrc and delsource then reference_release(exprasmlist,source); { load the address of dest into dst.base } - if assigned(dest.symbol) or - ((dest.offset + longint(len)) > high(smallint)) then + if not issimpleref(dest) or + ((dest.index <> R_NO) and + ((dest.offset + longint(len)) > high(smallint))) then begin dst.base := get_scratch_reg_address(list); a_loadaddr_ref_reg(list,dest,dst.base); @@ -1340,35 +1408,54 @@ const end; - procedure tcgppc.fixref(list: taasmoutput; var ref: treference); + function tcgppc.issimpleref(const ref: treference): boolean; + begin + if (ref.base = R_NO) and + (ref.index <> R_NO) then + internalerror(200208101); + result := + not(assigned(ref.symbol)) and + (((ref.index = R_NO) and + (ref.offset >= low(smallint)) and + (ref.offset <= high(smallint))) or + ((ref.index <> R_NO) and + (ref.offset = 0))); + end; + + function tcgppc.fixref(list: taasmoutput; var ref: treference): boolean; + + var + tmpreg: tregister; begin - If (ref.base <> R_NO) then + result := false; + if (ref.base <> R_NO) then begin if (ref.index <> R_NO) and ((ref.offset <> 0) or assigned(ref.symbol)) then begin + result := true; + tmpreg := cg.get_scratch_reg_int(list); if not assigned(ref.symbol) and (cardinal(ref.offset-low(smallint)) <= high(smallint)-low(smallint)) then begin list.concat(taicpu.op_reg_reg_const( - A_ADDI,ref.base,ref.base,ref.offset)); + A_ADDI,tmpreg,ref.base,ref.offset)); ref.offset := 0; end else begin list.concat(taicpu.op_reg_reg_reg( - A_ADD,ref.base,ref.base,ref.index)); + A_ADD,tmpreg,ref.base,ref.index)); ref.index := R_NO; end; + ref.base := tmpreg; end end else - begin - ref.base := ref.index; - ref.index := R_NO - end + if ref.index <> R_NO then + internalerror(200208102); end; @@ -1586,7 +1673,10 @@ begin end. { $Log$ - Revision 1.36 2002-08-06 20:55:23 florian + Revision 1.37 2002-08-10 17:15:31 jonas + * various fixes and optimizations + + Revision 1.36 2002/08/06 20:55:23 florian * first part of ppc calling conventions fix Revision 1.35 2002/08/06 07:12:05 jonas diff --git a/compiler/powerpc/nppcadd.pas b/compiler/powerpc/nppcadd.pas index 22ba62afab..cfa913d572 100644 --- a/compiler/powerpc/nppcadd.pas +++ b/compiler/powerpc/nppcadd.pas @@ -198,24 +198,24 @@ interface if (right.location.loc = LOC_CONSTANT) then begin {$ifdef extdebug} - if (qword(right.location.value) > high(cardinal)) then + if (qword(right.location.valuehigh) <> 0) then internalerror(2002080301); {$endif extdebug} if (nodetype in [equaln,unequaln]) then if (unsigned and (right.location.value > high(word))) or (not unsigned and - (right.location.value < low(smallint)) or - (right.location.value > high(smallint))) then + (longint(right.location.value) < low(smallint)) or + (longint(right.location.value) > high(smallint))) then // we can then maybe use a constant in the 'othersigned' case // (the sign doesn't matter for // equal/unequal) unsigned := not unsigned; if (unsigned and - (qword(right.location.value) <= high(word))) or + (right.location.value) <= high(word)) or (not(unsigned) and - (right.location.value >= low(smallint)) and - (right.location.value <= high(smallint))) then + (longint(right.location.value) >= low(smallint)) and + (longint(right.location.value) <= high(smallint))) then useconst := true else begin @@ -243,7 +243,7 @@ interface if (right.location.loc = LOC_CONSTANT) then if useconst then exprasmlist.concat(taicpu.op_reg_const(op, - left.location.register,right.location.value)) + left.location.register,longint(right.location.value))) else begin exprasmlist.concat(taicpu.op_reg_reg(op, @@ -348,7 +348,7 @@ interface left.location.register,right.location.register)) else exprasmlist.concat(taicpu.op_reg_const(A_CMPLWI, - left.location.register,right.location.value)); + left.location.register,longint(right.location.value))); location.resflags := getresflags; end; else @@ -693,11 +693,11 @@ interface location_copy(oldleft,left.location); location_copy(oldright,right.location); if left.location.loc = LOC_CONSTANT then - left.location.value := left.location.value shr 32 + left.location.valueqword := left.location.valueqword shr 32 else left.location.registerlow := left.location.registerhigh; if right.location.loc = LOC_CONSTANT then - right.location.value := right.location.value shr 32 + right.location.valueqword := right.location.valueqword shr 32 else right.location.registerlow := right.location.registerhigh; @@ -848,8 +848,8 @@ interface swapleftright; if left.location.loc = LOC_CONSTANT then if not(cs_check_overflow in aktlocalswitches) and - (left.location.value >= low(smallint)) and - (left.location.value <= high(smallint)) then + (longint(left.location.value) >= low(smallint)) and + (longint(left.location.value) <= high(smallint)) then begin // optimize exprasmlist.concat(taicpu.op_reg_reg_const(A_SUBFIC, @@ -887,7 +887,7 @@ interface if left.location.loc = LOC_CONSTANT then swapleftright; if (right.location.loc = LOC_CONSTANT) then - cg64.a_op64_const_reg_reg(exprasmlist,op,qword(right.location.value), + cg64.a_op64_const_reg_reg(exprasmlist,op,right.location.valueqword, left.location.register64,location.register64) else cg64.a_op64_reg_reg_reg(exprasmlist,op,right.location.register64, @@ -1281,11 +1281,11 @@ interface begin case nodetype of addn: - op := A_ADDO; + op := A_ADDO_; subn: - op := A_SUBO; + op := A_SUBO_; muln: - op := A_MULLWO; + op := A_MULLWO_; else internalerror(2002072601); end; @@ -1302,7 +1302,10 @@ begin end. { $Log$ - Revision 1.6 2002-08-06 20:55:24 florian + Revision 1.7 2002-08-10 17:15:31 jonas + * various fixes and optimizations + + Revision 1.6 2002/08/06 20:55:24 florian * first part of ppc calling conventions fix Revision 1.5 2002/08/05 08:58:54 jonas diff --git a/compiler/powerpc/nppccnv.pas b/compiler/powerpc/nppccnv.pas index 65173ac512..9250e1a351 100644 --- a/compiler/powerpc/nppccnv.pas +++ b/compiler/powerpc/nppccnv.pas @@ -121,7 +121,8 @@ implementation size := resulttype.def.size; leftsize := left.resulttype.def.size; if (size < leftsize) or - ((left.location.loc <> LOC_REGISTER) and + (((newsize in [OS_64,OS_S64]) or + (left.location.loc <> LOC_REGISTER)) and (size > leftsize)) then begin { reuse the left location by default } @@ -254,8 +255,8 @@ implementation cg.free_scratch_reg(exprasmlist,valuereg); tmpfpureg := rg.getregisterfpu(exprasmlist); - exprasmlist.concat(taicpu.op_reg_ref(A_LFD,tmpfpureg, - tempconst.location.reference)); + a_loadfpu_ref_reg(exprasmlist,OS_F64,tempconst.location.reference, + tmpfpureg); tempconst.free; location.register := rg.getregisterfpu(exprasmlist); @@ -417,7 +418,10 @@ begin end. { $Log$ - Revision 1.19 2002-07-29 21:23:44 florian + Revision 1.20 2002-08-10 17:15:31 jonas + * various fixes and optimizations + + Revision 1.19 2002/07/29 21:23:44 florian * more fixes for the ppc + wrappers for the tcnvnode.first_* stuff introduced diff --git a/compiler/powerpc/nppcmat.pas b/compiler/powerpc/nppcmat.pas index e1a35f13db..a5fcf09a1e 100644 --- a/compiler/powerpc/nppcmat.pas +++ b/compiler/powerpc/nppcmat.pas @@ -371,7 +371,6 @@ implementation end; LOC_REFERENCE,LOC_CREFERENCE: begin - reference_release(exprasmlist,left.location.reference); if (left.resulttype.def.deftype=floatdef) then begin src1 := rg.getregisterfpu(exprasmlist); @@ -387,16 +386,23 @@ implementation cg.a_load_ref_reg(exprasmlist,OS_32, left.location.reference,src1); end; + reference_release(exprasmlist,left.location.reference); end; end; { choose appropriate operand } if left.resulttype.def.deftype <> floatdef then - if not(cs_check_overflow in aktlocalswitches) then - op := A_NEG - else - op := A_NEGO_ + begin + if not(cs_check_overflow in aktlocalswitches) then + op := A_NEG + else + op := A_NEGO_; + location.loc := LOC_REGISTER; + end else - op := A_FNEG; + begin + op := A_FNEG; + location.loc := LOC_FPUREGISTER; + end; { emit operation } exprasmlist.concat(taicpu.op_reg_reg(op,location.register,src1)); end; @@ -489,7 +495,10 @@ begin end. { $Log$ - Revision 1.15 2002-07-26 10:48:34 jonas + Revision 1.16 2002-08-10 17:15:31 jonas + * various fixes and optimizations + + Revision 1.15 2002/07/26 10:48:34 jonas * fixed bug in shl/shr code Revision 1.14 2002/07/20 11:58:05 florian diff --git a/compiler/powerpc/nppcmem.pas b/compiler/powerpc/nppcmem.pas index adca783a5d..be6508a171 100644 --- a/compiler/powerpc/nppcmem.pas +++ b/compiler/powerpc/nppcmem.pas @@ -397,18 +397,20 @@ implementation end; end; - if location.reference.index=R_NO then + if location.reference.base=R_NO then begin - location.reference.index:=right.location.register; + location.reference.base:=right.location.register; cg.a_op_const_reg(exprasmlist,OP_IMUL,get_mul_size, right.location.register); end else begin - if location.reference.base=R_NO then - { this wouldn't make sense for the ppc since there are } - { no scalefactors (JM) } - internalerror(2002072901) + if location.reference.index=R_NO then + begin + location.reference.index:=right.location.register; + cg.a_op_const_reg(exprasmlist,OP_IMUL,get_mul_size, + right.location.register); + end else begin cg.a_loadaddr_ref_reg(exprasmlist,location.reference, @@ -436,7 +438,10 @@ begin end. { $Log$ - Revision 1.1 2002-07-29 09:21:30 jonas + Revision 1.2 2002-08-10 17:15:31 jonas + * various fixes and optimizations + + Revision 1.1 2002/07/29 09:21:30 jonas + tppcvecnode, almost straight copy of the i386 code, can most likely be made generic if all treference type allow a base, index and offset