From fa695eefeaa86df40bbec8acbdfc994977e09787 Mon Sep 17 00:00:00 2001 From: olle Date: Tue, 1 Oct 2002 05:24:28 +0000 Subject: [PATCH] * made a_load_store more robust and to accept large offsets and cleaned up code --- compiler/powerpc/cgcpu.pas | 1943 +++++++++++++++++++++++++++++------- 1 file changed, 1557 insertions(+), 386 deletions(-) diff --git a/compiler/powerpc/cgcpu.pas b/compiler/powerpc/cgcpu.pas index 95a53aa585..8f678411ee 100644 --- a/compiler/powerpc/cgcpu.pas +++ b/compiler/powerpc/cgcpu.pas @@ -27,6 +27,7 @@ unit cgcpu; interface uses + symtype, cgbase,cgobj, aasmbase,aasmcpu,aasmtai, cpubase,cpuinfo,node,cg64f32,cginfo; @@ -47,7 +48,7 @@ unit cgcpu; procedure a_call_reg(list : taasmoutput;reg: tregister); override; procedure a_call_ref(list : taasmoutput;const ref : treference);override; - procedure a_op_const_reg(list : taasmoutput; Op: TOpCG; a: AWord; reg: TRegister); override; + procedure a_op_const_reg(list : taasmoutput; Op: TOpCG; size: TCGSize; a: AWord; reg: TRegister); override; procedure a_op_reg_reg(list : taasmoutput; Op: TOpCG; size: TCGSize; src, dst: TRegister); override; procedure a_op_const_reg_reg(list: taasmoutput; op: TOpCg; @@ -57,12 +58,12 @@ unit cgcpu; { move instructions } procedure a_load_const_reg(list : taasmoutput; size: tcgsize; a : aword;reg : tregister);override; - procedure a_load_reg_ref(list : taasmoutput; size: tcgsize; reg : tregister;const ref : treference);override; - procedure a_load_ref_reg(list : taasmoutput;size : tcgsize;const Ref : treference;reg : tregister);override; - procedure a_load_reg_reg(list : taasmoutput;fromsize, tosize : tcgsize;reg1,reg2 : tregister);override; + procedure a_load_reg_ref(list : taasmoutput; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override; + procedure a_load_ref_reg(list : taasmoutput; fromsize, tosize : tcgsize;const Ref : treference;reg : tregister);override; + procedure a_load_reg_reg(list : taasmoutput; fromsize, tosize : tcgsize;reg1,reg2 : tregister);override; { fpu move instructions } - procedure a_loadfpu_reg_reg(list: taasmoutput; reg1, reg2: tregister); override; + procedure a_loadfpu_reg_reg(list: taasmoutput; size: tcgsize; reg1, reg2: tregister); override; procedure a_loadfpu_ref_reg(list: taasmoutput; size: tcgsize; const ref: treference; reg: tregister); override; procedure a_loadfpu_reg_ref(list: taasmoutput; size: tcgsize; reg: tregister; const ref: treference); override; @@ -76,33 +77,34 @@ unit cgcpu; procedure g_flags2reg(list: taasmoutput; size: TCgSize; const f: TResFlags; reg: TRegister); override; - - procedure g_stackframe_entry_sysv(list : taasmoutput;localsize : longint); - procedure g_stackframe_entry_mac(list : taasmoutput;localsize : longint); + procedure g_copyvaluepara_openarray(list : taasmoutput;const ref, lenref:treference;elesize:integer);override; procedure g_stackframe_entry(list : taasmoutput;localsize : longint);override; - procedure g_restore_frame_pointer(list : taasmoutput);override; procedure g_return_from_proc(list : taasmoutput;parasize : aword); override; + procedure g_restore_frame_pointer(list : taasmoutput);override; procedure a_loadaddr_ref_reg(list : taasmoutput;const ref : treference;r : tregister);override; procedure g_concatcopy(list : taasmoutput;const source,dest : treference;len : aword; delsource,loadref : boolean);override; - procedure g_overflowcheck(list: taasmoutput; const p: tnode); override; + procedure g_overflowcheck(list: taasmoutput; const l: tlocation; def: tdef); override; { find out whether a is of the form 11..00..11b or 00..11...00. If } { that's the case, we can use rlwinm to do an AND operation } function get_rlwi_const(a: aword; var l1, l2: longint): boolean; - procedure g_save_standard_registers(list : taasmoutput; usedinproc : tregisterset);override; - procedure g_restore_standard_registers(list : taasmoutput; usedinproc : tregisterset);override; + procedure g_save_standard_registers(list : taasmoutput; usedinproc : Tsupregset);override; + procedure g_restore_standard_registers(list : taasmoutput; usedinproc : Tsupregset);override; procedure g_save_all_registers(list : taasmoutput);override; - procedure g_restore_all_registers(list : taasmoutput;selfused,accused,acchiused:boolean);override; + procedure g_restore_all_registers(list : taasmoutput;accused,acchiused:boolean);override; procedure a_jmp_cond(list : taasmoutput;cond : TOpCmp;l: tasmlabel); private - + procedure g_stackframe_entry_sysv(list : taasmoutput;localsize : longint); procedure g_return_from_proc_sysv(list : taasmoutput;parasize : aword); + procedure g_stackframe_entry_aix(list : taasmoutput;localsize : longint); + procedure g_return_from_proc_aix(list : taasmoutput;parasize : aword); + procedure g_stackframe_entry_mac(list : taasmoutput;localsize : longint); procedure g_return_from_proc_mac(list : taasmoutput;parasize : aword); @@ -151,16 +153,14 @@ const implementation uses - globtype,globals,verbose,systems,cutils,symconst,symdef,rgobj,tgobj,cpupi; + globtype,globals,verbose,systems,cutils,symconst,symdef,symsym,rgobj,tgobj,cpupi; -{ parameter passing... Still needs extra support from the processor } -{ independent code generator } + { parameter passing... Still needs extra support from the processor } + { independent code generator } procedure tcgppc.a_param_const(list : taasmoutput;size : tcgsize;a : aword;const locpara : tparalocation); - var ref: treference; - begin case locpara.loc of LOC_REGISTER,LOC_CREGISTER: @@ -189,23 +189,29 @@ const begin case locpara.loc of LOC_REGISTER,LOC_CREGISTER: - a_load_ref_reg(list,size,r,locpara.register); + a_load_ref_reg(list,size,size,r,locpara.register); LOC_REFERENCE: begin reference_reset(ref); ref.base:=locpara.reference.index; ref.offset:=locpara.reference.offset; - tmpreg := get_scratch_reg_int(list); - a_load_ref_reg(list,size,r,tmpreg); - a_load_reg_ref(list,size,tmpreg,ref); +{$ifndef newra} + tmpreg := get_scratch_reg_int(list,size); +{$else newra} + tmpreg := rg.getregisterint(list,size); +{$endif newra} + a_load_ref_reg(list,size,size,r,tmpreg); + a_load_reg_ref(list,size,size,tmpreg,ref); +{$ifndef newra} free_scratch_reg(list,tmpreg); +{$else newra} + rg.ungetregisterint(list,tmpreg); +{$endif newra} end; LOC_FPUREGISTER,LOC_CFPUREGISTER: case size of - OS_32: - a_loadfpu_ref_reg(list,OS_F32,r,locpara.register); - OS_64: - a_loadfpu_ref_reg(list,OS_F64,r,locpara.register); + OS_F32, OS_F64: + a_loadfpu_ref_reg(list,size,r,locpara.register); else internalerror(2002072801); end; @@ -218,7 +224,6 @@ const procedure tcgppc.a_paramaddr_ref(list : taasmoutput;const r : treference;const locpara : tparalocation); - var ref: treference; tmpreg: tregister; @@ -232,10 +237,18 @@ const reference_reset(ref); ref.base := locpara.reference.index; ref.offset := locpara.reference.offset; +{$ifndef newra} tmpreg := get_scratch_reg_address(list); +{$else newra} + tmpreg := rg.getregisterint(list,OS_ADDR); +{$endif newra} a_loadaddr_ref_reg(list,r,tmpreg); - a_load_reg_ref(list,OS_ADDR,tmpreg,ref); + a_load_reg_ref(list,OS_ADDR,OS_ADDR,tmpreg,ref); +{$ifndef newra} free_scratch_reg(list,tmpreg); +{$else newra} + rg.ungetregisterint(list,tmpreg); +{$endif newra} end; else internalerror(2002080701); @@ -243,65 +256,101 @@ const end; - { calling a code fragment by name } + { calling a procedure by name } procedure tcgppc.a_call_name(list : taasmoutput;const s : string); var href : treference; begin - if target_info.system=system_powerpc_macos then - begin - { save our RTOC register value. Only necessary when doing pointer based } - { calls or cross TOC calls, but currently done always } - reference_reset_base(href,STACK_POINTER_REG,LA_RTOC); - list.concat(taicpu.op_reg_ref(A_STW,R_TOC,href)); - end; + { MacOS: The linker on MacOS (PPCLink) inserts a call to glue code, + if it is a cross-TOC call. If so, it also replaces the NOP + with some restore code.} list.concat(taicpu.op_sym(A_BL,objectlibrary.newasmsymbol(s))); if target_info.system=system_powerpc_macos then - list.concat(taicpu.op_reg_ref(A_LWZ,R_TOC,href)); - procinfo.flags:=procinfo.flags or pi_do_call; + list.concat(taicpu.op_none(A_NOP)); + if not(pi_do_call in current_procinfo.flags) then + internalerror(2003060703); end; - + { calling a procedure by address } procedure tcgppc.a_call_reg(list : taasmoutput;reg: tregister); - var - href : treference; - begin - list.concat(taicpu.op_reg(A_MTCTR,reg)); - if target_info.system=system_powerpc_macos then - begin - { save our RTOC register value. Only necessary when doing pointer based } - { calls or cross TOC calls, but currently done always } - reference_reset_base(href,STACK_POINTER_REG,LA_RTOC); - list.concat(taicpu.op_reg_ref(A_STW,R_TOC,href)); - end; - list.concat(taicpu.op_none(A_BCCTRL)); - if target_info.system=system_powerpc_macos then - list.concat(taicpu.op_reg_ref(A_LWZ,R_TOC,href)); - procinfo.flags:=procinfo.flags or pi_do_call; - end; - - { calling a code fragment through a reference } - procedure tcgppc.a_call_ref(list : taasmoutput;const ref : treference); var - href : treference; tmpreg : tregister; + tmpref : treference; + begin if target_info.system=system_powerpc_macos then begin - { save our RTOC register value. Only necessary when doing pointer based } - { calls or cross TOC calls, but currently done always } - reference_reset_base(href,STACK_POINTER_REG,LA_RTOC); - list.concat(taicpu.op_reg_ref(A_STW,R_TOC,href)); - end; - tmpreg := get_scratch_reg_int(list); - a_load_ref_reg(list,OS_ADDR,ref,tmpreg); - list.concat(taicpu.op_reg(A_MTCTR,tmpreg)); - free_scratch_reg(list,tmpreg); - list.concat(taicpu.op_none(A_BCCTRL)); + {Generate instruction to load the procedure address from + the transition vector.} + //TODO: Support cross-TOC calls. +{$ifndef newra} + tmpreg := get_scratch_reg_int(list,OS_INT); +{$else newra} + tmpreg := rg.getregisterint(list,OS_INT); +{$endif newra} + reference_reset(tmpref); + tmpref.offset := 0; + //tmpref.symaddr := refs_full; + tmpref.base:= reg; + list.concat(taicpu.op_reg_ref(A_LWZ,tmpreg,tmpref)); + list.concat(taicpu.op_reg(A_MTCTR,tmpreg)); +{$ifndef newra} + free_scratch_reg(list,tmpreg); +{$else newra} + rg.ungetregisterint(list,tmpreg); +{$endif newra} + end + else + list.concat(taicpu.op_reg(A_MTCTR,reg)); + list.concat(taicpu.op_none(A_BCTRL)); + //if target_info.system=system_powerpc_macos then + // //NOP is not needed here. + // list.concat(taicpu.op_none(A_NOP)); + if not(pi_do_call in current_procinfo.flags) then + internalerror(2003060704); + //list.concat(tai_comment.create(strpnew('***** a_call_reg'))); + end; + + + { calling a procedure by address } + procedure tcgppc.a_call_ref(list : taasmoutput;const ref : treference); + + var + tmpreg : tregister; + tmpref : treference; + + begin +{$ifndef newra} + tmpreg := get_scratch_reg_int(list,OS_ADDR); +{$else newra} + tmpreg := rg.getregisterint(list,OS_ADDR); +{$endif newra} + a_load_ref_reg(list,OS_ADDR,OS_ADDR,ref,tmpreg); if target_info.system=system_powerpc_macos then - list.concat(taicpu.op_reg_ref(A_LWZ,R_TOC,href)); - procinfo.flags:=procinfo.flags or pi_do_call; + begin + {Generate instruction to load the procedure address from + the transition vector.} + //TODO: Support cross-TOC calls. + reference_reset(tmpref); + tmpref.offset := 0; + //tmpref.symaddr := refs_full; + tmpref.base:= tmpreg; + list.concat(taicpu.op_reg_ref(A_LWZ,tmpreg,tmpref)); + end; + list.concat(taicpu.op_reg(A_MTCTR,tmpreg)); +{$ifndef newra} + free_scratch_reg(list,tmpreg); +{$else newra} + rg.ungetregisterint(list,tmpreg); +{$endif newra} + list.concat(taicpu.op_none(A_BCTRL)); + //if target_info.system=system_powerpc_macos then + // //NOP is not needed here. + // list.concat(taicpu.op_none(A_NOP)); + if not(pi_do_call in current_procinfo.flags) then + internalerror(2003060705); + //list.concat(tai_comment.create(strpnew('***** a_call_ref'))); end; {********************** load instructions ********************} @@ -327,7 +376,7 @@ const end; - procedure tcgppc.a_load_reg_ref(list : taasmoutput; size: TCGSize; reg : tregister;const ref : treference); + procedure tcgppc.a_load_reg_ref(list : taasmoutput; fromsize, tosize: TCGSize; reg : tregister;const ref : treference); const StoreInstr: Array[OS_8..OS_32,boolean, boolean] of TAsmOp = @@ -342,20 +391,24 @@ const begin ref2 := ref; freereg := fixref(list,ref2); - if size in [OS_S8..OS_S16] then + if tosize in [OS_S8..OS_S16] then { storing is the same for signed and unsigned values } - size := tcgsize(ord(size)-(ord(OS_S8)-ord(OS_8))); + tosize := tcgsize(ord(tosize)-(ord(OS_S8)-ord(OS_8))); { 64 bit stuff should be handled separately } - if size in [OS_64,OS_S64] then + if tosize in [OS_64,OS_S64] then internalerror(200109236); - op := storeinstr[tcgsize2unsigned[size],ref2.index<>R_NO,false]; + op := storeinstr[tcgsize2unsigned[tosize],ref2.index.number<>NR_NO,false]; a_load_store(list,op,reg,ref2); if freereg then +{$ifndef newra} cg.free_scratch_reg(list,ref2.base); +{$else newra} + rg.ungetregisterint(list,ref2.base); +{$endif newra} End; - procedure tcgppc.a_load_ref_reg(list : taasmoutput;size : tcgsize;const ref: treference;reg : tregister); + procedure tcgppc.a_load_ref_reg(list : taasmoutput; fromsize,tosize : tcgsize;const ref: treference;reg : tregister); const LoadInstr: Array[OS_8..OS_S32,boolean, boolean] of TAsmOp = @@ -376,17 +429,23 @@ const freereg: boolean; begin - if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then + { TODO: optimize/take into consideration fromsize/tosize. Will } + { probably only matter for OS_S8 loads though } + if not(fromsize in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then internalerror(2002090902); ref2 := ref; freereg := fixref(list,ref2); - op := loadinstr[size,ref2.index<>R_NO,false]; + op := loadinstr[fromsize,ref2.index.number<>NR_NO,false]; a_load_store(list,op,reg,ref2); if freereg then +{$ifndef newra} free_scratch_reg(list,ref2.base); +{$else newra} + rg.ungetregisterint(list,ref2.base); +{$endif newra} { sign extend shortint if necessary, since there is no } { load instruction that does that automatically (JM) } - if size = OS_S8 then + if fromsize = OS_S8 then list.concat(taicpu.op_reg_reg(A_EXTSB,reg,reg)); end; @@ -394,13 +453,17 @@ const procedure tcgppc.a_load_reg_reg(list : taasmoutput;fromsize, tosize : tcgsize;reg1,reg2 : tregister); begin - if (reg1 <> reg2) or + if (reg1.enum<>R_INTREGISTER) or (reg1.number = 0) then + internalerror(200303101); + if (reg2.enum<>R_INTREGISTER) or (reg2.number = 0) then + internalerror(200303102); + if (reg1.number<>reg2.number) or (tcgsize2size[tosize] < tcgsize2size[fromsize]) or ((tcgsize2size[tosize] = tcgsize2size[fromsize]) and (tosize <> fromsize) and not(fromsize in [OS_32,OS_S32])) then begin - case fromsize of + case tosize of OS_8: list.concat(taicpu.op_reg_reg_const_const_const(A_RLWINM, reg2,reg1,0,31-8+1,31)); @@ -419,7 +482,7 @@ const end; - procedure tcgppc.a_loadfpu_reg_reg(list: taasmoutput; reg1, reg2: tregister); + procedure tcgppc.a_loadfpu_reg_reg(list: taasmoutput; size: tcgsize; reg1, reg2: tregister); begin list.concat(taicpu.op_reg_reg(A_FMR,reg2,reg1)); @@ -444,17 +507,21 @@ const case size of OS_32,OS_F32: size:=OS_F32; - OS_64,OS_F64: + OS_64,OS_F64,OS_C64: size:=OS_F64; else internalerror(200201121); end; ref2 := ref; freereg := fixref(list,ref2); - op := fpuloadinstr[size,ref2.index <> R_NO,false]; + op := fpuloadinstr[size,ref2.index.number <> NR_NO,false]; a_load_store(list,op,reg,ref2); if freereg then +{$ifndef newra} cg.free_scratch_reg(list,ref2.base); +{$else newra} + rg.ungetregisterint(list,ref2.base); +{$endif newra} end; @@ -475,17 +542,18 @@ const internalerror(200201122); ref2 := ref; freereg := fixref(list,ref2); - op := fpustoreinstr[size,ref2.index <> R_NO,false]; + op := fpustoreinstr[size,ref2.index.number <> NR_NO,false]; a_load_store(list,op,reg,ref2); if freereg then +{$ifndef newra} cg.free_scratch_reg(list,ref2.base); +{$else newra} + rg.ungetregisterint(list,ref2.base); +{$endif newra} end; - procedure tcgppc.a_op_const_reg(list : taasmoutput; Op: TOpCG; a: AWord; reg: TRegister); - - var - scratch_register: TRegister; + procedure tcgppc.a_op_const_reg(list : taasmoutput; Op: TOpCG; size: TCGSize; a: AWord; reg: TRegister); begin a_op_const_reg_reg(list,op,OS_32,a,reg,reg); @@ -515,13 +583,15 @@ const end; begin + if src.enum<>R_INTREGISTER then + internalerror(200303102); if op = OP_SUB then begin {$ifopt q+} {$q-} {$define overflowon} {$endif} - a_op_const_reg_reg(list,OP_ADD,size,aword(-a),src,dst); + a_op_const_reg_reg(list,OP_ADD,size,aword(-longint(a)),src,dst); {$ifdef overflowon} {$q+} {$undef overflowon} @@ -536,7 +606,9 @@ const if (a = 0) then begin if op = OP_AND then - list.concat(taicpu.op_reg_const(A_LI,dst,0)); + list.concat(taicpu.op_reg_const(A_LI,dst,0)) + else + a_load_reg_reg(list,size,size,src,dst); exit; end else if (a = high(aword)) then @@ -546,6 +618,8 @@ const list.concat(taicpu.op_reg_const(A_LI,dst,-1)); OP_XOR: list.concat(taicpu.op_reg_reg(A_NOT,dst,src)); + OP_AND: + a_load_reg_reg(list,size,size,src,dst); end; exit; end @@ -631,13 +705,21 @@ const OP_OR: { try to use rlwimi } if gotrlwi and - (src = dst) then + (src.number = dst.number) then begin - scratchreg := get_scratch_reg_int(list); +{$ifndef newra} + scratchreg := get_scratch_reg_int(list,OS_INT); +{$else newra} + scratchreg := rg.getregisterint(list,OS_INT); +{$endif newra} list.concat(taicpu.op_reg_const(A_LI,scratchreg,-1)); list.concat(taicpu.op_reg_reg_const_const_const(A_RLWIMI,dst, scratchreg,0,l1,l2)); +{$ifndef newra} free_scratch_reg(list,scratchreg); +{$else newra} + rg.ungetregisterint(list,scratchreg); +{$endif newra} end else do_lo_hi; @@ -654,7 +736,9 @@ const begin if (a and 31) <> 0 Then list.concat(taicpu.op_reg_reg_const( - TOpCG2AsmOpConstLo[Op],dst,src,a and 31)); + TOpCG2AsmOpConstLo[Op],dst,src,a and 31)) + else + a_load_reg_reg(list,size,size,src,dst); if (a shr 5) <> 0 then internalError(68991); end @@ -665,10 +749,18 @@ const { perform the operation } if useReg then begin - scratchreg := get_scratch_reg_int(list); +{$ifndef newra} + scratchreg := get_scratch_reg_int(list,OS_INT); +{$else newra} + scratchreg := rg.getregisterint(list,OS_INT); +{$endif newra} a_load_const_reg(list,OS_32,a,scratchreg); a_op_reg_reg_reg(list,op,OS_32,scratchreg,src,dst); +{$ifndef newra} free_scratch_reg(list,scratchreg); +{$else newra} + rg.ungetregisterint(list,scratchreg); +{$endif newra} end; end; @@ -700,6 +792,7 @@ const p: taicpu; scratch_register: TRegister; signed: boolean; + r:Tregister; begin signed := cmp_op in [OC_GT,OC_LT,OC_GTE,OC_LTE]; @@ -708,25 +801,42 @@ const if (cmp_op in [OC_EQ,OC_NE]) and (a > $ffff) then signed := true; + r.enum:=R_CR0; if signed then if (longint(a) >= low(smallint)) and (longint(a) <= high(smallint)) Then - list.concat(taicpu.op_reg_reg_const(A_CMPWI,R_CR0,reg,longint(a))) + list.concat(taicpu.op_reg_reg_const(A_CMPWI,r,reg,longint(a))) else begin - scratch_register := get_scratch_reg_int(list); +{$ifndef newra} + scratch_register := get_scratch_reg_int(list,OS_INT); +{$else newra} + scratch_register := rg.getregisterint(list,OS_INT); +{$endif newra} a_load_const_reg(list,OS_32,a,scratch_register); - list.concat(taicpu.op_reg_reg_reg(A_CMPW,R_CR0,reg,scratch_register)); + list.concat(taicpu.op_reg_reg_reg(A_CMPW,r,reg,scratch_register)); +{$ifndef newra} free_scratch_reg(list,scratch_register); +{$else newra} + rg.ungetregisterint(list,scratch_register); +{$endif newra} end else if (a <= $ffff) then - list.concat(taicpu.op_reg_reg_const(A_CMPLWI,R_CR0,reg,a)) + list.concat(taicpu.op_reg_reg_const(A_CMPLWI,r,reg,a)) else begin - scratch_register := get_scratch_reg_int(list); +{$ifndef newra} + scratch_register := get_scratch_reg_int(list,OS_32); +{$else newra} + scratch_register := rg.getregisterint(list,OS_INT); +{$endif newra} a_load_const_reg(list,OS_32,a,scratch_register); - list.concat(taicpu.op_reg_reg_reg(A_CMPLW,R_CR0,reg,scratch_register)); + list.concat(taicpu.op_reg_reg_reg(A_CMPLW,r,reg,scratch_register)); +{$ifndef newra} free_scratch_reg(list,scratch_register); +{$else newra} + rg.ungetregisterint(list,scratch_register); +{$endif newra} end; a_jmp(list,A_BC,TOpCmp2AsmCond[cmp_op],0,l); end; @@ -738,22 +848,24 @@ const var p: taicpu; op: tasmop; + r:Tregister; begin if cmp_op in [OC_GT,OC_LT,OC_GTE,OC_LTE] then op := A_CMPW else op := A_CMPLW; - list.concat(taicpu.op_reg_reg_reg(op,R_CR0,reg1,reg2)); + r.enum:=R_CR0; + list.concat(taicpu.op_reg_reg_reg(op,r,reg2,reg1)); a_jmp(list,A_BC,TOpCmp2AsmCond[cmp_op],0,l); end; - procedure tcgppc.g_save_standard_registers(list : taasmoutput; usedinproc : tregisterset); + procedure tcgppc.g_save_standard_registers(list : taasmoutput; usedinproc : Tsupregset); begin {$warning FIX ME} end; - procedure tcgppc.g_restore_standard_registers(list : taasmoutput; usedinproc : tregisterset); + procedure tcgppc.g_restore_standard_registers(list : taasmoutput; usedinproc : Tsupregset); begin {$warning FIX ME} end; @@ -763,7 +875,7 @@ const {$warning FIX ME} end; - procedure tcgppc.g_restore_all_registers(list : taasmoutput;selfused,accused,acchiused:boolean); + procedure tcgppc.g_restore_all_registers(list : taasmoutput;accused,acchiused:boolean); begin {$warning FIX ME} end; @@ -784,9 +896,11 @@ const var c: tasmcond; + r:Tregister; begin c := flags_to_cond(f); - a_jmp(list,A_BC,c.cond,ord(c.cr)-ord(R_CR0),l); + r.enum:=R_CR0; + a_jmp(list,A_BC,c.cond,ord(c.cr)-ord(r.enum),l); end; procedure tcgppc.g_flags2reg(list: taasmoutput; size: TCgSize; const f: TResFlags; reg: TRegister); @@ -801,15 +915,17 @@ const testbit := ((ord(f.cr)-ord(R_CR0)) * 4); case f.flag of F_EQ,F_NE: - bitvalue := f.flag = F_EQ; + begin + inc(testbit,2); + bitvalue := f.flag = F_EQ; + end; F_LT,F_GE: begin - inc(testbit); bitvalue := f.flag = F_LT; end; F_GT,F_LE: begin - inc(testbit,2); + inc(testbit); bitvalue := f.flag = F_GT; end; else @@ -819,7 +935,7 @@ const list.concat(taicpu.op_reg(A_MFCR,reg)); { we will move the bit that has to be tested to bit 0 by rotating } { left } - testbit := (32 - testbit) and 31; + testbit := (testbit + 1) and 31; { extract bit } list.concat(taicpu.op_reg_reg_const_const_const( A_RLWINM,reg,reg,testbit,31,31)); @@ -885,27 +1001,53 @@ const procedure tcgppc.g_stackframe_entry(list : taasmoutput;localsize : longint); begin - case target_info.system of - system_powerpc_macos: + case target_info.abi of + abi_powerpc_macos: g_stackframe_entry_mac(list,localsize); - system_powerpc_linux: - g_stackframe_entry_sysv(list,localsize) + abi_powerpc_sysv: + g_stackframe_entry_sysv(list,localsize); + abi_powerpc_aix: + g_stackframe_entry_aix(list,localsize); else internalerror(2204001); end; end; + procedure tcgppc.g_return_from_proc(list : taasmoutput;parasize : aword); + + begin + case target_info.abi of + abi_powerpc_macos: + g_return_from_proc_mac(list,parasize); + abi_powerpc_sysv: + g_return_from_proc_sysv(list,parasize); + abi_powerpc_aix: + g_return_from_proc_aix(list,parasize); + else + internalerror(2204001); + end; + end; + + + procedure tcgppc.g_stackframe_entry_aix(list : taasmoutput;localsize : longint); + begin + g_stackframe_entry_sysv(list,localsize); + end; + procedure tcgppc.g_stackframe_entry_sysv(list : taasmoutput;localsize : longint); { generated the entry code of a procedure/function. Note: localsize is the } { sum of the size necessary for local variables and the maximum possible } { combined size of ALL the parameters of a procedure called by the current } { one } - var regcounter,firstregfpu,firstreggpr : TRegister; - href : treference; + var regcounter,firstregfpu,firstreggpr: TRegister; + href,href2 : treference; usesfpr,usesgpr,gotgot : boolean; parastart : aword; offset : aword; + r,r2,rsp:Tregister; + regcounter2: Tsuperregister; + hp: tparaitem; begin { we do our own localsize calculation } @@ -914,53 +1056,79 @@ const { procedure, but currently this isn't checked, so save them always } { following is the entry code as described in "Altivec Programming } { Interface Manual", bar the saving of AltiVec registers } - a_reg_alloc(list,STACK_POINTER_REG); - a_reg_alloc(list,R_0); - { allocate registers containing reg parameters } - for regcounter := R_3 to R_10 do - a_reg_alloc(list,regcounter); + rsp.enum:=R_INTREGISTER; + rsp.number:=NR_STACK_POINTER_REG; + a_reg_alloc(list,rsp); + r.enum:=R_INTREGISTER; + r.number:=NR_R0; + a_reg_alloc(list,r); + + if current_procinfo.procdef.parast.symtablelevel>1 then + begin + r.enum:=R_INTREGISTER; + r.number:=NR_R11; + a_reg_alloc(list,r); + end; + usesfpr:=false; - for regcounter:=R_F14 to R_F31 do - if regcounter in rg.usedbyproc then - begin - usesfpr:=true; - firstregfpu:=regcounter; - break; - end; + if not (po_assembler in current_procinfo.procdef.procoptions) then + for regcounter.enum:=R_F14 to R_F31 do + if regcounter.enum in rg.used_in_proc_other then + begin + usesfpr:= true; + firstregfpu:=regcounter; + break; + end; usesgpr:=false; - for regcounter:=R_14 to R_31 do - if regcounter in rg.usedbyproc then + if not (po_assembler in current_procinfo.procdef.procoptions) then + for regcounter2:=firstsaveintreg to RS_R31 do begin - usesgpr:=true; - firstreggpr:=regcounter; - break; + if regcounter2 in rg.used_in_proc_int then + begin + usesgpr:=true; + firstreggpr.enum := R_INTREGISTER; + firstreggpr.number := regcounter2 shl 8; + break; + end; end; { save link register? } - if (procinfo.flags and pi_do_call)<>0 then - begin - { save return address... } - list.concat(taicpu.op_reg(A_MFLR,R_0)); - { ... in caller's rframe } - reference_reset_base(href,STACK_POINTER_REG,4); - list.concat(taicpu.op_reg_ref(A_STW,R_0,href)); - a_reg_dealloc(list,R_0); - end; + if not (po_assembler in current_procinfo.procdef.procoptions) then + if (pi_do_call in current_procinfo.flags) then + begin + { save return address... } + r.enum:=R_INTREGISTER; + r.number:=NR_R0; + list.concat(taicpu.op_reg(A_MFLR,r)); + { ... in caller's rframe } + reference_reset_base(href,rsp,4); + list.concat(taicpu.op_reg_ref(A_STW,r,href)); + a_reg_dealloc(list,r); + end; - if usesfpr or usesgpr then + { !!! always allocate space for all registers for now !!! } + if not (po_assembler in current_procinfo.procdef.procoptions) then +{ if usesfpr or usesgpr then } begin - a_reg_alloc(list,R_11); + r.enum:=R_INTREGISTER; + r.number:=NR_R12; + a_reg_alloc(list,r); { save end of fpr save area } - list.concat(taicpu.op_reg_reg_const(A_ORI,R_11,STACK_POINTER_REG,0)); + list.concat(taicpu.op_reg_reg(A_MR,r,rsp)); end; { calculate the size of the locals } +{ if usesgpr then - inc(localsize,(ord(R_31)-ord(firstreggpr)+1)*4); + inc(localsize,((NR_R31-firstreggpr.number) shr 8+1)*4); if usesfpr then - inc(localsize,(ord(R_F31)-ord(firstregfpu)+1)*8); + inc(localsize,(ord(R_F31)-ord(firstregfpu.enum)+1)*8); +} + { !!! always allocate space for all registers for now !!! } + if not (po_assembler in current_procinfo.procdef.procoptions) then + inc(localsize,(31-13+1)*4+(31-14+1)*8); { align to 16 bytes } localsize:=align(localsize,16); @@ -969,34 +1137,56 @@ const localsize:=align(localsize,16); - tppcprocinfo(procinfo).localsize:=localsize; + tppcprocinfo(current_procinfo).localsize:=localsize; - reference_reset_base(href,R_1,-localsize); - a_load_store(list,A_STWU,R_1,href); + if (localsize <> 0) then + begin + r.enum:=R_INTREGISTER; + r.number:=NR_STACK_POINTER_REG; + if (localsize <= high(smallint)) then + begin + reference_reset_base(href,r,-localsize); + a_load_store(list,A_STWU,r,href); + end + else + begin + reference_reset_base(href,r,0); + { can't use getregisterint here, the register colouring } + { is already done when we get here } + href.index.enum := R_INTREGISTER; + href.index.number := NR_R11; + a_reg_alloc(list,href.index); + a_load_const_reg(list,OS_S32,-localsize,href.index); + a_load_store(list,A_STWUX,r,href); + a_reg_dealloc(list,href.index); + end; + end; { no GOT pointer loaded yet } gotgot:=false; + r.enum := R_INTREGISTER; + r.NUMBER := NR_R12; if usesfpr then begin { save floating-point registers if (cs_create_pic in aktmoduleswitches) and not(usesgpr) then begin - list.concat(taicpu.op_sym_ofs(A_BL,objectlibrary.newasmsymbol('_savefpr_'+tostr(ord(firstregfpu)-ord(R_F14)+14)+'_g'),0)); + a_call_name(objectlibrary.newasmsymbol('_savefpr_'+tostr(ord(firstregfpu)-ord(R_F14)+14)+'_g'); gotgot:=true; end else - list.concat(taicpu.op_sym_ofs(A_BL,objectlibrary.newasmsymbol('_savefpr_'+tostr(ord(firstregfpu)-ord(R_F14)+14)),0)); + a_call_name(objectlibrary.newasmsymbol('_savefpr_'+tostr(ord(firstregfpu)-ord(R_F14)+14)); } - for regcounter:=firstregfpu to R_F31 do - if regcounter in rg.usedbyproc then + reference_reset_base(href,r,-8); + for regcounter.enum:=firstregfpu.enum to R_F31 do + if regcounter.enum in rg.used_in_proc_other then begin - { reference_reset_base(href,R_1,-localsize); - a_load_store(list,A_STWU,R_1,href); - } + a_loadfpu_reg_ref(list,OS_F64,regcounter,href); + dec(href.offset,8); end; { compute end of gpr save area } - list.concat(taicpu.op_reg_reg_const(A_ADDI,R_11,R_11,-(ord(R_F31)-ord(firstregfpu)+1)*8)); + a_op_const_reg(list,OP_ADD,OS_ADDR,aword(href.offset+8),r); end; { save gprs and fetch GOT pointer } @@ -1005,18 +1195,57 @@ const { if cs_create_pic in aktmoduleswitches then begin - list.concat(taicpu.op_sym_ofs(A_BL,objectlibrary.newasmsymbol('_savegpr_'+tostr(ord(firstreggpr)-ord(R_14)+14)+'_g'),0)); + a_call_name(objectlibrary.newasmsymbol('_savegpr_'+tostr(ord(firstreggpr)-ord(R_14)+14)+'_g'); gotgot:=true; end else - list.concat(taicpu.op_sym_ofs(A_BL,objectlibrary.newasmsymbol('_savegpr_'+tostr(ord(firstreggpr)-ord(R_14)+14)),0)) + a_call_name(objectlibrary.newasmsymbol('_savegpr_'+tostr(ord(firstreggpr)-ord(R_14)+14)) } - reference_reset_base(href,R_11,-(ord(R_31)-ord(firstreggpr)+1)*4); - list.concat(taicpu.op_reg_ref(A_STMW,firstreggpr,href)); + reference_reset_base(href,r,-4); + for regcounter2:=firstsaveintreg to RS_R31 do + begin + if regcounter2 in rg.used_in_proc_int then + begin + usesgpr:=true; + r.enum := R_INTREGISTER; + r.number := regcounter2 shl 8; + a_load_reg_ref(list,OS_INT,OS_INT,r,href); + dec(href.offset,4); + end; + end; +{ + r.enum:=R_INTREGISTER; + r.number:=NR_R12; + reference_reset_base(href,r,-((NR_R31-firstreggpr.number) shr 8+1)*4); + list.concat(taicpu.op_reg_ref(A_STMW,firstreggpr,href)); +} end; + if assigned(current_procinfo.procdef.parast) then + begin + if not (po_assembler in current_procinfo.procdef.procoptions) then + begin + { copy memory parameters to local parast } + r.enum:=R_INTREGISTER; + r.number:=NR_R12; + hp:=tparaitem(current_procinfo.procdef.para.first); + while assigned(hp) do + begin + if (hp.calleeparaloc.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then + begin + reference_reset_base(href,current_procinfo.framepointer,tvarsym(hp.parasym).adjusted_address); + reference_reset_base(href2,r,hp.callerparaloc.reference.offset); + cg.a_load_ref_ref(list,hp.calleeparaloc.size,hp.calleeparaloc.size,href2,href); + end; + hp := tparaitem(hp.next); + end; + end; + end; + + r.enum:=R_INTREGISTER; + r.number:=NR_R12; if usesfpr or usesgpr then - a_reg_dealloc(list,R_11); + a_reg_dealloc(list,r); { PIC code support, } if cs_create_pic in aktmoduleswitches then @@ -1026,9 +1255,12 @@ const begin {!!!!!!!!!!!!!} end; - a_reg_alloc(list,R_31); + r.enum:=R_INTREGISTER; + r.number:=NR_R31; + r2.enum:=R_LR; + a_reg_alloc(list,r); { place GOT ptr in r31 } - list.concat(taicpu.op_reg_reg(A_MFSPR,R_31,R_LR)); + list.concat(taicpu.op_reg_reg(A_MFSPR,r,r2)); end; { save the CR if necessary ( !!! always done currently ) } { still need to find out where this has to be done for SystemV @@ -1038,97 +1270,333 @@ const new_reference(STACK_POINTER_REG,LA_CR))); a_reg_dealloc(list,R_0); } { now comes the AltiVec context save, not yet implemented !!! } + + { if we're in a nested procedure, we've to save R11 } + if current_procinfo.procdef.parast.symtablelevel>2 then + begin + r.enum:=R_INTREGISTER; + r.number:=NR_R11; + reference_reset_base(href,rsp,PARENT_FRAMEPOINTER_OFFSET); + list.concat(taicpu.op_reg_ref(A_STW,r,href)); + end; end; + + procedure tcgppc.g_return_from_proc_aix(list : taasmoutput;parasize : aword); + begin + g_return_from_proc_sysv(list,parasize); + end; + + procedure tcgppc.g_return_from_proc_sysv(list : taasmoutput;parasize : aword); var - regcounter,firstregfpu,firstreggpr : TRegister; + regcounter,firstregfpu,firstreggpr: TRegister; href : treference; usesfpr,usesgpr,genret : boolean; + r,r2:Tregister; + regcounter2:Tsuperregister; + localsize: aword; begin - { release parameter registers } - for regcounter := R_3 to R_10 do - a_reg_dealloc(list,regcounter); + localsize := 0; { AltiVec context restore, not yet implemented !!! } usesfpr:=false; - for regcounter:=R_F14 to R_F31 do - if regcounter in rg.usedbyproc then + if not (po_assembler in current_procinfo.procdef.procoptions) then + for regcounter.enum:=R_F14 to R_F31 do + if regcounter.enum in rg.used_in_proc_other then + begin + usesfpr:=true; + firstregfpu:=regcounter; + break; + end; + + usesgpr:=false; + if not (po_assembler in current_procinfo.procdef.procoptions) then + for regcounter2:=firstsaveintreg to RS_R31 do + begin + if regcounter2 in rg.used_in_proc_int then + begin + usesgpr:=true; + firstreggpr.enum:=R_INTREGISTER; + firstreggpr.number:=regcounter2 shl 8; + break; + end; + end; + + if not (po_assembler in current_procinfo.procdef.procoptions) then + inc(localsize,(31-13+1)*4+(31-14+1)*8); + + { align to 16 bytes } + localsize:=align(localsize,16); + + inc(localsize,tg.lasttemp); + + localsize:=align(localsize,16); + + tppcprocinfo(current_procinfo).localsize:=localsize; + + + { no return (blr) generated yet } + genret:=true; + if usesgpr or usesfpr then + begin + { address of gpr save area to r11 } + r.enum:=R_INTREGISTER; + r.number:=NR_STACK_POINTER_REG; + r2.enum:=R_INTREGISTER; + r2.number:=NR_R12; + a_op_const_reg_reg(list,OP_ADD,OS_ADDR,tppcprocinfo(current_procinfo).localsize,r,r2); + if usesfpr then + begin + reference_reset_base(href,r2,-8); + for regcounter.enum := firstregfpu.enum to R_F31 do + if (regcounter.enum in rg.used_in_proc_other) then + begin + a_loadfpu_ref_reg(list,OS_F64,href,regcounter); + dec(href.offset,8); + end; + inc(href.offset,4); + end + else + reference_reset_base(href,r2,-4); + + for regcounter2:=firstsaveintreg to RS_R31 do + begin + if regcounter2 in rg.used_in_proc_int then + begin + usesgpr:=true; + r.enum := R_INTREGISTER; + r.number := regcounter2 shl 8; + a_load_ref_reg(list,OS_INT,OS_INT,href,r); + dec(href.offset,4); + end; + end; + +(* + reference_reset_base(href,r2,-((NR_R31-ord(firstreggpr.number)) shr 8+1)*4); + list.concat(taicpu.op_reg_ref(A_LMW,firstreggpr,href)); +*) + end; + +(* + { restore fprs and return } + if usesfpr then + begin + { address of fpr save area to r11 } + r.enum:=R_INTREGISTER; + r.number:=NR_R12; + list.concat(taicpu.op_reg_reg_const(A_ADDI,r,r,(ord(R_F31)-ord(firstregfpu.enum)+1)*8)); + { + if (pi_do_call in current_procinfo.flags) then + a_call_name(objectlibrary.newasmsymbol('_restfpr_'+tostr(ord(firstregfpu)-ord(R_F14)+14)+ + '_x') + else + { leaf node => lr haven't to be restored } + a_call_name('_restfpr_'+tostr(ord(firstregfpu.enum)-ord(R_F14)+14)+ + '_l'); + genret:=false; + } + end; +*) + + { if we didn't generate the return code, we've to do it now } + if genret then + begin + { adjust r1 } + r.enum:=R_INTREGISTER; + r.number:=NR_R1; + a_op_const_reg(list,OP_ADD,OS_ADDR,tppcprocinfo(current_procinfo).localsize,r); + { load link register? } + if not (po_assembler in current_procinfo.procdef.procoptions) then + if (pi_do_call in current_procinfo.flags) then + begin + r.enum:=R_INTREGISTER; + r.number:=NR_STACK_POINTER_REG; + reference_reset_base(href,r,4); + r.enum:=R_INTREGISTER; + r.number:=NR_R0; + list.concat(taicpu.op_reg_ref(A_LWZ,r,href)); + list.concat(taicpu.op_reg(A_MTLR,r)); + end; + list.concat(taicpu.op_none(A_BLR)); + end; + end; + + function save_regs(list : taasmoutput):longint; + {Generates code which saves used non-volatile registers in + the save area right below the address the stackpointer point to. + Returns the actual used save area size.} + + var regcounter,firstregfpu,firstreggpr: TRegister; + usesfpr,usesgpr: boolean; + href : treference; + offset: integer; + r,r2:Tregister; + regcounter2: Tsuperregister; + + begin + usesfpr:=false; + if not (po_assembler in current_procinfo.procdef.procoptions) then + for regcounter.enum:=R_F14 to R_F31 do + if regcounter.enum in rg.used_in_proc_other then begin usesfpr:=true; firstregfpu:=regcounter; break; end; - usesgpr:=false; - for regcounter:=R_14 to R_30 do - if regcounter in rg.usedbyproc then + usesgpr:=false; + if not (po_assembler in current_procinfo.procdef.procoptions) then + for regcounter2:=firstsaveintreg to RS_R31 do + begin + if regcounter2 in rg.used_in_proc_int then + begin + usesgpr:=true; + firstreggpr.enum:=R_INTREGISTER; + firstreggpr.number:=regcounter2 shl 8; + break; + end; + end; + offset:= 0; + + { save floating-point registers } + if usesfpr then + for regcounter.enum := firstregfpu.enum to R_F31 do + begin + offset:= offset - 8; + r.enum:=R_INTREGISTER; + r.number:=NR_STACK_POINTER_REG; + reference_reset_base(href, r, offset); + list.concat(taicpu.op_reg_ref(A_STFD, regcounter, href)); + end; + (* Optimiztion in the future: a_call_name(list,'_savefXX'); *) + + { save gprs in gpr save area } + if usesgpr then + if firstreggpr.enum < R_30 then + begin + offset:= offset - 4 * (ord(R_31) - ord(firstreggpr.enum) + 1); + r.enum:=R_INTREGISTER; + r.number:=NR_STACK_POINTER_REG; + reference_reset_base(href,r,offset); + list.concat(taicpu.op_reg_ref(A_STMW,firstreggpr,href)); + {STMW stores multiple registers} + end + else + begin + r.enum:=R_INTREGISTER; + r.number:=NR_STACK_POINTER_REG; + r2 := firstreggpr; + convert_register_to_enum(firstreggpr); + for regcounter.enum := firstreggpr.enum to R_31 do + begin + offset:= offset - 4; + reference_reset_base(href, r, offset); + list.concat(taicpu.op_reg_ref(A_STW, r2, href)); + inc(r2.number,NR_R1-NR_R0); + end; + end; + + { now comes the AltiVec context save, not yet implemented !!! } + + save_regs:= -offset; + end; + + procedure restore_regs(list : taasmoutput); + {Generates code which restores used non-volatile registers from + the save area right below the address the stackpointer point to.} + + var regcounter,firstregfpu,firstreggpr: TRegister; + usesfpr,usesgpr: boolean; + href : treference; + offset: integer; + r,r2:Tregister; + regcounter2: Tsuperregister; + + begin + usesfpr:=false; + if not (po_assembler in current_procinfo.procdef.procoptions) then + for regcounter.enum:=R_F14 to R_F31 do + if regcounter.enum in rg.used_in_proc_other then begin - usesgpr:=true; - firstreggpr:=regcounter; + usesfpr:=true; + firstregfpu:=regcounter; break; end; - { no return (blr) generated yet } - genret:=true; - if usesgpr then + usesgpr:=false; + if not (po_assembler in current_procinfo.procdef.procoptions) then + for regcounter2:=RS_R13 to RS_R31 do begin - { address of gpr save area to r11 } - if usesfpr then - list.concat(taicpu.op_reg_reg_const(A_ADDI,R_11,R_1,tppcprocinfo(procinfo).localsize-(ord(R_F31)-ord(firstregfpu)+1)*8)) - else - list.concat(taicpu.op_reg_reg_const(A_ADDI,R_11,R_1,tppcprocinfo(procinfo).localsize)); - - { restore gprs } - { at least for now we use LMW } - { - list.concat(taicpu.op_sym_ofs(A_BL,objectlibrary.newasmsymbol('_restgpr_14'),0)); - } - reference_reset_base(href,R_11,-(ord(R_31)-ord(firstreggpr)+1)*4); - list.concat(taicpu.op_reg_ref(A_LMW,firstreggpr,href)); + if regcounter2 in rg.used_in_proc_int then + begin + usesgpr:=true; + firstreggpr.enum:=R_INTREGISTER; + firstreggpr.number:=regcounter2 shl 8; + break; + end; end; - { restore fprs and return } - if usesfpr then + offset:= 0; + + { restore fp registers } + if usesfpr then + for regcounter.enum := firstregfpu.enum to R_F31 do begin - { address of fpr save area to r11 } - list.concat(taicpu.op_reg_reg_const(A_ADDI,R_11,R_11,(ord(R_F31)-ord(firstregfpu)+1)*8)); - { - if (procinfo.flags and pi_do_call)<>0 then - list.concat(taicpu.op_sym_ofs(A_BL,objectlibrary.newasmsymbol('_restfpr_'+tostr(ord(firstregfpu)-ord(R_F14)+14)+ - '_x'),0)) - else - { leaf node => lr haven't to be restored } - list.concat(taicpu.op_sym_ofs(A_BL,objectlibrary.newasmsymbol('_restfpr_'+tostr(ord(firstregfpu)-ord(R_F14)+14)+ - '_l'),0)); - genret:=false; - } + offset:= offset - 8; + r.enum:=R_INTREGISTER; + r.number:=NR_STACK_POINTER_REG; + reference_reset_base(href, r, offset); + list.concat(taicpu.op_reg_ref(A_LFD, regcounter, href)); end; - { if we didn't generate the return code, we've to do it now } - if genret then + (* Optimiztion in the future: a_call_name(list,'_restfXX'); *) + + { restore gprs } + if usesgpr then + if firstreggpr.enum < R_30 then begin - { adjust r1 } - a_op_const_reg(list,OP_ADD,tppcprocinfo(procinfo).localsize,R_1); - { load link register? } - if (procinfo.flags and pi_do_call)<>0 then - begin - reference_reset_base(href,STACK_POINTER_REG,4); - list.concat(taicpu.op_reg_ref(A_LWZ,R_0,href)); - list.concat(taicpu.op_reg(A_MTLR,R_0)); - end; - list.concat(taicpu.op_none(A_BLR)); + offset:= offset - 4 * (ord(R_31) - ord(firstreggpr.enum) + 1); + r.enum:=R_INTREGISTER; + r.number:=NR_STACK_POINTER_REG; + reference_reset_base(href,r,offset); //-220 + list.concat(taicpu.op_reg_ref(A_LMW,firstreggpr,href)); + {LMW loads multiple registers} + end + else + begin + r.enum:=R_INTREGISTER; + r.number:=NR_STACK_POINTER_REG; + r2 := firstreggpr; + convert_register_to_enum(firstreggpr); + for regcounter.enum := firstreggpr.enum to R_31 do + begin + offset:= offset - 4; + reference_reset_base(href, r, offset); + list.concat(taicpu.op_reg_ref(A_LWZ, r2, href)); + inc(r2.number,NR_R1-NR_R0); + end; end; - end; + + { now comes the AltiVec context restore, not yet implemented !!! } + end; + procedure tcgppc.g_stackframe_entry_mac(list : taasmoutput;localsize : longint); { generated the entry code of a procedure/function. Note: localsize is the } { sum of the size necessary for local variables and the maximum possible } { combined size of ALL the parameters of a procedure called by the current } - { one } + { one } + + const + macosLinkageAreaSize = 24; + var regcounter: TRegister; href : treference; + registerSaveAreaSize : longint; + r,r2,rsp:Tregister; + regcounter2: Tsuperregister; begin if (localsize mod 8) <> 0 then internalerror(58991); @@ -1136,33 +1604,40 @@ const { procedure, but currently this isn't checked, so save them always } { following is the entry code as described in "Altivec Programming } { Interface Manual", bar the saving of AltiVec registers } - a_reg_alloc(list,STACK_POINTER_REG); - a_reg_alloc(list,R_0); - { allocate registers containing reg parameters } - for regcounter := R_3 to R_10 do - a_reg_alloc(list,regcounter); - { save return address... } - list.concat(taicpu.op_reg_reg(A_MFSPR,R_0,R_LR)); + r.enum:=R_INTREGISTER; + r.number:=NR_R0; + rsp.enum:=R_INTREGISTER; + rsp.number:=NR_STACK_POINTER_REG; + a_reg_alloc(list,rsp); + a_reg_alloc(list,r); + + { save return address in callers frame} + r2.enum:=R_LR; + list.concat(taicpu.op_reg_reg(A_MFSPR,r,r2)); { ... in caller's frame } - reference_reset_base(href,STACK_POINTER_REG,8); - list.concat(taicpu.op_reg_ref(A_STW,R_0,href)); - a_reg_dealloc(list,R_0); - { save floating-point registers } - { !!! has to be optimized: only save registers that are used } - list.concat(taicpu.op_sym_ofs(A_BL,objectlibrary.newasmsymbol('_savef14'),0)); - { save gprs in gpr save area } - { !!! has to be optimized: only save registers that are used } - reference_reset_base(href,STACK_POINTER_REG,-220); - list.concat(taicpu.op_reg_ref(A_STMW,R_13,href)); - { save the CR if necessary ( !!! always done currently ) } - a_reg_alloc(list,R_0); - list.concat(taicpu.op_reg_reg(A_MFSPR,R_0,R_CR)); - reference_reset_base(href,stack_pointer_reg,LA_CR); - list.concat(taicpu.op_reg_ref(A_STW,R_0,href)); - a_reg_dealloc(list,R_0); + reference_reset_base(href,rsp,8); + list.concat(taicpu.op_reg_ref(A_STW,r,href)); + a_reg_dealloc(list,r); + + { save non-volatile registers in callers frame} + registerSaveAreaSize:= save_regs(list); + + { save the CR if necessary in callers frame ( !!! always done currently ) } + a_reg_alloc(list,r); + r2.enum:=R_CR; + list.concat(taicpu.op_reg_reg(A_MFSPR,r,r2)); + reference_reset_base(href,rsp,LA_CR); + list.concat(taicpu.op_reg_ref(A_STW,r,href)); + a_reg_dealloc(list,r); + + (* { save pointer to incoming arguments } list.concat(taicpu.op_reg_reg_const(A_ORI,R_31,STACK_POINTER_REG,0)); + *) + + (* a_reg_alloc(list,R_12); + { 0 or 8 based on SP alignment } list.concat(taicpu.op_reg_reg_const_const_const(A_RLWINM, R_12,STACK_POINTER_REG,0,28,28)); @@ -1171,8 +1646,94 @@ const -localsize)); { establish new alignment } list.concat(taicpu.op_reg_reg_reg(A_STWUX,STACK_POINTER_REG,STACK_POINTER_REG,R_12)); + a_reg_dealloc(list,R_12); - { now comes the AltiVec context save, not yet implemented !!! } + *) + + { allocate stack frame } + localsize:= align(localsize + macosLinkageAreaSize + registerSaveAreaSize, 16); + inc(localsize,tg.lasttemp); + localsize:=align(localsize,16); + tppcprocinfo(current_procinfo).localsize:=localsize; + + if (localsize <> 0) then + begin + r.enum:=R_INTREGISTER; + r.number:=NR_STACK_POINTER_REG; + if (localsize <= high(smallint)) then + begin + reference_reset_base(href,r,-localsize); + a_load_store(list,A_STWU,r,href); + end + else + begin + reference_reset_base(href,r,0); + href.index.enum := R_INTREGISTER; + href.index.number := NR_R11; + a_reg_alloc(list,href.index); + a_load_const_reg(list,OS_S32,-localsize,href.index); + a_load_store(list,A_STWUX,r,href); + a_reg_dealloc(list,href.index); + end; + end; + end; + + procedure tcgppc.g_return_from_proc_mac(list : taasmoutput;parasize : aword); + + var + regcounter: TRegister; + href : treference; + r,r2,rsp:Tregister; + regcounter2: Tsuperregister; + begin + r.enum:=R_INTREGISTER; + r.number:=NR_R0; + rsp.enum:=R_INTREGISTER; + rsp.number:=NR_STACK_POINTER_REG; + a_reg_alloc(list,r); + + { restore stack pointer } + reference_reset_base(href,rsp,LA_SP); + list.concat(taicpu.op_reg_ref(A_LWZ,rsp,href)); + (* + list.concat(taicpu.op_reg_reg_const(A_ORI,rsp,R_31,0)); + *) + + { restore the CR if necessary from callers frame + ( !!! always done currently ) } + reference_reset_base(href,rsp,LA_CR); + r.enum:=R_INTREGISTER; + r.number:=NR_R0; + list.concat(taicpu.op_reg_ref(A_LWZ,r,href)); + r2.enum:=R_CR; + list.concat(taicpu.op_reg_reg(A_MTSPR,r,r2)); + a_reg_dealloc(list,r); + + (* + { restore return address from callers frame } + reference_reset_base(href,STACK_POINTER_REG,8); + list.concat(taicpu.op_reg_ref(A_LWZ,R_0,href)); + *) + + { restore non-volatile registers from callers frame } + restore_regs(list); + + (* + { return to caller } + list.concat(taicpu.op_reg_reg(A_MTSPR,R_0,R_LR)); + list.concat(taicpu.op_none(A_BLR)); + *) + + { restore return address from callers frame } + r.enum:=R_INTREGISTER; + r.number:=NR_R0; + r2.enum:=R_LR; + reference_reset_base(href,rsp,8); + list.concat(taicpu.op_reg_ref(A_LWZ,r,href)); + + { return to caller } + list.concat(taicpu.op_reg_reg(A_MTSPR,r,r2)); + list.concat(taicpu.op_none(A_BLR)); end; @@ -1183,72 +1744,115 @@ const end; - procedure tcgppc.g_return_from_proc(list : taasmoutput;parasize : aword); - - begin - case target_info.system of - system_powerpc_macos: - g_return_from_proc_mac(list,parasize); - system_powerpc_linux: - g_return_from_proc_sysv(list,parasize) - else - internalerror(2204001); - end; - end; - - procedure tcgppc.a_loadaddr_ref_reg(list : taasmoutput;const ref : treference;r : tregister); var ref2, tmpref: treference; freereg: boolean; + r2,tmpreg:Tregister; begin ref2 := ref; freereg := fixref(list,ref2); if assigned(ref2.symbol) then - { add the symbol's value to the base of the reference, and if the } - { reference doesn't have a base, create one } begin - reference_reset(tmpref); - tmpref.offset := ref2.offset; - tmpref.symbol := ref2.symbol; - tmpref.symaddr := refs_ha; - if ref2.base <> R_NO then + if target_info.system = system_powerpc_macos then begin - list.concat(taicpu.op_reg_reg_ref(A_ADDIS,r, - ref2.base,tmpref)); - if freereg then + if macos_direct_globals then begin - cg.free_scratch_reg(list,ref2.base); - freereg := false; + reference_reset(tmpref); + tmpref.offset := ref2.offset; + tmpref.symbol := ref2.symbol; + tmpref.base.number := NR_NO; + r2.enum:=R_INTREGISTER; + r2.number:=NR_RTOC; + list.concat(taicpu.op_reg_reg_ref(A_ADDI,r,r2,tmpref)); + end + else + begin + reference_reset(tmpref); + tmpref.symbol := ref2.symbol; + tmpref.offset := 0; + tmpref.base.enum := R_INTREGISTER; + tmpref.base.number := NR_RTOC; + list.concat(taicpu.op_reg_ref(A_LWZ,r,tmpref)); + + if ref2.offset <> 0 then + begin + reference_reset(tmpref); + tmpref.offset := ref2.offset; + tmpref.base:= r; + list.concat(taicpu.op_reg_ref(A_LA,r,tmpref)); + end; end; + + if ref2.base.number <> NR_NO then + list.concat(taicpu.op_reg_reg_reg(A_ADD,r,r,ref2.base)); + + //list.concat(tai_comment.create(strpnew('*** a_loadaddr_ref_reg'))); end else - list.concat(taicpu.op_reg_ref(A_LIS,r,tmpref)); - tmpref.base := R_NO; - tmpref.symaddr := refs_l; - { can be folded with one of the next instructions by the } - { optimizer probably } - list.concat(taicpu.op_reg_reg_ref(A_ADDI,r,r,tmpref)); + begin + + { add the symbol's value to the base of the reference, and if the } + { reference doesn't have a base, create one } + reference_reset(tmpref); + tmpref.offset := ref2.offset; + tmpref.symbol := ref2.symbol; + tmpref.symaddr := refs_ha; + if ref2.base.number<> NR_NO then + begin + list.concat(taicpu.op_reg_reg_ref(A_ADDIS,r, + ref2.base,tmpref)); + if freereg then + begin +{$ifndef newra} + cg.free_scratch_reg(list,ref2.base); +{$else newra} + rg.ungetregisterint(list,ref2.base); +{$endif newra} + freereg := false; + end; + end + else + list.concat(taicpu.op_reg_ref(A_LIS,r,tmpref)); + tmpref.base.number := NR_NO; + tmpref.symaddr := refs_l; + { can be folded with one of the next instructions by the } + { optimizer probably } + list.concat(taicpu.op_reg_reg_ref(A_ADDI,r,r,tmpref)); + end end else if ref2.offset <> 0 Then - if ref2.base <> R_NO then - a_op_const_reg_reg(list,OP_ADD,OS_32,ref2.offset,ref2.base,r) - { FixRef makes sure that "(ref.index <> R_NO) and (ref.offset <> 0)" never} - { occurs, so now only ref.offset has to be loaded } - else a_load_const_reg(list,OS_32,ref2.offset,r) - else if ref.index <> R_NO Then + if ref2.base.number <> NR_NO then + a_op_const_reg_reg(list,OP_ADD,OS_32,aword(ref2.offset),ref2.base,r) + { FixRef makes sure that "(ref.index <> R_NO) and (ref.offset <> 0)" never} + { occurs, so now only ref.offset has to be loaded } + else + a_load_const_reg(list,OS_32,ref2.offset,r) + else if ref.index.number <> NR_NO Then list.concat(taicpu.op_reg_reg_reg(A_ADD,r,ref2.base,ref2.index)) - else if (ref2.base <> R_NO) and - (r <> ref2.base) then + else if (ref2.base.number <> NR_NO) and + (r.number <> ref2.base.number) then list.concat(taicpu.op_reg_reg(A_MR,r,ref2.base)); if freereg then +{$ifndef newra} cg.free_scratch_reg(list,ref2.base); +{$else newra} + rg.ungetregisterint(list,ref2.base); +{$endif newra} end; { ************* concatcopy ************ } +{$ifndef ppc603} + const + maxmoveunit = 8; +{$else ppc603} + const + maxmoveunit = 4; +{$endif ppc603} + procedure tcgppc.g_concatcopy(list : taasmoutput;const source,dest : treference;len : aword; delsource,loadref : boolean); var @@ -1257,6 +1861,8 @@ const lab: tasmlabel; count, count2: aword; orgsrc, orgdst: boolean; + r:Tregister; + size: tcgsize; begin {$ifdef extdebug} @@ -1266,41 +1872,60 @@ const { make sure short loads are handled as optimally as possible } if not loadref then - if (len <= 8) and + if (len <= maxmoveunit) and (byte(len) in [1,2,4,8]) then begin if len < 8 then begin - a_load_ref_ref(list,int_cgsize(len),source,dest); + size := int_cgsize(len); + a_load_ref_ref(list,size,size,source,dest); if delsource then - reference_release(list,source); + begin + reference_release(list,source); + tg.ungetiftemp(list,source); + end; end else begin - a_reg_alloc(list,R_F0); - a_loadfpu_ref_reg(list,OS_F64,source,R_F0); + r.enum:=R_F0; + a_reg_alloc(list,r); + a_loadfpu_ref_reg(list,OS_F64,source,r); if delsource then - reference_release(list,source); - a_loadfpu_reg_ref(list,OS_F64,R_F0,dest); - a_reg_dealloc(list,R_F0); + begin + reference_release(list,source); + tg.ungetiftemp(list,source); + end; + a_loadfpu_reg_ref(list,OS_F64,r,dest); + a_reg_dealloc(list,r); end; exit; end; + count := len div maxmoveunit; + reference_reset(src); reference_reset(dst); { load the address of source into src.base } if loadref then begin +{$ifndef newra} src.base := get_scratch_reg_address(list); - a_load_ref_reg(list,OS_32,source,src.base); +{$else newra} + src.base := rg.getregisterint(list,OS_ADDR); +{$endif newra} + a_load_ref_reg(list,OS_32,OS_32,source,src.base); orgsrc := false; end - else if not issimpleref(source) or - ((source.index <> R_NO) and + else if (count > 4) or + not issimpleref(source) or + ((source.index.number <> NR_NO) and ((source.offset + longint(len)) > high(smallint))) then begin +{$ifndef newra} src.base := get_scratch_reg_address(list); +{$else newra} + src.base := rg.getregisterint(list,OS_ADDR); +{$endif newra} a_loadaddr_ref_reg(list,source,src.base); orgsrc := false; end @@ -1312,11 +1937,16 @@ const if not orgsrc and delsource then reference_release(list,source); { load the address of dest into dst.base } - if not issimpleref(dest) or - ((dest.index <> R_NO) and + if (count > 4) or + not issimpleref(dest) or + ((dest.index.number <> NR_NO) and ((dest.offset + longint(len)) > high(smallint))) then begin +{$ifndef newra} dst.base := get_scratch_reg_address(list); +{$else newra} + dst.base := rg.getregisterint(list,OS_ADDR); +{$endif newra} a_loadaddr_ref_reg(list,dest,dst.base); orgdst := false; end @@ -1326,7 +1956,7 @@ const orgdst := true; end; - count := len div 8; +{$ifndef ppc603} if count > 4 then { generate a loop } begin @@ -1338,19 +1968,29 @@ const inc(src.offset,8); list.concat(taicpu.op_reg_reg_const(A_SUBI,src.base,src.base,8)); list.concat(taicpu.op_reg_reg_const(A_SUBI,dst.base,dst.base,8)); - countreg := get_scratch_reg_int(list); +{$ifndef newra} + countreg := get_scratch_reg_int(list,OS_INT); +{$else newra} + countreg := rg.getregisterint(list,OS_INT); +{$endif newra} a_load_const_reg(list,OS_32,count,countreg); { explicitely allocate R_0 since it can be used safely here } { (for holding date that's being copied) } - a_reg_alloc(list,R_F0); + r.enum:=R_F0; + a_reg_alloc(list,r); objectlibrary.getlabel(lab); a_label(list, lab); list.concat(taicpu.op_reg_reg_const(A_SUBIC_,countreg,countreg,1)); - list.concat(taicpu.op_reg_ref(A_LFDU,R_F0,src)); - list.concat(taicpu.op_reg_ref(A_STFDU,R_F0,dst)); + r.enum:=R_F0; + list.concat(taicpu.op_reg_ref(A_LFDU,r,src)); + list.concat(taicpu.op_reg_ref(A_STFDU,r,dst)); a_jmp(list,A_BC,C_NE,0,lab); +{$ifndef newra} free_scratch_reg(list,countreg); - a_reg_dealloc(list,R_F0); +{$else newra} + rg.ungetregisterint(list,countreg); +{$endif newra} + a_reg_dealloc(list,r); len := len mod 8; end; @@ -1358,43 +1998,106 @@ const if count > 0 then { unrolled loop } begin - a_reg_alloc(list,R_F0); + r.enum:=R_F0; + a_reg_alloc(list,r); for count2 := 1 to count do begin - a_loadfpu_ref_reg(list,OS_F64,src,R_F0); - a_loadfpu_reg_ref(list,OS_F64,R_F0,dst); + a_loadfpu_ref_reg(list,OS_F64,src,r); + a_loadfpu_reg_ref(list,OS_F64,r,dst); inc(src.offset,8); inc(dst.offset,8); end; - a_reg_dealloc(list,R_F0); + a_reg_dealloc(list,r); len := len mod 8; end; if (len and 4) <> 0 then begin - a_reg_alloc(list,R_0); - a_load_ref_reg(list,OS_32,src,R_0); - a_load_reg_ref(list,OS_32,R_0,dst); + r.enum:=R_INTREGISTER; + r.number:=NR_R0; + a_reg_alloc(list,r); + a_load_ref_reg(list,OS_32,OS_32,src,r); + a_load_reg_ref(list,OS_32,OS_32,r,dst); inc(src.offset,4); inc(dst.offset,4); - a_reg_dealloc(list,R_0); + a_reg_dealloc(list,r); end; +{$else not ppc603} + if count > 4 then + { generate a loop } + begin + { the offsets are zero after the a_loadaddress_ref_reg and just } + { have to be set to 4. I put an Inc there so debugging may be } + { easier (should offset be different from zero here, it will be } + { easy to notice in the generated assembler } + inc(dst.offset,4); + inc(src.offset,4); + list.concat(taicpu.op_reg_reg_const(A_SUBI,src.base,src.base,4)); + list.concat(taicpu.op_reg_reg_const(A_SUBI,dst.base,dst.base,4)); +{$ifndef newra} + countreg := get_scratch_reg_int(list,OS_INT); +{$else newra} + countreg := rg.getregisterint(list,OS_INT); +{$endif newra} + a_load_const_reg(list,OS_32,count,countreg); + { explicitely allocate R_0 since it can be used safely here } + { (for holding date that's being copied) } + r.enum:=R_INTREGISTER; + r.number:=NR_R0; + a_reg_alloc(list,r); + objectlibrary.getlabel(lab); + a_label(list, lab); + list.concat(taicpu.op_reg_reg_const(A_SUBIC_,countreg,countreg,1)); + list.concat(taicpu.op_reg_ref(A_LWZU,r,src)); + list.concat(taicpu.op_reg_ref(A_STWU,r,dst)); + a_jmp(list,A_BC,C_NE,0,lab); +{$ifndef newra} + free_scratch_reg(list,countreg); +{$else newra} + rg.ungetregisterint(list,countreg); +{$endif newra} + a_reg_dealloc(list,r); + len := len mod 4; + end; + + count := len div 4; + if count > 0 then + { unrolled loop } + begin + r.enum:=R_INTREGISTER; + r.number:=NR_R0; + a_reg_alloc(list,r); + for count2 := 1 to count do + begin + a_load_ref_reg(list,OS_32,OS_32,src,r); + a_load_reg_ref(list,OS_32,OS_32,r,dst); + inc(src.offset,4); + inc(dst.offset,4); + end; + a_reg_dealloc(list,r); + len := len mod 4; + end; +{$endif not ppc603} { copy the leftovers } if (len and 2) <> 0 then begin - a_reg_alloc(list,R_0); - a_load_ref_reg(list,OS_16,src,R_0); - a_load_reg_ref(list,OS_16,R_0,dst); + r.enum:=R_INTREGISTER; + r.number:=NR_R0; + a_reg_alloc(list,r); + a_load_ref_reg(list,OS_16,OS_16,src,r); + a_load_reg_ref(list,OS_16,OS_16,r,dst); inc(src.offset,2); inc(dst.offset,2); - a_reg_dealloc(list,R_0); + a_reg_dealloc(list,r); end; if (len and 1) <> 0 then begin - a_reg_alloc(list,R_0); - a_load_ref_reg(list,OS_8,src,R_0); - a_load_reg_ref(list,OS_8,R_0,dst); - a_reg_dealloc(list,R_0); + r.enum:=R_INTREGISTER; + r.number:=NR_R0; + a_reg_alloc(list,r); + a_load_ref_reg(list,OS_8,OS_8,src,r); + a_load_reg_ref(list,OS_8,OS_8,r,dst); + a_reg_dealloc(list,r); end; if orgsrc then begin @@ -1402,26 +2105,160 @@ const reference_release(list,source); end else +{$ifndef newra} free_scratch_reg(list,src.base); +{$else newra} + rg.ungetregisterint(list,src.base); +{$endif newra} if not orgdst then +{$ifndef newra} free_scratch_reg(list,dst.base); +{$else newra} + rg.ungetregisterint(list,dst.base); +{$endif newra} + if delsource then + tg.ungetiftemp(list,source); end; + procedure tcgppc.g_copyvaluepara_openarray(list : taasmoutput;const ref, lenref:treference;elesize:integer); + var + power,len : longint; +{$ifndef __NOWINPECOFF__} + again,ok : tasmlabel; +{$endif} + r,r2,rsp:Tregister; + begin + {$warning !!!! FIX ME !!!!} + internalerror(200305231); +{!!!! + lenref:=ref; + inc(lenref.offset,4); + { get stack space } + r.enum:=R_INTREGISTER; + r.number:=NR_EDI; + rsp.enum:=R_INTREGISTER; + rsp.number:=NR_ESP; + r2.enum:=R_INTREGISTER; + rg.getexplicitregisterint(list,NR_EDI); + list.concat(Taicpu.op_ref_reg(A_MOV,S_L,lenref,r)); + list.concat(Taicpu.op_reg(A_INC,S_L,r)); + if (elesize<>1) then + begin + if ispowerof2(elesize, power) then + list.concat(Taicpu.op_const_reg(A_SHL,S_L,power,r)) + else + list.concat(Taicpu.op_const_reg(A_IMUL,S_L,elesize,r)); + end; +{$ifndef __NOWINPECOFF__} + { windows guards only a few pages for stack growing, } + { so we have to access every page first } + if target_info.system=system_i386_win32 then + begin + objectlibrary.getlabel(again); + objectlibrary.getlabel(ok); + a_label(list,again); + list.concat(Taicpu.op_const_reg(A_CMP,S_L,winstackpagesize,r)); + a_jmp_cond(list,OC_B,ok); + list.concat(Taicpu.op_const_reg(A_SUB,S_L,winstackpagesize-4,rsp)); + r2.number:=NR_EAX; + list.concat(Taicpu.op_reg(A_PUSH,S_L,r)); + list.concat(Taicpu.op_const_reg(A_SUB,S_L,winstackpagesize,r)); + a_jmp_always(list,again); - procedure tcgppc.g_overflowcheck(list: taasmoutput; const p: tnode); + a_label(list,ok); + list.concat(Taicpu.op_reg_reg(A_SUB,S_L,r,rsp)); + rg.ungetregisterint(list,r); + { now reload EDI } + rg.getexplicitregisterint(list,NR_EDI); + list.concat(Taicpu.op_ref_reg(A_MOV,S_L,lenref,r)); + list.concat(Taicpu.op_reg(A_INC,S_L,r)); + + if (elesize<>1) then + begin + if ispowerof2(elesize, power) then + list.concat(Taicpu.op_const_reg(A_SHL,S_L,power,r)) + else + list.concat(Taicpu.op_const_reg(A_IMUL,S_L,elesize,r)); + end; + end + else +{$endif __NOWINPECOFF__} + list.concat(Taicpu.op_reg_reg(A_SUB,S_L,r,rsp)); + { align stack on 4 bytes } + list.concat(Taicpu.op_const_reg(A_AND,S_L,$fffffff4,rsp)); + { load destination } + a_load_reg_reg(list,OS_INT,OS_INT,rsp,r); + + { don't destroy the registers! } + r2.number:=NR_ECX; + list.concat(Taicpu.op_reg(A_PUSH,S_L,r2)); + r2.number:=NR_ESI; + list.concat(Taicpu.op_reg(A_PUSH,S_L,r2)); + + { load count } + r2.number:=NR_ECX; + a_load_ref_reg(list,OS_INT,lenref,r2); + + { load source } + r2.number:=NR_ESI; + a_load_ref_reg(list,OS_INT,ref,r2); + + { scheduled .... } + r2.number:=NR_ECX; + list.concat(Taicpu.op_reg(A_INC,S_L,r2)); + + { calculate size } + len:=elesize; + opsize:=S_B; + if (len and 3)=0 then + begin + opsize:=S_L; + len:=len shr 2; + end + else + if (len and 1)=0 then + begin + opsize:=S_W; + len:=len shr 1; + end; + + if ispowerof2(len, power) then + list.concat(Taicpu.op_const_reg(A_SHL,S_L,power,r2)) + else + list.concat(Taicpu.op_const_reg(A_IMUL,S_L,len,r2)); + list.concat(Taicpu.op_none(A_REP,S_NO)); + case opsize of + S_B : list.concat(Taicpu.Op_none(A_MOVSB,S_NO)); + S_W : list.concat(Taicpu.Op_none(A_MOVSW,S_NO)); + S_L : list.concat(Taicpu.Op_none(A_MOVSD,S_NO)); + end; + rg.ungetregisterint(list,r); + r2.number:=NR_ESI; + list.concat(Taicpu.op_reg(A_POP,S_L,r2)); + r2.number:=NR_ECX; + list.concat(Taicpu.op_reg(A_POP,S_L,r2)); + + { patch the new address } + a_load_reg_ref(list,OS_INT,rsp,ref); +!!!!} + end; + + procedure tcgppc.g_overflowcheck(list: taasmoutput; const l: tlocation; def: tdef); var hl : tasmlabel; + r:Tregister; begin if not(cs_check_overflow in aktlocalswitches) then exit; objectlibrary.getlabel(hl); - if not ((p.resulttype.def.deftype=pointerdef) or - ((p.resulttype.def.deftype=orddef) and - (torddef(p.resulttype.def).typ in [u64bit,u16bit,u32bit,u8bit,uchar, + if not ((def.deftype=pointerdef) or + ((def.deftype=orddef) and + (torddef(def).typ in [u64bit,u16bit,u32bit,u8bit,uchar, bool8bit,bool16bit,bool32bit]))) then begin - list.concat(taicpu.op_reg(A_MCRXR,R_CR7)); + r.enum:=R_CR7; + list.concat(taicpu.op_reg(A_MCRXR,r)); a_jmp(list,A_BC,C_OV,7,hl) end else @@ -1433,43 +2270,18 @@ const {***************** This is private property, keep out! :) *****************} - - procedure tcgppc.g_return_from_proc_mac(list : taasmoutput;parasize : aword); - - var - regcounter: TRegister; - href : treference; - begin - { release parameter registers } - for regcounter := R_3 to R_10 do - a_reg_dealloc(list,regcounter); - { AltiVec context restore, not yet implemented !!! } - - { restore SP } - list.concat(taicpu.op_reg_reg_const(A_ORI,STACK_POINTER_REG,R_31,0)); - { restore gprs } - reference_reset_base(href,STACK_POINTER_REG,-220); - list.concat(taicpu.op_reg_ref(A_LMW,R_13,href)); - { restore return address ... } - reference_reset_base(href,STACK_POINTER_REG,8); - list.concat(taicpu.op_reg_ref(A_LWZ,R_0,href)); - { ... and return from _restf14 } - list.concat(taicpu.op_sym_ofs(A_B,objectlibrary.newasmsymbol('_restf14'),0)); - end; - - function tcgppc.issimpleref(const ref: treference): boolean; begin - if (ref.base = R_NO) and - (ref.index <> R_NO) then + if (ref.base.number = NR_NO) and + (ref.index.number <> NR_NO) then internalerror(200208101); result := not(assigned(ref.symbol)) and - (((ref.index = R_NO) and + (((ref.index.number = NR_NO) and (ref.offset >= low(smallint)) and (ref.offset <= high(smallint))) or - ((ref.index <> R_NO) and + ((ref.index.number <> NR_NO) and (ref.offset = 0))); end; @@ -1478,15 +2290,39 @@ const var tmpreg: tregister; +{$ifdef newra} + orgindex: tregister; + freeindex: boolean; +{$endif newra} begin result := false; - if (ref.base <> R_NO) then + if (ref.base.number = NR_NO) then begin - if (ref.index <> R_NO) and + ref.base := ref.index; + ref.base.number := NR_NO; + end; + if (ref.base.number <> NR_NO) then + begin + if (ref.index.number <> NR_NO) and ((ref.offset <> 0) or assigned(ref.symbol)) then begin result := true; - tmpreg := cg.get_scratch_reg_int(list); +{$ifndef newra} + tmpreg := cg.get_scratch_reg_int(list,OS_INT); +{$else newra} + { references are often freed before they are used. Since we allocate } + { a register here, we must first reallocate the index register, since } + { otherwise it may be overwritten (and it's still used afterwards) } + freeindex := false; + if ((ref.index.number shr 8) >= first_supreg) and + ((ref.index.number shr 8) in rg.unusedregsint) then + begin + rg.getexplicitregisterint(list,ref.index.number); + orgindex := ref.index; + freeindex := true; + end; + tmpreg := rg.getregisterint(list,OS_ADDR); +{$endif newra} if not assigned(ref.symbol) and (cardinal(ref.offset-low(smallint)) <= high(smallint)-low(smallint)) then @@ -1499,13 +2335,19 @@ const begin list.concat(taicpu.op_reg_reg_reg( A_ADD,tmpreg,ref.base,ref.index)); - ref.index := R_NO; + ref.index.number := NR_NO; end; ref.base := tmpreg; +{$ifdef newra} + if freeindex then + begin + rg.ungetregisterint(list,orgindex); + end; +{$endif newra} end end else - if ref.index <> R_NO then + if ref.index.number <> NR_NO then internalerror(200208102); end; @@ -1515,7 +2357,8 @@ const function tcgppc.get_rlwi_const(a: aword; var l1, l2: longint): boolean; var - temp, testbit: longint; + temp : longint; + testbit : aword; compare: boolean; begin @@ -1582,30 +2425,103 @@ const var tmpreg: tregister; + tmpregUsed: Boolean; tmpref: treference; + largeOffset: Boolean; begin - tmpreg := R_NO; - if assigned(ref.symbol) or - (cardinal(ref.offset-low(smallint)) > - high(smallint)-low(smallint)) then + tmpreg.number := NR_NO; + + if target_info.system = system_powerpc_macos then begin + largeOffset:= (cardinal(ref.offset-low(smallint)) > + high(smallint)-low(smallint)); + +{$ifndef newra} tmpreg := get_scratch_reg_address(list); - reference_reset(tmpref); - tmpref.symbol := ref.symbol; - tmpref.offset := ref.offset; - tmpref.symaddr := refs_ha; - if ref.base <> R_NO then - list.concat(taicpu.op_reg_reg_ref(A_ADDIS,tmpreg, - ref.base,tmpref)) +{$else newra} + tmpreg := rg.getregisterint(list,OS_ADDR); +{$endif newra} + tmpregUsed:= false; + + if assigned(ref.symbol) then + begin //Load symbol's value + reference_reset(tmpref); + tmpref.symbol := ref.symbol; + tmpref.base.enum:= R_INTREGISTER; + tmpref.base.number:= NR_RTOC; + if macos_direct_globals then + list.concat(taicpu.op_reg_ref(A_LA,tmpreg,tmpref)) + else + list.concat(taicpu.op_reg_ref(A_LWZ,tmpreg,tmpref)); + tmpregUsed:= true; + end; + + if largeOffset then + begin //Add hi part of offset + reference_reset(tmpref); + tmpref.offset := Hi(ref.offset); + if tmpregUsed then + list.concat(taicpu.op_reg_reg_ref(A_ADDIS,tmpreg, + tmpreg,tmpref)) + else + list.concat(taicpu.op_reg_ref(A_LIS,tmpreg,tmpref)); + tmpregUsed:= true; + end; + + if tmpregUsed then + begin + //Add content of base register + if ref.base.number <> NR_NO then + list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg, + ref.base,tmpreg)); + + //Make ref ready to be used by op + ref.symbol:= nil; + ref.base:= tmpreg; + if largeOffset then + ref.offset := Lo(ref.offset); + list.concat(taicpu.op_reg_ref(op,reg,ref)); + //list.concat(tai_comment.create(strpnew('*** a_load_store indirect global'))); + end else - list.concat(taicpu.op_reg_ref(A_LIS,tmpreg,tmpref)); - ref.base := tmpreg; - ref.symaddr := refs_l; + list.concat(taicpu.op_reg_ref(op,reg,ref)); + end + else {if target_info.system <> system_powerpc_macos} + begin + if assigned(ref.symbol) or + (cardinal(ref.offset-low(smallint)) > + high(smallint)-low(smallint)) then + begin +{$ifndef newra} + tmpreg := get_scratch_reg_address(list); +{$else newra} + tmpreg := rg.getregisterint(list,OS_ADDR); +{$endif newra} + reference_reset(tmpref); + tmpref.symbol := ref.symbol; + tmpref.offset := ref.offset; + tmpref.symaddr := refs_ha; + if ref.base.number <> NR_NO then + list.concat(taicpu.op_reg_reg_ref(A_ADDIS,tmpreg, + ref.base,tmpref)) + else + list.concat(taicpu.op_reg_ref(A_LIS,tmpreg,tmpref)); + ref.base := tmpreg; + ref.symaddr := refs_l; + list.concat(taicpu.op_reg_ref(op,reg,ref)); + end + else + list.concat(taicpu.op_reg_ref(op,reg,ref)); end; - list.concat(taicpu.op_reg_ref(op,reg,ref)); - if (tmpreg <> R_NO) then + + + if (tmpreg.number <> NR_NO) then +{$ifndef newra} free_scratch_reg(list,tmpreg); +{$else newra} + rg.ungetregisterint(list,tmpreg); +{$endif newra} end; @@ -1667,22 +2583,31 @@ const var tmpreg: tregister; tmpreg64: tregister64; + newop: TOpCG; issub: boolean; begin case op of OP_AND,OP_OR,OP_XOR: begin - cg.a_op_const_reg_reg(list,op,OS_32,cardinal(value),regsrc.reglo,regdst.reglo); - cg.a_op_const_reg_reg(list,op,OS_32,value shr 32,regsrc.reghi, + cg.a_op_const_reg_reg(list,op,OS_32,aword(value),regsrc.reglo,regdst.reglo); + cg.a_op_const_reg_reg(list,op,OS_32,aword(value shr 32),regsrc.reghi, regdst.reghi); end; OP_ADD, OP_SUB: begin + if (int64(value) < 0) then + begin + if op = OP_ADD then + op := OP_SUB + else + op := OP_ADD; + int64(value) := -int64(value); + end; if (longint(value) <> 0) then begin issub := op = OP_SUB; - if (longint(value)-ord(issub) >= -32768) and - (longint(value)-ord(issub) <= 32767) then + if (int64(value) > 0) and + (int64(value)-ord(issub) <= 32767) then begin list.concat(taicpu.op_reg_reg_const(ops[issub,1], regdst.reglo,regsrc.reglo,longint(value))); @@ -1691,28 +2616,46 @@ const end else if ((value shr 32) = 0) then begin - tmpreg := cg.get_scratch_reg_int(list); +{$ifndef newra} + tmpreg := cg.get_scratch_reg_int(list,OS_32); +{$else newra} + tmpreg := rg.getregisterint(list,OS_32); +{$endif newra} cg.a_load_const_reg(list,OS_32,cardinal(value),tmpreg); list.concat(taicpu.op_reg_reg_reg(ops[issub,2], regdst.reglo,regsrc.reglo,tmpreg)); +{$ifndef newra} cg.free_scratch_reg(list,tmpreg); +{$else newra} + rg.ungetregisterint(list,tmpreg); +{$endif newra} list.concat(taicpu.op_reg_reg(ops[issub,3], regdst.reghi,regsrc.reghi)); end else begin - tmpreg64.reglo := cg.get_scratch_reg_int(list); - tmpreg64.reghi := cg.get_scratch_reg_int(list); +{$ifndef newra} + tmpreg64.reglo := cg.get_scratch_reg_int(list,OS_32); + tmpreg64.reghi := cg.get_scratch_reg_int(list,OS_32); +{$else newra} + tmpreg64.reglo := rg.getregisterint(list,OS_32); + tmpreg64.reghi := rg.getregisterint(list,OS_32); +{$endif newra} a_load64_const_reg(list,value,tmpreg64); a_op64_reg_reg_reg(list,op,tmpreg64,regsrc,regdst); +{$ifndef newra} cg.free_scratch_reg(list,tmpreg64.reghi); cg.free_scratch_reg(list,tmpreg64.reglo); +{$else newra} + rg.ungetregisterint(list,tmpreg64.reglo); + rg.ungetregisterint(list,tmpreg64.reghi); +{$endif newra} end end else begin cg.a_load_reg_reg(list,OS_INT,OS_INT,regsrc.reglo,regdst.reglo); - cg.a_op_const_reg_reg(list,op,OS_32,value shr 32,regsrc.reghi, + cg.a_op_const_reg_reg(list,op,OS_32,aword(value shr 32),regsrc.reghi, regdst.reghi); end; end; @@ -1728,7 +2671,235 @@ begin end. { $Log$ - Revision 1.58 2002-09-17 18:54:06 jonas + Revision 1.117 2002-10-01 05:24:28 olle + * made a_load_store more robust and to accept large offsets and cleaned up code + + Revision 1.116 2003/07/23 11:02:23 jonas + * don't use rg.getregisterint() anymore in g_stackframe_entry_*, because + the register colouring has already occurred then, use a hard-coded + register instead + + Revision 1.115 2003/07/20 20:39:20 jonas + * fixed newra bug due to the fact that we sometimes need a temp reg + when loading/storing to memory (base+index+offset is not possible) + and because a reference is often freed before it is last used, this + temp register was soemtimes the same as one of the reference regs + + Revision 1.114 2003/07/20 16:15:58 jonas + * fixed bug in g_concatcopy with -dnewra + + Revision 1.113 2003/07/06 20:25:03 jonas + * fixed ppc compiler + + Revision 1.112 2003/07/05 20:11:42 jonas + * create_paraloc_info() is now called separately for the caller and + callee info + * fixed ppc cycle + + Revision 1.111 2003/07/02 22:18:04 peter + * paraloc splitted in callerparaloc,calleeparaloc + * sparc calling convention updates + + Revision 1.110 2003/06/18 10:12:36 olle + * macos: fixes of loading-code + + Revision 1.109 2003/06/14 22:32:43 jonas + * ppc compiles with -dnewra, haven't tried to compile anything with it + yet though + + Revision 1.108 2003/06/13 21:19:31 peter + * current_procdef removed, use current_procinfo.procdef instead + + Revision 1.107 2003/06/09 14:54:26 jonas + * (de)allocation of registers for parameters is now performed properly + (and checked on the ppc) + - removed obsolete allocation of all parameter registers at the start + of a procedure (and deallocation at the end) + + Revision 1.106 2003/06/08 18:19:27 jonas + - removed duplicate identifier + + Revision 1.105 2003/06/07 18:57:04 jonas + + added freeintparaloc + * ppc get/freeintparaloc now check whether the parameter regs are + properly allocated/deallocated (and get an extra list para) + * ppc a_call_* now internalerrors if pi_do_call is not yet set + * fixed lot of missing pi_do_call's + + Revision 1.104 2003/06/04 11:58:58 jonas + * calculate localsize also in g_return_from_proc since it's now called + before g_stackframe_entry (still have to fix macos) + * compilation fixes (cycle doesn't work yet though) + + Revision 1.103 2003/06/01 21:38:06 peter + * getregisterfpu size parameter added + * op_const_reg size parameter added + * sparc updates + + Revision 1.102 2003/06/01 13:42:18 jonas + * fix for bug in fixref that Peter found during the Sparc conversion + + Revision 1.101 2003/05/30 18:52:10 jonas + * fixed bug with intregvars + * locapara.loc can also be LOC_CFPUREGISTER -> also fixed + rcgppc.a_param_ref, which previously got bogus size values + + Revision 1.100 2003/05/29 21:17:27 jonas + * compile with -dppc603 to not use unaligned float loads in move() and + g_concatcopy, because the 603 and 604 take an exception for those + (and netbsd doesn't even handle those in the kernel). There are + still some of those left that could cause problems though (e.g. + in the set helpers) + + Revision 1.99 2003/05/29 10:06:09 jonas + * also free temps in g_concatcopy if delsource is true + + Revision 1.98 2003/05/28 23:58:18 jonas + * added missing initialization of rg.usedint{in,by}proc + * ppc now also saves/restores used fpu registers + * ncgcal doesn't add used registers to usedby/inproc anymore, except for + i386 + + Revision 1.97 2003/05/28 23:18:31 florian + * started to fix and clean up the sparc port + + Revision 1.96 2003/05/24 11:59:42 jonas + * fixed integer typeconversion problems + + Revision 1.95 2003/05/23 18:51:26 jonas + * fixed support for nested procedures and more parameters than those + which fit in registers (untested/probably not working: calling a + nested procedure from a deeper nested procedure) + + Revision 1.94 2003/05/20 23:54:00 florian + + basic darwin support added + + Revision 1.93 2003/05/15 22:14:42 florian + * fixed last commit, changing lastsaveintreg to r31 caused some strange problems + + Revision 1.92 2003/05/15 21:37:00 florian + * sysv entry code saves r13 now as well + + Revision 1.91 2003/05/15 19:39:09 florian + * fixed ppc compiler which was broken by Peter's changes + + Revision 1.90 2003/05/12 18:43:50 jonas + * fixed g_concatcopy + + Revision 1.89 2003/05/11 20:59:23 jonas + * fixed bug with large offsets in entrycode + + Revision 1.88 2003/05/11 11:45:08 jonas + * fixed shifts + + Revision 1.87 2003/05/11 11:07:33 jonas + * fixed optimizations in a_op_const_reg_reg() + + Revision 1.86 2003/04/27 11:21:36 peter + * aktprocdef renamed to current_procinfo.procdef + * procinfo renamed to current_procinfo + * procinfo will now be stored in current_module so it can be + cleaned up properly + * gen_main_procsym changed to create_main_proc and release_main_proc + to also generate a tprocinfo structure + * fixed unit implicit initfinal + + Revision 1.85 2003/04/26 22:56:11 jonas + * fix to a_op64_const_reg_reg + + Revision 1.84 2003/04/26 16:08:41 jonas + * fixed g_flags2reg + + Revision 1.83 2003/04/26 15:25:29 florian + * fixed cmp_reg_reg_reg, cmp operands were emitted in the wrong order + + Revision 1.82 2003/04/25 20:55:34 florian + * stack frame calculations are now completly done using the code generator + routines instead of generating directly assembler so also large stack frames + are handle properly + + Revision 1.81 2003/04/24 11:24:00 florian + * fixed several issues with nested procedures + + Revision 1.80 2003/04/23 22:18:01 peter + * fixes to get rtl compiled + + Revision 1.79 2003/04/23 12:35:35 florian + * fixed several issues with powerpc + + applied a patch from Jonas for nested function calls (PowerPC only) + * ... + + Revision 1.78 2003/04/16 09:26:55 jonas + * assembler procedures now again get a stackframe if they have local + variables. No space is reserved for a function result however. + Also, the register parameters aren't automatically saved on the stack + anymore in assembler procedures. + + Revision 1.77 2003/04/06 16:39:11 jonas + * don't generate entry/exit code for assembler procedures + + Revision 1.76 2003/03/22 18:01:13 jonas + * fixed linux entry/exit code generation + + Revision 1.75 2003/03/19 14:26:26 jonas + * fixed R_TOC bugs introduced by new register allocator conversion + + Revision 1.74 2003/03/13 22:57:45 olle + * change in a_loadaddr_ref_reg + + Revision 1.73 2003/03/12 22:43:38 jonas + * more powerpc and generic fixes related to the new register allocator + + Revision 1.72 2003/03/11 21:46:24 jonas + * lots of new regallocator fixes, both in generic and ppc-specific code + (ppc compiler still can't compile the linux system unit though) + + Revision 1.71 2003/02/19 22:00:16 daniel + * Code generator converted to new register notation + - Horribily outdated todo.txt removed + + Revision 1.70 2003/01/13 17:17:50 olle + * changed global var access, TOC now contain pointers to globals + * fixed handling of function pointers + + Revision 1.69 2003/01/09 22:00:53 florian + * fixed some PowerPC issues + + Revision 1.68 2003/01/08 18:43:58 daniel + * Tregister changed into a record + + Revision 1.67 2002/12/15 19:22:01 florian + * fixed some crashes and a rte 201 + + Revision 1.66 2002/11/28 10:55:16 olle + * macos: changing code gen for references to globals + + Revision 1.65 2002/11/07 15:50:23 jonas + * fixed bctr(l) problems + + Revision 1.64 2002/11/04 18:24:19 olle + * macos: globals are located in TOC and relative r2, instead of absolute + + Revision 1.63 2002/10/28 22:24:28 olle + * macos entry/exit: only used registers are saved + - macos entry/exit: stackptr not saved in r31 anymore + * macos entry/exit: misc fixes + + Revision 1.62 2002/10/19 23:51:48 olle + * macos stack frame size computing updated + + macos epilogue: control register now restored + * macos prologue and epilogue: fp reg now saved and restored + + Revision 1.61 2002/10/19 12:50:36 olle + * reorganized prologue and epilogue routines + + Revision 1.60 2002/10/02 21:49:51 florian + * all A_BL instructions replaced by calls to a_call_name + + Revision 1.59 2002/10/02 13:24:58 jonas + * changed a_call_* so that no superfluous code is generated anymore + + Revision 1.58 2002/09/17 18:54:06 jonas * a_load_reg_reg() now has two size parameters: source and dest. This allows some optimizations on architectures that don't encode the register size in the register name.