From 1b8fc23dfdf514a714914de40bae216a6f7b7ab1 Mon Sep 17 00:00:00 2001 From: "J. Gareth \"Curious Kit\" Moreton" Date: Thu, 21 Apr 2022 01:02:17 +0100 Subject: [PATCH] * x86: Code generation fixes where FLAGS register is not properly allocated. (cherry picked from commit 6f24c8b4efccea67d092062009f413cc789a052c) --- compiler/hlcgobj.pas | 3 +++ compiler/i386/cgcpu.pas | 6 +++++ compiler/i386/n386add.pas | 12 +++++++++ compiler/i386/n386mat.pas | 4 +++ compiler/i8086/cgcpu.pas | 10 ++++---- compiler/i8086/n8086mat.pas | 6 ++++- compiler/x86/cgx86.pas | 8 ++++++ compiler/x86/nx86add.pas | 50 +++++++++++++++++++++++++++++++------ compiler/x86/nx86cnv.pas | 7 +++++- compiler/x86/nx86mat.pas | 19 +++++++++++--- compiler/x86/nx86set.pas | 29 +++++++++++++++++++-- compiler/x86_64/nx64cnv.pas | 3 +++ 12 files changed, 137 insertions(+), 20 deletions(-) diff --git a/compiler/hlcgobj.pas b/compiler/hlcgobj.pas index 4ec97a3e80..939ed34f08 100644 --- a/compiler/hlcgobj.pas +++ b/compiler/hlcgobj.pas @@ -4281,6 +4281,9 @@ implementation LOC_CREGISTER,LOC_REGISTER,LOC_CREFERENCE,LOC_REFERENCE : begin a_cmp_const_loc_label(list,p.resultdef,OC_NE,0,p.location,truelabel); +{$ifdef x86} { x86 always uses the flags in some way for conditional jumps } + a_reg_dealloc(list,NR_DEFAULTFLAGS); +{$endif x86} a_jmp_always(list,falselabel); end; LOC_JUMP: diff --git a/compiler/i386/cgcpu.pas b/compiler/i386/cgcpu.pas index 20949cba2c..f68d018521 100644 --- a/compiler/i386/cgcpu.pas +++ b/compiler/i386/cgcpu.pas @@ -452,8 +452,10 @@ unit cgcpu; current_asmdata.getjumplabel(again); current_asmdata.getjumplabel(ok); a_label(list,again); + cg.a_reg_alloc(list,NR_DEFAULTFLAGS); list.concat(Taicpu.op_const_reg(A_CMP,S_L,winstackpagesize,NR_EDI)); a_jmp_cond(list,OC_B,ok); + cg.a_reg_dealloc(list,NR_DEFAULTFLAGS); list.concat(Taicpu.op_const_reg(A_SUB,S_L,winstackpagesize-4,NR_ESP)); list.concat(Taicpu.op_reg(A_PUSH,S_L,NR_EDI)); list.concat(Taicpu.op_const_reg(A_SUB,S_L,winstackpagesize,NR_EDI)); @@ -679,8 +681,10 @@ unit cgcpu; { so we've to do some tricks here } current_asmdata.getjumplabel(l1); current_asmdata.getjumplabel(l2); + cg.a_reg_alloc(list,NR_DEFAULTFLAGS); list.Concat(taicpu.op_const_reg(A_TEST,S_B,32,NR_CL)); cg.a_jmp_flags(list,F_E,l1); + cg.a_reg_dealloc(list,NR_DEFAULTFLAGS); tmpreg:=cg.getintregister(list,OS_32); case op of OP_SHL: @@ -812,8 +816,10 @@ unit cgcpu; { so we've to do some tricks here } current_asmdata.getjumplabel(l1); current_asmdata.getjumplabel(l2); + cg.a_reg_alloc(list,NR_DEFAULTFLAGS); list.Concat(taicpu.op_const_reg(A_TEST,S_B,32,NR_CL)); cg.a_jmp_flags(list,F_E,l1); + cg.a_reg_dealloc(list,NR_DEFAULTFLAGS); case op of OP_SHL: begin diff --git a/compiler/i386/n386add.pas b/compiler/i386/n386add.pas index 8f7d746e8d..986ea3ae1b 100644 --- a/compiler/i386/n386add.pas +++ b/compiler/i386/n386add.pas @@ -341,14 +341,18 @@ interface tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference); href:=left.location.reference; inc(href.offset,4); + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); emit_const_ref(A_CMP,S_L,aint(hi(right.location.value64)),href); firstjmp64bitcmp; + cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); if assigned(hlab) then cg.a_jmp_always(current_asmdata.CurrAsmList,hlab) else begin + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); emit_const_ref(A_CMP,S_L,aint(lo(right.location.value64)),left.location.reference); secondjmp64bitcmp; + cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); end; location_freetemp(current_asmdata.CurrAsmList,left.location); exit; @@ -372,10 +376,12 @@ interface LOC_REGISTER, LOC_CREGISTER : begin + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); emit_reg_reg(A_CMP,S_L,right.location.register64.reghi,left.location.register64.reghi); firstjmp64bitcmp; emit_reg_reg(A_CMP,S_L,right.location.register64.reglo,left.location.register64.reglo); secondjmp64bitcmp; + cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); end; LOC_CREFERENCE, LOC_REFERENCE : @@ -383,22 +389,28 @@ interface tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference); href:=right.location.reference; inc(href.offset,4); + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); emit_ref_reg(A_CMP,S_L,href,left.location.register64.reghi); firstjmp64bitcmp; emit_ref_reg(A_CMP,S_L,right.location.reference,left.location.register64.reglo); secondjmp64bitcmp; + cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); location_freetemp(current_asmdata.CurrAsmList,right.location); end; LOC_CONSTANT : begin + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(hi(right.location.value64)),left.location.register64.reghi)); firstjmp64bitcmp; + cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); if assigned(hlab) then cg.a_jmp_always(current_asmdata.CurrAsmList,hlab) else begin + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(lo(right.location.value64)),left.location.register64.reglo)); secondjmp64bitcmp; + cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); end; end; else diff --git a/compiler/i386/n386mat.pas b/compiler/i386/n386mat.pas index 34cd229564..2b47c3ce5e 100644 --- a/compiler/i386/n386mat.pas +++ b/compiler/i386/n386mat.pas @@ -88,9 +88,11 @@ implementation exit; hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false); hreg1:=left.location.register; + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); emit_reg_reg(A_TEST,S_L,hreg1,hreg1); current_asmdata.getjumplabel(hl); cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NS,hl); + cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); if power=1 then emit_reg(A_INC,S_L,hreg1) else @@ -194,8 +196,10 @@ implementation { so we've to do some tricks here } current_asmdata.getjumplabel(l2); current_asmdata.getjumplabel(l3); + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); emit_const_reg(A_TEST,S_B,32,NR_CL); cg.a_jmp_flags(current_asmdata.CurrAsmList,F_E,l2); + cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); if nodetype=shln then begin emit_reg_reg(A_SHL,S_L,NR_CL,hreg64lo); diff --git a/compiler/i8086/cgcpu.pas b/compiler/i8086/cgcpu.pas index ec5700ee57..3aa44ec679 100644 --- a/compiler/i8086/cgcpu.pas +++ b/compiler/i8086/cgcpu.pas @@ -2026,8 +2026,8 @@ unit cgcpu; else list.concat(taicpu.op_const_reg(A_CMP,S_W,longint(a and $ffff),reg)); gen_cmp32_jmp2(list, cmp_op, hl_skip, l); - a_label(list,hl_skip); cg.a_reg_dealloc(list,NR_DEFAULTFLAGS); + a_label(list,hl_skip); end else inherited a_cmp_const_reg_label(list, size, cmp_op, a, reg, l); @@ -2051,8 +2051,8 @@ unit cgcpu; dec(tmpref.offset,2); list.concat(taicpu.op_const_ref(A_CMP,S_W,longint(a and $ffff),tmpref)); gen_cmp32_jmp2(list, cmp_op, hl_skip, l); - a_label(list,hl_skip); cg.a_reg_dealloc(list,NR_DEFAULTFLAGS); + a_label(list,hl_skip); end else inherited a_cmp_const_ref_label(list, size, cmp_op, a, ref, l); @@ -2073,8 +2073,8 @@ unit cgcpu; gen_cmp32_jmp1(list, cmp_op, hl_skip, l); list.concat(taicpu.op_reg_reg(A_CMP,S_W,reg1,reg2)); gen_cmp32_jmp2(list, cmp_op, hl_skip, l); - a_label(list,hl_skip); cg.a_reg_dealloc(list,NR_DEFAULTFLAGS); + a_label(list,hl_skip); end else inherited a_cmp_reg_reg_label(list, size, cmp_op, reg1, reg2, l); @@ -2099,8 +2099,8 @@ unit cgcpu; dec(tmpref.offset,2); list.concat(taicpu.op_ref_reg(A_CMP,S_W,tmpref,reg)); gen_cmp32_jmp2(list, cmp_op, hl_skip, l); - a_label(list,hl_skip); cg.a_reg_dealloc(list,NR_DEFAULTFLAGS); + a_label(list,hl_skip); end else inherited a_cmp_ref_reg_label(list, size, cmp_op, ref, reg, l); @@ -2125,8 +2125,8 @@ unit cgcpu; dec(tmpref.offset,2); list.concat(taicpu.op_reg_ref(A_CMP,S_W,reg,tmpref)); gen_cmp32_jmp2(list, cmp_op, hl_skip, l); - a_label(list,hl_skip); cg.a_reg_dealloc(list,NR_DEFAULTFLAGS); + a_label(list,hl_skip); end else inherited a_cmp_reg_ref_label(list, size, cmp_op, reg, ref, l); diff --git a/compiler/i8086/n8086mat.pas b/compiler/i8086/n8086mat.pas index 01465aabfc..c997ba0807 100644 --- a/compiler/i8086/n8086mat.pas +++ b/compiler/i8086/n8086mat.pas @@ -157,10 +157,12 @@ implementation end else begin - { a jump, but less operations } + { a jump, but fewer operations } + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); emit_reg_reg(A_TEST,S_W,hreg1,hreg1); current_asmdata.getjumplabel(hl); cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NS,hl); + cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); if power=1 then emit_reg(A_INC,S_W,hreg1) else @@ -256,10 +258,12 @@ implementation d:=tordconstnode(right).value.svalue; if d>=$8000 then begin + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); emit_const_reg(A_CMP,S_W,aint(d),hreg1); location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT); emit_const_reg(A_MOV,S_W,0,location.register); emit_const_reg(A_SBB,S_W,-1,location.register); + cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); end else begin diff --git a/compiler/x86/cgx86.pas b/compiler/x86/cgx86.pas index 97aeb4b39f..4d29b81222 100644 --- a/compiler/x86/cgx86.pas +++ b/compiler/x86/cgx86.pas @@ -2386,8 +2386,10 @@ unit cgx86; exit; end; {$endif x86_64} + cg.a_reg_alloc(list,NR_DEFAULTFLAGS); list.concat(taicpu.op_const_ref(A_CMP,TCgSize2OpSize[size],a,tmpref)); a_jmp_cond(list,cmp_op,l); + cg.a_reg_dealloc(list,NR_DEFAULTFLAGS); end; @@ -2397,8 +2399,10 @@ unit cgx86; begin check_register_size(size,reg1); check_register_size(size,reg2); + cg.a_reg_alloc(list,NR_DEFAULTFLAGS); list.concat(taicpu.op_reg_reg(A_CMP,TCgSize2OpSize[size],reg1,reg2)); a_jmp_cond(list,cmp_op,l); + cg.a_reg_dealloc(list,NR_DEFAULTFLAGS); end; @@ -2409,8 +2413,10 @@ unit cgx86; tmpref:=ref; make_simple_ref(list,tmpref); check_register_size(size,reg); + cg.a_reg_alloc(list,NR_DEFAULTFLAGS); list.concat(taicpu.op_ref_reg(A_CMP,TCgSize2OpSize[size],tmpref,reg)); a_jmp_cond(list,cmp_op,l); + cg.a_reg_dealloc(list,NR_DEFAULTFLAGS); end; @@ -2421,8 +2427,10 @@ unit cgx86; tmpref:=ref; make_simple_ref(list,tmpref); check_register_size(size,reg); + cg.a_reg_alloc(list,NR_DEFAULTFLAGS); list.concat(taicpu.op_reg_ref(A_CMP,TCgSize2OpSize[size],reg,tmpref)); a_jmp_cond(list,cmp_op,l); + cg.a_reg_dealloc(list,NR_DEFAULTFLAGS); end; diff --git a/compiler/x86/nx86add.pas b/compiler/x86/nx86add.pas index 52562c6e67..c37eff03f7 100644 --- a/compiler/x86/nx86add.pas +++ b/compiler/x86/nx86add.pas @@ -39,7 +39,7 @@ unit nx86add; procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean); procedure force_left_and_right_fpureg; procedure prepare_x87_locations(out refnode: tnode); - procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize); + procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize;AllocFlags:boolean); procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean); procedure second_cmpfloatvector; @@ -93,7 +93,13 @@ unit nx86add; hl4 : tasmlabel; r : Tregister; href : treference; + overflowcheck: boolean; + comparison: boolean; begin + overflowcheck:=needoverflowcheck; + comparison:= + (op=A_CMP) or (op=A_TEST) or (op=A_BT) or is_boolean(resultdef); + { at this point, left.location.loc should be LOC_REGISTER } if right.location.loc=LOC_REGISTER then begin @@ -114,6 +120,10 @@ unit nx86add; emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register); if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then location_swap(left.location,right.location); + + if comparison then + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); + emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register); end; end @@ -126,6 +136,10 @@ unit nx86add; cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register); r:=cg.getintregister(current_asmdata.CurrAsmList,opsize); hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r); + + if comparison then + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); + emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r); cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register); end @@ -193,12 +207,14 @@ unit nx86add; r:=cg.getintregister(current_asmdata.CurrAsmList,opsize); hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r); emit_reg(A_NOT,TCGSize2Opsize[opsize],r); + + if comparison or (mboverflow and overflowcheck) then + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); + emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register); end else - begin - emit_op_right_left(op,opsize); - end; + emit_op_right_left(op,opsize,comparison or (mboverflow and overflowcheck)); end; end; end; @@ -216,6 +232,9 @@ unit nx86add; cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4) else cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4); + + if not comparison then + cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false); cg.a_label(current_asmdata.CurrAsmList,hl4); end; @@ -345,7 +364,7 @@ unit nx86add; end; - procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize); + procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize;AllocFlags:boolean); {$ifdef x86_64} var tmpreg : tregister; @@ -357,11 +376,18 @@ unit nx86add; case right.location.loc of LOC_REGISTER, LOC_CREGISTER : - current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register)); + begin + if AllocFlags then + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); + + current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register)); + end; LOC_REFERENCE, LOC_CREFERENCE : begin tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference); + if AllocFlags then + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register)); end; LOC_CONSTANT : @@ -373,11 +399,18 @@ unit nx86add; begin tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize); cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg); + if AllocFlags then + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register)); end else {$endif x86_64} - current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register)); + begin + if AllocFlags then + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); + + current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register)); + end; end; else internalerror(200203232); @@ -623,7 +656,7 @@ unit nx86add; ((nf_swapped in flags) and (nodetype = gten)) then swapleftright; hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false); - emit_op_right_left(A_AND,opsize); + emit_op_right_left(A_AND,opsize,False); op:=A_CMP; { warning: ugly hack, we need a JE so change the node to equaln } nodetype:=equaln; @@ -1591,6 +1624,7 @@ unit nx86add; {$endif x86_64} then begin + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference); location_freetemp(current_asmdata.CurrAsmList,left.location); end diff --git a/compiler/x86/nx86cnv.pas b/compiler/x86/nx86cnv.pas index 3a92259371..c1974fc123 100644 --- a/compiler/x86/nx86cnv.pas +++ b/compiler/x86/nx86cnv.pas @@ -329,6 +329,7 @@ implementation location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef)); if (left.location.loc=LOC_REGISTER) and (torddef(left.resultdef).ordtype=u64bit) then begin + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); if use_bt then begin {$if defined(cpu64bitalu)} @@ -391,17 +392,18 @@ implementation constant to save space. } current_asmdata.getglobaldatalabel(l1); current_asmdata.getjumplabel(l2); - if not(signtested) then begin if use_bt then begin {$if defined(cpu64bitalu) or defined(cpu32bitalu)} inc(leftref.offset,4); + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); emit_const_ref(A_BT,S_L,31,leftref); dec(leftref.offset,4); {$elseif defined(cpu16bitalu)} inc(leftref.offset,6); + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); emit_const_ref(A_BT,S_W,15,leftref); dec(leftref.offset,6); {$endif} @@ -412,6 +414,7 @@ implementation { reading a byte, instead of word is faster on a true } { 8088, because of the 8-bit data bus } inc(leftref.offset,7); + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); emit_const_ref(A_TEST,S_B,aint($80),leftref); dec(leftref.offset,7); {$else i8086} @@ -425,9 +428,11 @@ implementation cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NC,l2) else cg.a_jmp_flags(current_asmdata.CurrAsmList,F_E,l2); + cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata_norel,l1.name,const_align(sizeof(pint))); current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1)); { I got this constant from a test program (FK) } + { It's actually the bit representation of 2^64 as a Single [Kit] } current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_32bit($5f800000)); reference_reset_symbol(href,l1,0,4,[]); tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,href); diff --git a/compiler/x86/nx86mat.pas b/compiler/x86/nx86mat.pas index 6dc16bc9f5..0ccee9db0c 100644 --- a/compiler/x86/nx86mat.pas +++ b/compiler/x86/nx86mat.pas @@ -266,6 +266,7 @@ interface tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference); cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_32,OS_32,left.location.reference,hreg); inc(left.location.reference.offset,4); + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_32,left.location.reference,hreg); end else @@ -276,6 +277,7 @@ interface tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference); cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,hreg); inc(left.location.reference.offset,2); + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg); inc(left.location.reference.offset,2); cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg); @@ -288,11 +290,15 @@ interface tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference); cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,hreg); inc(left.location.reference.offset,2); + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg); end else {$endif} - emit_const_ref(A_CMP, TCGSize2Opsize[opsize], 0, left.location.reference); + begin + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); + emit_const_ref(A_CMP, TCGSize2Opsize[opsize], 0, left.location.reference); + end; location_reset(location,LOC_FLAGS,OS_NO); location.resflags:=F_E; end; @@ -308,6 +314,7 @@ interface if is_64bit(resultdef) then begin hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false); + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); emit_reg_reg(A_OR,S_L,left.location.register64.reghi,left.location.register64.reglo); end else @@ -315,6 +322,7 @@ interface if is_64bit(resultdef) then begin hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false); + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); emit_reg_reg(A_OR,S_W,cg.GetNextReg(left.location.register64.reghi),left.location.register64.reghi); emit_reg_reg(A_OR,S_W,cg.GetNextReg(left.location.register64.reglo),left.location.register64.reglo); emit_reg_reg(A_OR,S_W,left.location.register64.reghi,left.location.register64.reglo); @@ -322,12 +330,14 @@ interface else if is_32bit(resultdef) then begin hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false); + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); emit_reg_reg(A_OR,S_L,cg.GetNextReg(left.location.register),left.location.register); end else {$endif} begin hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,true); + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register); end; location_reset(location,LOC_FLAGS,OS_NO); @@ -499,15 +509,18 @@ interface peephole optimizer. [Kit] } emit_reg_reg(A_XOR,opsize,location.register,location.register); - cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); if (cgsize in [OS_64,OS_S64]) then { Cannot use 64-bit constants in CMP } begin hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize); emit_const_reg(A_MOV,opsize,aint(d),hreg2); + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); emit_reg_reg(A_CMP,opsize,hreg2,hreg1); end else - emit_const_reg(A_CMP,opsize,aint(d),hreg1); + begin + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); + emit_const_reg(A_CMP,opsize,aint(d),hreg1); + end; { NOTE: SBB and SETAE are both 3 bytes long without the REX prefix, both use an ALU for their execution and take a single cycle to run. The only difference is that SETAE does not modify the flags, diff --git a/compiler/x86/nx86set.pas b/compiler/x86/nx86set.pas index fdca279a9a..765e29c84e 100644 --- a/compiler/x86/nx86set.pas +++ b/compiler/x86/nx86set.pas @@ -523,6 +523,8 @@ implementation { "x in [y..z]" expression } adjustment := 0; + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); + for i:=1 to numparts do if setparts[i].range then { use fact that a <= x <= b <=> cardinal(x-a) <= cardinal(b-a) } @@ -548,6 +550,7 @@ implementation { (this will never overflow since we check at the } { beginning whether stop-start <> 255) } cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,OC_B,setparts[i].stop-setparts[i].start+1,pleftreg,l); + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); end else { if setparts[i].start = 0 and setparts[i].stop = 255, } @@ -589,6 +592,7 @@ implementation begin if left.location.loc=LOC_CONSTANT then begin + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); location.resflags:=F_NE; case right.location.loc of LOC_REGISTER, @@ -624,6 +628,7 @@ implementation emit_const_reg(A_MOV,S_W,1,hreg); emit_reg_reg(A_SHL,S_W,NR_CL,hreg); + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); case right.location.loc of LOC_REGISTER, LOC_CREGISTER : @@ -648,6 +653,7 @@ implementation hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,u32inttype,true); hreg:=left.location.register; + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); case right.location.loc of LOC_REGISTER, LOC_CREGISTER : @@ -688,15 +694,18 @@ implementation left.location.size := OS_16; cg.a_load_loc_reg(current_asmdata.CurrAsmList,OS_16,left.location,NR_CX); cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,OC_BE,15,NR_CX,l); + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); { set the zero flag } current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_TEST,S_B,0,NR_AL)); cg.a_jmp_always(current_asmdata.CurrAsmList,l2); + cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); hreg:=cg.getintregister(current_asmdata.CurrAsmList,OS_16); cg.a_label(current_asmdata.CurrAsmList,l); emit_const_reg(A_MOV,S_W,1,hreg); emit_reg_reg(A_SHL,S_W,NR_CL,hreg); cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_CX); + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); emit_const_reg(A_TEST,S_W,right.location.value,hreg); cg.a_label(current_asmdata.CurrAsmList,l2); @@ -720,6 +729,7 @@ implementation hreg:=cg.makeregsize(current_asmdata.CurrAsmList,left.location.register,opsize); cg.a_load_reg_reg(current_asmdata.CurrAsmList,left.location.size,opsize,left.location.register,hreg); cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,OC_BE,31,hreg,l); + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); { reset carry flag } current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLC,S_NO)); cg.a_jmp_always(current_asmdata.CurrAsmList,l2); @@ -732,6 +742,7 @@ implementation end; else begin + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); emit_const_ref(A_CMP,TCGSize2OpSize[orgopsize],31,left.location.reference); cg.a_jmp_flags(current_asmdata.CurrAsmList,F_BE,l); { reset carry flag } @@ -763,10 +774,12 @@ implementation LOC_REFERENCE,LOC_CREFERENCE: begin inc(right.location.reference.offset,(left.location.value-setbase) shr 3); + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); emit_const_ref(A_TEST,S_B,1 shl ((left.location.value-setbase) and 7),right.location.reference); end; LOC_REGISTER,LOC_CREGISTER: begin + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); emit_const_reg(A_TEST,TCGSize2OpSize[right.location.size],1 shl (left.location.value-setbase),right.location.register); end; else @@ -819,21 +832,27 @@ implementation { BE will be false for negative values } cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,OC_BE,tsetdef(right.resultdef).setmax-tsetdef(right.resultdef).setbase,pleftreg,l); { set the zero flag } + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_TEST,S_B,0,NR_AL)); cg.a_jmp_always(current_asmdata.CurrAsmList,l2); cg.a_label(current_asmdata.CurrAsmList,l); + cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); emit_const_reg(A_MOV,S_W,1,pleftreg); emit_reg_reg(A_SHL,S_W,NR_CL,pleftreg); cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_CX); case right.location.loc of LOC_REGISTER, LOC_CREGISTER : - emit_reg_reg(A_TEST,S_W,pleftreg,right.location.register); + begin + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); + emit_reg_reg(A_TEST,S_W,pleftreg,right.location.register); + end; LOC_CREFERENCE, LOC_REFERENCE : begin if not use_small then add_extra_offset(extra_offset_reg,right.location.reference); + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); emit_reg_ref(A_TEST,S_W,pleftreg,right.location.reference); end; else @@ -852,11 +871,15 @@ implementation cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_CX); case right.location.loc of LOC_REGISTER, LOC_CREGISTER : - emit_reg_reg(A_TEST,S_W,pleftreg,right.location.register); + begin + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); + emit_reg_reg(A_TEST,S_W,pleftreg,right.location.register); + end; LOC_CREFERENCE, LOC_REFERENCE : begin if not use_small then add_extra_offset(extra_offset_reg,right.location.reference); + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); emit_reg_ref(A_TEST,S_W,pleftreg,right.location.reference); end; else @@ -887,6 +910,7 @@ implementation { BE will be false for negative values } cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,OC_BE,tsetdef(right.resultdef).setmax-tsetdef(right.resultdef).setbase,pleftreg,l); + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); { reset carry flag } current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLC,S_NO)); cg.a_jmp_always(current_asmdata.CurrAsmList,l2); @@ -910,6 +934,7 @@ implementation end else begin + cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS); case right.location.loc of LOC_REGISTER, LOC_CREGISTER : emit_reg_reg(A_BT,S_L,pleftreg,right.location.register); diff --git a/compiler/x86_64/nx64cnv.pas b/compiler/x86_64/nx64cnv.pas index d39de187fc..3c483ef708 100644 --- a/compiler/x86_64/nx64cnv.pas +++ b/compiler/x86_64/nx64cnv.pas @@ -118,6 +118,7 @@ implementation case left.location.loc of LOC_REGISTER : begin + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); emit_const_reg(A_BT,S_Q,63,left.location.register); current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_Q,left.location.register,location.register)); end; @@ -126,6 +127,7 @@ implementation href:=left.location.reference; tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,href); inc(href.offset,4); + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); emit_const_ref(A_BT,S_L,31,href); dec(href.offset,4); current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_Q,href,location.register)); @@ -135,6 +137,7 @@ implementation end; cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NC,l2); + cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata_norel,l1.name,const_align(sizeof(pint))); current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1)); reference_reset_symbol(href,l1,0,4,[]);