* x86: Code generation fixes where FLAGS

register is not properly allocated.

(cherry picked from commit 6f24c8b4ef)
This commit is contained in:
J. Gareth "Curious Kit" Moreton 2022-04-21 01:02:17 +01:00 committed by florian
parent 7d3534de20
commit 1b8fc23dfd
12 changed files with 137 additions and 20 deletions

View File

@ -4281,6 +4281,9 @@ implementation
LOC_CREGISTER,LOC_REGISTER,LOC_CREFERENCE,LOC_REFERENCE :
begin
a_cmp_const_loc_label(list,p.resultdef,OC_NE,0,p.location,truelabel);
{$ifdef x86} { x86 always uses the flags in some way for conditional jumps }
a_reg_dealloc(list,NR_DEFAULTFLAGS);
{$endif x86}
a_jmp_always(list,falselabel);
end;
LOC_JUMP:

View File

@ -452,8 +452,10 @@ unit cgcpu;
current_asmdata.getjumplabel(again);
current_asmdata.getjumplabel(ok);
a_label(list,again);
cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
list.concat(Taicpu.op_const_reg(A_CMP,S_L,winstackpagesize,NR_EDI));
a_jmp_cond(list,OC_B,ok);
cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
list.concat(Taicpu.op_const_reg(A_SUB,S_L,winstackpagesize-4,NR_ESP));
list.concat(Taicpu.op_reg(A_PUSH,S_L,NR_EDI));
list.concat(Taicpu.op_const_reg(A_SUB,S_L,winstackpagesize,NR_EDI));
@ -679,8 +681,10 @@ unit cgcpu;
{ so we've to do some tricks here }
current_asmdata.getjumplabel(l1);
current_asmdata.getjumplabel(l2);
cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
list.Concat(taicpu.op_const_reg(A_TEST,S_B,32,NR_CL));
cg.a_jmp_flags(list,F_E,l1);
cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
tmpreg:=cg.getintregister(list,OS_32);
case op of
OP_SHL:
@ -812,8 +816,10 @@ unit cgcpu;
{ so we've to do some tricks here }
current_asmdata.getjumplabel(l1);
current_asmdata.getjumplabel(l2);
cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
list.Concat(taicpu.op_const_reg(A_TEST,S_B,32,NR_CL));
cg.a_jmp_flags(list,F_E,l1);
cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
case op of
OP_SHL:
begin

View File

@ -341,14 +341,18 @@ interface
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
href:=left.location.reference;
inc(href.offset,4);
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_const_ref(A_CMP,S_L,aint(hi(right.location.value64)),href);
firstjmp64bitcmp;
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
if assigned(hlab) then
cg.a_jmp_always(current_asmdata.CurrAsmList,hlab)
else
begin
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_const_ref(A_CMP,S_L,aint(lo(right.location.value64)),left.location.reference);
secondjmp64bitcmp;
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
end;
location_freetemp(current_asmdata.CurrAsmList,left.location);
exit;
@ -372,10 +376,12 @@ interface
LOC_REGISTER,
LOC_CREGISTER :
begin
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_reg_reg(A_CMP,S_L,right.location.register64.reghi,left.location.register64.reghi);
firstjmp64bitcmp;
emit_reg_reg(A_CMP,S_L,right.location.register64.reglo,left.location.register64.reglo);
secondjmp64bitcmp;
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
end;
LOC_CREFERENCE,
LOC_REFERENCE :
@ -383,22 +389,28 @@ interface
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
href:=right.location.reference;
inc(href.offset,4);
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_ref_reg(A_CMP,S_L,href,left.location.register64.reghi);
firstjmp64bitcmp;
emit_ref_reg(A_CMP,S_L,right.location.reference,left.location.register64.reglo);
secondjmp64bitcmp;
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
location_freetemp(current_asmdata.CurrAsmList,right.location);
end;
LOC_CONSTANT :
begin
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(hi(right.location.value64)),left.location.register64.reghi));
firstjmp64bitcmp;
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
if assigned(hlab) then
cg.a_jmp_always(current_asmdata.CurrAsmList,hlab)
else
begin
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(lo(right.location.value64)),left.location.register64.reglo));
secondjmp64bitcmp;
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
end;
end;
else

View File

@ -88,9 +88,11 @@ implementation
exit;
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
hreg1:=left.location.register;
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_reg_reg(A_TEST,S_L,hreg1,hreg1);
current_asmdata.getjumplabel(hl);
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NS,hl);
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
if power=1 then
emit_reg(A_INC,S_L,hreg1)
else
@ -194,8 +196,10 @@ implementation
{ so we've to do some tricks here }
current_asmdata.getjumplabel(l2);
current_asmdata.getjumplabel(l3);
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_const_reg(A_TEST,S_B,32,NR_CL);
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_E,l2);
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
if nodetype=shln then
begin
emit_reg_reg(A_SHL,S_L,NR_CL,hreg64lo);

View File

@ -2026,8 +2026,8 @@ unit cgcpu;
else
list.concat(taicpu.op_const_reg(A_CMP,S_W,longint(a and $ffff),reg));
gen_cmp32_jmp2(list, cmp_op, hl_skip, l);
a_label(list,hl_skip);
cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
a_label(list,hl_skip);
end
else
inherited a_cmp_const_reg_label(list, size, cmp_op, a, reg, l);
@ -2051,8 +2051,8 @@ unit cgcpu;
dec(tmpref.offset,2);
list.concat(taicpu.op_const_ref(A_CMP,S_W,longint(a and $ffff),tmpref));
gen_cmp32_jmp2(list, cmp_op, hl_skip, l);
a_label(list,hl_skip);
cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
a_label(list,hl_skip);
end
else
inherited a_cmp_const_ref_label(list, size, cmp_op, a, ref, l);
@ -2073,8 +2073,8 @@ unit cgcpu;
gen_cmp32_jmp1(list, cmp_op, hl_skip, l);
list.concat(taicpu.op_reg_reg(A_CMP,S_W,reg1,reg2));
gen_cmp32_jmp2(list, cmp_op, hl_skip, l);
a_label(list,hl_skip);
cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
a_label(list,hl_skip);
end
else
inherited a_cmp_reg_reg_label(list, size, cmp_op, reg1, reg2, l);
@ -2099,8 +2099,8 @@ unit cgcpu;
dec(tmpref.offset,2);
list.concat(taicpu.op_ref_reg(A_CMP,S_W,tmpref,reg));
gen_cmp32_jmp2(list, cmp_op, hl_skip, l);
a_label(list,hl_skip);
cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
a_label(list,hl_skip);
end
else
inherited a_cmp_ref_reg_label(list, size, cmp_op, ref, reg, l);
@ -2125,8 +2125,8 @@ unit cgcpu;
dec(tmpref.offset,2);
list.concat(taicpu.op_reg_ref(A_CMP,S_W,reg,tmpref));
gen_cmp32_jmp2(list, cmp_op, hl_skip, l);
a_label(list,hl_skip);
cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
a_label(list,hl_skip);
end
else
inherited a_cmp_reg_ref_label(list, size, cmp_op, reg, ref, l);

View File

@ -157,10 +157,12 @@ implementation
end
else
begin
{ a jump, but less operations }
{ a jump, but fewer operations }
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_reg_reg(A_TEST,S_W,hreg1,hreg1);
current_asmdata.getjumplabel(hl);
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NS,hl);
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
if power=1 then
emit_reg(A_INC,S_W,hreg1)
else
@ -256,10 +258,12 @@ implementation
d:=tordconstnode(right).value.svalue;
if d>=$8000 then
begin
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_const_reg(A_CMP,S_W,aint(d),hreg1);
location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
emit_const_reg(A_MOV,S_W,0,location.register);
emit_const_reg(A_SBB,S_W,-1,location.register);
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
end
else
begin

View File

@ -2386,8 +2386,10 @@ unit cgx86;
exit;
end;
{$endif x86_64}
cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
list.concat(taicpu.op_const_ref(A_CMP,TCgSize2OpSize[size],a,tmpref));
a_jmp_cond(list,cmp_op,l);
cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
end;
@ -2397,8 +2399,10 @@ unit cgx86;
begin
check_register_size(size,reg1);
check_register_size(size,reg2);
cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
list.concat(taicpu.op_reg_reg(A_CMP,TCgSize2OpSize[size],reg1,reg2));
a_jmp_cond(list,cmp_op,l);
cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
end;
@ -2409,8 +2413,10 @@ unit cgx86;
tmpref:=ref;
make_simple_ref(list,tmpref);
check_register_size(size,reg);
cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
list.concat(taicpu.op_ref_reg(A_CMP,TCgSize2OpSize[size],tmpref,reg));
a_jmp_cond(list,cmp_op,l);
cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
end;
@ -2421,8 +2427,10 @@ unit cgx86;
tmpref:=ref;
make_simple_ref(list,tmpref);
check_register_size(size,reg);
cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
list.concat(taicpu.op_reg_ref(A_CMP,TCgSize2OpSize[size],reg,tmpref));
a_jmp_cond(list,cmp_op,l);
cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
end;

View File

@ -39,7 +39,7 @@ unit nx86add;
procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
procedure force_left_and_right_fpureg;
procedure prepare_x87_locations(out refnode: tnode);
procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize;AllocFlags:boolean);
procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
procedure second_cmpfloatvector;
@ -93,7 +93,13 @@ unit nx86add;
hl4 : tasmlabel;
r : Tregister;
href : treference;
overflowcheck: boolean;
comparison: boolean;
begin
overflowcheck:=needoverflowcheck;
comparison:=
(op=A_CMP) or (op=A_TEST) or (op=A_BT) or is_boolean(resultdef);
{ at this point, left.location.loc should be LOC_REGISTER }
if right.location.loc=LOC_REGISTER then
begin
@ -114,6 +120,10 @@ unit nx86add;
emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
location_swap(left.location,right.location);
if comparison then
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
end;
end
@ -126,6 +136,10 @@ unit nx86add;
cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
if comparison then
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
end
@ -193,12 +207,14 @@ unit nx86add;
r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
if comparison or (mboverflow and overflowcheck) then
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
end
else
begin
emit_op_right_left(op,opsize);
end;
emit_op_right_left(op,opsize,comparison or (mboverflow and overflowcheck));
end;
end;
end;
@ -216,6 +232,9 @@ unit nx86add;
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
else
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
if not comparison then
cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
cg.a_label(current_asmdata.CurrAsmList,hl4);
end;
@ -345,7 +364,7 @@ unit nx86add;
end;
procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize;AllocFlags:boolean);
{$ifdef x86_64}
var
tmpreg : tregister;
@ -357,11 +376,18 @@ unit nx86add;
case right.location.loc of
LOC_REGISTER,
LOC_CREGISTER :
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
begin
if AllocFlags then
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
end;
LOC_REFERENCE,
LOC_CREFERENCE :
begin
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
if AllocFlags then
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
end;
LOC_CONSTANT :
@ -373,11 +399,18 @@ unit nx86add;
begin
tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
if AllocFlags then
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
end
else
{$endif x86_64}
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
begin
if AllocFlags then
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
end;
end;
else
internalerror(200203232);
@ -623,7 +656,7 @@ unit nx86add;
((nf_swapped in flags) and (nodetype = gten)) then
swapleftright;
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
emit_op_right_left(A_AND,opsize);
emit_op_right_left(A_AND,opsize,False);
op:=A_CMP;
{ warning: ugly hack, we need a JE so change the node to equaln }
nodetype:=equaln;
@ -1591,6 +1624,7 @@ unit nx86add;
{$endif x86_64}
then
begin
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
location_freetemp(current_asmdata.CurrAsmList,left.location);
end

View File

@ -329,6 +329,7 @@ implementation
location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
if (left.location.loc=LOC_REGISTER) and (torddef(left.resultdef).ordtype=u64bit) then
begin
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
if use_bt then
begin
{$if defined(cpu64bitalu)}
@ -391,17 +392,18 @@ implementation
constant to save space. }
current_asmdata.getglobaldatalabel(l1);
current_asmdata.getjumplabel(l2);
if not(signtested) then
begin
if use_bt then
begin
{$if defined(cpu64bitalu) or defined(cpu32bitalu)}
inc(leftref.offset,4);
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_const_ref(A_BT,S_L,31,leftref);
dec(leftref.offset,4);
{$elseif defined(cpu16bitalu)}
inc(leftref.offset,6);
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_const_ref(A_BT,S_W,15,leftref);
dec(leftref.offset,6);
{$endif}
@ -412,6 +414,7 @@ implementation
{ reading a byte, instead of word is faster on a true }
{ 8088, because of the 8-bit data bus }
inc(leftref.offset,7);
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_const_ref(A_TEST,S_B,aint($80),leftref);
dec(leftref.offset,7);
{$else i8086}
@ -425,9 +428,11 @@ implementation
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NC,l2)
else
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_E,l2);
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata_norel,l1.name,const_align(sizeof(pint)));
current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1));
{ I got this constant from a test program (FK) }
{ It's actually the bit representation of 2^64 as a Single [Kit] }
current_asmdata.asmlists[al_typedconsts].concat(Tai_const.Create_32bit($5f800000));
reference_reset_symbol(href,l1,0,4,[]);
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,href);

View File

@ -266,6 +266,7 @@ interface
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_32,OS_32,left.location.reference,hreg);
inc(left.location.reference.offset,4);
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_32,left.location.reference,hreg);
end
else
@ -276,6 +277,7 @@ interface
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,hreg);
inc(left.location.reference.offset,2);
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
inc(left.location.reference.offset,2);
cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
@ -288,11 +290,15 @@ interface
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,hreg);
inc(left.location.reference.offset,2);
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
end
else
{$endif}
emit_const_ref(A_CMP, TCGSize2Opsize[opsize], 0, left.location.reference);
begin
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_const_ref(A_CMP, TCGSize2Opsize[opsize], 0, left.location.reference);
end;
location_reset(location,LOC_FLAGS,OS_NO);
location.resflags:=F_E;
end;
@ -308,6 +314,7 @@ interface
if is_64bit(resultdef) then
begin
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_reg_reg(A_OR,S_L,left.location.register64.reghi,left.location.register64.reglo);
end
else
@ -315,6 +322,7 @@ interface
if is_64bit(resultdef) then
begin
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_reg_reg(A_OR,S_W,cg.GetNextReg(left.location.register64.reghi),left.location.register64.reghi);
emit_reg_reg(A_OR,S_W,cg.GetNextReg(left.location.register64.reglo),left.location.register64.reglo);
emit_reg_reg(A_OR,S_W,left.location.register64.reghi,left.location.register64.reglo);
@ -322,12 +330,14 @@ interface
else if is_32bit(resultdef) then
begin
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_reg_reg(A_OR,S_L,cg.GetNextReg(left.location.register),left.location.register);
end
else
{$endif}
begin
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,true);
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
end;
location_reset(location,LOC_FLAGS,OS_NO);
@ -499,15 +509,18 @@ interface
peephole optimizer. [Kit] }
emit_reg_reg(A_XOR,opsize,location.register,location.register);
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
if (cgsize in [OS_64,OS_S64]) then { Cannot use 64-bit constants in CMP }
begin
hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
emit_const_reg(A_MOV,opsize,aint(d),hreg2);
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_reg_reg(A_CMP,opsize,hreg2,hreg1);
end
else
emit_const_reg(A_CMP,opsize,aint(d),hreg1);
begin
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_const_reg(A_CMP,opsize,aint(d),hreg1);
end;
{ NOTE: SBB and SETAE are both 3 bytes long without the REX prefix,
both use an ALU for their execution and take a single cycle to
run. The only difference is that SETAE does not modify the flags,

View File

@ -523,6 +523,8 @@ implementation
{ "x in [y..z]" expression }
adjustment := 0;
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
for i:=1 to numparts do
if setparts[i].range then
{ use fact that a <= x <= b <=> cardinal(x-a) <= cardinal(b-a) }
@ -548,6 +550,7 @@ implementation
{ (this will never overflow since we check at the }
{ beginning whether stop-start <> 255) }
cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,OC_B,setparts[i].stop-setparts[i].start+1,pleftreg,l);
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
end
else
{ if setparts[i].start = 0 and setparts[i].stop = 255, }
@ -589,6 +592,7 @@ implementation
begin
if left.location.loc=LOC_CONSTANT then
begin
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
location.resflags:=F_NE;
case right.location.loc of
LOC_REGISTER,
@ -624,6 +628,7 @@ implementation
emit_const_reg(A_MOV,S_W,1,hreg);
emit_reg_reg(A_SHL,S_W,NR_CL,hreg);
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
case right.location.loc of
LOC_REGISTER,
LOC_CREGISTER :
@ -648,6 +653,7 @@ implementation
hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,u32inttype,true);
hreg:=left.location.register;
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
case right.location.loc of
LOC_REGISTER,
LOC_CREGISTER :
@ -688,15 +694,18 @@ implementation
left.location.size := OS_16;
cg.a_load_loc_reg(current_asmdata.CurrAsmList,OS_16,left.location,NR_CX);
cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,OC_BE,15,NR_CX,l);
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
{ set the zero flag }
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_TEST,S_B,0,NR_AL));
cg.a_jmp_always(current_asmdata.CurrAsmList,l2);
cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
hreg:=cg.getintregister(current_asmdata.CurrAsmList,OS_16);
cg.a_label(current_asmdata.CurrAsmList,l);
emit_const_reg(A_MOV,S_W,1,hreg);
emit_reg_reg(A_SHL,S_W,NR_CL,hreg);
cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_CX);
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
emit_const_reg(A_TEST,S_W,right.location.value,hreg);
cg.a_label(current_asmdata.CurrAsmList,l2);
@ -720,6 +729,7 @@ implementation
hreg:=cg.makeregsize(current_asmdata.CurrAsmList,left.location.register,opsize);
cg.a_load_reg_reg(current_asmdata.CurrAsmList,left.location.size,opsize,left.location.register,hreg);
cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,OC_BE,31,hreg,l);
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
{ reset carry flag }
current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLC,S_NO));
cg.a_jmp_always(current_asmdata.CurrAsmList,l2);
@ -732,6 +742,7 @@ implementation
end;
else
begin
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
emit_const_ref(A_CMP,TCGSize2OpSize[orgopsize],31,left.location.reference);
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_BE,l);
{ reset carry flag }
@ -763,10 +774,12 @@ implementation
LOC_REFERENCE,LOC_CREFERENCE:
begin
inc(right.location.reference.offset,(left.location.value-setbase) shr 3);
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
emit_const_ref(A_TEST,S_B,1 shl ((left.location.value-setbase) and 7),right.location.reference);
end;
LOC_REGISTER,LOC_CREGISTER:
begin
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
emit_const_reg(A_TEST,TCGSize2OpSize[right.location.size],1 shl (left.location.value-setbase),right.location.register);
end;
else
@ -819,21 +832,27 @@ implementation
{ BE will be false for negative values }
cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,OC_BE,tsetdef(right.resultdef).setmax-tsetdef(right.resultdef).setbase,pleftreg,l);
{ set the zero flag }
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_TEST,S_B,0,NR_AL));
cg.a_jmp_always(current_asmdata.CurrAsmList,l2);
cg.a_label(current_asmdata.CurrAsmList,l);
cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
emit_const_reg(A_MOV,S_W,1,pleftreg);
emit_reg_reg(A_SHL,S_W,NR_CL,pleftreg);
cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_CX);
case right.location.loc of
LOC_REGISTER, LOC_CREGISTER :
emit_reg_reg(A_TEST,S_W,pleftreg,right.location.register);
begin
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
emit_reg_reg(A_TEST,S_W,pleftreg,right.location.register);
end;
LOC_CREFERENCE, LOC_REFERENCE :
begin
if not use_small then
add_extra_offset(extra_offset_reg,right.location.reference);
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
emit_reg_ref(A_TEST,S_W,pleftreg,right.location.reference);
end;
else
@ -852,11 +871,15 @@ implementation
cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_CX);
case right.location.loc of
LOC_REGISTER, LOC_CREGISTER :
emit_reg_reg(A_TEST,S_W,pleftreg,right.location.register);
begin
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
emit_reg_reg(A_TEST,S_W,pleftreg,right.location.register);
end;
LOC_CREFERENCE, LOC_REFERENCE :
begin
if not use_small then
add_extra_offset(extra_offset_reg,right.location.reference);
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
emit_reg_ref(A_TEST,S_W,pleftreg,right.location.reference);
end;
else
@ -887,6 +910,7 @@ implementation
{ BE will be false for negative values }
cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opsize,OC_BE,tsetdef(right.resultdef).setmax-tsetdef(right.resultdef).setbase,pleftreg,l);
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
{ reset carry flag }
current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLC,S_NO));
cg.a_jmp_always(current_asmdata.CurrAsmList,l2);
@ -910,6 +934,7 @@ implementation
end
else
begin
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
case right.location.loc of
LOC_REGISTER, LOC_CREGISTER :
emit_reg_reg(A_BT,S_L,pleftreg,right.location.register);

View File

@ -118,6 +118,7 @@ implementation
case left.location.loc of
LOC_REGISTER :
begin
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_const_reg(A_BT,S_Q,63,left.location.register);
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_Q,left.location.register,location.register));
end;
@ -126,6 +127,7 @@ implementation
href:=left.location.reference;
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,href);
inc(href.offset,4);
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_const_ref(A_BT,S_L,31,href);
dec(href.offset,4);
current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_Q,href,location.register));
@ -135,6 +137,7 @@ implementation
end;
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NC,l2);
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata_norel,l1.name,const_align(sizeof(pint)));
current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1));
reference_reset_symbol(href,l1,0,4,[]);