fpc/compiler/x86/nx86set.pas
peter c95a859f0a * generic tlocation
* move tlocation to cgutils
2004-10-31 21:45:02 +00:00

503 lines
20 KiB
ObjectPascal

{
$Id$
Copyright (c) 1998-2002 by Florian Klaempfl
Generate x86 assembler for in/case nodes
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
****************************************************************************
}
unit nx86set;
{$i fpcdefs.inc}
interface
uses
node,nset,pass_1,ncgset;
type
tx86innode = class(tinnode)
procedure pass_2;override;
function pass_1 : tnode;override;
end;
implementation
uses
globtype,systems,
verbose,globals,
symconst,symdef,defutil,
aasmbase,aasmtai,aasmcpu,
cgbase,pass_2,tgobj,
ncon,
cpubase,
cga,cgobj,cgutils,ncgutil,
cgx86;
{*****************************************************************************
TX86INNODE
*****************************************************************************}
function tx86innode.pass_1 : tnode;
begin
result:=nil;
{ this is the only difference from the generic version }
expectloc:=LOC_FLAGS;
firstpass(right);
firstpass(left);
if codegenerror then
exit;
left_right_max;
{ a smallset needs maybe an misc. register }
if (left.nodetype<>ordconstn) and
not(right.location.loc in [LOC_CREGISTER,LOC_REGISTER]) and
(right.registersint<1) then
inc(registersint);
end;
procedure tx86innode.pass_2;
type
Tsetpart=record
range : boolean; {Part is a range.}
start,stop : byte; {Start/stop when range; Stop=element when an element.}
end;
var
genjumps,
use_small,
ranges : boolean;
hreg,hreg2,
pleftreg : tregister;
opsize : tcgsize;
setparts : array[1..8] of Tsetpart;
i,numparts : byte;
adjustment : longint;
l,l2 : tasmlabel;
{$ifdef CORRECT_SET_IN_FPC}
AM : tasmop;
{$endif CORRECT_SET_IN_FPC}
function analizeset(Aset:pconstset;is_small:boolean):boolean;
var
compares,maxcompares:word;
i:byte;
begin
if tnormalset(Aset^)=[] then
{The expression...
if expr in []
...is allways false. It should be optimized away in the
resulttype pass, and thus never occur here. Since we
do generate wrong code for it, do internalerror.}
internalerror(2002072301);
analizeset:=false;
ranges:=false;
numparts:=0;
compares:=0;
{ Lots of comparisions take a lot of time, so do not allow
too much comparisions. 8 comparisions are, however, still
smalller than emitting the set }
if cs_littlesize in aktglobalswitches then
maxcompares:=8
else
maxcompares:=5;
{ when smallset is possible allow only 3 compares the smallset
code is for littlesize also smaller when more compares are used }
if is_small then
maxcompares:=3;
for i:=0 to 255 do
if i in tnormalset(Aset^) then
begin
if (numparts=0) or (i<>setparts[numparts].stop+1) then
begin
{Set element is a separate element.}
inc(compares);
if compares>maxcompares then
exit;
inc(numparts);
setparts[numparts].range:=false;
setparts[numparts].stop:=i;
end
else
{Set element is part of a range.}
if not setparts[numparts].range then
begin
{Transform an element into a range.}
setparts[numparts].range:=true;
setparts[numparts].start:=setparts[numparts].stop;
setparts[numparts].stop:=i;
ranges := true;
{ there's only one compare per range anymore. Only a }
{ sub is added, but that's much faster than a }
{ cmp/jcc combo so neglect its effect }
{ inc(compares);
if compares>maxcompares then
exit; }
end
else
begin
{Extend a range.}
setparts[numparts].stop:=i;
end;
end;
analizeset:=true;
end;
begin
{ We check first if we can generate jumps, this can be done
because the resulttype.def is already set in firstpass }
{ check if we can use smallset operation using btl which is limited
to 32 bits, the left side may also not contain higher values !! }
use_small:=(tsetdef(right.resulttype.def).settype=smallset) and
((left.resulttype.def.deftype=orddef) and (torddef(left.resulttype.def).high<=32) or
(left.resulttype.def.deftype=enumdef) and (tenumdef(left.resulttype.def).max<=32));
{ Can we generate jumps? Possible for all types of sets }
genjumps:=(right.nodetype=setconstn) and
analizeset(tsetconstnode(right).value_set,use_small);
{ calculate both operators }
{ the complex one first }
firstcomplex(self);
secondpass(left);
{ Only process the right if we are not generating jumps }
if not genjumps then
begin
secondpass(right);
end;
if codegenerror then
exit;
{ ofcourse not commutative }
if nf_swaped in flags then
swapleftright;
if genjumps then
begin
{ It gives us advantage to check for the set elements
separately instead of using the SET_IN_BYTE procedure.
To do: Build in support for LOC_JUMP }
opsize := def_cgsize(left.resulttype.def);
{ If register is used, use only lower 8 bits }
if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
begin
{ for ranges we always need a 32bit register, because then we }
{ use the register as base in a reference (JM) }
if ranges then
begin
pleftreg:=cg.makeregsize(exprasmlist,left.location.register,OS_INT);
cg.a_load_reg_reg(exprasmlist,left.location.size,OS_INT,left.location.register,pleftreg);
if opsize<>OS_INT then
cg.a_op_const_reg(exprasmlist,OP_AND,OS_INT,255,pleftreg);
opsize:=OS_INT;
end
else
{ otherwise simply use the lower 8 bits (no "and" }
{ necessary this way) (JM) }
begin
pleftreg:=cg.makeregsize(exprasmlist,left.location.register,OS_8);
opsize := OS_8;
end;
end
else
begin
{ load the value in a register }
pleftreg:=cg.getintregister(exprasmlist,OS_32);
opsize:=OS_32;
cg.a_load_ref_reg(exprasmlist,OS_8,OS_32,left.location.reference,pleftreg);
end;
{ Get a label to jump to the end }
location_reset(location,LOC_FLAGS,OS_NO);
{ It's better to use the zero flag when there are
no ranges }
if ranges then
location.resflags:=F_C
else
location.resflags:=F_E;
objectlibrary.getlabel(l);
{ how much have we already substracted from the x in the }
{ "x in [y..z]" expression }
adjustment := 0;
for i:=1 to numparts do
if setparts[i].range then
{ use fact that a <= x <= b <=> cardinal(x-a) <= cardinal(b-a) }
begin
{ is the range different from all legal values? }
if (setparts[i].stop-setparts[i].start <> 255) then
begin
{ yes, is the lower bound <> 0? }
if (setparts[i].start <> 0) then
begin
if (left.location.loc = LOC_CREGISTER) then
begin
hreg:=cg.getintregister(exprasmlist,OS_INT);
cg.a_load_reg_reg(exprasmlist,opsize,OS_INT,pleftreg,hreg);
pleftreg:=hreg;
opsize:=OS_INT;
end;
cg.a_op_const_reg(exprasmlist,OP_SUB,opsize,setparts[i].start-adjustment,pleftreg);
end;
{ new total value substracted from x: }
{ adjustment + (setparts[i].start - adjustment) }
adjustment := setparts[i].start;
{ check if result < b-a+1 (not "result <= b-a", since }
{ we need a carry in case the element is in the range }
{ (this will never overflow since we check at the }
{ beginning whether stop-start <> 255) }
cg.a_cmp_const_reg_label(exprasmlist,opsize,OC_B,setparts[i].stop-setparts[i].start+1,pleftreg,l);
end
else
{ if setparts[i].start = 0 and setparts[i].stop = 255, }
{ it's always true since "in" is only allowed for bytes }
begin
exprasmlist.concat(taicpu.op_none(A_STC,S_NO));
cg.a_jmp_always(exprasmlist,l);
end;
end
else
begin
{ Emit code to check if left is an element }
exprasmlist.concat(taicpu.op_const_reg(A_CMP,TCGSize2OpSize[opsize],setparts[i].stop-adjustment,
pleftreg));
{ Result should be in carry flag when ranges are used }
if ranges then
exprasmlist.concat(taicpu.op_none(A_STC,S_NO));
{ If found, jump to end }
cg.a_jmp_flags(exprasmlist,F_E,l);
end;
if ranges and
{ if the last one was a range, the carry flag is already }
{ set appropriately }
not(setparts[numparts].range) then
exprasmlist.concat(taicpu.op_none(A_CLC,S_NO));
{ To compensate for not doing a second pass }
right.location.reference.symbol:=nil;
{ Now place the end label }
cg.a_label(exprasmlist,l);
end
else
begin
location_reset(location,LOC_FLAGS,OS_NO);
{ We will now generated code to check the set itself, no jmps,
handle smallsets separate, because it allows faster checks }
if use_small then
begin
if left.nodetype=ordconstn then
begin
location.resflags:=F_NE;
case right.location.loc of
LOC_REGISTER,
LOC_CREGISTER:
begin
emit_const_reg(A_TEST,S_L,
1 shl (tordconstnode(left).value and 31),right.location.register);
end;
LOC_REFERENCE,
LOC_CREFERENCE :
begin
emit_const_ref(A_TEST,S_L,1 shl (tordconstnode(left).value and 31),
right.location.reference);
end;
else
internalerror(200203312);
end;
end
else
begin
case left.location.loc of
LOC_REGISTER,
LOC_CREGISTER:
begin
hreg:=cg.makeregsize(exprasmlist,left.location.register,OS_32);
cg.a_load_reg_reg(exprasmlist,left.location.size,OS_32,left.location.register,hreg);
end;
else
begin
{ the set element isn't never samller than a byte
and because it's a small set we need only 5 bits
but 8 bits are easier to load }
hreg:=cg.getintregister(exprasmlist,OS_32);
cg.a_load_ref_reg(exprasmlist,OS_8,OS_32,left.location.reference,hreg);
end;
end;
case right.location.loc of
LOC_REGISTER,
LOC_CREGISTER :
begin
emit_reg_reg(A_BT,S_L,hreg,right.location.register);
end;
LOC_CONSTANT :
begin
{ We have to load the value into a register because
btl does not accept values only refs or regs (PFV) }
hreg2:=cg.getintregister(exprasmlist,OS_32);
cg.a_load_const_reg(exprasmlist,OS_32,right.location.value,hreg2);
emit_reg_reg(A_BT,S_L,hreg,hreg2);
end;
LOC_CREFERENCE,
LOC_REFERENCE :
begin
emit_reg_ref(A_BT,S_L,hreg,right.location.reference);
end;
else
internalerror(2002032210);
end;
location.resflags:=F_C;
end;
end
else
begin
if right.location.loc=LOC_CONSTANT then
begin
location.resflags:=F_C;
objectlibrary.getlabel(l);
objectlibrary.getlabel(l2);
{ load constants to a register }
if left.nodetype=ordconstn then
location_force_reg(exprasmlist,left.location,OS_INT,true);
case left.location.loc of
LOC_REGISTER,
LOC_CREGISTER:
begin
hreg:=cg.makeregsize(exprasmlist,left.location.register,OS_32);
cg.a_load_reg_reg(exprasmlist,left.location.size,OS_32,left.location.register,hreg);
cg.a_cmp_const_reg_label(exprasmlist,OS_32,OC_BE,31,hreg,l);
{ reset carry flag }
exprasmlist.concat(taicpu.op_none(A_CLC,S_NO));
cg.a_jmp_always(exprasmlist,l2);
cg.a_label(exprasmlist,l);
{ We have to load the value into a register because
btl does not accept values only refs or regs (PFV) }
hreg2:=cg.getintregister(exprasmlist,OS_32);
cg.a_load_const_reg(exprasmlist,OS_32,right.location.value,hreg2);
emit_reg_reg(A_BT,S_L,hreg,hreg2);
end;
else
begin
{$ifdef CORRECT_SET_IN_FPC}
if m_tp in aktmodeswitches then
begin
{***WARNING only correct if
reference is 32 bits (PM) *****}
emit_const_ref(A_CMP,S_L,31,reference_copy(left.location.reference));
end
else
{$endif CORRECT_SET_IN_FPC}
begin
emit_const_ref(A_CMP,S_B,31,left.location.reference);
end;
cg.a_jmp_flags(exprasmlist,F_BE,l);
{ reset carry flag }
exprasmlist.concat(taicpu.op_none(A_CLC,S_NO));
cg.a_jmp_always(exprasmlist,l2);
cg.a_label(exprasmlist,l);
hreg:=cg.getintregister(exprasmlist,OS_32);
cg.a_load_ref_reg(exprasmlist,OS_32,OS_32,left.location.reference,hreg);
{ We have to load the value into a register because
btl does not accept values only refs or regs (PFV) }
hreg2:=cg.getintregister(exprasmlist,OS_32);
cg.a_load_const_reg(exprasmlist,OS_32,right.location.value,hreg2);
emit_reg_reg(A_BT,S_L,hreg,hreg2);
end;
end;
cg.a_label(exprasmlist,l2);
end { of right.location.loc=LOC_CONSTANT }
{ do search in a normal set which could have >32 elementsm
but also used if the left side contains higher values > 32 }
else if left.nodetype=ordconstn then
begin
location.resflags:=F_NE;
inc(right.location.reference.offset,tordconstnode(left).value shr 3);
emit_const_ref(A_TEST,S_B,1 shl (tordconstnode(left).value and 7),right.location.reference);
end
else
begin
if (left.location.loc=LOC_REGISTER) then
pleftreg:=cg.makeregsize(exprasmlist,left.location.register,OS_32)
else
pleftreg:=cg.getintregister(exprasmlist,OS_32);
cg.a_load_loc_reg(exprasmlist,OS_32,left.location,pleftreg);
location_freetemp(exprasmlist,left.location);
emit_reg_ref(A_BT,S_L,pleftreg,right.location.reference);
{ tg.ungetiftemp(exprasmlist,right.location.reference) happens below }
location.resflags:=F_C;
end;
end;
end;
if not genjumps then
location_freetemp(exprasmlist,right.location);
end;
begin
cinnode:=tx86innode;
end.
{
$Log$
Revision 1.8 2004-10-31 21:45:04 peter
* generic tlocation
* move tlocation to cgutils
Revision 1.7 2004/10/24 20:10:08 peter
* -Or fixes
Revision 1.6 2004/10/01 17:32:16 peter
* fix resizing of LOC_CREGISTER
Revision 1.5 2004/09/25 14:23:55 peter
* ungetregister is now only used for cpuregisters, renamed to
ungetcpuregister
* renamed (get|unget)explicitregister(s) to ..cpuregister
* removed location-release/reference_release
Revision 1.4 2004/06/16 20:07:11 florian
* dwarf branch merged
Revision 1.3 2004/05/22 23:34:28 peter
tai_regalloc.allocation changed to ratype to notify rgobj of register size changes
Revision 1.2.2.1 2004/04/28 18:35:42 peter
* cardinal fixes for x86-64
Revision 1.2 2004/02/27 10:21:06 florian
* top_symbol killed
+ refaddr to treference added
+ refsymbol to treference added
* top_local stuff moved to an extra record to save memory
+ aint introduced
* tppufile.get/putint64/aint implemented
Revision 1.1 2004/02/22 12:04:04 florian
+ nx86set added
* some more x86-64 fixes
}