+ $fputype directive support

+ single data type operations with sse unit * fixed more x86-64 stuff
2025-10-23 17:51:37 +02:00 · 2003-12-25 01:07:09 +00:00 · 2003-12-25 01:07:09 +00:00 · 06442fa677
commit 06442fa677
parent 31a33a1a7b
13 changed files with 335 additions and 202 deletions
--- a/compiler/cgbase.pas
+++ b/compiler/cgbase.pas
@ -48,8 +48,6 @@ interface
         LOC_MMXREGISTER,  { MMX register }
         { MMX register variable }
         LOC_CMMXREGISTER,
-         LOC_SSEREGISTER,
-         LOC_CSSEREGISTER,
         { multimedia register }
         LOC_MMREGISTER,
         { Constant multimedia reg which shouldn't be modified }
@ -254,8 +252,6 @@ interface
            'LOC_CFPUREG',
            'LOC_MMXREG',
            'LOC_CMMXREG',
-            'LOC_SSEREG',
-            'LOC_CSSEREG',
            'LOC_MMREG',
            'LOC_CMMREG');

@ -587,7 +583,12 @@ finalization
 end.
 {
  $Log$
-  Revision 1.82  2003-12-22 23:10:21  peter
+  Revision 1.83  2003-12-25 01:07:09  florian
+    + $fputype directive support
+    + single data type operations with sse unit
+    * fixed more x86-64 stuff
+
+  Revision 1.82  2003/12/22 23:10:21  peter
    * use low(longint) instead of $8000000

  Revision 1.81  2003/12/21 19:42:42  florian
--- a/compiler/defutil.pas
+++ b/compiler/defutil.pas
@ -160,6 +160,9 @@ interface
    {# Returns true, if def is a single type }
    function is_single(def : tdef) : boolean;

+    {# Returns true, if def is a double type }
+    function is_double(def : tdef) : boolean;
+
    {# Returns true, if def is a 64 bit integer type }
    function is_64bitint(def : tdef) : boolean;

@ -215,7 +218,7 @@ implementation
      end;


-    { returns true, if def is a currency type }
+    { returns true, if def is a single type }
    function is_single(def : tdef) : boolean;
      begin
        result:=(def.deftype=floatdef) and
@ -223,6 +226,14 @@ implementation
      end;


+    { returns true, if def is a double type }
+    function is_double(def : tdef) : boolean;
+      begin
+        result:=(def.deftype=floatdef) and
+          (tfloatdef(def).typ=s64real);
+      end;
+
+
    function range_to_basetype(low,high:TConstExprInt):tbasetype;
      begin
        { generate a unsigned range if high<0 and low>=0 }
@ -833,7 +844,12 @@ implementation
 end.
 {
  $Log$
-  Revision 1.7  2003-11-10 18:05:16  florian
+  Revision 1.8  2003-12-25 01:07:09  florian
+    + $fputype directive support
+    + single data type operations with sse unit
+    * fixed more x86-64 stuff
+
+  Revision 1.7  2003/11/10 18:05:16  florian
    + is_single added

  Revision 1.6  2003/10/01 20:34:48  peter
--- a/compiler/globals.pas
+++ b/compiler/globals.pas
@ -1721,7 +1721,7 @@ implementation
        initoptprocessor:=ClassAthlon64;
        initspecificoptprocessor:=ClassAthlon64;

-        initfputype:=fpu_sse2;
+        initfputype:=fpu_sse64;

        initpackenum:=4;
        {$IFDEF testvarsets}
@ -1744,7 +1744,12 @@ implementation
 end.
 {
  $Log$
-  Revision 1.117  2003-12-20 12:38:51  florian
+  Revision 1.118  2003-12-25 01:07:09  florian
+    + $fputype directive support
+    + single data type operations with sse unit
+    * fixed more x86-64 stuff
+
+  Revision 1.117  2003/12/20 12:38:51  florian
    * some x86-64 compilation fixe

  Revision 1.116  2003/11/30 19:35:29  florian
--- a/compiler/i386/cpuinfo.pas
+++ b/compiler/i386/cpuinfo.pas
@ -65,7 +65,8 @@ Type
      fpu_soft,
      fpu_x87,
      fpu_sse,
-      fpu_sse2
+      fpu_sse2,
+      fpu_sse3
     );


@ -112,15 +113,24 @@ Const
     'SOFT',
     'X87',
     'SSE',
-     'SSE2'
+     'SSE2',
+     'SSE3'
   );

+   sse_singlescalar : set of tfputype = [fpu_sse,fpu_sse2,fpu_sse3];
+   sse_doublescalar : set of tfputype = [];
+
 Implementation

 end.
 {
  $Log$
-  Revision 1.20  2003-12-01 18:43:31  peter
+  Revision 1.21  2003-12-25 01:07:09  florian
+    + $fputype directive support
+    + single data type operations with sse unit
+    * fixed more x86-64 stuff
+
+  Revision 1.20  2003/12/01 18:43:31  peter
    * s128real type is not compatible with s80real

  Revision 1.19  2003/11/12 16:05:39  florian
--- a/compiler/i386/n386add.pas
+++ b/compiler/i386/n386add.pas
@ -35,7 +35,6 @@ interface
         protected
          function  first_addstring : tnode; override;
         private
-          procedure pass_left_and_right(var pushedfpu:boolean);
          function  getresflags(unsigned : boolean) : tresflags;
          procedure left_must_be_reg(opsize:TOpSize;noswap:boolean);
          procedure emit_op_right_left(op:TAsmOp;opsize:TOpSize);
@ -43,7 +42,6 @@ interface
          procedure set_result_location(cmpop,unsigned:boolean);
          procedure second_addstring;
          procedure second_addboolean;
-          procedure second_addfloat;
          procedure second_addsmallset;
          procedure second_addmmxset;
          procedure second_mul;
@ -71,25 +69,6 @@ interface
      const
         opsize_2_cgsize : array[S_B..S_L] of tcgsize = (OS_8,OS_16,OS_32);

-    procedure ti386addnode.pass_left_and_right(var pushedfpu:boolean);
-      begin
-        { calculate the operator which is more difficult }
-        firstcomplex(self);
-
-        { in case of constant put it to the left }
-        if (left.nodetype=ordconstn) then
-         swapleftright;
-        secondpass(left);
-
-        { are too few registers free? }
-        if location.loc=LOC_FPUREGISTER then
-          pushedfpu:=maybe_pushfpu(exprasmlist,right.registersfpu,left.location)
-        else
-          pushedfpu:=false;
-        secondpass(right);
-      end;
-
-
    function ti386addnode.getresflags(unsigned : boolean) : tresflags;
      begin
         case nodetype of
@ -540,145 +519,6 @@ interface
      end;


-{*****************************************************************************
-                                AddFloat
-*****************************************************************************}
-
-    procedure ti386addnode.second_addfloat;
-      var
-        op         : TAsmOp;
-        resflags   : tresflags;
-        pushedfpu,
-        cmpop      : boolean;
-      begin
-        pass_left_and_right(pushedfpu);
-
-        cmpop:=false;
-        case nodetype of
-          addn :
-            op:=A_FADDP;
-          muln :
-            op:=A_FMULP;
-          subn :
-            op:=A_FSUBP;
-          slashn :
-            op:=A_FDIVP;
-          ltn,lten,gtn,gten,
-          equaln,unequaln :
-            begin
-              op:=A_FCOMPP;
-              cmpop:=true;
-            end;
-          else
-            internalerror(2003042214);
-        end;
-
-        if (right.location.loc<>LOC_FPUREGISTER) then
-         begin
-           cg.a_loadfpu_loc_reg(exprasmlist,right.location,NR_ST);
-           if (right.location.loc <> LOC_CFPUREGISTER) and
-              pushedfpu then
-             location_freetemp(exprasmlist,left.location);
-           if (left.location.loc<>LOC_FPUREGISTER) then
-            begin
-              cg.a_loadfpu_loc_reg(exprasmlist,left.location,NR_ST);
-              if (left.location.loc <> LOC_CFPUREGISTER) and
-                 pushedfpu then
-                location_freetemp(exprasmlist,left.location);
-            end
-           else
-            begin
-              { left was on the stack => swap }
-              toggleflag(nf_swaped);
-            end;
-
-           { releases the right reference }
-           location_release(exprasmlist,right.location);
-         end
-        { the nominator in st0 }
-        else if (left.location.loc<>LOC_FPUREGISTER) then
-         begin
-           cg.a_loadfpu_loc_reg(exprasmlist,left.location,NR_ST);
-           if (left.location.loc <> LOC_CFPUREGISTER) and
-              pushedfpu then
-             location_freetemp(exprasmlist,left.location);
-         end
-        else
-         begin
-           { fpu operands are always in the wrong order on the stack }
-           toggleflag(nf_swaped);
-         end;
-
-        { releases the left reference }
-        if (left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
-          location_release(exprasmlist,left.location);
-
-        { if we swaped the tree nodes, then use the reverse operator }
-        if nf_swaped in flags then
-          begin
-             if (nodetype=slashn) then
-               op:=A_FDIVRP
-             else if (nodetype=subn) then
-               op:=A_FSUBRP;
-          end;
-        { to avoid the pentium bug
-        if (op=FDIVP) and (opt_processors=pentium) then
-          cg.a_call_name(exprasmlist,'EMUL_FDIVP')
-        else
-        }
-        { the Intel assemblers want operands }
-        if op<>A_FCOMPP then
-          begin
-             emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
-             tcgx86(cg).dec_fpu_stack;
-          end
-        else
-          begin
-             emit_none(op,S_NO);
-             tcgx86(cg).dec_fpu_stack;
-             tcgx86(cg).dec_fpu_stack;
-          end;
-
-        { on comparison load flags }
-        if cmpop then
-         begin
-           cg.getexplicitregister(exprasmlist,NR_AX);
-           emit_reg(A_FNSTSW,S_NO,NR_AX);
-           emit_none(A_SAHF,S_NO);
-           cg.ungetregister(exprasmlist,NR_AX);
-           if nf_swaped in flags then
-            begin
-              case nodetype of
-                  equaln : resflags:=F_E;
-                unequaln : resflags:=F_NE;
-                     ltn : resflags:=F_A;
-                    lten : resflags:=F_AE;
-                     gtn : resflags:=F_B;
-                    gten : resflags:=F_BE;
-              end;
-            end
-           else
-            begin
-              case nodetype of
-                  equaln : resflags:=F_E;
-                unequaln : resflags:=F_NE;
-                     ltn : resflags:=F_B;
-                    lten : resflags:=F_BE;
-                     gtn : resflags:=F_A;
-                    gten : resflags:=F_AE;
-              end;
-            end;
-           location_reset(location,LOC_FLAGS,OS_NO);
-           location.resflags:=resflags;
-         end
-        else
-         begin
-           location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
-           location.register:=NR_ST;
-         end;
-      end;
-
-
 {*****************************************************************************
                                AddSmallSet
 *****************************************************************************}
@ -1588,7 +1428,12 @@ begin
 end.
 {
  $Log$
-  Revision 1.91  2003-12-24 00:10:02  florian
+  Revision 1.92  2003-12-25 01:07:09  florian
+    + $fputype directive support
+    + single data type operations with sse unit
+    * fixed more x86-64 stuff
+
+  Revision 1.91  2003/12/24 00:10:02  florian
    - delete parameter in cg64 methods removed

  Revision 1.90  2003/12/23 22:13:41  peter
--- a/compiler/i386/n386mat.pas
+++ b/compiler/i386/n386mat.pas
@ -470,6 +470,14 @@ implementation
               location.register:=NR_ST;
               emit_none(A_FCHS,S_NO);
            end;
+          {
+          LOC_MMREGISTER,
+          LOC_CMMREGISTER:
+            begin
+            end;
+          }
+          else
+            internalerror(200312241);
        end;
      end;

@ -579,7 +587,12 @@ begin
 end.
 {
  $Log$
-  Revision 1.66  2003-12-10 17:28:41  peter
+  Revision 1.67  2003-12-25 01:07:09  florian
+    + $fputype directive support
+    + single data type operations with sse unit
+    * fixed more x86-64 stuff
+
+  Revision 1.66  2003/12/10 17:28:41  peter
    * int64 shl/shr > 63 returns 0

  Revision 1.65  2003/10/10 17:48:14  peter
--- a/compiler/ncgld.pas
+++ b/compiler/ncgld.pas
@ -591,6 +591,21 @@ implementation
                    cg.a_loadmm_reg_ref(exprasmlist,right.location.register,left.location.reference);
                end;
 {$endif SUPPORT_MMX}
+              LOC_MMREGISTER,
+              LOC_CMMREGISTER:
+                begin
+                  if left.resulttype.def.deftype=arraydef then
+                    begin
+                    end
+                  else
+                    begin
+                      cgsize:=def_cgsize(left.resulttype.def);
+                      if left.location.loc=LOC_CMMREGISTER then
+                        cg.a_loadmm_reg_reg(exprasmlist,right.location.size,left.location.size,right.location.register,left.location.register,mms_movescalar)
+                      else
+                        cg.a_loadmm_reg_ref(exprasmlist,right.location.size,left.location.size,right.location.register,left.location.reference,mms_movescalar);
+                    end;
+                end;
              LOC_REGISTER,
              LOC_CREGISTER :
                begin
@ -657,7 +672,6 @@ implementation
                end;
 {$endif cpuflags}
            end;
-
         end;

        if releaseright then
@ -892,7 +906,12 @@ begin
 end.
 {
  $Log$
-  Revision 1.103  2003-12-24 00:10:02  florian
+  Revision 1.104  2003-12-25 01:07:09  florian
+    + $fputype directive support
+    + single data type operations with sse unit
+    * fixed more x86-64 stuff
+
+  Revision 1.103  2003/12/24 00:10:02  florian
    - delete parameter in cg64 methods removed

  Revision 1.102  2003/12/06 01:15:22  florian
--- a/compiler/scandir.pas
+++ b/compiler/scandir.pas
@ -291,6 +291,9 @@ implementation

    procedure dir_fputype;
      begin
+        current_scanner.skipspace;
+        if not(SetFPUType(upper(current_scanner.readcomment),false)) then
+          comment(V_Error,'Illegal FPU type');
      end;

    procedure dir_goto;
@ -922,6 +925,7 @@ implementation
        AddDirective('EXTENDEDSYNTAX',directive_all, {$ifdef FPCPROCVAR}@{$endif}dir_extendedsyntax);
        AddDirective('EXTERNALSYM',directive_all, {$ifdef FPCPROCVAR}@{$endif}dir_externalsym);
        AddDirective('FATAL',directive_all, {$ifdef FPCPROCVAR}@{$endif}dir_fatal);
+        AddDirective('FPUTYPE',directive_all, {$ifdef FPCPROCVAR}@{$endif}dir_fputype);
        AddDirective('GOTO',directive_all, {$ifdef FPCPROCVAR}@{$endif}dir_goto);
        AddDirective('HINT',directive_all, {$ifdef FPCPROCVAR}@{$endif}dir_hint);
        AddDirective('HINTS',directive_all, {$ifdef FPCPROCVAR}@{$endif}dir_hints);
@ -988,7 +992,12 @@ implementation
 end.
 {
  $Log$
-  Revision 1.28  2003-11-12 16:05:39  florian
+  Revision 1.29  2003-12-25 01:07:09  florian
+    + $fputype directive support
+    + single data type operations with sse unit
+    * fixed more x86-64 stuff
+
+  Revision 1.28  2003/11/12 16:05:39  florian
    * assembler readers OOPed
    + typed currency constants
    + typed 128 bit float constants if the CPU supports it
--- a/compiler/x86/aasmcpu.pas
+++ b/compiler/x86/aasmcpu.pas
@ -1531,7 +1531,9 @@ implementation
            202,
            209,
            210,
-            217,218,219 : ;
+            217,218: ;
+            219 :
+              inc(len);
            216 :
              begin
                inc(codes);
@ -1825,11 +1827,16 @@ implementation
            202,
            209,
            210,
-            217,218,219 :
+            217,218 :
              begin
                { these are dissambler hints or 32 bit prefixes which
                  are not needed }
              end;
+            219 :
+              begin
+                bytes[0]:=$f3;
+                sec.writebytes(bytes,1);
+              end;
            31,
            48,49,50,
            224,225,226 :
@ -2344,7 +2351,12 @@ implementation
 end.
 {
  $Log$
-  Revision 1.40  2003-12-15 21:25:49  peter
+  Revision 1.41  2003-12-25 01:07:09  florian
+    + $fputype directive support
+    + single data type operations with sse unit
+    * fixed more x86-64 stuff
+
+  Revision 1.40  2003/12/15 21:25:49  peter
    * reg allocations for imaginary register are now inserted just
      before reg allocation
    * tregister changed to enum to allow compile time check
--- a/compiler/x86/cgx86.pas
+++ b/compiler/x86/cgx86.pas
@ -755,25 +755,36 @@ unit cgx86;
             else
               internalerror(200312202);
           end
+         else if shufflescalar(shuffle) then
+           list.concat(taicpu.op_reg_reg(get_scalar_mm_op(fromsize,tosize),S_NO,reg1,reg2))
         else
-           begin
-             if shufflescalar(shuffle) then
-               list.concat(taicpu.op_reg_reg(get_scalar_mm_op(fromsize,tosize),S_NO,reg1,reg2))
-             else
-               internalerror(200312201);
-           end;
+           internalerror(200312201);
       end;


    procedure tcgx86.a_loadmm_ref_reg(list: taasmoutput; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle);
       begin
-         list.concat(taicpu.op_ref_reg(A_MOVQ,S_NO,ref,reg));
+         if shuffle=nil then
+           begin
+             list.concat(taicpu.op_ref_reg(A_MOVQ,S_NO,ref,reg));
+           end
+         else if shufflescalar(shuffle) then
+           list.concat(taicpu.op_ref_reg(get_scalar_mm_op(fromsize,tosize),S_NO,ref,reg))
+         else
+           internalerror(200312252);
       end;


    procedure tcgx86.a_loadmm_reg_ref(list: taasmoutput; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle);
       begin
-         list.concat(taicpu.op_reg_ref(A_MOVQ,S_NO,reg,ref));
+         if shuffle=nil then
+           begin
+             list.concat(taicpu.op_ref_reg(A_MOVQ,S_NO,ref,reg));
+           end
+         else if shufflescalar(shuffle) then
+           list.concat(taicpu.op_reg_ref(get_scalar_mm_op(fromsize,tosize),S_NO,reg,ref))
+         else
+           internalerror(200312252);
       end;


@ -792,7 +803,7 @@ unit cgx86;
     var
       l : tlocation;
     begin
-       l.loc:=LOC_REGISTER;
+       l.loc:=LOC_MMREGISTER;
       l.register:=src;
       l.size:=size;
       opmm_loc_reg(list,op,size,l,dst,shuffle);
@ -804,7 +815,7 @@ unit cgx86;
        opmm2asmop : array[0..1,OS_F32..OS_F64,topcg] of tasmop = (
          ( { scalar }
            ( { OS_F32 }
-              A_NOP,A_ADDSS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP
+              A_NOP,A_ADDSS,A_NOP,A_DIVSS,A_NOP,A_NOP,A_MULSS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSS,A_NOP
            ),
          { Intel did again a "nice" job: they added packed double operations (*PD) to SSE2 but
            no scalar ones (*SD)
@ -1908,7 +1919,12 @@ unit cgx86;
 end.
 {
  $Log$
-  Revision 1.95  2003-12-24 01:47:23  florian
+  Revision 1.96  2003-12-25 01:07:09  florian
+    + $fputype directive support
+    + single data type operations with sse unit
+    * fixed more x86-64 stuff
+
+  Revision 1.95  2003/12/24 01:47:23  florian
    * first fixes to compile the x86-64 system unit

  Revision 1.94  2003/12/24 00:10:03  florian
--- a/compiler/x86/cpubase.pas
+++ b/compiler/x86/cpubase.pas
@ -313,8 +313,8 @@ uses

    const
      { declare aliases }
-      LOC_MMREGISTER = LOC_SSEREGISTER;
-      LOC_CMMREGISTER = LOC_CSSEREGISTER;
+      LOC_SSEREGISTER = LOC_MMREGISTER;
+      LOC_CSSEREGISTER = LOC_CMMREGISTER;

      max_operands = 3;

@ -421,6 +421,8 @@ implementation
            cgsize2subreg:=R_SUBQ;
          OS_M64:
            cgsize2subreg:=R_SUBNONE;
+          OS_F32,OS_F64:
+            cgsize2subreg:=R_SUBWHOLE;
          else
            internalerror(200301231);
        end;
@ -532,7 +534,12 @@ implementation
 end.
 {
  $Log$
-  Revision 1.32  2003-12-19 22:08:44  daniel
+  Revision 1.33  2003-12-25 01:07:09  florian
+    + $fputype directive support
+    + single data type operations with sse unit
+    * fixed more x86-64 stuff
+
+  Revision 1.32  2003/12/19 22:08:44  daniel
    * Some work to restore the MMX capabilities

  Revision 1.31  2003/12/15 21:25:49  peter
--- a/compiler/x86/nx86add.pas
+++ b/compiler/x86/nx86add.pas
@ -34,20 +34,188 @@ unit nx86add;

    type
       tx86addnode = class(tcgaddnode)
-         procedure second_floataddsse;
+         procedure second_addfloat;override;
+         procedure second_addfloatsse;
+         procedure pass_left_and_right(var pushedfpu:boolean);
       end;


  implementation

    uses
+      globals,
      verbose,
      aasmtai,
-      cgbase,cgobj,
-      ncgutil,
+      cpuinfo,
+      cgbase,cgobj,cgx86,cga,
+      pass_2,ncgutil,
      defutil;

-    procedure tx86addnode.second_floataddsse;
+{*****************************************************************************
+                                AddFloat
+*****************************************************************************}
+
+    procedure tx86addnode.pass_left_and_right(var pushedfpu:boolean);
+      begin
+        { calculate the operator which is more difficult }
+        firstcomplex(self);
+
+        { in case of constant put it to the left }
+        if (left.nodetype=ordconstn) then
+         swapleftright;
+        secondpass(left);
+
+        { are too few registers free? }
+        if location.loc=LOC_FPUREGISTER then
+          pushedfpu:=maybe_pushfpu(exprasmlist,right.registersfpu,left.location)
+        else
+          pushedfpu:=false;
+        secondpass(right);
+      end;
+
+
+    procedure tx86addnode.second_addfloat;
+      var
+        op         : TAsmOp;
+        resflags   : tresflags;
+        pushedfpu,
+        cmpop      : boolean;
+      begin
+        if (is_single(resulttype.def) and (aktfputype in sse_singlescalar)) or
+          (is_double(resulttype.def) and (aktfputype in sse_doublescalar)) then
+          begin
+            second_addfloatsse;
+            exit;
+          end;
+        pass_left_and_right(pushedfpu);
+
+        cmpop:=false;
+        case nodetype of
+          addn :
+            op:=A_FADDP;
+          muln :
+            op:=A_FMULP;
+          subn :
+            op:=A_FSUBP;
+          slashn :
+            op:=A_FDIVP;
+          ltn,lten,gtn,gten,
+          equaln,unequaln :
+            begin
+              op:=A_FCOMPP;
+              cmpop:=true;
+            end;
+          else
+            internalerror(2003042214);
+        end;
+
+        if (right.location.loc<>LOC_FPUREGISTER) then
+         begin
+           cg.a_loadfpu_loc_reg(exprasmlist,right.location,NR_ST);
+           if (right.location.loc <> LOC_CFPUREGISTER) and
+              pushedfpu then
+             location_freetemp(exprasmlist,left.location);
+           if (left.location.loc<>LOC_FPUREGISTER) then
+            begin
+              cg.a_loadfpu_loc_reg(exprasmlist,left.location,NR_ST);
+              if (left.location.loc <> LOC_CFPUREGISTER) and
+                 pushedfpu then
+                location_freetemp(exprasmlist,left.location);
+            end
+           else
+            begin
+              { left was on the stack => swap }
+              toggleflag(nf_swaped);
+            end;
+
+           { releases the right reference }
+           location_release(exprasmlist,right.location);
+         end
+        { the nominator in st0 }
+        else if (left.location.loc<>LOC_FPUREGISTER) then
+         begin
+           cg.a_loadfpu_loc_reg(exprasmlist,left.location,NR_ST);
+           if (left.location.loc <> LOC_CFPUREGISTER) and
+              pushedfpu then
+             location_freetemp(exprasmlist,left.location);
+         end
+        else
+         begin
+           { fpu operands are always in the wrong order on the stack }
+           toggleflag(nf_swaped);
+         end;
+
+        { releases the left reference }
+        if (left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
+          location_release(exprasmlist,left.location);
+
+        { if we swaped the tree nodes, then use the reverse operator }
+        if nf_swaped in flags then
+          begin
+             if (nodetype=slashn) then
+               op:=A_FDIVRP
+             else if (nodetype=subn) then
+               op:=A_FSUBRP;
+          end;
+        { to avoid the pentium bug
+        if (op=FDIVP) and (opt_processors=pentium) then
+          cg.a_call_name(exprasmlist,'EMUL_FDIVP')
+        else
+        }
+        { the Intel assemblers want operands }
+        if op<>A_FCOMPP then
+          begin
+             emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
+             tcgx86(cg).dec_fpu_stack;
+          end
+        else
+          begin
+             emit_none(op,S_NO);
+             tcgx86(cg).dec_fpu_stack;
+             tcgx86(cg).dec_fpu_stack;
+          end;
+
+        { on comparison load flags }
+        if cmpop then
+         begin
+           cg.getexplicitregister(exprasmlist,NR_AX);
+           emit_reg(A_FNSTSW,S_NO,NR_AX);
+           emit_none(A_SAHF,S_NO);
+           cg.ungetregister(exprasmlist,NR_AX);
+           if nf_swaped in flags then
+            begin
+              case nodetype of
+                  equaln : resflags:=F_E;
+                unequaln : resflags:=F_NE;
+                     ltn : resflags:=F_A;
+                    lten : resflags:=F_AE;
+                     gtn : resflags:=F_B;
+                    gten : resflags:=F_BE;
+              end;
+            end
+           else
+            begin
+              case nodetype of
+                  equaln : resflags:=F_E;
+                unequaln : resflags:=F_NE;
+                     ltn : resflags:=F_B;
+                    lten : resflags:=F_BE;
+                     gtn : resflags:=F_A;
+                    gten : resflags:=F_AE;
+              end;
+            end;
+           location_reset(location,LOC_FLAGS,OS_NO);
+           location.resflags:=resflags;
+         end
+        else
+         begin
+           location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
+           location.register:=NR_ST;
+         end;
+      end;
+
+
+    procedure tx86addnode.second_addfloatsse;
      var
        op : topcg;
      begin
@ -88,7 +256,12 @@ unit nx86add;
 end.
 {
  $Log$
-  Revision 1.2  2003-12-23 14:38:07  florian
+  Revision 1.3  2003-12-25 01:07:09  florian
+    + $fputype directive support
+    + single data type operations with sse unit
+    * fixed more x86-64 stuff
+
+  Revision 1.2  2003/12/23 14:38:07  florian
    + second_floataddsse implemented

  Revision 1.1  2003/10/13 01:58:04  florian
--- a/compiler/x86_64/cpuinfo.pas
+++ b/compiler/x86_64/cpuinfo.pas
@ -61,7 +61,7 @@ Type

   tfputype =
     (no_fpuprocessor,
-      fpu_sse2
+      fpu_sse64
     );

 Const
@ -98,16 +98,23 @@ Const
   );

   fputypestr : array[tfputype] of string[6] = ('',
-     'SSE2'
+     'SSE64'
   );

+   sse_singlescalar : set of tfputype = [fpu_sse64];
+   sse_doublescalar : set of tfputype = [fpu_sse64];

 Implementation

 end.
 {
  $Log$
-  Revision 1.9  2003-12-22 19:00:17  florian
+  Revision 1.10  2003-12-25 01:07:09  florian
+    + $fputype directive support
+    + single data type operations with sse unit
+    * fixed more x86-64 stuff
+
+  Revision 1.9  2003/12/22 19:00:17  florian
    * fixed some x86-64 issues

  Revision 1.8  2003/12/20 12:38:51  florian